o
    i                  
   @   s  d dl Z d dlZz`d dlZd dlmZmZmZm	Z
mZmZmZ dZe jjdu r.ejd neeeee jjdZedk rKejd ed ejd e  D ]\ZZed	e d
e  qWW n: ey Z  z.e!de  d dZG dd dZG dd dZG dd dZ
dd Zdd ZW Y dZ [ ndZ [ ww dZ"erz
d dlm#Z$ dZ"W n ey   ed Y nw e"sG dd dZ$d dl%Z&G dd deZ'G dd de$Z#G d d! d!e
Z	G d"d# d#e'Z(G d$d% d%e'Z)e(Zed&e ed#e( ed%e) ed!e	 e"rede# e j*d'd(hd#d)e j+d'd(hd%d)e j,h d*d!d+d,d-Z-e"r>e j*d'd(hdd.d,e-d/< g d0Z.dS )1    N)QuantizedTensorQuantizedLayoutTensorCoreFP8LayoutTensorCoreNVFP4Layoutregister_layout_opregister_layout_classget_layout_classTcuda.)   zPWARNING: You need pytorch with cu130 or higher to use optimized CUDA operations.tritonzFound comfy_kitchen backend z: z'Failed to import comfy_kitchen, Error: z,, fp8 and fp4 support will not be available.Fc                   @      e Zd ZdS )r   N__name__
__module____qualname__ r   r   -/mnt/c/Users/fbmor/ComfyUI/comfy/quant_ops.pyr          r   c                   @   r   )_CKFp8LayoutNr   r   r   r   r   r   "   r   r   c                   @   r   )_CKNvfp4LayoutNr   r   r   r   r   r   %   r   r   c                 C      d S Nr   )nameclsr   r   r   r   (      r   c                 C   r   r   r   )r   r   r   r   r   +   r   r   )TensorCoreMXFP8LayoutzBcomfy_kitchen does not support MXFP8, please update comfy_kitchen.c                   @   r   )_CKMxfp8LayoutNr   r   r   r   r   r   7   r   r   c                   @   s   e Zd ZdZedddZdS )_TensorCoreFP8LayoutBaseNr   Fc           
      C   sP  | j d u rt| j d|j}t|j}t|trN|dkrNt	|
 jtjdt| j j }|jtjtjfvrNt|j}dtjd| |j|jd }|d u r\tjd|jtjd}t|tjsltj||jtjd}|dkr|r}|d| |j9 }n
|d| |j }tjj|| j |d	}nt||| j }| j| ||d
}	||	fS )Nz must define FP8_DTYPErecalculate)dtypeg      ?)minmaxr   devicer    r   )r    seedscale
orig_dtype
orig_shape)	FP8_DTYPENotImplementedErrorr   r    tupleshape
isinstancestrtorchamaxabstofloat32finfor"   bfloat16clampr!   onesr$   Tensortensorcomfyfloatstochastic_roundingckquantize_per_tensor_fp8Params)
r   r:   r'   r=   inplace_opsr(   r)   tensor_infoqdataparamsr   r   r   quantizeC   s*   

&z!_TensorCoreFP8LayoutBase.quantizeNr   F)r   r   r   r*   classmethodrE   r   r   r   r   r   @   s    r   c                   @      e Zd ZedddZdS )r   Nr   Fc                 C   s   |  dkrtd|   d|j}t|j}| |}||k}|dkr1tjj|||d\}	}
n	t	j
||d\}	}
| j|
||d}|	|fS )N   zMXFP8 requires 2D tensor, got Dr   )pad_32xr%   )rK   r&   )dim
ValueErrorr    r,   r-   get_padded_shaper;   r<   (stochastic_round_quantize_mxfp8_by_blockr>   quantize_mxfp8r@   r   r:   r'   r=   rA   r(   r)   padded_shapeneeds_paddingrC   block_scalerD   r   r   r   rE   d   s   

zTensorCoreMXFP8Layout.quantizerF   r   r   r   rG   rE   r   r   r   r   r   c       r   c                   @   rH   )r   Nr   Fc                 C   s   |  dkrtd|   d|j}t|j}|d u s%t|tr4|dkr4t|	 t
jjt
jj  }t|tjs?t|}|j|jtjd}| |}||k}|dkrbtjj||||d\}	}
n
t
j|||d\}	}
| j||||
d	}|	|fS )
NrI   zNVFP4 requires 2D tensor, got rJ   r   r#   r   )pad_16xr%   )rW   )r'   r(   r)   rT   )rL   rM   r    r,   r-   r.   r/   r0   r1   r2   r>   float_utilsF8_E4M3_MAXF4_E2M1_MAXr9   r:   r3   r$   r4   rN   r;   r<   (stochastic_round_quantize_nvfp4_by_blockquantize_nvfp4r@   rQ   r   r   r   rE   }   s*   


zTensorCoreNVFP4Layout.quantizerF   rU   r   r   r   r   r   |   rV   r   c                   @      e Zd ZejZdS )TensorCoreFP8E4M3LayoutN)r   r   r   r0   float8_e4m3fnr*   r   r   r   r   r^          
r^   c                   @   r]   )TensorCoreFP8E5M2LayoutN)r   r   r   r0   float8_e5m2r*   r   r   r   r   ra      r`   ra   r   weight_scaleinput_scale)	storage_t
parameterscomfy_tensor_layout>   rd   rc   weight_scale_2   )re   rf   rg   
group_size)r_   rb   nvfp4    mxfp8)r   r   r   r^   ra   r   QUANT_ALGOSr   )/r0   loggingcomfy_kitchenr>   comfy_kitchen.tensorr   r   r   r   r   r   r   r   r   _CK_AVAILABLEversionr	   registrydisabler,   mapintr/   splitcuda_versionwarninglist_backendsitemskvinfoImportErroreerror_CK_MXFP8_AVAILABLEr   r   comfy.floatr;   r   r^   ra   r_   rb   uint8rn   __all__r   r   r   r   <module>   s    $	
#!





