o
    i                     @   s  d Z ddlZddlZddlmZ ddlmZmZ ddlZddl	Z	ddl
Z
ddlZddlZddlZddlZddlmZ ddlZddlZddlZG dd deZG dd	 d	eZejaejZejadZd
Zd
Zdd Ze Z d
a!dZ"ze	j#j$Z"e"%dZ&e'e&d e'e&d fZ(W n   Y dZ)ej*re+d e	j,ddd d
a-ej.dure/d ddl0Z0da-ej.Z1e1dk re02 a3ne02e1a3e+d4e05e1 d
Z)zddl6Z7W n   Y ze	j89 Z:e	j8; a!W n   d
a!Y ze	j<j=; rej>addl?Z	W n   Y zddl@Z@e	jA9 Z:e	jA; aBW n   d
aBY zddlCZCe	jD9 Z:e	jD; aEW n   d
aEY zeFe	daGW n   d
aGY ejHrBejIadd ZJdd ZKdd ZLdd ZMdd ZNd d! ZOdd"d#ZPd$d% ZQePeO d& ZeR jSd& ZTe+d'4eeT ze+d(4e" eQ ZUeUdure+d)4eU W n   Y ze	jVjWZXW n   eYZXY ze	jZZ[W n e\y   e]Z[Y nw d*d+ Z^d,d- Z_dZ`dZaejbrd
ZcnBz;ddldZdddleZddZczedjfZcW n   Y z edj#j$Z`e+d.4e` e`gd/r e/d0 e/d1 d
ZaW n   Y W n   d
ZcY d2d3 Zhd4d5 Zidd6d7Zjd8Zkeh r#d9Zkd
alejmr-dald
Zcz>eh rLe(d d:krLtld
krLejnd
krLejod
krLdaleJ s\eK s\eL s\eM rjejnd
krjejod
krjdalW n   Y ejpZqg d;Zrd<Zszei re	jVteO ju%d=d Zvewd>d? erD sexesd@krd
e	j<jy_ze+dA ze{e|e'e}e	j#j~%ddd: ZW n   dBZY dCdD Ze+dE4ev e+dF4e ejnd
kr	ejod
kr	eevr	e(dGkrewdHd? dID rdaledJkr	ewdKd? dLD r	dale(dGkredMkrewdNd? dOD rdZqW n   Y tlr=e	j<jVd e	j<jVd e	j<jVd d
Zzeh sHei r\ejejv r\de	j<jVj_dZe+dP W n   Y e	jV; r{e	j<jy; r{ejejv r{de	j<jy_ze(dQkre	j<jVd W n	   e/dR Y ejrejZdZ)nejrejZnejsejrejad
Zejre+dS dZe)reejejfv reatejkrejatej>krejae+dTtj  ejZere+dU dVdW Zze+dX4eeO  W n	   e/dY Y g ZdZd[ Zdd\d]ZG d^d_ d_Zd`da Zdbdc Zewe ZddZerFddlZdeZedfkrAedg7 Zdhdi Zndjdi Zejdurcejdk dk dk Zedl4ed&  dmdn Zdodp Zg d
ddfdqdrZddsdtZdudv ZddwdxZdydz Zd{d| Zd}d~ Zdd Zdd Zdd Zd	ddZdde	je	je	jgdfddZe	je	je	jgfddZdd Zdd Zd
ddZd	ddZdd Zdd Zdd Zdd Zdg fddZdd Zdd Zdd ZŐd	ddZdd Zdd Zi ZdZejdurejZn
eh sei rd:Zej̐r dZedkr-e+d4eʡ dd Zi Zi Zdadd Zdd Zdd Zdd ZԐdddZՐdddZ֐dddZi ZdadZejېseh skei rerxePe	2dd8 Zn	ePe	2dd Ze+d4ed&  eg dZdd ZddÄ Zddń ZddǄ ZddɄ Zdd˄ Zdd̈́ Zddτ Zddф Zddӄ ZddՄ ZdddׄZddل Zddۄ Zdd݄ Zdd߄ Zdd Zdd Zdd Zdd ZdddZdddZd	ddZpd	ddZd	ddZdd Zi Zdd Zdd ZdddZdd Zdd ZG dd deYZe ad
a dd dZdd Zdd ZdS (  a  
    This file is part of ComfyUI.
    Copyright (C) 2024 Comfy

    This program is free software: you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation, either version 3 of the License, or
    (at your option) any later version.

    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.

    You should have received a copy of the GNU General Public License
    along with this program.  If not, see <https://www.gnu.org/licenses/>.
    N)Enum)argsPerformanceFeature)nullcontextc                   @   s$   e Zd ZdZdZdZdZdZdZdS )	VRAMStater                  N)	__name__
__module____qualname__DISABLEDNO_VRAMLOW_VRAMNORMAL_VRAM	HIGH_VRAMSHARED r   r   4/mnt/c/Users/fbmor/ComfyUI/comfy/model_management.pyr   #   s    r   c                   @   s   e Zd ZdZdZdZdS )CPUStater   r   r   N)r   r   r   GPUCPUMPSr   r   r   r   r   +   s    r   Fc                  C   s   g } z|  tj W n   Y z|  tj W n   Y z|  tj W n   Y z|  tj W n   Y z	|  tj W | S    Y | S N)appendtorchfloat8_e4m3fnfloat8_e4m3fnuzfloat8_e5m2float8_e5m2fnuzfloat8_e8m0fnu)float8_typesr   r   r   get_supported_float8_types=   s.   r$    .r   Tz*Using deterministic algorithms for pytorch)	warn_onlyzWARNING: torch-directml barely works, is very slow, has not been updated in over 1 year and might be removed soon, please don't use it, there are better options.zUsing directml with device: {}corexc                   C   s   t tjkr	tr	dS dS NTF)	cpu_stater   r   xpu_availabler   r   r   r   is_intel_xpu   s   
r,   c                   C      t rdS dS r)   )npu_availabler   r   r   r   is_ascend_npu      r/   c                   C   r-   r)   )mlu_availabler   r   r   r   is_mlu   r0   r2   c                   C   r-   r)   )ixuca_availabler   r   r   r   is_ixuca   r0   r4   c                  C   s*   t  j} | drdS | drdS dS )Nz
-MicrosoftTzmicrosoft-standard-WSL2F)platformunamereleaseendswith)versionr   r   r   is_wsl   s   


r:   c                   C   s   t rtS ttjkrtdS ttjkrtdS t r$tdtj	
 S t r0tdtj
 S t r<tdtj
 S ttj
 S )Nmpscpuxpunpumlu)directml_enableddirectml_devicer*   r   r   r   devicer   r,   r=   current_devicer/   r>   r2   r?   cudar   r   r   r   get_torch_device   s   



rE   c                 C   s   | d u rt  } t| dr| jdks| jdkrt j}|}njtr%d}|}nct r>tj	
| }|d }tj	| j}|}|}nJt rXtj
| }|d }tj| \}}|}|}n0t rrtj
| }|d }tj| \}}	|}|	}ntj
| }|d }tj| \}}
|}|
}|r||fS |S )Ntyper<   r;      @reserved_bytes.all.current)rE   hasattrrF   psutilvirtual_memorytotalr@   r,   r   r=   memory_statsget_device_propertiestotal_memoryr/   r>   mem_get_infor2   r?   rD   )devtorch_total_too	mem_totalmem_total_torchstatsmem_reservedmem_total_xpu_mem_total_npumem_total_mlumem_total_cudar   r   r   get_total_memory   sD   
r\   c                   C   s0   zt dd t d dD W S    Y d S )Nc                 s   s    | ]}t |V  qd S r   )int).0nr   r   r   	<genexpr>       zmac_version.<locals>.<genexpr>r   r&   )tupler5   mac_versplitr   r   r   r   mac_version   s   "re   i   z+Total VRAM {:0.0f} MB, total RAM {:0.0f} MBzpytorch version: {}zMac Version {}c                 C   sF   t | trdS t | tr!t| dd dksdt|  v r!t  dS dS )NT
error_coder   zout of memoryF)
isinstanceOOM_EXCEPTIONACCELERATOR_ERRORgetattrstrlowerdiscard_cuda_async_errorer   r   r   is_oom  s   
*rp   c                 C   s   t | s| d S r   )rp   rn   r   r   r   raise_non_oom  s   rq   zxformers version: {}z0.0.18zz
WARNING: This version of xformers has a major bug where you will get black images when generating high resolution images.z=Please downgrade or upgrade xformers to a different version.
c                   C      t tjkrtjjrdS dS r)   )r*   r   r   r   r9   rD   r   r   r   r   	is_nvidia<     
rs   c                   C   rr   r)   )r*   r   r   r   r9   hipr   r   r   r   is_amdC  rt   rv   c                 C   sl   t  sdS t| rdS tj| j}|dr4t|dkr4z
t|d d }W n   d}Y ||kr4dS dS )NFgfx   r
   r   r   T)	rv   is_device_cpur   rD   rN   gcnArchName
startswithlenr]   )rB   min_rdna_versionarchcmp_rdna_versionr   r   r   amd_min_versionJ  s   r   g?g        r   )gfx1030gfx1031gfx1010gfx1011gfx1012gfx906gfx900gfx803COMFYUI_ENABLE_MIOPEN:c                 c       | ]}|t v V  qd S r   r~   r^   ar   r   r   r`   y  ra   r`   1zESet: torch.backends.cudnn.enabled = False for better AMD performance.)   c                 C   s   t jd }tjtj|dd}ttdd tdd t|}| |v r(dS d	| d d	 |v r5dS d
	| d d |v rBdS dS )Nr   libzaotriton.imagesc                 S   s   | dd  S )Nr
   r   r   r   r   r   <lambda>  s    z$aotriton_supported.<locals>.<lambda>c                 S   s
   |  dS )Nzamd-gfx)r{   r   r   r   r   r     s   
 Tz{}xr   z{}xxF)
r   __path__ospathjoinsetmapfilterlistdirformat)gpu_archr   rw   r   r   r   aotriton_supported  s   
"r   zAMD arch: {}zROCm version: {}r   rx   c                 c   r   r   r   r   r   r   r   r`     ra   )gfx90agfx942gfx950gfx1100gfx1101gfx1150gfx1151)rx   r   c                 c   r   r   r   r   r   r   r   r`     ra   )gfx1200gfx1201)r   r
   c                 c   r   r   r   r   r   r   r   r`     ra   )r   r   r   zEnabled fp16 accumulation.)r   r   z9Warning, could not set allow_fp16_bf16_reduction_math_sdpz7Forcing FP32, if this improves things please report it.zSet vram state to: z!Disabling smart memory managementc                 C   s   t | dr8| jdkr#ztj }W n   d}Y d| tj| |S | jdkr2d| tj| S d| jS t rEd| tj| S t	 rRd| tj
| S t r_d| tj| S d| tj| S )	NrF   rD   r%   z
{} {} : {}r=   z{} {}z{}zCUDA {}: {})rI   rF   r   rD   get_allocator_backendr   get_device_namer=   r,   r/   r>   r2   r?   )rB   allocator_backendr   r   r   get_torch_device_name  s"   


r   z
Device: {}zCould not pick default device.c                 C   s,   d}|   }|D ]}|| }||j7 }q|S Nr   )
state_dictnbytes)module
module_memsdktr   r   r   module_size  s   r   c           
      C   s   d}d}t  }|  }|D ]@}|| }||j7 }t|tjjr$|j n| }t	|dds/q||j7 }|s7qd|_
|jd }	|	|v rDq|	  ||	 q||fS )Nr   _comfy_tensor_mmap_touchedF)r   r   r   rg   comfy	quant_opsQuantizedTensor_qdatauntyped_storagerj   r   _comfy_tensor_mmap_refsbounceadd)
r   freemmap_touched_memr   bounced_mmapsr   r   r   storagemmap_objr   r   r   module_mmap_residency  s(   
 

r   c                   @   s   e Zd Zdd Zdd Zdd Zedd Zd	d
 Zd%ddZ	dd Z
dd Zdd Zd&ddZd%ddZd'ddZd%ddZdd  Zd!d" Zd#d$ ZdS )(LoadedModelc                 C   s.   |  | |j| _d | _d| _d | _d | _d S NT)
_set_modelload_devicerB   
real_modelcurrently_usedmodel_finalizer_patcher_finalizerselfmodelr   r   r   __init__  s   

zLoadedModel.__init__c                 C   sD   t || _|jd ur t |j| _t || j| _d| j_d S d S NF)	weakrefref_modelparent_parent_modelfinalize_switch_parentr   atexitr   r   r   r   r     s   
zLoadedModel._set_modelc                 C   s"   |   }|d ur| | d S d S r   )r   r   r   r   r   r   r   #  s   zLoadedModel._switch_parentc                 C   s   |   S r   )r   r   r   r   r   r   (  s   zLoadedModel.modelc                 C   
   | j  S r   )r   
model_sizer   r   r   r   model_memory,     
zLoadedModel.model_memoryFc                 C   s   | j j|dS )Nr   )r   model_mmap_residency)r   r   r   r   r   r   /  s   z LoadedModel.model_mmap_residencyc                 C   r   r   )r   loaded_sizer   r   r   r   model_loaded_memory2  r   zLoadedModel.model_loaded_memoryc                 C   s   | j  | j   S r   )r   r   r   r   r   r   r   model_offloaded_memory5     z"LoadedModel.model_offloaded_memoryc                 C   s   || j  kr|  S |  S r   )r   current_loaded_devicer   r   )r   rB   r   r   r   model_memory_required8  s   z!LoadedModel.model_memory_requiredr   c                 C   s   | j | j | j | j   |}|dkrd}| j||d | j j }t rQtjsQdt v rQ|d urQt	
  tj| dddd}W d    n1 sLw   Y  t|| _t|t| _d| j_|S )Nr   nFforce_patch_weightsipexT)inplace
graph_modeconcat_linearF)r   model_patches_torB   model_dtypemodel_use_more_vramr,   r   disable_ipex_optimizeglobalsr   no_gradr   optimizeevalr   r   r   r   cleanup_modelsr   r   )r   lowvram_model_memoryr   use_more_vramr   r   r   r   
model_load>  s   
zLoadedModel.model_loadc                 C   s   |r| j  dkrdS dS Nr   TF)r   lowvram_patch_counter)r   r   r   r   r   should_reload_modelS  s   zLoadedModel.should_reload_modelNTc                 C   sZ   |d ur|| j  k r| j | j j|}||krdS | j | | j  d | _d | _dS )NFT)r   r   partially_unloadoffload_devicedetachr   r   )r   memory_to_freeunpatch_weightsfreedr   r   r   model_unloadX  s   
zLoadedModel.model_unloadc                 C   s   | j j| j||dS )Nr   )r   partially_loadrB   )r   extra_memoryr   r   r   r   r   d  r   zLoadedModel.model_use_more_vramc                 C   s   | j |j u S r   r   )r   otherr   r   r   __eq__g  s   zLoadedModel.__eq__c                 C   s   | j d ur| j   d S d S r   )r   r  r   r   r   r   __del__j  s   
zLoadedModel.__del__c                 C   s   |   d uo
| jd u S r   )r   r   r   r   r   r   is_deadn  s   zLoadedModel.is_deadF)r   Fr   )r   r   r   r   r   r   propertyr   r   r   r   r   r   r   r   r  r   r
  r  r  r   r   r   r   r     s$    





r   c                 C   s4   |D ]}|j |kr| || 8 } | dkr d S qd S r   )rB   r   )r  loaded_modelsrB   mr   r   r   use_more_memoryr  s   
r  c                 C   s(   d}| D ]}|j |kr|| 7 }q|S r   )rB   r   )r  rB   offloaded_memr  r   r   r   offloaded_memoryy  s   
r  i   i  %i <  i  @c                   C   s
   t j S r   )r   windowsget_free_ramr   r   r   r   r    r   r  c                   C   s
   t  jS r   )rJ   rK   	availabler   r   r   r   r    r   i   z+Reserving {}MB vram for other applications.c                   C      t S r   )EXTRA_RESERVED_VRAMr   r   r   r   extra_reserved_memory  s   r  c                   C   s
   dt   S )NgA)r  r   r   r   r   minimum_inference_memory  r   r  c                 C   sJ  t   g }g }g }tttd ddD ],}	t|	 }
|d u s"|
j|kr?|
|vr?|
 s?||
  t	|
j
|
 |	f d|
_qt|}|D ]m}|d }	d}d}trV|d u r{|d u r\dn| t| }|t  }t|	 j
 r{|r{| t|	 j
 8 } d}|dkrt|	 |rtdt|	 j
j
jj  ||	 |dkrtdt|	 j
j
jj  t|	 j
| qF|D ]2}|d }	|t j }|dkr|	|vrqt|	 jdd	\}}|dkrtd
t|	 j
j
jj  qt|ddD ]
}	|t|	 qt|dkrt  |S |d ur#tt j!kr#t|dd\}}||d kr#t  |S )Nr   r   Fr   r   z
Unloading zPIN Unloading Tr   zRAM Unloading )reverse)torch_free_toog      ?)"cleanup_models_gcranger|   current_loaded_modelsrB   r  r   r   sysgetrefcountr   r   r   sortedDISABLE_SMART_MEMORYget_free_memoryr  
is_dynamicr   r  loggingdebug	__class__r   partially_unload_ramrJ   rK   r  r   popsoft_empty_cache
vram_stater   r   )memory_requiredrB   keep_loadedfor_dynamicpins_requiredram_requiredunloaded_model
can_unloadunloaded_modelsishift_modelcan_unload_sortedxr  pins_to_freeram_to_freeresident_memoryrX   mem_free_totalmem_free_torchr   r   r   free_memory  sb   $


r>  c           $      C   sH  t   t }t||t  }|d u r|}nt||t  }t }| D ]}|| | D ]}	||	 q-q"|} g }
d}| D ]@}| sFd}t|}zt	
|}W n   d }Y |d urit	| }d|_|
| q>t|drytd|jjj  |
| q>|
D ]1}g }ttt	D ]}|jt	| jr|g| }q|D ]}t	|}|jjdd |j  qqi }i }i }|
D ]9}|j}||d|| ||< | \}}|j }|| }|| }||d| ||< ||d| ||< q|D ]}|tdkrt|| d | |||| || d	 q|D ]%}|tdkr;t |}||k r;t|||d
}td!t| q|
D ]b}|j}|j"}t#|rPt$j%} nt&} d}!t'r| t$j(ksc| t$j)kr|s|* }"t ||" }#td|#| t+|#t, |#t  }!|!|" }!|!dkrd}!| t$j-krd}!|j.|!|d t	/d| q?d S )NTFr   zRequested to load )unpatch_allr   r<   g?)r/  r0  r1  )r/  z{} models unloaded.g?r   )0r  r  maxr  r   r   model_patches_modelsr%  r   r  indexr   r   rI   r&  infor   r(  r   r  r|   is_cloner*  r  r   rB   getr   r   pinned_memory_sizer   r>  r$  r   r   ry   r   r   r,  lowvram_availabler   r   r   minMIN_WEIGHT_MEMORY_RATIOr   r   insert)$modelsr-  r   minimum_memory_requiredforce_full_loadinference_memory	extra_memmodels_tempr  mmmodels_to_loadfree_for_dynamicr8  loaded_modelloaded_model_indexloaded	to_unloadr5  model_to_unloadtotal_memory_requiredtotal_pins_requiredtotal_ram_requiredrB   r;  r   pinned_memoryr0  r1  free_memmodels_lr   	torch_devvram_set_stater   loaded_memorycurrent_free_memr   r   r   load_models_gpu  s   






$ 
rc  c                 C   s
   t | gS r   )rc  r  r   r   r   load_model_gpu8  r   rd  c                 C   s*   g }t D ]}| r|jsq||j q|S r   )r  r   r   r   )only_currently_usedoutputr  r   r   r   r  ;  s   r  c                  C   s   d} t   tttD ]}t| }| r%td| j	j
 d}  nq| rLt  t  tttD ]}t| }| rKtd| j	j
 q5d S d S )NFzPotential memory leak detected with model {}, doing a full garbage collect, for maximum performance avoid circular references in the model code.Tz_WARNING, memory leak with model {}. Please make sure it is not being referenced from somewhere.)reset_cast_buffersr  r|   r  r  r&  rC  r   r   r(  r   gccollectr+  warning)do_gcr5  curr   r   r   r  F  s&   r  c                 C   sj   |   D ].\}}|jddD ]\}}t|| d|j q|jddD ]\}}t|| d|j q#qd S )NF)recurse_comfy_model_dtype)named_modulesnamed_parameterssetattrdtypenamed_buffers)r   namer   
param_nameparambuf_namebufr   r   r   archive_model_dtypes\  s   ry  c                  C   sJ   g } t ttD ]}t|  d u r|g|  } q| D ]}t|}~qd S r   )r  r|   r  r   r*  )	to_deleter5  r8  r   r   r   r   d  s   

r   c                 C   sL   d}| t jks| t jkrd}|S | t jkrd}|S z| j}W |S    Y |S )Nr
   r   )r   float16bfloat16float32itemsize)rr  
dtype_sizer   r   r   r  n  s   
r  c                   C   s   t tjkrt S tdS Nr<   )r,  r   r   rE   r   rB   r   r   r   r   unet_offload_device{  s   

r  c                 C   sz   t d}tjjr|S t }ttjksttj	kr|S t
s!ttjkr#|S t||  }t|}t|}||kr;||k r;|S |S r  )r   rB   r   memory_managementaimdo_enabledrE   r,  r   r   r   r#  r   r  r$  )
parametersrr  cpu_devr_  r   mem_devmem_cpur   r   r   unet_inital_load_device  s   
r  c                 C   s   t | d t  S )Ng)\(?)r\   r  rB   r   r   r   maximum_vram_for_weights  s   r  c                 C   s  |dk rd}t jrtjS t jrtjS t jrtjS t jrtj	S t j
r$tjS t jr*tjS t jr0tjS d }|tv r8|}|d urNt| rB|S t| }|d |krN|S tsU|tj	krctj	|v rct| |drctj	S |D ],}|tj	kr|t| |dr|tj	|v r|tj	  S |tjkrt| |drtj|v rtj  S qe|D ].}|tj	krt| |ddrtj	|v rtj	  S |tjkrt| |ddrtj|v rtj  S qtjS )	Nr   l     @=7M.cr   )rB   model_params)r  T)rB   r  manual_cast)r  r  )r   	fp32_unetr   r}  	fp64_unetfloat64	bf16_unetr|  	fp16_unetr{  fp8_e4m3fn_unetr   fp8_e5m2_unetr    fp8_e8m0fnu_unetr"   FLOAT8_TYPESsupports_fp8_computer  PRIORITIZE_FP16should_use_fp16should_use_bf16)rB   r  supported_dtypesweight_dtype	fp8_dtypefree_model_memorydtr   r   r   
unet_dtype  sZ   







r  c                 C   s   | t jks
| t jkrd S t|dd}|r| t jkrd S t|}|r(| t jkr(d S t|dd}tr:|r:t j|v r:t jS |D ]}|t jkrJ|rJt j  S |t jkrV|rVt j  S q<t jS )NFprioritize_performanceT)r   r}  r  r  r{  r  r|  r  )r  inference_devicer  fp16_supportedbf16_supportedr  r   r   r   unet_manual_cast  s$   

r  c                   C      t jrt S tdS r  r   gpu_onlyrE   r   rB   r   r   r   r   text_encoder_offload_device     
r  c                   C   sH   t jrt S ttjtjfv stjj	rt
ddrt S tdS tdS )NFr  r<   )r   r  rE   r,  r   r   r   r   r  r  r  r   rB   r   r   r   r   text_encoder_device  s   


r  c                 C   s\   t jjr|S | |ks|dkr|S t| r| S t| }t|}||d kr,|d |k r,| S |S )NrG   g      ?g333333?)r   r  r  is_device_mpsr$  )r   r   r   mem_lmem_or   r   r   text_encoder_initial_device  s   r  c                 C   sP   t jrtjS t jrtjS t jrtjS t jrtj	S t j
rtjS t| r%tjS tjS r   )r   fp8_e4m3fn_text_encr   r   fp8_e5m2_text_encr    fp16_text_encr{  bf16_text_encr|  fp32_text_encr}  ry   r  r   r   r   text_encoder_dtype  s   r  c                   C   r  r  r  r   r   r   r   intermediate_device  r  r  c                   C   s   t jrtjS tjS r   )r   fp16_intermediatesr   r{  r}  r   r   r   r   intermediate_dtype   s   r  c                   C   s   t jrtdS t S r  )r   cpu_vaer   rB   rE   r   r   r   r   
vae_device&  s   
r  c                   C   r  r  r  r   r   r   r   vae_offload_device+  r  r  c                 C   sh   t jrtjS t jrtjS t jrtjS |D ]}|tjkr#t| r#|  S |tjkr0t	| r0|  S qtjS r   )
r   fp16_vaer   r{  bf16_vaer|  fp32_vaer}  r  r  )rB   allowed_dtypesdr   r   r   	vae_dtype1  s   r  c                 C   s   t | dr| jS dS )NrF   rD   rI   rF   )rQ   r   r   r   get_autocast_deviceB  s   
r  c                 C   s:   |t jkrdS t| rdS |t jkrdS |t jkrdS dS r)   )r   r}  ry   r{  r|  rB   rr  r   r   r   supports_dtypeG  s   


r  c                 C   s^   |t jkrdS |t jkrdS trdS |t jkrdS t| rdS |t jkr&dS |t jkr-dS dS r)   )r   r}  r{  r@   r|  r  r   r    r  r   r   r   supports_castR  s   




r  c                 C   s4   | d u r|} n
t | t |kr|} t|| s|} | S r   )r  r  )rr  fallback_dtyperB   r   r   r   pick_weight_dtypec  s   
r  c                 C   s6   t jrdS t| rdS t rdS t jrdS trdS dS r)   )r   force_non_blockingr  r,   deterministicr@   r  r   r   r   device_supports_non_blockingn  s   r  c                   C   s   t jrdS dS r)   )r   force_channels_lastr   r   r   r   r  {  s   r  z-Using async weight offloading with {} streamsc                 C   s4   | d u rd S t | rtj S t| rtj S d S r   )is_device_cudar   rD   current_streamis_device_xpur=   r  r   r   r   r    s   

r  r   c                 C   s   | d ur| }t |dr|| }nt }t| d }|d u s$| |k ri|td u r,d S |d ur@| dkr@t  t| = ~t  | t	j
|t	j|d}|t| < W d    n1 sZw   Y  |td kri||fa|S )N
as_contextr   i   rr  rB   r   )rI   r  r   STREAM_CAST_BUFFERSrE  numelLARGEST_CASTED_WEIGHTsynchronizer+  r   emptyint8)offload_streamrB   sizer   
wf_contextcast_bufferr   r   r   get_cast_buffer  s,   


r  c                  C   s.   da tD ]} |   qt  t  t  d S )Nr   )r  r  r  clearr+  )r  r   r   r   rg    s   

rg  c                 C   s   t | d}tdkrd S tj rd S | tv r4t|  }|| t|  |d t	| }|t | < || S t
| rag }ttD ]}tjj| dd}tjj|_|| q>|t| < || }|t | < |S t| rg }ttD ]}tjj| dd}tjj|_|| qk|t| < || }|t | < |S d S )Nr   r   )rB   priority)stream_countersrE  NUM_STREAMSr   compileris_compilingSTREAMSwait_streamr  r|   r  r  rD   Streamstreamr  r   r  r=   )rB   stream_counterssr   s1sr   r   r   get_offload_stream  s@   


r  c                 C   s*   |d u s
t | d u rd S t | | d S r   )r  r  )rB   r  r   r   r   sync_stream  s   r  c           	      C   s   t  }|d ur|}t|dr||}tj| |}|? | D ]3}|d}|d u r+qtj||r3qt|tj	j
r?|j n| }t|drKd|_|j||d qW d    d S 1 s^w   Y  d S )Nr  r   r   Tnon_blocking)r   rI   r  r   r  interpret_gathered_liker*  read_tensor_file_slice_intorg   r   r   r   r   r   copy_)	tensorsrr  r  r  
dest_viewstensor	dest_viewr   r   r   r   cast_to_gathered  s&   


 
"r  c                 C   s&  |d u s	| j |krF|s|d u s| j|kr| S |d ur?|}t|dr&||}| | j||dW  d    S 1 s:w   Y  | j||dS |d ur~|}t|drV||}| |d u retj| ||d}|j| |d W d    |S 1 sww   Y  |S |d u rtj| ||d}|j| |d |S )Nr  )rr  copyr  r  )rB   rr  rI   r  tor   
empty_liker  )weightrr  rB   r  r  r  r  r  r   r   r   cast_to
  s8   

 


r  c                 C   s   t |}t| ||||dS )N)rr  rB   r  r  )r  r  )r  rB   rr  r  r  r   r   r   cast_to_device&  s   r  r   r<   g?zEnabled pinned memory {})Tensor	Parameterr   c                  C   sV   z t jdgt jt d} t jdgt jt d}| | }t  W d S  ty*   Y d S w )Nr   r  )r   r  uint8rE   r  RuntimeError)r   brX   r   r   r   rm   8  s   rm   c                 C   s   t dkrdS t| jtvrdS t| jsdS |  rdS |  s"dS | j}t	| t kr-dS | 
 }|dkr7dS tj ||ddkrM|t|< t	|7 a	dS td t  dS )Nr   Fr   Tz
Pin error.)MAX_PINNED_MEMORYrF   r   PINNING_ALLOWED_TYPESry   rB   	is_pinnedis_contiguousr   TOTAL_PINNED_MEMORYdata_ptrr   rD   cudartcudaHostRegisterPINNED_MEMORYr&  rj  rm   )r  r  ptrr   r   r   
pin_memoryB  s.   

r  c                 C   s   t dkrdS t| jsdS |  }| j}t|d }|d u r%td dS ||kr0td dS t	j
 |dkrCtt|8 adS td t  dS )Nr   Fz+Tried to unpin tensor not pinned by ComfyUIzSize of pinned tensor changedTzUnpin error.)r  ry   rB   r  r   r  rE  r&  rj  r   rD   r	  cudaHostUnregisterr  r*  rm   )r  r  r  size_storedr   r   r   unpin_memoryh  s&   



r  c                   C      t jS r   )r   use_sage_attentionr   r   r   r   sage_attention_enabled     r  c                   C   r  r   )r   use_flash_attentionr   r   r   r   flash_attention_enabled  r  r  c                   C   sB   t tjkrdS t rdS t rdS t rdS t rdS trdS tS r   )	r*   r   r   r,   r/   r2   r4   r@   XFORMERS_IS_AVAILABLEr   r   r   r   xformers_enabled  s   
r  c                  C   s   t  } | sdS tS r   )r  XFORMERS_ENABLED_VAE)enabledr   r   r   xformers_enabled_vae  s   r  c                   C   r  r   )ENABLE_PYTORCH_ATTENTIONr   r   r   r   pytorch_attention_enabled  s   r  c                   C   s   t  rdS t S r   )rv   r  r   r   r   r   pytorch_attention_enabled_vae  s   r  c                   C   sD   t r t rdS t rdS t rdS t rdS t rdS t r dS dS r)   )r  rs   r,   r/   r2   rv   r4   r   r   r   r   !pytorch_attention_flash_attention  s   r  c                  C   s4   t j} t }|d urd|krd} | rtjtjiS d S )N)   r   T)r   force_upcast_attentionre   r   r{  r}  )upcastmacos_versionr   r   r   force_upcast_attention_dtype  s   r$  c                 C   sd  | d u rt  } t| dr| jdks| jdkrt j}|}ntr%d}|}nt rHtj	
| }|d }|d }tj	| j| }|| }|| }nbt rjtj
| }|d }|d }tj| \}}	|| }|| }n@t rtj
| }|d }|d }tj| \}
}	|| }|
| }ntj
| }|d }|d }tj| \}}	|| }|| }|r||fS |S )NrF   r<   r;   rG   zactive_bytes.all.currentrH   )rE   rI   rF   rJ   rK   r  r@   r,   r   r=   rM   rN   rO   r/   r>   rP   r2   r?   rD   )rQ   r  r<  r=  rU   
mem_activerV   mem_free_xpumem_free_npurX   mem_free_mlumem_free_cudar   r   r   r$    sL   



r$  c                   C   
   t tjkS r   )r*   r   r   r   r   r   r   cpu_mode     
r+  c                   C   r*  r   )r*   r   r   r   r   r   r   mps_mode  r,  r-  c                 C   s   t | dr| j|krdS dS )NrF   TFr  )rB   rF   r   r   r   is_device_type  s   

r.  c                 C   
   t | dS r  r.  r  r   r   r   ry     r   ry   c                 C   r/  )Nr;   r0  r  r   r   r   r  
  r   r  c                 C   r/  )Nr=   r0  r  r   r   r   r    r   r  c                 C   r/  )NrD   r0  r  r   r   r   r    r   r  c                   C   r-   r)   )r@   r   r   r   r   is_directml_enabled  s   r1  c           	      C   sN  | d ur
t | r
dS tjrdS trdS t rdS | d ur t| s#t r%dS t r*dS t r:t	dk r3dS t
j| jS t r?dS t rDdS t rIdS t
jjrOdS t
j| }|jdkr\dS |jdk rcdS g d}|D ]}||j v r|tsv|ry dS  dS qi|rt| }|r|d |krdS |jdk rdS g d	}|D ]
}||jv r dS qdS )
NFTr   r	      r   )10801070ztitan xp3000p3200p4000p4200p5000p5200p600010601050p40p100p6p4r
   rx   )166016501630T500T550T600MX550MX450zCMP 30HXT2000T1000T1200)ry   r   
force_fp16
FORCE_FP32r1  r  r-  r+  r,   torch_version_numericr   r=   rN   has_fp16r/   r2   r4   r9   ru   rD   majorrt  rl   WINDOWSr  )	rB   r  r  r  propsnvidia_10_seriesr8  r  nvidia_16_seriesr   r   r   r    sb   



r  c                    s  | d ur
t | r
dS trdS trdS | d urt| st r&t dk r$dS dS t r+dS t r9tdk r4dS t	j
 S t r>dS t rCdS t r^t	j| j t fddtD r^|r\dS dS t	j| }t rn|jdkrndS |jdkrudS t	j }|r|rt| }|r|d	 |krdS dS )
NF)r   Tr2  c                 3   s    | ]}| v V  qd S r   r   r   r   r   r   r`     ra   z"should_use_bf16.<locals>.<genexpr>r	   r3  r
   )ry   rO  r@   r  r-  re   r+  r,   rP  r   r=   is_bf16_supportedr/   r4   rv   rD   rN   rz   anyAMD_RDNA2_AND_OLDER_ARCHr2   rR  r  )rB   r  r  r  rT  
bf16_worksr  r   r   r   r  `  sN   




r  c                 C   sh   t rdS t s	dS tj| }|jdkrdS |jdk rdS |jdk r$dS tdk r*dS tr2tdk r2dS dS )NTF	   r3  r2  )r   r
   )	SUPPORT_FP8_OPSrs   r   rD   rN   rR  minorrP  rS  rB   rT  r   r   r   r    s"   


r  c                 C   s(   t  sdS tj| }|jdk rdS dS )NF
   T)rs   r   rD   rN   rR  r^  r   r   r   supports_nvfp4_compute  s   
r`  c                 C   s4   t  sdS tdk rdS tj| }|jdk rdS dS )NF)r   r_  r_  T)rs   rP  r   rD   rN   rR  r^  r   r   r   supports_mxfp8_compute  s   
ra  c                   C   s   t dk rdS dS )Nr   FT)rP  r   r   r   r   extended_fp16_support  s   rb  c                 C   s:   t | d }|d ur|S t| rtj}ntj}|t | < |S r   )LORA_COMPUTE_DTYPESrE  r  r   r{  r}  r  r   r   r   lora_compute_dtype  s   rd  c                   C   s:   t  rd S t rtj  d S tj rtj  d S d S r   )r+  r,   r   r=   r  rD   is_availabler   r   r   r   r    s   
r  c                 C   s   t  rd S ttjkrtj  d S t rtj  d S t	 r%tj
  d S t r/tj  d S tj rEtj  tj  tj  d S d S r   )r+  r*   r   r   r   r;   empty_cacher,   r=   r/   r>   r2   r?   rD   re  r  ipc_collect)forcer   r   r   r+    s   



r+  c                   C   s   t dt  d S )Ngꌠ9Y>)F)r>  rE   r   r   r   r   unload_all_models  s   ri  c                   C   s   t  st rtjj S dS )Nr%   )rv   rs   r   rD   memorymemory_summaryr   r   r   r   debug_memory_summary  s   rl  c                   @   s   e Zd ZdS )InterruptProcessingExceptionN)r   r   r   r   r   r   r   rm    s    rm  c                 C   s.   t  | aW d    d S 1 sw   Y  d S r   interrupt_processing_mutexinterrupt_processing)valuer   r   r   interrupt_current_processing  s   "rr  c                   C   s,   t 
 tW  d    S 1 sw   Y  d S r   rn  r   r   r   r   processing_interrupted	  s   $rs  c                   C   s8   t  tr
dat W d    d S 1 sw   Y  d S r   )ro  rp  rm  r   r   r   r   )throw_exception_if_processing_interrupted  s   "rt  r   r  )r   FNFr   )r   )FN)NNFFNNr   )T(  __doc__rJ   r&  enumr   comfy.cli_argsr   r   	threadingr   r   r5   r   rh  r   
contextlibr   comfy.memory_managementr   comfy.utilscomfy.quant_opsr   r   r   r,  set_vram_tor   r*   
total_vramin_trainingtraining_fp8_bwdr$   r  r+   torch_versionr9   __version__rd   tempr]   rP  rG  r  rC  use_deterministic_algorithmsr@   directmlrj  torch_directmldevice_indexrB   rA   r   device_nameintel_extension_for_pytorchr   r=   device_countrX   re  backendsr;   r   	torch.mps	torch_npur>   r.   	torch_mlur?   r1   rI   r3   r<   r   r,   r/   r2   r4   r:   rE   r\   re   rK   rL   	total_ramrc   rD   OutOfMemoryErrorrh   	ExceptionAcceleratorErrorri   AttributeErrorr  rp   rq   XFORMERS_VERSIONr  disable_xformersr  xformersxformers.ops_has_cpp_libraryr{   rs   rv   r   rI  r  use_pytorch_cross_attentionuse_split_cross_attentionuse_quad_cross_attentionr  r\  rY  AMD_ENABLE_MIOPEN_ENVrN   rz   r~   rX  getenvcudnnr  rb   r   rk   ru   rocm_versionr   enable_math_sdpenable_flash_sdpenable_mem_efficient_sdpr  Fp16Accumulationfastmatmulallow_fp16_accumulationAutoTune	benchmark"allow_fp16_bf16_reduction_math_sdplowvramr   novramr   highvramr  r   rO  
force_fp32r   r   rt  disable_smart_memoryr#  r   r  r   r   r   r  r  	win32_verrS  r  comfy.windowsr  reserve_vramr'  r  r  r>  rc  rd  r  r  ry  r   r  r  r  r  r{  r|  r}  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  async_offloaddisable_async_offloadr  r  r  r  r  rg  r  r  r  r  r  r  r  r  disable_pinned_memoryr   r  rm   r  r  r  r  r  r  r  r  r  r$  r$  r+  r-  r.  ry   r  r  r  r1  r  r  r`  ra  rb  rc  rd  r  r+  ri  rl  rm  RLockro  rp  rr  rs  rt  r   r   r   r   <module>   s   








*





" 

(



(



_

6j
5

 	%
&.F8
