o
    i&                     @   s  d dl Z d dlZd dlZd dlZd dlZd dlZd dlZd dlZd dl	Z
d dlZd dlmZ d dlmZ d dlmZmZ d dlmZmZ G dd dejZG dd	 d	ejZG d
d dejZd5ddZd6ddZdd Zdd ZG dd dejZG dd dejZG dd dejZG dd dejZ G dd dejZ!de
j"fd d!Z#d"d# Z$d7d%ej%fd&d'Z&G d(d) d)ejZ'd dl(ZG d*d+ d+ejZ)G d,d- d-ejZ*G d.d/ d/ejZ+G d0d1 d1eZ,d2e,fd3d4Z-dS )8    N)BytesIO)override)SymmetricPatchifierlatent_to_pixel_coords)ComfyExtensionioc                   @   2   e Zd Zedd ZeddejfddZeZdS )	EmptyLTXVLatentVideoc                 C   sn   t jddt jjdddtjddt jjdd	dtjddt jjd
ddtjddt jjdddddgt j gdS )Nr	   latent/video/ltxvwidth   @       defaultminmaxstepheight   lengtha         
batch_size   r   r   r   node_idcategoryinputsoutputs)r   SchemaIntInputnodesMAX_RESOLUTIONLatentOutputcls r+   3/mnt/c/Users/fbmor/ComfyUI/comfy_extras/nodes_lt.pydefine_schema   s   z"EmptyLTXVLatentVideo.define_schemar   returnc                 C   s@   t j|d|d d d |d |d gtj d}td|iS )N   r   r   r   devicesamples)torchzeroscomfymodel_managementintermediate_devicer   
NodeOutput)r*   r   r   r   r   latentr+   r+   r,   execute!   s   2zEmptyLTXVLatentVideo.executeN)r   	__name__
__module____qualname__classmethodr-   r   r8   r:   generater+   r+   r+   r,   r	      s    
r	   c                   @   0   e Zd Zedd ZedejfddZeZdS )LTXVImgToVideoc                 C   s   t jddt jdt jdt jdt jdt jjddd	tjd
dt jjddd	tjd
dt jjdddtjddt jjdddddt j	jdddddg	t jj
ddt jj
ddt jj
ddgdS )NrB   conditioning/video_modelspositivenegativevaeimager   r   r   r   r   r   r   r   r   	   r   r   r   r   r   strength      ?        display_namer9   r   )r   r"   Conditioningr$   VaeImager#   r%   r&   Floatr(   r'   r)   r+   r+   r,   r-   )   s$   



zLTXVImgToVideo.define_schemar.   c
                 C   s   t j|dd||dddd}
|
d d d d d d d df }||}tj|d|d d d |d |d gt j d	}||d d d d d |j	d
 f< tj
|d|j	d
 ddftj|jd}d|	 |d d d d d |j	d
 f< t||||dS )Nr   bilinearcenter   r/   r   r   r0      dtyper1   rJ   r2   
noise_mask)r5   utilscommon_upscalemovedimencoder3   r4   r6   r7   shapeonesfloat32r1   r   r8   )r*   rD   rE   rG   rF   r   r   r   r   rI   pixelsencode_pixelstr9   conditioning_latent_frames_maskr+   r+   r,   r:   @   s   $ 
2 $zLTXVImgToVideo.executeNr;   r+   r+   r+   r,   rB   (   s    
rB   c                   @   r   )	LTXVImgToVideoInplacec                 C   s^   t jddt jdt jdt jdt jjddddd	t jjd
dddgt jjddgdS )Nrf   rC   rF   rG   r9   rI   rJ   rK   r   bypassFzBypass the conditioning.)r   tooltiprL   r   )	r   r"   rO   r$   rP   r'   rQ   Booleanr(   r)   r+   r+   r,   r-   V   s   


z#LTXVImgToVideoInplace.define_schemaFr.   c                 C   s   |r|fS |d }|j \}}}	|j\}
}}}}||	 }|| }|jd |ks-|jd |kr@tj|dd||dddd}n|}|d d d d d d d df }||}||d d d d d |jd f< tj|
d|ddftj	|j
d}d	| |d d d d d |jd f< t||d
S )Nr2   r   rV   rR   rS   rT   rU   rW   rJ   rY   )downscale_index_formular_   r5   r[   r\   r]   r^   r3   r`   ra   r1   r   r8   )r*   rF   rG   r9   rI   rg   r2   _height_scale_factorwidth_scale_factorbatchlatent_frameslatent_heightlatent_widthr   r   rb   rc   rd   re   r+   r+   r,   r:   g   s*   & 
 $zLTXVImgToVideoInplace.executeN)Fr;   r+   r+   r+   r,   rf   U   s    
rf   rJ   c                 C   s   ||d|d}g }| |fD ]*}g }|D ]}	|	d  dd}
|
dur%|
} nqg ||}|t|d|i q|d |d fS )zAppend a guide_attention_entry to both positive and negative conditioning.

    Each entry tracks one guide reference for per-reference attention control.
    Entries are derived independently from each conditioning to avoid cross-contamination.
    N)pre_filter_countrI   
pixel_masklatent_shaper   guide_attention_entriesr   )getappendnode_helpersconditioning_set_values)rD   rE   rr   rt   rI   	new_entryresultscondexistingrd   foundentriesr+   r+   r,   _append_guide_attention_entry   s&   r   c                 C   s*   | D ]}||d v r|d |   S q|S )Nr   r+   )conditioningkeyr   rd   r+   r+   r,   conditioning_get_any_value   s
   r   c                 C   sZ   |  dd }| d }|d u r'|j\}}}}}tj|d|ddftj|jd}|S | }|S )NrZ   r2   r   rW   )rv   r_   r3   r`   ra   r1   clone)r9   rZ   latent_imager   rk   latent_lengthr+   r+   r,   get_noise_mask   s   r   c                 C   sD   t | dd }|d u rdS t|d d dd d df jd }||fS )Nkeyframe_idxsNr   r   )r   r3   uniquer_   )r|   r   num_keyframesr+   r+   r,   get_keyframe_idxs   s
   $r   c                   @   s|   e Zd ZedddZedd Zedd Zedd	 ZedddZ	edddZ
edd ZedejfddZeZd
S )LTXVAddGuider   T)	start_endc                 C   s   t jddt jdt jdt jdt jdt jjddd	t jjd
dddddt jjddddddgt jj	ddt jj	ddt jj	ddgdS )Nr   rC   rD   rE   rF   r9   rG   zImage or video to condition the latent video on. Must be 8*n + 1 frames. If the video is not 8*n + 1 frames, it will be cropped to the nearest 8*n + 1 frames.rh   	frame_idxr   ii'  a3  Frame index to start the conditioning at. For single-frame images or videos with 1-8 frames, any frame_idx value is acceptable. For videos with 9+ frames, frame_idx must be divisible by 8, otherwise it will be rounded down to the nearest multiple of 8. Negative values are counted from the end of the video.)r   r   r   rh   rI   rJ   rK   {Gz?r   rL   r   )
r   r"   rN   r$   rO   r'   rP   r#   rQ   r(   r)   r+   r+   r,   r-      s2   




zLTXVAddGuide.define_schemac                 C   s   |\}}}|d |j d d | | d  }tjj|dd|| || ddddd}	|	d d d d d d d df }
||
}|
|fS )Nr   r   rR   rS   disabled)croprU   )r_   r5   r[   r\   r]   r^   )r*   rF   rq   rp   imagesscale_factorstime_scale_factorrm   rl   rb   rc   rd   r+   r+   r,   r^      s   
". 
zLTXVAddGuide.encodec                 C   s   |\}}}t |\}}|| }	|dkr|nt|	d | d | d}|dkr4|dkr4|d | | d }|| d | }
||
fS )Nr   r   )r   r   )r*   r|   r   guide_lengthr   r   r   rk   r   latent_count
latent_idxr+   r+   r,   get_latent_index   s   
&zLTXVAddGuide.get_latent_indexNc                 C   s   t |\}}| j|\}}	|d u r|dkp|jd dk}t|	||d}
|
d d df  |7  < |d tj|dd  |
jddddd }|
d d dd d d dd f  |	|
j
7  < |d u re|
}n	tj||
gdd}t|d|iS )	Nr   rV   r   
causal_fixr0   rR   dimr   )r   
PATCHIFIERpatchifyr_   r   r3   tensorr1   viewtorX   catrx   ry   )r*   r|   r   guiding_latentr   latent_downscale_factorr   r   rk   latent_coordspixel_coordsspatial_end_offsetr+   r+   r,   add_keyframe_index   s"   

0zLTXVAddGuide.add_keyframe_indexr/   c              
   C   s  |j d |
ks|j d |
krtd| j||||||d}| j||||||d}|	d urut|j d |	j d }t|j d |	j d }|j d dksP|j d dkrY|ddd||}|	j d dksg|	j d dkrp|	ddd||}	|	| }ntj|j d d|j d |j d |j d fd	| |j|jd
}|j d |j d kr|j d |j d  }tj	j
j|ddddddd|fdd}tj||gdd}tj||gdd}||||fS )Nr   z6Adding guide to a combined AV latent is not supported.r   rU      rR   r   rV   rJ   rW   )padvaluer   )r_   
ValueErrorr   r   expandr3   fullrX   r1   nn
functionalr   r   )r*   rD   rE   r   r   rZ   r   rI   r   
guide_maskin_channelsr   r   target_htarget_wmaskpad_lenr+   r+   r,   append_keyframe  s0   
$$zLTXVAddGuide.append_keyframec                 C   s   |j d }|j d || ksJ dtj|j d d|ddfd| |j|jd}| }| }||d d d d ||| f< ||d d d d ||| f< ||fS )NrV   =Conditioning frames exceed the length of the latent sequence.r   r   rJ   rW   )r_   r3   r   rX   r1   r   )r*   r   rZ   r   r   rI   cond_lengthr   r+   r+   r,   replace_latent_frames=  s   
z"LTXVAddGuide.replace_latent_framesr.   c              
   C   s   |j }|d }	t|}
|	j\}}}}}| |||||\}}| ||t|||\}}||jd  |ks8J d| ||||	|
|||\}}}	}
|jd |jd  |jd  }t|jdd  }t|||||d\}}t	
|||	|
dS )Nr2   rV   r   rU   r   )rI   rY   )rj   r   r_   r^   r   lenr   listr   r   r8   )r*   rD   rE   rF   r9   rG   r   rI   r   r   rZ   rk   r   rp   rq   rd   r   rr   guide_latent_shaper+   r+   r,   r:   Q  s.   

zLTXVAddGuide.execute)r   N)Nr/   r   N)r<   r=   r>   r   r   r?   r-   r^   r   r   r   r   r   r8   r:   r@   r+   r+   r+   r,   r      s"    
!

!
r   c                   @   rA   )LTXVCropGuidesc              	   C   sT   t jddt jdt jdt jdgt jjddt jjddt jjddgdS )Nr   rC   rD   rE   r9   rL   r   )r   r"   rN   r$   r'   r(   r)   r+   r+   r,   r-   u  s   


zLTXVCropGuides.define_schemar.   c                 C   s   |d   }t|}t|\}}|dkrt||||dS |d d d d d | f }|d d d d d | f }t|d d d}t|d d d}t||||dS )Nr2   r   rY   )r   ru   )r   r   r   r   r8   rx   ry   )r*   rD   rE   r9   r   rZ   rk   r   r+   r+   r,   r:     s    zLTXVCropGuides.executeN)	r<   r=   r>   r?   r-   r   r8   r:   r   r+   r+   r+   r,   r   t  s    
r   c                   @   ,   e Zd Zedd ZedejfddZdS )LTXVConditioningc                 C   sR   t jddt jdt jdt jjddddd	d
gt jjddt jjddgdS )Nr   rC   rD   rE   
frame_rateg      9@rK   g     @@r   r   rL   r   )r   r"   rN   r$   rQ   r(   r)   r+   r+   r,   r-     s   

zLTXVConditioning.define_schemar.   c                 C   s,   t |d|i}t |d|i}t||S )Nr   )rx   ry   r   r8   )r*   rD   rE   r   r+   r+   r,   r:     s   zLTXVConditioning.executeNr<   r=   r>   r?   r-   r   r8   r:   r+   r+   r+   r,   r     s
    
r   c                   @   .   e Zd Zedd ZeddejfddZdS )ModelSamplingLTXVc                 C   sZ   t jddt jdt jjdddddd	t jjd
ddddd	t jjdddgt j gdS )Nr   zadvanced/modelmodel	max_shiftffffff @rK         Y@r   r   
base_shiftffffff?r9   Toptionalr   )r   r"   Modelr$   rQ   r'   r(   r)   r+   r+   r,   r-     s   
zModelSamplingLTXV.define_schemaNr.   c                 C   s   |  }|d u rd}nt|d jdd  }d}d}|| ||  }	||	|  }
||	 |
 }tjj}tjj}G dd d||}||jj	}|j
|d |d| t|S )	Nr   r2   rV      c                   @   s   e Zd ZdS )z8ModelSamplingLTXV.execute.<locals>.ModelSamplingAdvancedN)r<   r=   r>   r+   r+   r+   r,   ModelSamplingAdvanced  s    r   )shiftmodel_sampling)r   mathprodr_   r5   r   ModelSamplingFluxCONSTr   model_configset_parametersadd_object_patchr   r8   )r*   r   r   r   r9   mtokensx1x2mmbr   sampling_basesampling_typer   r   r+   r+   r,   r:     s    
zModelSamplingLTXV.executeNr   r+   r+   r+   r,   r     s
    
r   c                   @   r   )LTXVSchedulerc                 C   s   t jddt jjdddddt jjdd	d
dddt jjddd
dddt jjdddddt jjddd
dddddt jjdddgt j gdS )Nr   z#sampling/custom_sampling/schedulerssteps   r   i'  r   r   r   rK   r   r   r   r   r   stretchTz4Stretch the sigmas to be in the range [terminal, 1].)idr   rh   advancedterminalg?gGz?z2The terminal value of the sigmas after stretching.)r   r   r   r   r   rh   r   r9   r   r   )	r   r"   r#   r$   rQ   ri   r'   Sigmasr(   r)   r+   r+   r,   r-     s4   	zLTXVScheduler.define_schemaNr.   c                 C   s   |d u rd}nt |d jdd  }tdd|d }d}	d}
|| |
|	  }|||	  }|| | }d}t|dkt |t |d| d |   d}|ro|dk}|| }d| }|d	 d|  }d||  }|||< t|S )
Nr   r2   rV   rJ   rK   r   r   r   rR   )	r   r   r_   r3   linspacewhereexpr   r8   )r*   r   r   r   r   r   r9   r   sigmasr   r   r   r   sigma_shiftpowernon_zero_masknon_zero_sigmasone_minus_zscale_factor	stretchedr+   r+   r,   r:     s.   "
zLTXVScheduler.executer   r   r+   r+   r+   r,   r     s
    
r   image_arrayc                 C   s   t j| ddd}z;|jddt|ddd}|jd	 |_|jd |_t jj|d
dj	dd}|
|| |
|  W |  d S |  w )Nwmp4formatlibx264r   veryfast)crfpreset)rateoptionsr   rgb24yuv420p)avopen
add_streamstrr_   r   r   
VideoFramefrom_ndarrayreformatmuxr^   close)output_filer   r   	containerstreamav_framer+   r+   r,   encode_single_frame&  s   r  c                 C   sP   t | }ztdd |jD }t||}W |  n|  w |jddS )Nc                 s   s    | ]
}|j d kr|V  qdS )videoN)type).0sr+   r+   r,   	<genexpr>:  s    z&decode_single_frame.<locals>.<genexpr>r  r   )r  r  nextstreamsdecoder  
to_ndarray)
video_filer  r  framer+   r+   r,   decode_single_frame7  s   
r     rG   c                 C   s   |dkr| S | d | j d d d d | j d d d f d    }t }t||| | }W d    n1 s>w   Y  t|}t|}W d    n1 sVw   Y  tj	|| j
| jdd }|S )Nr   rV   r   g     o@rW   )r_   bytecpunumpyr   r  getvaluer  r3   r   rX   r1   )rG   r   r   r  video_bytesr  r   r+   r+   r,   
preprocessA  s   @


r$  c                   @   rA   )LTXVPreprocessc                 C   s<   t jddt jdt jjddddddgt jjd	d
gdS )Nr%  rG   img_compression#   r   d   z(Amount of compression to apply on image.)r   r   r   r   rh   output_imagerL   r   )r   r"   rP   r$   r#   r(   r)   r+   r+   r,   r-   P  s   

zLTXVPreprocess.define_schemar.   c                 C   s<   g }t |jd D ]}|t|| | q	tt|S r   )ranger_   rw   r$  r   r8   r3   stack)r*   rG   r&  output_imagesir+   r+   r,   r:   `  s   zLTXVPreprocess.executeN)	r<   r=   r>   r?   r-   r   r8   r:   r$  r+   r+   r+   r,   r%  O  s    
r%  c                   @   r   )LTXVConcatAVLatentc                 C   s2   t jddt jdt jdgt jjddgdS )Nr.  r
   video_latentaudio_latentr9   rL   r   r   r"   r'   r$   r(   r)   r+   r+   r,   r-   l  s   

z LTXVConcatAVLatent.define_schemar.   c                 C   s   i }| | | | |dd }|dd }|d us |d ur@|d u r+t|d }|d u r6t|d }tj||f|d< tj|d |d f|d< t|S )NrZ   r2   )	updaterv   r3   	ones_liker5   nested_tensorNestedTensorr   r8   )r*   r/  r0  outputvideo_noise_maskaudio_noise_maskr+   r+   r,   r:   z  s   


zLTXVConcatAVLatent.executeNr   r+   r+   r+   r,   r.  k  s
    
r.  c                   @   r   )LTXVSeparateAVLatentc              	   C   s6   t jdddt jdgt jjddt jjddgdS )	Nr9  r
   zLTXV Separate AV Latent	av_latentr/  rL   r0  )r   r   descriptionr    r!   r1  r)   r+   r+   r,   r-     s   
z"LTXVSeparateAVLatent.define_schemar.   c                 C   sx   |d   }| }|d |d< | }|d |d< d|v r6|d }|d ur6|  }|d |d< |d |d< t||S )Nr2   r   r   rZ   )unbindcopyr   r8   )r*   r:  latentsr/  r0  masksr+   r+   r,   r:     s   zLTXVSeparateAVLatent.executeNr   r+   r+   r+   r,   r9    s
    
r9  c                   @   s4   e Zd ZedejfddZedejfddZdS )LTXVReferenceAudior.   c                 C   s   t jddddt jdt jdt jdt jjdd	d
t jjddddt jjddddddddt jjddddddddt jjddddddddgt j t jjddt jjddgdS ) Nr@  zLTXV Reference Audio (ID-LoRA)zconditioning/audiozSet reference audio for ID-LoRA speaker identity transfer. Encodes a reference audio clip into the conditioning and optionally patches the model with identity guidance (extra forward pass without reference, amplifying the speaker identity effect).r   rD   rE   reference_audiozReference audio clip whose speaker identity to transfer. ~5 seconds recommended (training duration). Shorter or longer clips may degrade voice identity transfer.r   	audio_vaez	Audio VAEzLTXV Audio VAE for encoding.)r   rM   rh   identity_guidance_scaleg      @rK   r   r   zStrength of identity guidance. Runs an extra forward pass without reference each step to amplify speaker identity. Set to 0 to disable (no extra pass).)r   r   r   r   roundrh   start_percentrJ   gMbP?Tz;Start of the sigma range where identity guidance is active.)r   r   r   r   r   rh   end_percentz9End of the sigma range where identity guidance is active.rL   )r   rM   r   r;  r    r!   )	r   r"   r   r$   rN   AudiorO   rQ   r(   r)   r+   r+   r,   r-     s&   


z LTXVReferenceAudio.define_schemac	                    s   | |}	|	j\}
}}}|	dddd|
||| }d|i}t|d|i}t|d|i}| }| |d}|||| fdd	}|	| t
|||S )
Nr   rV   r   rU   r   	ref_audior   c                    s    dkr| d S | d }|d   }|ks|k r| d S | d }| d }| d }| d  }| d }g }|D ]}	|	 }
|
di  }|d	d  ||
d< ||
 q8tj| d
 |g|||\}|||    S )Nr   denoisedsigmacond_denoisedr|   model_optionsinputmodel_condsrH  r   )itemr=  rv   poprw   r5   samplerscalc_cond_batch)argsrJ  sigma_	cond_predr|   
cfg_resultrL  x
noref_condentryrz   mc
pred_norefscale	sigma_endsigma_startr+   r,   post_cfg_function  s,   z5LTXVReferenceAudio.execute.<locals>.post_cfg_function)r^   r_   permutereshaperx   ry   r   get_model_objectpercent_to_sigma#set_model_sampler_post_cfg_functionr   r8   )r*   r   rD   rE   rA  rB  rC  rE  rF  audio_latentsr   crd   f
ref_tokensrH  r   r   r`  r+   r\  r,   r:     s   




zLTXVReferenceAudio.executeN)	r<   r=   r>   r?   r   r"   r-   r8   r:   r+   r+   r+   r,   r@    s
    r@  c                   @   s(   e Zd Zedeeej  fddZdS )LtxvExtensionr.   c                    s   t ttttttttt	t
tgS r   )r	   rB   rf   r   r   r   r   r%  r   r.  r9  r@  )selfr+   r+   r,   get_node_list  s   zLtxvExtension.get_node_listN)	r<   r=   r>   r   r   r  r   	ComfyNoderl  r+   r+   r+   r,   rj    s    rj  r.   c                      s   t  S r   )rj  r+   r+   r+   r,   comfy_entrypoint  s   rn  )rJ   r   )r  ).r%   rx   r3   comfy.model_managementr5   comfy.model_samplingcomfy.samplerscomfy.utilsr   r!  npr  r   r   typing_extensionsr   )comfy.ldm.lightricks.symmetric_patchifierr   r   comfy_api.latestr   rm  r	   rB   rf   r   r   r   r   r   r   r   r   r   ndarrayr  r  Tensorr$  r%  comfy.nested_tensorr.  r9  r@  rj  rn  r+   r+   r+   r,   <module>   sJ    -
5
 0,-A
# N