o
    i;                    @   s  d dl Z d dlZd dlZd dlZd dlZd dlZd dlZd dlZd dl	Z	d dl
Zd dlmZmZ d dlmZ d dlmZmZ d dlZG dd dejZG dd dejZG d	d
 d
ejZG dd dejZG dd dejZG dd dejZG dd dejZG dd dejZG dd dejZdd Zd\dej dee!e!f de!fddZ"dZ#d d! Z$d]d#ej%d$ej%d%e!fd&d'Z&d(ej%d)ej%d*ej%fd+d,Z'd-ej(d.ej(d/e)d0e*d1e!f
d2d3Z+	4	5	6d^d-ej(d.ej(d/e)d0e*d1e!f
d7d8Z,G d9d: d:ejZ-d_d;d<Z.	d_d=d>Z/d`dBdCZ0dadDdEZ1G dFdG dGejZ2G dHdI dIejZ3dbdJdKZ4G dLdM dMejZ5G dNdO dOejZ6G dPdQ dQejZ7d dRl8m9Z9m:Z:m;Z;m<Z< G dSdT dTejZ=G dUdV dVejZ>G dWdX dXeZ?dYe?fdZd[Z@dS )c    N)Tuple	TypedDict)override)ComfyExtensionioc                   @   .   e Zd Zedd ZeddejfddZdS )WanImageToVideoc                 C   s   t jddt jdt jdt jdt jjdddtjdd	t jjd
ddtjdd	t jjdddtjdd	t jjdddddt jjdddt j	jdddg	t jj
ddt jj
ddt jj
ddgdS )Nr   conditioning/video_modelspositivenegativevaewidth@     defaultminmaxstepheight  lengthQ         
batch_size   r   r   r   clip_vision_outputToptionalstart_imagedisplay_namelatentnode_idcategoryinputsoutputsr   SchemaConditioningInputVaeIntnodesMAX_RESOLUTIONClipVisionOutputImageOutputLatentcls r8   4/mnt/c/Users/fbmor/ComfyUI/comfy_extras/nodes_wan.pydefine_schema   $   


zWanImageToVideo.define_schemaNreturnc
                 C   s  t j|d|d d d |d |d gtj d}
|d urtj|d | dd||dddd}t j||||j	d f|j
|jd	d
 }||d |j	d < ||d d d d d d d df }t jdd|
j	d |j	d |j	d f|j
|jd	}d|d d d d d |j	d d d d f< t|||d}t|||d}|	d urt|d|	i}t|d|	i}i }|
|d< t|||S )Nr   r   r      devicebilinearcenterr?   dtype      ?r                 concat_latent_imageconcat_maskr   samples)torchzeroscomfymodel_managementintermediate_deviceutilscommon_upscalemovedimonesshaper?   rD   encodenode_helpersconditioning_set_valuesr   
NodeOutput)r7   r
   r   r   r   r   r   r   r!   r   r$   imagerK   mask
out_latentr8   r8   r9   execute(   s    2,&&0,zWanImageToVideo.executeNN__name__
__module____qualname__classmethodr:   r   r[   r_   r8   r8   r8   r9   r      
    
r   c                   @   r   )WanFunControlToVideoc                 C      t jddt jdt jdt jdt jjdddtjdd	t jjd
ddtjdd	t jjdddtjdd	t jjdddddt jjdddt j	jdddt j	jdddg
t jj
ddt jj
ddt jj
ddgdS )Nrg   r	   r
   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   Tr   r!   control_videor"   r$   r%   r*   r6   r8   r8   r9   r:   A   &   


z"WanFunControlToVideo.define_schemaNr<   c                 C   s  t j|d|d d d |d |d gtj d}t j|d|d d d |d |d gtj d}tj |}|ddddd}|d urtj	
|d | dd||dd	dd}||d d d d d d d d
f }|d d d d d |jd f |d d dd d |jd f< |
d urtj	
|
d | dd||dd	dd}
||
d d d d d d d d
f }|d d d d d |jd f |d d d dd |jd f< t|d|i}t|d|i}|	d urt|d|	i}t|d|	i}i }||d< t|||S )Nr   r   r   r=   r>   rG   r@   rA   rB   rF   rK   r   rM   )rN   rO   rP   rQ   rR   latent_formatsWan21process_outrepeatrS   rT   rU   rX   rW   rY   rZ   r   r[   )r7   r
   r   r   r   r   r   r   r!   r   ri   r$   concat_latentrK   r^   r8   r8   r9   r_   Y   s(   22,&<,&<zWanFunControlToVideo.executeNNNra   r8   r8   r8   r9   rg   @   
    
rg   c                   @   r   )Wan22FunControlToVideoc                 C   s   t jddt jdt jdt jdt jjdddtjdd	t jjd
ddtjdd	t jjdddtjdd	t jjdddddt jjdddt jjdddg	t jj	ddt jj	ddt j
j	ddgdS )Nrr   r	   r
   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   	ref_imageTr   ri   r"   r$   r%   r   r+   r,   r-   r.   r/   r0   r1   r3   r4   r5   r6   r8   r8   r9   r:   v   r;   z$Wan22FunControlToVideo.define_schemaNr<   c                 C   s0  |  }|j}tj|||d d d || || gtj d}tj|||d d d || || gtj d}|dkrFtj 	|}ntj
 	|}|ddddd}tdd|jd d |jd |jd f}|	d urtj|	d | dd||dd	dd}	||	d d d d d d d d
f }|d d d d d |jd f |d d |d d |jd f< d|d d d d d |	jd d
 f< d }|d urtj|d d dd||dd	dd}||d d d d d d d d
f }|
d urDtj|
d | dd||dd	dd}
||
d d d d d d d d
f }|d d d d d |jd f |d d d |d |jd f< |d|jd d d|jd
 |jd dd}t||||d}t||||d}|d urtj|d|gidd}tj|d|gidd}i }||d< t|||S )Nr   r   r>   0   rG   rH   r@   rA   rB   rF   rI   r   )rK   rL   concat_mask_indexreference_latentsTappendrM   )spacial_compression_encodelatent_channelsrN   rO   rP   rQ   rR   rk   Wan22rm   rl   rn   rV   rW   rS   rT   rU   rX   view	transposerY   rZ   r   r[   )r7   r
   r   r   r   r   r   r   rs   r!   ri   spacial_scaler{   r$   ro   r]   rK   
ref_latentr^   r8   r8   r9   r_      s>   22*,&<$,&
,&<0
zWan22FunControlToVideo.executerp   ra   r8   r8   r8   r9   rr   u   rf   rr   c                   @   r   )WanFirstLastFrameToVideoc                 C   s   t jddt jdt jdt jdt jjdddtjdd	t jjd
ddtjdd	t jjdddtjdd	t jjdddddt jjdddt jjdddt j	jdddt j	jdddgt jj
ddt jj
ddt jj
ddgdS )Nr   r	   r
   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   clip_vision_start_imageTr   clip_vision_end_imager!   	end_imager"   r$   r%   r*   r6   r8   r8   r9   r:      (   


z&WanFirstLastFrameToVideo.define_schemaNr<   c                 C   sp  |  }tj||j|d d d || || gtj d}|d ur8tj|d | 	dd||dd	dd}|	d urStj|	| d  	dd||dd	dd}	t
|||dfd }t
dd|jd	 d |jd
 |jd f}|d ur||d |jd < d|d d d d d |jd d f< |	d ur|	||	jd  d < d|d d d d |	jd  d f< ||d d d d d d d df }|d|jd	 d d|jd |jd dd	}t|||d}t|||d}d }|
d ur|
}|d ur|d urtj|j|jgd
d}tj }||_n|}|d ur+t|d|i}t|d|i}i }||d< t|||S )Nr   r   r>   r@   rA   rB   rF   rE   rG   rH   r   rI   rJ   dimr   rM   )rz   rN   rO   r{   rP   rQ   rR   rS   rT   rU   rV   rW   rX   r}   r~   rY   rZ   catpenultimate_hidden_statesclip_visionr4   r   r[   )r7   r
   r   r   r   r   r   r   r!   r   r   r   r   r$   r\   r]   rK   r   statesr^   r8   r8   r9   r_      sB   4,.*$"&0



z WanFirstLastFrameToVideo.executeNNNNra   r8   r8   r8   r9   r      
    
r   c                   @   r   )WanFunInpaintToVideoc                 C   rh   )Nr   r	   r
   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   Tr   r!   r   r"   r$   r%   r*   r6   r8   r8   r9   r:      rj   z"WanFunInpaintToVideo.define_schemaNr<   c                 C   s$   t  }|j|||||||||	|
d
S )N)r!   r   r   )r   r_   )r7   r
   r   r   r   r   r   r   r!   r   r   flfvr8   r8   r9   r_     s   zWanFunInpaintToVideo.executerp   ra   r8   r8   r8   r9   r      rq   r   c                   @   r   )WanVaceToVideoc                 C   s   t jdddgdt jdt jdt jdt jjdd	d
tjd
dt jjddd
tjd
dt jjdddtjddt jjdddddt jjddddddt j	jdddt j
jdddt j	jdddgt jjddt jjddt jjd dt jjd!dgd"S )#Nr   zvideo conditioningzvideo controlr	   r
   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   strength      ?rI        @@{Gz?ri   Tr   control_masksreference_imager"   r$   trim_latentr&   search_aliasesr'   r(   r)   )r   r+   r,   r-   r.   r/   r0   r1   Floatr3   Maskr4   r5   r6   r8   r8   r9   r:     s,   


zWanVaceToVideo.define_schemaNr<   c                 C   s  |d d d }|	d urAt j|	d | dd||dddd}	|	jd |k r@tjjj|	ddddddd||	jd  fdd}	nt	|||d	fd }	|d urt j|d d dd||dddd}|
|d d d d d d d d	f }tj|t j t|gdd
}|
d u rt	|||df}n<|
}|jd	kr|d}t j|d | ||dddd}|jd |k rtjjj|ddddddd||jd  fdd}|	d }	|	d|  d }|	| d }|
|d d d d d d d d	f }|
|d d d d d d d d	f }tj||fdd
}|d ur$tj||fdd
}d}|| }|| }||||||}|ddddd	}||| |||}tjjj|d|||fddd}d}|d urt|d d d |jd d d d d f }tj||fdd
}||jd 7 }|jd }|d}tj||g|g|gddd}tj||g|g|gddd}tj|d||d |d gt j d}i }||d< t||||S )Nr   r   r@   rA   rB   r   rE   )valuerF   r   r   rG   r=   nearest-exactsizemode)vace_frames	vace_maskvace_strengthTrx   r   r>   rM   )rP   rS   rT   rU   rW   rN   nn
functionalpadrV   rX   r   rk   rl   rm   
zeros_likendim	unsqueezer}   permutereshapeinterpolatesqueezerY   rZ   rO   rQ   rR   r   r[   )r7   r
   r   r   r   r   r   r   r   ri   r   r   latent_lengthr]   inactivereactivecontrol_video_latent
vae_strideheight_mask
width_maskr   mask_padr$   r^   r8   r8   r9   r_   9  s^   ,.,&$

$.&&
&
,

&zWanVaceToVideo.executerp   ra   r8   r8   r8   r9   r     s
    
r   c                   @   ,   e Zd Zedd ZedejfddZdS )TrimVideoLatentc              
   C   s6   t jddt jdt jjdddddgt j gdS )	Nr   zlatent/videorM   trim_amountr   i r   r%   )r   r+   r5   r-   r/   r4   r6   r8   r8   r9   r:   v  s   
zTrimVideoLatent.define_schemar<   c                 C   s8   |  }|d }|d d d d |d f |d< t|S )NrM   )copyr   r[   )r7   rM   r   samples_outs1r8   r8   r9   r_     s   
zTrimVideoLatent.executeNra   r8   r8   r8   r9   r   u  s
    
r   c                   @   r   )WanCameraImageToVideoc                 C   s   t jddt jdt jdt jdt jjdddtjdd	t jjd
ddtjdd	t jjdddtjdd	t jjdddddt jjdddt j	jdddt j
jdddg
t jjddt jjddt jjddgdS )Nr   r	   r
   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   Tr   r!   camera_conditionsr"   r$   r%   )r   r+   r,   r-   r.   r/   r0   r1   r2   r3   WanCameraEmbeddingr4   r5   r6   r8   r8   r9   r:     rj   z#WanCameraImageToVideo.define_schemaNr<   c                 C   s  t j|d|d d d |d |d gtj d}t j|d|d d d |d |d gtj d}tj |}|d urtj	|d | 
dd||dd
dd}||d d d d d d d d	f }|d d d d d |jd
 f |d d d d d |jd
 f< t dd|jd
 d |jd |jd f}d|d d d d d |jd d	 f< |d|jd
 d d|jd	 |jd dd
}t|||d}t|||d}|
d urt|d|
i}t|d|
i}|	d urt|d|	i}t|d|	i}i }||d< t|||S )Nr   r   r   r=   r>   r@   rA   rB   rF   rG   rH   rI   r   rJ   r   r   rM   )rN   rO   rP   rQ   rR   rk   rl   rm   rS   rT   rU   rX   rW   rV   r}   r~   rY   rZ   r   r[   )r7   r
   r   r   r   r   r   r   r!   r   r   r$   ro   rK   r]   r^   r8   r8   r9   r_     s*   22,&<*$0zWanCameraImageToVideo.executerp   ra   r8   r8   r8   r9   r     rq   r   c                   @   r   )WanPhantomSubjectToVideoc                 C   s   t jddt jdt jdt jdt jjdddtjdd	t jjd
ddtjdd	t jjdddtjdd	t jjdddddt jjdddgt jj	ddt jj	ddt jj	ddt j
j	ddgdS )Nr   r	   r
   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   imagesTr   r"   negative_textnegative_img_textr$   r%   rt   r6   r8   r8   r9   r:     s$   


z&WanPhantomSubjectToVideo.define_schemar<   c	              
   C   s  t j|d|d d d |d |d gtj d}	|}
|d ur}tj|d | dd||dddd}g }|D ]}|||	d	d d d d d d d d
f g7 }q9t j
|dd}t|d|i}t|d|i}
t|dtj t |i}i }|	|d< t||
||S )Nr   r   r   r=   r>   r@   rA   rB   r   rF   rG   r   time_dim_concatrM   )rN   rO   rP   rQ   rR   rS   rT   rU   rX   r   r   rY   rZ   rk   rl   rm   r   r   r[   )r7   r
   r   r   r   r   r   r   r   r$   cond2latent_imagesirK   r^   r8   r8   r9   r_     s   2,4"z WanPhantomSubjectToVideo.executeNra   r8   r8   r8   r9   r     s
    
r   c                 C   s   g }zat | trt| dd}|| n| D ]}t|dd}|| q|r?t |d tr?d|d v r?|g}W |S |r`t |d tr`|d r`t |d d tr`d|d d v r`W |S 	 W |S  tj	yp   g }Y |S w )z0Parse JSON track data into a standardized format'"r   x)

isinstancestrjsonloadsreplaceextendry   dictlistJSONDecodeError)trackstracks_dataparsed	track_strr8   r8   r9   parse_json_tracks  s(   

<r   r=   	tracks_np
frame_sizequant_multic                 K   s8  t |  }|jd dkrt |d}|dd df |dddf }}t| }t g ||d }|| }|| d }|d d }t dd|jd 	ddddj
|j }	t j|	||gdd		ddd
}
|
d d }|
dd  }dtd| }|td| }t j||dd	dd | }t j||gdd	}|S )Nr   y   )r   r   rG   rF   .rG   rF   r@   r   r   r   x   )rN   
from_numpyfloatrW   r   r   tensortype_aslinspacer}   expandr   mathgcdrepeat_interleave)r   r   
num_framesr   kwargsr   visibles
short_edgeframe_centertrangeout_out_0out_labfinal_resultr8   r8   r9   process_tracks
  s$   "(r   r   c                 C   sn   t jdd | D t jd}|jd }|tk r*t jt| dft jd}t ||f}n|dt }|tddS )zFConvert list of {x,y} to (FIXED_LENGTH,1,3) array, padding/truncating.c                 S   s   g | ]}|d  |d dgqS )r   yr   r8   ).0pr8   r8   r9   
<listcomp>/  s    zpad_pts.<locals>.<listcomp>)rD   r   rF   Nr   )nparrayfloat32rW   FIXED_LENGTHrO   vstackr   )trptsnr   r8   r8   r9   pad_pts-  s   
r   r   targetindr   c                    s   t  j|ksJ d|t jf jt fddt|D dgt j|     }t j|d kr`tt j|d  D ]}|d}qC|jg d|d  j|d d R  }tj||dS )	z Index selection utility functionz>Index must have the target dim, but get dim: %d, ind shape: %sc                    s(   g | ]}j | d kr j | ndqS )r   r@   rW   )r   kr  r   r8   r9   r   @  s   ( zind_sel.<locals>.<listcomp>r@   r   )r@   N)r   index)	lenrW   r   r   tupleranger   rN   gather)r   r  r   ind_pad_r8   r  r9   ind_sel8  s&   
*r  	vert_attrweightvert_assignc                 C   s   t |jd }t | jdkr4| jd | ksJ dg| t| j }| |}t||tj|d}n1| jd | ks?J | jd gdg|d   t| jdd  }| |}t||tj|d}tj	||
d dd}|S )z$Merge vertex attributes with weightsr   rG   r   r   Nr@   rH   )r  rW   r   r   r   r  typerN   longsumr   )r  r  r  
target_dim	new_shaper   sel_attr
final_attrr8   r8   r9   merge_finalQ  s   
,
r  r   vidtemperature
vae_dividetopkc                 C   s  |j \}}}}| j d }	tj| g ddd\}}
}|
tj|t|| |t|| g|
jd }|dd}|dd}t| t|| |t|| |}t| t|| |t|| |}tjtj	||dd	d
d
d dd
|
j}|
d
d
dd
f }|d
d
dd
f }|j|d dg|j dd
 R  d}|| j|d dg|j dd
 R  d|d  }|d
d
d
d
f |d
d
d
d
d
d
f  dd}t| | |dd|d dd|	 }tj|t||j d dd\}}d}tjjj|ddddd
d |d
d
d
df |j|ddd}|dddd}t|||dddd}|d}||d
d
dd
f d|dd   }tj|d
d
d
df |gdd}tjt|d
d |gdd}|d
 |d ddd|fS )z%Apply motion patching based on tracksrG   )r   rG   r   r@   r   r>   r   r   ij)indexingNr   gh㈵>)r  r   rA   rF   rO   F)r   padding_modealign_corners)rW   rN   splitr   r   r?   clampr   stackmeshgridtor}   r  powexpr  r   r   grid_sampler   r  rD   r   r  r   	ones_liker   )r   r  r  r  r  r  THWN	tracks_xyvisibletracks_nxxyygrid
tracks_padvisible_padvisible_aligntracks_aligndist_r  vert_weight
vert_index	grid_modepoint_featureout_feature
out_weightmix_featureout_feature_fullout_mask_fullr8   r8   r9   _patch_motion_singlec  sX   

,$$$(&4

(" rA       k@r   r   rG   c                 C   sr   t | }g }g }t|D ]}t| | || |||\}	}
||	 ||
 qtj|dd}tj|dd}||fS )Nr   r   )r  r  rA  ry   rN   r"  )r   r  r  r  r  B	out_masksout_featuresr   r]   featurer@  r?  r8   r8   r9   patch_motion  s    
rH  c                   @   s0   e Zd Zedd Ze	ddejfddZdS )WanTrackToVideoc                 C   s   t jdg ddt jdt jdt jdt jjddd	d
t jjdddtjddt jjdddtjddt jjdddtjddt jjdddddt j	jdddddddt jjdddd dd!t j
d"t jjd#dd$gt jjdd%t jjdd%t jjd&d%gd'S )(NrI  )zmotion trackingztrajectory videozpoint trackingzkeypoint animationr	   r
   r   r   r   Tz[])	multiliner   r   r   r   r   r   r   r   r   r   r   r   r   r   r  rB  r   r   g?)r   r   r   r   advancedr  rG   
   )r   r   r   rK  r!   r   r   r"   r$   r   )r   r+   r,   r-   r.   Stringr/   r0   r1   r   r3   r2   r4   r5   r6   r8   r8   r9   r:     s,   



zWanTrackToVideo.define_schemaNr<   c                 C   sn  t |}|st j|||||||||d	S tj|d|d d d |d |d gtj d}t|d d t	r:|g}g }|D ])}g }|D ]}t
|}|| qDtj|dd}|t|||f|d d q>|d urtj|d | d	d||d
ddd	}tj|jd ||||jd	 f|j|jdd }t|jd D ]
}|| ||df< qg }tj||}t|D ]}||||d d d d d d d df g7 }qtj|dd}tj |}tj||}t|||	|
dd}|\}}tj  |}| d }t!"|||d}t!"|||d}|d ur*t!"|d|i}t!"|d|i}i }||d< t#$|||S )N)r!   r   r   r   r   r=   r>   r   )axisr@   rA   rB   rC   rE   rF   r   rC  )r  r  r  r   )rL   rK   r   rM   )%r   r   r_   rN   rO   rP   rQ   rR   r   r   r   ry   r   r"  r   r   rS   rT   rU   rV   rW   r?   rD   r  resize_to_batch_sizerX   r   rk   rl   
process_inresize_list_to_batch_sizerH  rm   rY   rZ   r   r[   )r7   r
   r   r   r   r   r   r   r   r  r  r!   r   r   r$   processed_tracksbatcharrstrackr   r   videosr   latent_videosr   resr]   rK   r^   r8   r8   r9   r_     sd   $"
,.0


zWanTrackToVideo.executer`   ra   r8   r8   r8   r9   rI    s    
rI  c                 C   sT   |  dd} | jd t| }|du rt|| }tjjj| |ddd}| ddS )z
    features: shape=[1, T, 512]
    input_fps: fps for audio, f_a
    output_fps: fps for video, f_m
    output_len: video length
    r   rG   NTlinear)r   r  r   )r~   rW   r   intrN   r   r   r   )features	input_fps
output_fps
output_lenseq_lenoutput_featuresr8   r8   r9   linear_interpolation  s   ra  c                 C   s   || }t t||  }|||  krtd|d ur"|dkr"|}n|| }|dk r.tdtjd|d }||  }	|	| }
tj|	|
|dd}tt||  	t }t
|d|d }|S )Nz0required_duration must be less than video lengthr   zvideo length is too shortr   F)endpoint)rZ  r   ceil
ValueErrorrandomrandintr   roundr   astypeclip)original_fpstotal_frames
target_fps
num_samplefixed_startrequired_durationrequired_origin_framesstart_frame	max_start
start_timeend_timetime_pointsframe_indicesr8   r8   r9   get_sample_indices-  s    rw  r   r      c                    sv  | j \} }|dkrd}nd}|| }t ||  d }	|	| }
t|	| | |   }t| | ||
dd}g }t|| }|D ]g}| k rtt|||  ||d |  |}dd |D } fdd|D }|r{| d d |f jd	d
d}n*| d |  }n!|stj	|d| d  g| j
dntj	||d| d  g| j
d}|| qCtjdd |D dd}||	fS )Nr   TFr   )rj  rk  rl  rm  rn  c                 S   s   g | ]
}|d k r
d n|qS r   r8   r   cr8   r8   r9   r   c  s    z.get_audio_embed_bucket_fps.<locals>.<listcomp>c                    s    g | ]}| kr d  n|qS r   r8   rz  audio_frame_numr8   r9   r   d  s    rH   r@   )	start_dimend_dimrG   r>   c                 S   s   g | ]}| d qS ry  )r   rz  r8   r8   r9   r   r  s    r   )rW   rZ  r   rc  rw  r   r  flattenrN   rO   r?   ry   r   )audio_embedfpsbatch_framesm
video_rate
num_layers	audio_dimreturn_all_layersscalemin_batch_num
bucket_numpadd_audio_num	batch_idxbatch_audio_ebaudio_sample_stridebi
chosen_idxframe_audio_embedr8   r}  r9   get_audio_embed_bucket_fpsH  sJ   
"r  c                 C   sd  |d d d }|	d urt |	d }d}d}t|d|d}|d }t|||d|d	\}}|d}t|jd
krA|ddd}nt|jdkrP|ddd
d}|d d d d d d ||| f }|jd
 dkrt	| d|i} t	|d|d i}||7 }|d urt
j|d d dd||dddd}||d d d d d d d d
f }tj	| d|gidd} tj	|d|gidd}|d ur|jd dkr|dd  }t
j|dd||dddd}|jd dk rt d||d
gd }|||jd  d < |}||d d d d d d d d
f }|d ur:|d d d d dd f }t	| d|i} t	|d|i}t j|d||d |d gt
j d}t
j t |}|
d urt
j|
d | dd||dddd}
||
d d d d d d d d
f }
|
|d d d d d |
jd f< t	| d|i} t	|d|i}i }||d< | |||fS )Nr   r   encoded_audio_all_layersrx  r   2   r\  r]  r   )r  r  r  r  rF   rG   r  rI   r@   rA   rB   rw   Trx   I   irE   ireference_motionr=   r>   ri   rM   )rN   r   ra  r  r   r  rW   r   rY   rZ   rP   rS   rT   rU   rX   rV   rO   rQ   rR   rk   rl   rm   r   )r
   r   r   r   r   r   r   frame_offsetrs   audio_encoder_outputri   
ref_motionref_motion_latentlatent_tfeatr  r  r  audio_embed_bucket
num_repeatr   rr$   control_video_outr^   r8   r8   r9   wan_sound_to_videow  s^   
$,&
$&
&
,& r  c                   @   r   )WanSoundImageToVideoc                 C   s   t jddt jdt jdt jdt jjdddtjdd	t jjd
ddtjdd	t jjdddtjdd	t jjdddddt jjdddt j	jdddt j	jdddt j	jdddgt jj
ddt jj
ddt jj
ddgdS )Nr  r	   r
   r   r   r   r   r   r   r   r   r   M   r   r   r   r   r   r  Tr   rs   ri   r  r"   r$   r%   r   r+   r,   r-   r.   r/   r0   r1   AudioEncoderOutputr3   r4   r5   r6   r8   r8   r9   r:     r   z"WanSoundImageToVideo.define_schemaNr<   c                 C   s4   t |||||||||	|
|d\}}}}t|||S )N)rs   r  ri   r  )r  r   r[   )r7   r
   r   r   r   r   r   r   rs   r  ri   r  r^   r  r8   r8   r9   r_     s   zWanSoundImageToVideo.executer   ra   r8   r8   r8   r9   r    r   r  c                   @   r   )WanSoundImageToVideoExtendc                 C   s   t jddt jdt jdt jdt jjdddtjd	d
t jdt j	jdddt j
jdddt j
jdddgt jjddt jjddt jjddgdS )Nr  r	   r
   r   r   r   r  r   r   r   video_latentr  Tr   rs   ri   r"   r$   r%   )r   r+   r,   r-   r.   r/   r0   r1   r5   r  r3   r4   r6   r8   r8   r9   r:     s"   



z(WanSoundImageToVideoExtend.define_schemaNr<   c	                 C   st   |d }|j d d }	|j d d }
|j d }|j d d }t||||	|
||||||d |d\}}}}t|||S )	NrM   r@   r=   rH   r   r   )r  rs   r  ri   r  r  )rW   r  r   r[   )r7   r
   r   r   r   r  rs   r  ri   r   r   r   r  r^   r8   r8   r9   r_     s   
z"WanSoundImageToVideoExtend.executerp   ra   r8   r8   r8   r9   r    s
    
r  c                    s8  t j jd  jd f j jdt jd jd  jd f j jd}d|d d  }g }t|D ][}|dkra|| d }|| d }	t j fddt||	D dd	}
t j||
fdd	}
n(|d d|d   | }|d d|  | }	t j fd
dt||	D dd	}
||
 q3t j|dd	}||	| fS )Nr   rG   )rD   r?   rF   r   r   c                    6   g | ]}d |  kr j d  k rn n | nqS ry  r  r   r   	audio_embzero_audio_embedr8   r9   r         (z(get_audio_emb_window.<locals>.<listcomp>r   c                    r  ry  r  r  r  r8   r9   r   
  r  )	rN   rO   rW   rD   r?   r  r"  r   ry   )r  	frame_num
frame0_idxaudio_shiftzero_audio_embed_3iter_audio_emb_windlt_isted	wind_featr8   r  r9   get_audio_emb_window  s0   $&r  c                   @   r   )WanHuMoImageToVideoc                 C   s   t jddt jdt jdt jdt jjdddtjdd	t jjd
ddtjdd	t jjdddtjdd	t jjdddddt jjdddt j	jdddg	t jj
ddt jj
ddt jj
ddgddS )Nr  r	   r
   r   r   r   r   r   r   r   r   r   a   r   r   r   r   r   r  Tr   rs   r"   r$   r&   r'   r(   r)   is_experimentalr  r6   r8   r8   r9   r:     s&   


z!WanHuMoImageToVideo.define_schemaNr<   c
                 C   s  |d d d }
t j|d|
|d |d gtj d}|d urbtj|d d dd||dddd}||d d d d d d d d	f }t	j
|d
|gidd}t	j
|d
t |gidd}n)t j|dd|d |d gtj d}t	j
|d
|gidd}t	j
|d
|gidd}|	d ur@t j|	d dd}|	d d }|d d d |d f }t|d d d d ddf jdddd}t|d d d d ddf jdddd}t|d d d d ddf jdddd}t|d d d d ddf jdddd}t|d d d d df dd}t j|||||gddd }t||dd\}}|d}t |}t	
|d|i}t	
|d|i}n!t j||
d dddgtj d}t	
|d|i}t	
|d|i}i }||d< t|||S )Nr   r   r   r=   r>   r@   rA   rB   rF   rw   Trx   r  rG   r   audio_samplesi  r   r            )r  r        rM   )rN   rO   rP   rQ   rR   rS   rT   rU   rX   rY   rZ   r   r"  ra  meanr  r   r   r[   )r7   r
   r   r   r   r   r   r   rs   r  r  r$   r   zero_latentr  	audio_lenfeat0feat1feat2feat3feat4r  audio_emb_neg
zero_audior^   r8   r8   r9   r_   -  s>   &,&&
****

"zWanHuMoImageToVideo.executer`   ra   r8   r8   r8   r9   r    rq   r  c                   @   r   )WanAnimateToVideoc                 C   s^  t jddt jdt jdt jdt jjdddtjdd	t jjd
ddtjdd	t jjdddtjdd	t jjdddddt jjdddt j	jdddt j	jdddt j	jdddt jjdddtjdd	t j	jdddt j
jdddt j	jdddt jjdddtjdd d!gt jjdd"t jjdd"t jjd#d"t jjd$d"t jjd%d"t jjdd"gdd&S )'Nr  r	   r
   r   r   r   r   r   r   r   r   r   r  r   r   r   r   r   r   Tr   r   
face_video
pose_videocontinue_motion_max_framesr  background_videocharacter_maskcontinue_motionvideo_frame_offsetr   zThe amount of frames to seek in all the input videos. Used for generating longer videos by chunk. Connect to the video_frame_offset output of the previous node for extending a video.r   r   r   r   tooltipr"   r$   r   
trim_imager  )r   r+   r,   r-   r.   r/   r0   r1   r2   r3   r   r4   r5   r6   r8   r8   r9   r:   W  s:   


zWanAnimateToVideo.define_schemaNr<   c              
   C   sV  d}|d d d }|d }|d }d}|
d u r!t d||df}
tj|
d | dd||dd	dd}||d d d d d d d df }t jdd|jd
 |jd |jd f|j|j	d}||jd 7 }d}|d u r{t 
|||dfd }nS|| d  }|	|jd 8 }	td|	}	tj|| d  dd||dd	dd}t j
||||jd f|j|j	dd }||d |jd < ||jd d d d 7 }|d urt|d|i}t|d|i}|d ur|jd |	krd }n||	d  }|d uritj|d | dd||dd	dd}|s2|jd |k r2t j|f|dd  f||jd    dd}||d d d d d d d df }t|d|i}t|d|i}|ri|jd }|d d }|d | }|d ur|jd |	kryd }n||	d  }|d urtj|d | dddddd	d d }|ddd}t|d|i}t|d|d d i}td|d d }|d ur|jd |	kr||	d  }tj|d | dd||dd	dd}|jd |kr||d  |||jd < t j
dd|d |jd |jd f|j|j	d}|d ur)d|d d d d d |d f< |d ur|jd |	ks>|jd dkr|jd dkrT||fd|jd   }n||	d  }|jdkrk|d}|dd}|jdkrv|d}tj|d d d d d |f |jd |jd dd	}|jd |kr|d d d d |d f |d d d d ||jd f< t j|||d d d d d d d df fdd}|d|jd d d|jd |jd dd}t j||fdd}t|||d}t|||d}t j|d|| ||gtj d}i }||d< t||||td|d d |	| S )NFr   r   r=   r   rF   r@   arearB   r  rH   rC   rG   rE   r   r   pose_video_latent   g       @r   face_video_pixelsrI   r|  r   rJ   r   r>   rM   )rN   rO   rP   rS   rT   rU   rX   rW   r?   rD   rV   r   rY   rZ   r   r   rn   r   r}   r~   rQ   rR   r   r[   )r7   r
   r   r   r   r   r   r   r  r  r   r   r  r  r  r  r  trim_to_pose_videor   latent_widthlatent_heightr   r\   rK   r]   ref_motion_latent_lengthr  ref_images_nummask_refmotionr$   r^   r8   r8   r9   r_   y  s   ,&0
.&
,,&


,
,.

 

6640"&zWanAnimateToVideo.executeNNNNNNNra   r8   r8   r8   r9   r  V  s
    
!r  c                   @   r   )Wan22ImageToVideoLatentc                 C   s   t jddt jdt jjdddtjddt jjdd	dtjddt jjd
ddtjddt jjdddddt jjdddgt j	 gdS )Nr  zconditioning/inpaintr   r   r  r  r   r   i  r   1   r   r   r   r   r   r!   Tr   r%   )
r   r+   r.   r-   r/   r0   r1   r3   r5   r4   r6   r8   r8   r9   r:     s   
	z%Wan22ImageToVideoLatent.define_schemaNr<   c                 C   s  t jdd|d d d |d |d gtj d}|d u r(i }||d< t|S t j|jd d|d d d |jd |jd	 gtj d}	|d urtj	
|d | d	d||d
ddd	}||}
|
|d d d d d |
jd f< |	d d d d d |
jd f  d9  < i }tj }|||	 |d|	   }||fd|jd   |d< |	|fd|	jd   |d< t|S )Nr   ru   r   r   r>   rM   r   rH   r@   rA   rB   r  rI   r   r|  
noise_mask)rN   rO   rP   rQ   rR   r   r[   rV   rW   rS   rT   rU   rX   rk   r|   rm   rn   r   )r7   r   r   r   r   r   r!   r$   r^   r]   latent_templatent_formatr8   r8   r9   r_     s"   2
<,
 (

zWan22ImageToVideoLatent.executeNra   r8   r8   r8   r9   r    s
    
r  )InfiniteTalkOuterSampleWrapperMultiTalkCrossAttnPatchMultiTalkGetAttnMapPatchproject_audio_featuresc                   @   sD   e Zd ZG dd deZedd Ze	d
dedejfdd	Z	dS )WanInfiniteTalkToVideoc                   @   s.   e Zd ZU eed< ejjed< ejjed< dS )zWanInfiniteTalkToVideo.DCValuesr   audio_encoder_output_2r]   N)	rb   rc   rd   r   __annotations__r   r  Typer   r8   r8   r8   r9   DCValues  s   
 r  c                 C   sv  t jddt jjdt jdg t jdt jjdddt jjd	dd
dt jjddddggdt jdt jdt j	dt j	dt j
dt jjdddtjddt jjdddtjddt jjdddtjddt jjdddt jjdddt jd t jjd!d"dd#dd$dd%t jjd&d'd(d)d*dt jjd+ddgt jjdd,t j	jdd,t j	jdd,t jjd-d,t jjd.d,gd/S )0Nr  r	   r   single_speakertwo_speakersr  Tr   mask_1z?Mask for the first speaker, required if using two audio inputs.r    r  mask_2z@Mask for the second speaker, required if using two audio inputs.)optionsmodelmodel_patchr
   r   r   r   r   r   r   r   r   r   r   r   r   r   r!   audio_encoder_output_1motion_frame_count	   !   z3Number of previous frames to use as motion context.)r   r   r   r   r  rK  audio_scaler   g      $      $@r   previous_framesr"   r$   r  r%   )r   r+   DynamicCombor-   Optionr  r   Model
ModelPatchr,   r.   r/   r0   r1   r2   r3   r   r4   r5   r6   r8   r8   r9   r:     sB   





z$WanInfiniteTalkToVideo.define_schemaNr   r<   c           1   	   C   s<  |d ur|j d |k rtd|d dkr!|d }|d }|d }|d ur1|d u s-|d u r1tdd }|d urJ|d urJ|d u rCtd	t||g}tjd
d|	d
 d d
 |d |d gtj d}|d urtj	|d |	 
dd
||dd
d
d}tj|	|||j d f|j|jdd }||d |j d < ||d d d d d d d df }tjd
d
|j d |j d |j d f|j|jd}d|d d d d d |j d d
 d d
 f< t|||d}t|||d}|d urt|d|i}t|d|i}| }g }g }|
|fD ]4}|d u rq|d }tj|ddd
d
d  }t|ddd
dd
}|| ||j d  qd}t|d
kr|d krt|}g } |D ]0}!|!j d |k rtj||!j d  g|!j d
d  R d!|!ji}"tj|!|"gdd}!| |! qX| }n=|dkrt|}#g }$d}%t||D ](\}!}&tj|#g|!j d
d  R d!|!ji}'|!|'|%|%|& < |$|' |%|&7 }%q|$}d }(|d urtjjj|d|j d d |j d d fd"d#d }(|(dk|(j d d}(|d ur@tj	|| d  
dd
||dd
d
d})|j d | }*|*}+|+|	 },t d$|+ d%|,  ||)d d d d d d d df }-|}.nd }+}.|	},|d d d d d d
f }-t!|j"j#||+|,$|% }/|/|j&d& d'< |'tj(j)j*d(t+|-||d ud) |,t-||d* |(d ur|,t.|(d+ i }0||0d,< t/0||||0|.S )-Nr   z$Not enough previous frames provided.r   r  r  r  r  z=Masks must be provided if two audio encoder outputs are used.zCSecond audio encoder output must be provided if two masks are used.r   r   r   r=   r>   r@   rA   rB   rC   rE   rF   rG   rH   rI   rJ   r   r  r   r  r  r  addpararD   nearestr   z&InfiniteTalk: Processing audio frames z - transformer_optionsaudio_embedsinfinite_talk_outer_sample)	is_extendattn2_patchattn1_patchrM   )1rW   rd  rN   r   rO   rP   rQ   rR   rS   rT   rU   rV   r?   rD   rX   rY   rZ   cloner"  r   ra  ry   r  r   r  zipr   r   r   r   r}   logginginfor  r  
audio_projr$  model_dtypemodel_optionsadd_wrapper_with_keypatcher_extension
WrappersMPOUTER_SAMPLEr  set_model_patchr  r  r   r[   )1r7   r   r  r  r
   r   r   r   r   r   r  r  r!   r  r  r   r  r  r  	ref_masksr$   r\   rK   rL   model_patchedencoded_audio_listseq_lengthsr  
all_layersencoded_audiomulti_audio_typemax_lenpaddedembr   	total_len	full_listoffsetr_  fulltoken_ref_target_masksmotion_framesr  audio_start	audio_endmotion_frames_latentr  r  r^   r8   r8   r9   r_   ;  s   2,&&0,



.
$

$
.&	
zWanInfiniteTalkToVideo.executer  )
rb   rc   rd   r   r  re   r:   r   r[   r_   r8   r8   r8   r9   r    s    
%r  c                   @   r   )WanSCAILToVideoc                 C   s  t jddt jdt jdt jdt jjdddtjdd	t jjd
ddtjdd	t jjdddtjdd	t jjdddddt jjdddt j	jdddt j	jddddt j
jddddddd t j
jd!ddddd"d t j
jd#ddddd$d gt jjdd%t jjdd%t jjd&d'd(gdd)S )*Nr1  r	   r
   r   r   r   r  r  r   r   i  r   r   r   r   r   r   r   r   Tr   r   r  z^Video used for pose conditioning. Will be downscaled to half the resolution of the main video.r  pose_strengthr   rI   r  r   zStrength of the pose latent.r  
pose_startz$Start step to use pose conditioning.pose_endz"End step to use pose conditioning.r"   r$   z$Empty latent of the generation size.)r#   r  r  )r   r+   r,   r-   r.   r/   r0   r1   r2   r3   r   r4   r5   r6   r8   r8   r9   r:     s.   


zWanSCAILToVideo.define_schemaNr<   c                 C   s  t j|d|d d d |d |d gtj d}d }|d urHtj|d d dd||dddd}||d d d d d d d d	f }|d uret	j
|d
|gidd}t	j
|d
t |gidd}|d uryt	
|d|i}t	
|d|i}|d urtj|d | dd|d |d dddd}||d d d d d d d d	f | }t	|d|i|	|
}t	|d|i|	|
}i }||d< t|||S )Nr   r   r   r=   r>   r@   rA   rB   rF   rw   Trx   r   rG   r  r  rM   )rN   rO   rP   rQ   rR   rS   rT   rU   rX   rY   rZ   r   +conditioning_set_values_with_timestep_ranger   r[   )r7   r
   r   r   r   r   r   r   r2  r3  r4  r   r   r  r$   r   r  r^   r8   r8   r9   r_     s&   2,&4*zWanSCAILToVideo.executerp   ra   r8   r8   r8   r9   r1    s
    
r1  c                   @   s(   e Zd Zedeeej  fddZdS )WanExtensionr<   c                    s(   t ttttttttt	t
ttttttgS r  )rI  r   rg   rr   r   r   r   r   r   r   r  r  r  r  r  r  r1  )selfr8   r8   r9   get_node_list  s&   zWanExtension.get_node_listN)	rb   rc   rd   r   r   r  r   	ComfyNoder8  r8   r8   r8   r9   r6    s    r6  r<   c                      s   t  S r  )r6  r8   r8   r8   r9   comfy_entrypoint  s   r:  )r=   r|  )rB  rC  rG   r  )r   r   r   rx  )r   NNNNN)rG   )Ar   r0   rY   rN   comfy.model_managementrP   comfy.utilscomfy.latent_formatscomfy.clip_visionr   numpyr   typingr   r   typing_extensionsr   comfy_api.latestr   r   r  r9  r   rg   rr   r   r   r   r   r   r   r   ndarrayrZ  r   r   r   Tensorr  r  FloatTensorr   r  rA  rH  rI  ra  rw  r  r  r  r  r  r  r  r  comfy.ldm.wan.model_multitalkr  r  r  r  r  r1  r6  r:  r8   r8   r8   r9   <module>   s    05AHX6+""
B

\


/>!
#B , %9