o
    i                     @   s\   d dl mZ d dlmZ ddlZddlZddlZddlZddl	Z	G dd dZ
d
dd	ZdS )   )Wav2Vec2Model)WhisperLargeV3    Nc                   @   s,   e Zd Zdd Zdd Zdd Zdd Zd	S )
AudioEncoderModelc                 C   s   t j | _t j }t j| j| _|d}t|}|	| j|t j
jd |dkr5tdi || _n|dkrAtdi || _| j  t jj| j| j|d| _d| _t j| j d S )N
model_type)dtypedevice
operationswav2vec2whisper3)load_deviceoffload_devicei>   )comfymodel_managementtext_encoder_devicer   text_encoder_offload_devicetext_encoder_dtyper   popdictupdateopsmanual_castr   modelr   evalmodel_patcherCoreModelPatcherpatchermodel_sample_ratearchive_model_dtypes)selfconfigr   r   model_configr   r   A/mnt/c/Users/fbmor/ComfyUI/comfy/audio_encoders/audio_encoders.py__init__   s$   


zAudioEncoderModel.__init__c                 C   s   | j j|d| j dS )NF)strictassign)r   load_state_dictr   
is_dynamic)r    sdr   r   r#   load_sd    s   zAudioEncoderModel.load_sdc                 C   s
   | j  S )N)r   
state_dict)r    r   r   r#   get_sd#   s   
zAudioEncoderModel.get_sdc                 C   s\   t j| j tj||| j}| |	| j
\}}i }||d< ||d< |jd |d< |S )Nencoded_audioencoded_audio_all_layers   audio_samples)r   r   load_model_gpur   
torchaudio
functionalresampler   r   tor   shape)r    audiosample_rateout
all_layersoutputsr   r   r#   encode_audio&   s   zAudioEncoderModel.encode_audioN)__name__
__module____qualname__r$   r*   r,   r<   r   r   r   r#   r   
   s
    r    c              	   C   s   t j| ddi} d| v r;| d jd }|dkr$ddddd	d	d	d	d
}n-|dkr4ddddddddd
}ntd|d| v rMt j| ddi} ddi}ntdt|}|| \}}t|dkrjt	
d| t|dkrxt	
d| |S )Nz	wav2vec2.r@   zencoder.layer_norm.biasr   i   r
         T)r   	embed_dim	num_heads
num_layers	conv_norm	conv_biasdo_normalizedo_stable_layer_normi      FzAERROR: audio encoder file is invalid or unsupported embed_dim: {}z$model.encoder.embed_positions.weightzmodel.r   r   z#ERROR: audio encoder not supported.zmissing audio encoder: {}zunexpected audio encoder: {})r   utilsstate_dict_prefix_replacer6   RuntimeErrorformatr   r*   lenloggingwarning)r)   prefixrC   r!   audio_encodermur   r   r#   load_audio_encoder_from_sd1   sH   
rV   )r@   )r
   r   whisperr   comfy.model_managementr   	comfy.opscomfy.utilsrP   r2   r   rV   r   r   r   r#   <module>   s    '