o
    i]                     @   s  d dl Z d dlZd dlmZ d dlmZmZmZ d dlm	Z	m
Z
mZmZmZmZmZmZmZmZmZ d dlmZmZmZmZmZmZmZmZ dZdZdZg d	Z d
d e D Z!dd e D Z"G dd dej#Z$G dd dej#Z%G dd dej#Z&G dd dej#Z'G dd dej#Z(G dd dej#Z)ej*jdddddej+j,dd ej*jd!d"d#ddej+j,d$d ej-jd%d&d'd(ej*jd)d#d#d*dej+j,d+d gZ.G d,d- d-ej#Z/d.e0d/e1fd0d1Z2G d2d3 d3ej#Z3G d4d5 d5eZ4d/e4fd6d7Z5dS )8    N)override)IOComfyExtensionInput)AddVoiceRequestAddVoiceResponseDialogueInputDialogueSettingsSpeechToSpeechRequestSpeechToTextRequestSpeechToTextResponseTextToDialogueRequestTextToSoundEffectsRequestTextToSpeechRequestTextToSpeechVoiceSettings)ApiEndpointaudio_bytes_to_audio_inputaudio_ndarray_to_bytesio"audio_tensor_to_contiguous_ndarraysync_opsync_op_rawupload_audio_to_comfyapivalidate_stringELEVENLABS_MUSIC_SECTIONSELEVENLABS_COMPOSITION_PLANELEVENLABS_VOICE))CwhRBWXzGAHq8TQ4Fs17Rogermaleamerican)EXAVITQu4vr4xnSDxMaLSarahfemaler   )FGY2WhTYpPnrIDTdsKH5Laurar"   r   )IKne3meq5aSn9XLyUdCDCharlier   
australian)JBFqnCBsd6RMkjVDRZzbGeorger   british)N2lVS1w4EtoT3dr4eOWOCallumr   r   )SAz9YHcvj6GT2YYXdXwwRiverneutralr   )SOYHLrjzK2X1ezoPC6crHarryr   r   )TX3LPaxmHKxFdv7VOQHJLiamr   r   )Xb7hH8MSUJpSbSDYk0k2Alicer"   r*   )XrExE9yKIg1WjnnlVkGXMatildar"   r   )bIHbv24MWmeRgasZH58oWillr   r   )cgSgspJ2msm6clMCkdW9Jessicar"   r   )cjVigY5qzO86Huf0OWalEricr   r   )hpp4J3VqNfWAUOO0d1UsBellar"   r   )iP95p4xoKVk53GoZ742BChrisr   r   )nPczCjzI2devNBz1zQrbBrianr   r   )onwK4e9ZLuTAKqWW03F9Danielr   r*   )pFZP5JQG7iQjIQuC4BkuLilyr"   r*   )pNInz6obpgDQGcFmaJgBAdamr   r   )pqHfZKP75CvOlQylNhV4Billr   r   c                 C   s*   g | ]\}}}}| d | d| dqS z (z, ) ).0_namegenderaccentrN   rN   >/mnt/c/Users/fbmor/ComfyUI/comfy_api_nodes/nodes_elevenlabs.py
<listcomp><   s   * rU   c                 C   s,   i | ]\}}}}| d | d| d|qS rL   rN   )rO   voice_idrQ   rR   rS   rN   rN   rT   
<dictcomp>=   s     rW   c                   @   sJ   e Zd ZedejfddZedejde	de
dededejfd	d
ZdS )ElevenLabsSpeechToTextreturnc                 C   s0  t jddddt jjdddt jjdt jd	t jjd
dddt jjddddt jjdddddt jj	ddt jjdddddt jj	ddt j
jdg ddddggd d!t jjd"d#d$dt jjd%d&d&d't jj	d(d)t jjd*d+d&d,d-d.gt jjd/d0t jjd"d0t jjd1d0gt jjt jjt jjgd2t jd3d4d5	S )6NrX   zElevenLabs Speech to Textapi node/audio/ElevenLabsznTranscribe audio to text. Supports automatic language detection, speaker diarization, and audio event tagging.audiozAudio to transcribe.tooltipmodel	scribe_v2tag_audio_eventsFz=Annotate sounds like (laughter), (music), etc. in transcript.defaultr]   diarizez"Annotate which speaker is talking.diarization_thresholdg)\(?皙?g?{Gz?zSSpeaker separation sensitivity. Lower values are more sensitive to speaker changes.rb   minmaxstepdisplay_moder]   temperature        g       @zNRandomness control. 0.0 uses model default. Higher values increase randomness.timestamps_granularity)word	characternonero   z&Timing precision for transcript words.)optionsrb   r]   zModel to use for transcription.rr   r]   language_code dISO-639-1 or ISO-639-3 language code (e.g., 'en', 'es', 'fra'). Leave empty for automatic detection.num_speakersr       zHMaximum number of speakers to predict. Set to 0 for automatic detection.)rb   rh   ri   rk   r]   seed   6Seed for reproducibility (determinism not guaranteed).rb   rh   ri   r]   textdisplay_name
words_jsonTzL{"type":"usd","usd":0.0073,"format":{"approximate":true,"suffix":"/minute"}}expr	node_idr   categorydescriptioninputsoutputshiddenis_api_nodeprice_badge)r   SchemaAudior   DynamicComboOptionBooleanFloatNumberDisplaysliderComboStringIntOutputHiddenauth_token_comfy_orgapi_key_comfy_org	unique_id
PriceBadgeclsrN   rN   rT   define_schemaC   s   

+/Kz$ElevenLabsSpeechToText.define_schemar[   r^   rt   rw   ry   c           	         s   |d r|rt dt|d t| |ddddI d H | r |nd |d |d	kr*|nd |d
 |d |d r9|d nd ||d d
}t| tdddt|ddI d H }tj|j	r^dd |j	D ng dd}t
|j|j|S )Nrc   zxNumber of speakers cannot be specified when diarization is enabled. Either disable diarization or set num_speakers to 0.r^   mp4aac	audio/mp4)container_format
codec_name	mime_typer`   r   rn   rd   rl   )
model_idcloud_storage_urlrt   r`   rw   rn   rc   rd   ry   rl   z#/proxy/elevenlabs/v1/speech-to-textPOSTpathmethodmultipart/form-data)response_modeldatacontent_typec                 S   s   g | ]}|j d dqS )Texclude_none)
model_dump)rO   wrN   rN   rT   rU      s    z2ElevenLabsSpeechToText.execute.<locals>.<listcomp>   )indent)
ValueErrorr   r   stripr   r   r   jsondumpswordsr   
NodeOutputr~   rt   )	r   r[   r^   rt   rw   ry   requestresponser   rN   rN   rT   execute   s>   	


zElevenLabsSpeechToText.executeN)__name__
__module____qualname__classmethodr   r   r   r   r   dictstrintr   r   rN   rN   rN   rT   rX   B   s"    arX   c                   @   s8   e Zd ZedejfddZededejfddZ	dS )ElevenLabsVoiceSelectorrY   c              
   C   s8   t jddddt jjdtddgt tjddgd	d
S )Nr   zElevenLabs Voice SelectorrZ   zCSelect a predefined ElevenLabs voice for text-to-speech generation.voicez5Choose a voice from the predefined ElevenLabs voices.rs   r   F)r   r   r   r   r   r   r   )r   r   r   r   ELEVENLABS_VOICE_OPTIONSCustomr   r   r   rN   rN   rT   r      s   z%ElevenLabsVoiceSelector.define_schemar   c                 C   s&   t |}|std| t|S )NzUnknown voice: )ELEVENLABS_VOICE_MAPgetr   r   r   )r   r   rV   rN   rN   rT   r      s   

zElevenLabsVoiceSelector.executeN)
r   r   r   r   r   r   r   r   r   r   rN   rN   rN   rT   r      s
    r   c                   @   sT   e Zd ZedejfddZededededede	d	ed
e
dedejfddZdS )ElevenLabsTextToSpeechrY   c                 C   s  t jddddt tjdddt jjdd	d
ddt jjdddddt jjddt j	jdg dddt j
jdt j
dt jjdddddt jjddt jjdddddt jjd dt jjd!d"d#d$t jjd%ddd&dt jjd'dgt j
d(t jjdddddt jjddt jjdddddt jjd dggd)dt jjd*d
d+d$t jjd,d-d.d/d0d1t j	jd2d3d4gd5dgt j gt jjt jjt jjgd	t jd6d7d8	S )9Nr   zElevenLabs Text to SpeechrZ   zConvert text to speech.r   zVVoice to use for speech synthesis. Connect from Voice Selector or Instant Voice Clone.r\   r~   Tru   zThe text to convert to speech.	multilinerb   r]   	stability      ?rm         ?rf   Voice stability. Lower values give broader emotional range, higher values produce more consistent but potentially monotonous speech.rg   apply_text_normalizationautoonoffjText normalization mode. 'auto' lets the system decide, 'on' always applies normalization, 'off' skips it.rs   r^   eleven_multilingual_v2speedffffff??6Speech speed. 1.0 is normal, <1.0 slower, >1.0 faster.similarity_boost      ?LSimilarity boost. Higher values make the voice more similar to the original.use_speaker_boostF/Boost similarity to the original speaker voice.ra   style皙?YStyle exaggeration. Higher values increase stylistic expression but may reduce stability.	eleven_v3z Model to use for text-to-speech.rt   rv   ry   rz   r   r{   r|   r}   output_formatmp3_44100_192opus_48000_192Audio output format.L{"type":"usd","usd":0.24,"format":{"approximate":true,"suffix":"/1K chars"}}r   r   )r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   rN   rN   rT   r      s   

		&	?Crz$ElevenLabsTextToSpeech.define_schemar   r~   r   r   r^   rt   ry   r   c	                    s   t |dd t||d | r|nd t||d |d |dd |dd d||d	}	t| td
| dd|id|	ddI d H }
tt	|
S )Nrz   
min_lengthr^   r   r   r   r   )r   r   r   r   r   )r~   r   rt   voice_settingsry   r   z$/proxy/elevenlabs/v1/text-to-speech/r   r   r   r   query_paramsTr   	as_binary)
r   r   r   r   r   r   r   r   r   r   )r   r   r~   r   r   r^   rt   ry   r   r   r   rN   rN   rT   r   s  s6   


zElevenLabsTextToSpeech.executeN)r   r   r   r   r   r   r   r   floatr   r   r   r   rN   rN   rN   rT   r      s0     	
r   c                   @   s:   e Zd ZedejfddZedejdej	fddZ
dS )ElevenLabsAudioIsolationrY   c                 C   sL   t jddddt jjdddgt j gt jjt jjt jjgdt j	d	d
d	S )Nr   zElevenLabs Voice IsolationrZ   z?Remove background noise from audio, isolating vocals or speech.r[   z.Audio to process for background noise removal.r\   TJ{"type":"usd","usd":0.24,"format":{"approximate":true,"suffix":"/minute"}}r   r   )
r   r   r   r   r   r   r   r   r   r   r   rN   rN   rT   r     s*   z&ElevenLabsAudioIsolation.define_schemar[   c                    sX   t |d }t||d dd}t| tddddd	|d
fidddI d H }tt|S )Nwaveformsample_rater   r   z$/proxy/elevenlabs/v1/audio-isolationr   r   r[   	audio.mp4r   r   T)filesr   r   )r   r   r   r   r   r   r   )r   r[   audio_data_npaudio_bytes_ior   rN   rN   rT   r     s   
z ElevenLabsAudioIsolation.executeN)r   r   r   r   r   r   r   r   r   r   r   rN   rN   rN   rT   r     s    r   c                	   @   s@   e Zd ZedejfddZededededej	fddZ
d	S )
ElevenLabsTextToSoundEffectsrY   c                 C   s   t jddddt jjddddd	t jjd
t jdt jjdddddt jjddt j	jddddt jjdddddt jjddggddt j
jdd d!gd"dgt j gt jjt jjt jjgdt jd#d$d%	S )&Nr   z ElevenLabs Text to Sound EffectsrZ   z.Generate sound effects from text descriptions.r~   Tru   z1Text description of the sound effect to generate.r   r^   eleven_sfx_v2durationg      @r   g      >@re   z'Duration of generated sound in seconds.rg   loopFz'Create a smoothly looping sound effect.ra   prompt_influenceg333333?rm   r   rf   zeHow closely generation follows the prompt. Higher values make the sound follow the text more closely.z)Model to use for sound effect generation.rs   r   r   r   r   zJ{"type":"usd","usd":0.14,"format":{"approximate":true,"suffix":"/minute"}}r   r   )r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   rN   rN   rT   r     sx   	#1z*ElevenLabsTextToSoundEffects.define_schemar~   r^   r   c                    s\   t |dd t| tddd|idt||d |d |d	d d
ddI d H }tt|S )Nrz   r   z%/proxy/elevenlabs/v1/sound-generationr   r   r   r   r  r   )r~   duration_secondsr  r   Tr   )r   r   r   r   r   r   r   r   )r   r~   r^   r   r   rN   rN   rT   r     s$   
z$ElevenLabsTextToSoundEffects.executeN)r   r   r   r   r   r   r   r   r   r   r   rN   rN   rN   rT   r     s    Dr   c                   @   s@   e Zd ZedejfddZedejjde	dej
fddZdS )	ElevenLabsInstantVoiceClonerY   c                 C   s~   t jddddt jjdt jjt jddddd	d
dt jjddddgt tj	ddgt j
jt j
jt j
jgdt jddd	S )Nr  zElevenLabs Instant Voice ClonerZ   z]Create a cloned voice from audio samples. Provide 1-8 audio recordings of the voice to clone.r   r[   rz      )prefixrh   ri   z#Audio recordings for voice cloning.)templater]   remove_background_noiseFzARemove background noise from voice samples using audio isolation.ra   r   r   Tz{"type":"usd","usd":0.15}r   r   )r   r   Autogrowr   TemplatePrefixr   r   r   r   r   r   r   r   r   r   r   rN   rN   rT   r   )  s<   


z)ElevenLabsInstantVoiceClone.define_schemar   r  c                    s   g }|D ](}|| }|d }|d }t |}t||dd}	|d| d|	 dff qt| tdd	d
tttt	
 |d|ddI d H }
t|
jS )Nr   r   r   r   r   z.mp4r   z/proxy/elevenlabs/v1/voices/addr   r   )rQ   r  r   )r   r   r   r   )r   r   appendgetvaluer   r   r   r   r   uuiduuid4r   r   rV   )r   r   r  file_tupleskeyr[   r   r   r   r   r   rN   rN   rT   r   N  s*    

z#ElevenLabsInstantVoiceClone.executeN)r   r   r   r   r   r   r   r  Typeboolr   r   rN   rN   rN   rT   r  (  s    $r  r   r   r   r   rf   r   rg   r   r   rm   r   r   Fr   ra   r   r   r   c                   @   sR   e Zd ZedejfddZededej	de
deded	ed
edejfddZdS )ElevenLabsSpeechToSpeechrY   c                 C   s   t jddddt tjdddt jjdd	dt jjd
ddddt jjddt j	jdt j	
dtt j	
dtgddt jjdddgddt jjddddddt jjdd d!d"gt j gt jjt jjt jjgd#t jd$d%d&	S )'Nr  zElevenLabs Speech to SpeechrZ   z]Transform speech from one voice to another while preserving the original content and emotion.r   zXTarget voice for the transformation. Connect from Voice Selector or Instant Voice Clone.r\   r[   zSource audio to transform.r   r   rm   r   rf   r   rg   r^   eleven_multilingual_sts_v2eleven_english_sts_v2z1Model to use for speech-to-speech transformation.rs   r   r   r   r   ry   r       Seed for reproducibility.r}   r  Fz?Remove background noise from input audio using audio isolation.ra   Tr   r   r   )r   r   r   r   r   r   r   r   r   r   r   ELEVENLABS_STS_VOICE_SETTINGSr   r   r   r   r   r   r   r   r   r   rN   rN   rT   r     s   


5z&ElevenLabsSpeechToSpeech.define_schemar   r[   r   r^   r   ry   r  c              	      s   t |d }t||d dd}	t||d |d |d |d d	}
t| td
| dd|idt|d |
jdd||ddd|	 dfidddI d H }t	t
|S )Nr   r   r   r   r   r   r   r   )r   r   r   r   r   z&/proxy/elevenlabs/v1/speech-to-speech/r   r   r   r^   Tr   )r   r   ry   r  r[   r   r   r   )r   r   r   r   )r   r   r   r   r   r
   model_dump_jsonr  r   r   r   )r   r   r[   r   r^   r   ry   r  r   r   r   r   rN   rN   rT   r     s8   
z ElevenLabsSpeechToSpeech.executeN)r   r   r   r   r   r   r   r   r   r   r   r   r   r  r   r   rN   rN   rN   rT   r    s*    H	r  countrY   c              
   C   sf   g }t d| d D ]'}|tjjd| ddd| ddttjd| d	| d
dg q	|S )z>Generate input widgets for a given number of dialogue entries.rz   r~   Tru   z Text content for dialogue entry .r   r   zVoice for dialogue entry z5. Connect from Voice Selector or Instant Voice Clone.r\   )rangeextendr   r   r   r   r   )r  r   irN   rN   rT   _generate_dialogue_inputs  s    


r  c                   @   sP   e Zd ZedejfddZedededede	ded	e
d
edejfddZdS )ElevenLabsTextToDialoguerY   c                 C   sh  t jddddt jjdddddt jjd	d
t jjdg dddt jjddgddt jjdt jdt	dt jdt	dt jdt	dt jdt	dt jdt	dt jdt	dt jdt	d t jd!t	d"t jd#t	d$t jd%t	d&g
d'dt j
jd(d)d*d+t jjd,dd-d.d/d0t jjd1d2d3gd4dgt j gt jjt jjt jjgd5t jd6d7d8	S )9Nr  zElevenLabs Text to DialoguerZ   zZGenerate multi-speaker dialogue from text. Each dialogue entry has its own text and voice.r   r   rm   r   r   rg   r   r   r   rs   r^   r   z%Model to use for dialogue generation.r   1rz   2r   3   4   5   6   7   8r  9	   10
   zNumber of dialogue entries.rt   ru   rv   ra   ry   r   r  r  r}   r   r   r   r   Tr   r   r   )r   r   r   r   r   r   r   r   r   r  r   r   r   r   r   r   r   r   r   r   rN   rN   rT   r     s   
:z&ElevenLabsTextToDialogue.define_schemar   r   r^   r   rt   ry   r   c                    s   t |d }g }	td|d D ]}
|d|
  }|d|
  }t|dd |	t||d qt|	|| r9|nd t|d||d}t| t	d	d
d|id|ddI d H }t
t|S )Nr   rz   r~   r   r   )r~   rV   )r   )r   r   rt   settingsry   r   z%/proxy/elevenlabs/v1/text-to-dialoguer   r   r   Tr   )r   r  r   r
  r   r   r   r	   r   r   r   r   r   )r   r   r   r^   r   rt   ry   r   num_entriesdialogue_inputsr  r~   rV   r   r   rN   rN   rT   r   e  s6   
z ElevenLabsTextToDialogue.executeN)r   r   r   r   r   r   r   r   r   r   r   r   r   rN   rN   rN   rT   r    s*    M	r  c                   @   s(   e Zd Zedeeej  fddZdS )ElevenLabsExtensionrY   c                    s   t tttttttgS N)rX   r   r   r   r   r  r  r  )selfrN   rN   rT   get_node_list  s   z!ElevenLabsExtension.get_node_listN)	r   r   r   r   listtyper   	ComfyNoder7  rN   rN   rN   rT   r4    s    r4  c                      s   t  S r5  )r4  rN   rN   rN   rT   comfy_entrypoint  s   r;  )6r   r  typing_extensionsr   comfy_api.latestr   r   r   comfy_api_nodes.apis.elevenlabsr   r   r   r	   r
   r   r   r   r   r   r   comfy_api_nodes.utilr   r   r   r   r   r   r   r   r   r   r   ELEVENLABS_VOICESr   r   r:  rX   r   r   r   r   r  r   r   r   r   r  r  r   r8  r  r  r4  r;  rN   rN   rN   rT   <module>   sx    4(  0-`D		$rv