o
    i                  
   @   sH  d Z ddlZddlZddlmZ ddlmZ ddlmZ ddlm	Z	 ddl
Z
ddlmZ ddlZddlmZmZmZmZ dd	lmZmZmZmZmZmZmZmZmZmZmZmZm Z m!Z!m"Z" dd
l#m$Z$m%Z%m&Z&m'Z'm(Z(m)Z)m*Z*m+Z+m,Z,m-Z- dZ.dZ/dZ0ej1ej2ddgdddZ3G dd de4eZ5	d:de6ej7 dej8de9de:e fddZ;dedB de4de<fddZ=ded e	d! e4B de:e fd"d#Z>dede4fd$d%Z?d;ded'e<dej8fd(d)Z@dedeAdB fd*d+ZBG d,d- d-ej7ZCG d.d/ d/ej7ZDG d0d1 d1ej7ZEG d2d3 d3ej7ZFG d4d5 d5ej7ZGG d6d7 d7eZHdeHfd8d9ZIdS )<z
API Nodes for Gemini Multimodal LLM Usage via Remote API
See: https://cloud.google.com/vertex-ai/generative-ai/docs/model-reference/inference
    N)Enum)fnmatch)BytesIO)Literal)override)IOComfyExtensionInputTypes)GeminiContentGeminiFileDataGeminiGenerateContentRequestGeminiGenerateContentResponseGeminiImageConfig!GeminiImageGenerateContentRequestGeminiImageGenerationConfigGeminiInlineDataGeminiMimeType
GeminiPart
GeminiRoleGeminiSystemInstructionContentGeminiTextPartGeminiThinkingConfigModality)
ApiEndpointaudio_to_base64_stringbytesio_to_image_tensordownload_url_to_image_tensorget_number_of_imagessync_optensor_to_base64_stringupload_images_to_comfyapivalidate_stringvideo_to_base64_stringz/proxy/vertexai/geminii  @u  You are an expert image-generation engine. You must ALWAYS produce an image.
Interpret all user input—regardless of format, intent, or abstraction—as literal visual directives for image composition.
If a prompt is conversational or lacks specific visual details, you must creatively invent a concrete visual scenario that depicts the concept.
Prioritize generating the visual representation above any text, formatting, or conversational requests.model
resolutionwidgetsa  
    (
      $m := widgets.model;
      $r := widgets.resolution;
      $isFlash := $contains($m, "nano banana 2");
      $flashPrices := {"1k": 0.0696, "2k": 0.1014, "4k": 0.154};
      $proPrices := {"1k": 0.134, "2k": 0.134, "4k": 0.24};
      $prices := $isFlash ? $flashPrices : $proPrices;
      {"type":"usd","usd": $lookup($prices, $r), "format":{"suffix":"/Image","approximate":true}}
    )
    
depends_onexprc                   @   s   e Zd ZdZdZdZdS )GeminiImageModelz7
    Gemini Image Model Names allowed by comfy-api
    gemini-2.5-flash-image-previewgemini-2.5-flash-imageN)__name__
__module____qualname____doc__gemini_2_5_flash_image_previewgemini_2_5_flash_image r4   r4   :/mnt/c/Users/fbmor/ComfyUI/comfy_api_nodes/nodes_gemini.pyr+   K   s    r+   clsimagesimage_limitreturnc           
   	      s   g }|dk rt dt|}|dkrt d|dkr|nt||}t|d}t| ||dI d H }|D ]}|tttj|dd q3t	||D ]}	|tt
tjt||	 dd	 qH|S )
Nr   zPimage_limit must be greater than or equal to 0 when creating Gemini image parts.zINo images provided to create_image_parts; at least one image is required.
   )
max_images)mimeTypefileUri)fileDatar<   data
inlineData)
ValueErrorr   minr!   appendr   r   r   	image_pngranger   r    )
r6   r7   r8   image_partstotal_imageseffective_maxnum_url_imagesreference_images_urlsreference_image_urlidxr4   r4   r5   create_image_partsT   sB   

rO   mimepatternc                 C   s   | du rdS t | j|S )zPCheck if a MIME type matches a pattern. Supports fnmatch globs (e.g. 'image/*').NF)r   value)rP   rQ   r4   r4   r5   _mime_matches   s   rS   response	part_typetextc                 C   s
  | j s| jr| jjr| j}td|j d|j dtdg }g }| j D ]R}|jr8|j dkr8||j q%|jdu sC|jj	du rDq%|jj	D ].}|dkrW|j
rW|| qH|jrgt|jj|rg|| qH|jrvt|jj|rv|| qHq%|s|rtd| |S )	z
    Filter response parts by their type.

    Args:
        response: The API response from Gemini.
        part_type: Type of parts to extract ("text" or a MIME type).

    Returns:
        List of response parts matching the requested type.
    z(Gemini API blocked the request. Reason: z ()zGemini API returned no response candidates. If you are using the `IMAGE` modality, try changing it to `IMAGE+TEXT` to view the model's reasoning and understand why image generation failed.IMAGE_PROHIBITED_CONTENTNrV   z)Gemini API blocked the request. Reasons: )
candidatespromptFeedbackblockReasonrC   blockReasonMessagefinishReasonupperrE   contentpartsrV   rB   rS   r<   r>   )rT   rU   feedbackr`   blocked_reasons	candidatepartr4   r4   r5   get_parts_by_type   s:   

re   c                 C   s   t | d}ddd |D S )z
    Extract and concatenate all text parts from the response.

    Args:
        response: The API response from Gemini.

    Returns:
        Combined text from all text parts in the response.
    rV   
c                 S      g | ]}|j qS r4   rV   ).0rd   r4   r4   r5   
<listcomp>       z*get_text_from_response.<locals>.<listcomp>)re   join)rT   r`   r4   r4   r5   get_text_from_response   s   

rm   Fthoughtc                    s   g }t | d}|D ])}|jdu |krq
|jr%t|jj}tt|}n	t|j	j
I d H }|| q
t|dkrT|sOt|  }|rKtd| tdtdS tj|ddS )Nzimage/*Tr   z2Gemini did not generate an image. Model response: zGemini did not generate an image. Try rephrasing your prompt or changing the response modality to 'IMAGE+TEXT' to see the model's reasoning.)      rp      )dim)re   rn   rB   base64	b64decoder@   r   r   r   r>   r=   rE   lenrm   striprC   torchzeroscat)rT   rn   image_tensorsr`   rd   
image_datareturned_imagemodel_messager4   r4   r5   get_image_from_response   s*   

r~   c                 C   s"  | j sd S | j dv rd}d}d}nJ| j dv rd}d}d}n>| j dv r)d}d}d	}n2| j d
v r5d}d}d}n&| j dkrAd}d}d}n| j dkrMd}d}d}n| j dkrYd}d}d}nd S | jj| }| jjr| jjD ]}|jtjkry|||j 7 }qi|||j 7 }qi| jjr||| jj 7 }|d S )N)gemini-2.5-pro-preview-05-06gemini-2.5-prog      ?g      $@g        )gemini-2.5-flash-preview-04-17gemini-2.5-flashg333333?g      @)r,   r-   g      >@)gemini-3-pro-previewgemini-3.1-pro-preview   g      (@gemini-3.1-flash-lite-previewg      ?g      ?gemini-3-pro-image-previewg      ^@gemini-3.1-flash-image-previewg      ?g      @g      N@g    .A)	modelVersionusageMetadatapromptTokenCountcandidatesTokensDetailsmodalityr   IMAGE
tokenCountthoughtsTokenCount)rT   input_tokens_priceoutput_text_tokens_priceoutput_image_tokens_pricefinal_priceir4   r4   r5   calculate_tokens_price   sP   






r   c                   @   s   e Zd ZdZedd Zedejdee	 fddZ
edejdee	 fd	d
Ze					ddedededejdB dejdB dejdB dee	 dB dedejfddZdS )
GeminiNodeaZ  
    Node to generate text responses from a Gemini model.

    This node allows users to interact with Google's Gemini AI models, providing
    multimodal inputs (text, images, audio, video, files) to generate coherent
    text responses. The node works with the latest Gemini models, handling the
    API communication and response parsing.
    c                 C   s   t jddddt jjddddd	t jjd
g ddddt jjdddddddt jjddddt jjddddt jjddddt 	djddddt jjddddd dd!gt j
 gt jjt jjt jjgdt jt jd
gd"d#d$d%	S )&Nr   zGoogle Geminiapi node/text/GeminizGenerate text responses with Google's Gemini AI model. You can provide multiple types of inputs (text, images, audio, video) as context for generating more relevant and meaningful responses.promptT zText inputs to the model, used to generate a response. You can include detailed instructions, questions, or context for the model.)	multilinedefaulttooltipr$   )r   r   r   r   r   gemini-3-1-progemini-3-1-flash-liter   1The Gemini model to use for generating responses.optionsr   r   seed*   r       ^  When seed is fixed to a specific value, the model makes a best effort to provide the same response for repeated requests. Deterministic output isn't guaranteed. Also, changing the model or parameter settings, such as the temperature, can cause variations in the response even when you use the same seed value. By default, a random seed value is used.r   rD   maxcontrol_after_generater   r7   qOptional image(s) to use as context for the model. To include multiple images, you can use the Batch Images node.optionalr   audioz/Optional audio to use as context for the model.videoz/Optional video to use as context for the model.GEMINI_INPUT_FILESfilessOptional file(s) to use as context for the model. Accepts inputs from the Gemini Generate Content Input Files node.system_prompt8Foundational instructions that dictate an AI's behavior.r   r   r   r   advancedr&   a  
                (
                  $m := widgets.model;
                  $contains($m, "gemini-2.5-flash") ? {
                    "type": "list_usd",
                    "usd": [0.0003, 0.0025],
                    "format": { "approximate": true, "separator": "-", "suffix": " per 1K tokens"}
                  }
                  : $contains($m, "gemini-2.5-pro") ? {
                    "type": "list_usd",
                    "usd": [0.00125, 0.01],
                    "format": { "approximate": true, "separator": "-", "suffix": " per 1K tokens" }
                  }
                  : ($contains($m, "gemini-3-pro-preview") or $contains($m, "gemini-3-1-pro")) ? {
                    "type": "list_usd",
                    "usd": [0.002, 0.012],
                    "format": { "approximate": true, "separator": "-", "suffix": " per 1K tokens" }
                  }
                  : $contains($m, "gemini-3-1-flash-lite") ? {
                    "type": "list_usd",
                    "usd": [0.00025, 0.0015],
                    "format": { "approximate": true, "separator": "-", "suffix": " per 1K tokens" }
                  }
                  : {"type":"text", "text":"Token-based"}
                )
                r(   	node_iddisplay_namecategorydescriptioninputsoutputshiddenis_api_nodeprice_badge)r   SchemaStringr	   ComboIntImageAudioVideoCustomOutputHiddenauth_token_comfy_orgapi_key_comfy_org	unique_id
PriceBadgePriceBadgeDependsr6   r4   r4   r5   define_schema  s   	
BzGeminiNode.define_schemavideo_inputr9   c                 C   s,   t |tjjtjjd}tttj	|ddgS )z3Convert video input to Gemini API compatible parts.)container_formatcodecr?   rA   )
r#   r
   VideoContainerMP4
VideoCodecH264r   r   r   	video_mp4)r6   r   base_64_stringr4   r4   r5   create_video_parts  s   zGeminiNode.create_video_partsaudio_inputc                 C   sh   g }t |d jd D ]&}tj|d | d|d d}t|ddd}|ttt	j
|dd	 q|S )
a
  
        Convert audio input to Gemini API compatible parts.

        Args:
            audio_input: Audio input from ComfyUI, containing waveform tensor and sample rate.

        Returns:
            List of GeminiPart objects containing the encoded audio.
        waveformr   sample_rate)r   r   mp3
libmp3lame)r   
codec_namer?   rA   )rG   shaper	   r   	unsqueezer   rE   r   r   r   	audio_mp3)r6   r   audio_partsbatch_indexaudio_at_indexaudio_bytesr4   r4   r5   create_audio_parts  s(   zGeminiNode.create_audio_partsNr   r   r$   r   r7   r   r   r   r   c	                    s  |dkrd}n|dkrd}n|dkrd}t |dg}	|d ur*|	t| |I d H  |d ur6|	| | |d urB|	| | |d urK|	| d }
|rYtt|dgd d}
t| tt	 d| d	d
t
ttj|	dg|
dttdI d H }t|}t|pdS )Nr   r   r   r   r   rh   r`   role/POSTpathmethodr   r`   )contentssystemInstruction)endpointr@   response_modelprice_extractorz#Empty response from Gemini model...)r   extendrO   r   r   r   r   r   r   GEMINI_BASE_ENDPOINTr   r   r   userr   r   rm   r   
NodeOutput)r6   r   r$   r   r7   r   r   r   r   r`   gemini_system_promptrT   output_textr4   r4   r5   execute  sF   
	zGeminiNode.execute)NNNNr   )r.   r/   r0   r1   classmethodr   r	   r   listr   r   r   r   strintr   r   r   r   r4   r4   r4   r5   r     sB    	
q!
	
r   c                   @   sX   e Zd ZdZedd ZededefddZedd	ed
e	e dB de
jfddZdS )GeminiInputFilesaL  
    Loads and formats input files for use with the Gemini API.

    This node allows users to include text (.txt) and PDF (.pdf) files as input
    context for the Gemini model. Files are converted to the appropriate format
    required by the API and can be chained together to include multiple files
    in a single request.
    c                 C   s   t  }dd t|D }t|dd d}dd |D }tjddd	d
tjjd||r.|d ndddt	djddddgt	d
 gdS )z
        For details about the supported file input types, see:
        https://cloud.google.com/vertex-ai/generative-ai/docs/model-reference/inference
        c                 S   s>   g | ]}|  r|jd s|jdr| jtk r|qS )z.txt.pdf)is_filenameendswithstatst_sizeGEMINI_MAX_INPUT_FILE_SIZEri   fr4   r4   r5   rj     s    

z2GeminiInputFiles.define_schema.<locals>.<listcomp>c                 S   s   | j S Nr  )xr4   r4   r5   <lambda>	  s    z0GeminiInputFiles.define_schema.<locals>.<lambda>)keyc                 S   rg   r4   r  r	  r4   r4   r5   rj   
  rk   r  zGemini Input Filesr   u  Loads and prepares input files to include as inputs for Gemini LLM nodes. The files will be read by the Gemini model when generating a response. The contents of the text file count toward the token limit. 🛈 TIP: Can be chained together with other Gemini Input File nodes.filer   NzgInput files to include as context for the model. Only accepts text (.txt) and PDF (.pdf) files for now.r   r   TzAn optional additional file(s) to batch together with the file loaded from this node. Allows chaining of input files so that a single message can include multiple input files.r   )r   r   r   r   r   r   )folder_pathsget_input_directoryosscandirsortedr   r   r   r	   r   r   )r6   	input_dirinput_filesr4   r4   r5   r     s4   
zGeminiInputFiles.define_schema	file_pathr9   c                 C   sj   | drtjntj}t|d}| }W d    n1 sw   Y  t|d}t	t
||ddS )Nr  rbzutf-8r?   rA   )r  r   application_pdf
text_plainopenreadrs   	b64encodedecoder   r   )r6   r  	mime_typer
  file_content
base64_strr4   r4   r5   create_file_part'  s   
z!GeminiInputFiles.create_file_partNr  r   c                 C   s0   |du rg }t |}| |}t|g| S )z-Loads and formats input files for Gemini API.N)r  get_annotated_filepathr#  r   r   )r6   r  r   r  input_file_contentr4   r4   r5   r   6  s
   

zGeminiInputFiles.executer  )r.   r/   r0   r1   r   r   r   r   r#  r   r   r   r   r4   r4   r4   r5   r    s    	
+(r  c                   @   sf   e Zd Zedd Ze					ddeded	ed
ejdB de	e
 dB dedededejfddZdS )GeminiImagec                 C   s   t jddddt jjddddd	t jjd
ttjddt jjdddddddt jjddddt 	djddddt jjdg dddddt jjddd gd!ddd"t jjd#dt
dd$dd%gt j t j gt jjt jjt jjgdt jd&d'd(	S ))NGeminiImageNodez!Nano Banana (Google Gemini Image)api node/image/Geminiz)Edit images synchronously via Google API.r   TzText prompt for generationr   r   r   r   r$   r   r   r   r   r   r   r   r   r7   r   r   r   r   r   aspect_ratioautoz1:1z2:3z3:2z3:4z4:3z4:5z5:4z9:16z16:9z21:9r,  zkDefaults to matching the output image size to that of your input image, or otherwise generates 1:1 squares.)r   r   r   r   response_modalities
IMAGE+TEXTr   mChoose 'IMAGE' for image-only output, or 'IMAGE+TEXT' to return both the generated image and a text response.)r   r   r   r   r   r   r   zO{"type":"usd","usd":0.039,"format":{"suffix":"/Image (1K)","approximate":true}})r*   r   )r   r   r   r	   r   r+   r3   r   r   r   GEMINI_IMAGE_SYS_PROMPTr   r   r   r   r   r   r   r4   r4   r5   r   B  s   
?zGeminiImage.define_schemaNr,  r.  r   r   r$   r   r7   r   r*  r-  r   r9   c	              	      s   t |ddd t|dg}	|sd}|dkrt nt|d}
|d ur-|	t| |I d H  |d ur6|	| d }|rDtt|dgd d}t| td| d	d
t	t
tj|	dgt|dkr^dgnddg|
d|dttdI d H }tt|I d H t|S )NTro   strip_whitespace
min_lengthrh   r,  )aspectRatior   /proxy/vertexai/gemini/r   r   r   r   TEXTresponseModalitiesimageConfigr   generationConfigr   r@   r   r   )r"   r   r   r   rO   r   r   r   r   r   r   r   r   r   r   r   r   r   r~   rm   )r6   r   r$   r   r7   r   r*  r-  r   r`   image_configr   rT   r4   r4   r5   r     s:   

zGeminiImage.execute)NNr,  r.  r   r.   r/   r0   r   r   r   r   r	   r   r   r   r   r   r   r4   r4   r4   r5   r&  @  s8    
S
	
r&  c                   @   sf   e Zd Zedd Ze			ddedededed	ed
edejdB de	e
 dB dedejfddZdS )GeminiImage2c                 C   s   t jddddt jjddddd	t jjd
ddgdt jjdddddddt jjdg ddddt jjdg dddt jjdddgd dd!t jjd"dd#d$t d%jd&dd'd$t jjd(dtdd)dd*g	t j	 t j	 gt j
jt j
jt j
jgdtd+	S ),NGeminiImage2Nodez%Nano Banana Pro (Google Gemini Image)r(  <Generate or edit images synchronously via Google Vertex API.r   TText prompt describing the image to generate or the edits to apply. Include any constraints, styles, or details the model should follow.r   r)  r$   r   &Nano Banana 2 (Gemini 3.1 Flash Image)r   r   r   r   r   b  When the seed is fixed to a specific value, the model makes a best effort to provide the same response for repeated requests. Deterministic output isn't guaranteed. Also, changing the model or parameter settings, such as the temperature, can cause variations in the response even when you use the same seed value. By default, a random seed value is used.r   r*  r+  r,  wIf set to 'auto', matches your input image's aspect ratio; if no image is provided, a 16:9 square is usually generated.r   r%   1K2K4KGTarget output resolution. For 2K/4K the native Gemini upscaler is used.r   r   r-  r.  r   r/  )r   r   r   r7   ^Optional reference image(s). To include multiple images, use the Batch Images node (up to 14).r   r   r   r   r   r   r   r   r   r   r   r	   r   r   r   r   r0  r   r   r   r   r   GEMINI_IMAGE_2_PRICE_BADGEr   r4   r4   r5   r     s   
AzGeminiImage2.define_schemaNr   r   r$   r   r*  r%   r-  r7   r   r   r9   c
              	      s  t |ddd |dkrd}t|dg}
|d ur-t|dkr"td|
t| |I d H  |d ur6|
| t|d	}|d
krB||_d }|	rPtt	|	dgd d}t
| td| ddtttj|
dgt|dkrjdgnddg|d|dttdI d H }tt|I d H t|S )NTro   r1  rC  r   rh      5The current maximum number of supported images is 14.	imageSizer,  r   r5  r   r   r   r   r6  r7  r:  r<  )r"   r   r   rC   r   rO   r   r4  r   r   r   r   r   r   r   r   r   r   r   r   r   r~   rm   )r6   r   r$   r   r*  r%   r-  r7   r   r   r`   r=  r   rT   r4   r4   r5   r     sB   


zGeminiImage2.executeNNr   r>  r4   r4   r4   r5   r?    s8    
S	
	
r?  c                   @   sj   e Zd Zedd Ze			ddedededed	ed
ededejdB de	e
 dB dedejfddZdS )GeminiNanoBanana2c                 C   s  t jddddt jjddddd	t jjd
dgdt jjdddddddt jjdg ddddt jjdg dddt jjdddgddt jjd d!d"gdt jjd#dd$d%t d&jd'dd(d%t jjd)dtdd*dd+g
t j	 t j	 t jj	d,d-d.gt j
jt j
jt j
jgdtd/	S )0NrU  zNano Banana 2r(  rA  r   TrB  r   r)  r$   rC  rD  r   r   r   r   rE  r   r*  r+  r,  rF  r   r%   rG  rK  rL  r-  r   r.  )r   r   thinking_levelMINIMALHIGHr7   rM  r   r   r   r   r   r   r   thought_imagezoFirst image from the model's thinking process. Only available with thinking_level HIGH and IMAGE+TEXT modality.)r   r   r   rN  r   r4   r4   r5   r   Q  s   

X
zGeminiNanoBanana2.define_schemaNr   r   r$   r   r*  r%   r-  rV  r7   r   r   r9   c                    s(  t |ddd |dkrd}t|dg}|d ur-t|dkr"td|t| |I d H  |	d ur6||	 t|d	}|d
krB||_d }|
rPtt	|
dgd d}t
| td| ddtttj|dgt|dkrjdgnddg|t|dd|dttdI d H }tt|I d H t|t|ddI d H S )NTro   r1  rC  r   rh   rP  rQ  rR  r,  r   r5  r   r   r   r   r6  )thinkingLevel)r8  r9  thinkingConfigr:  r<  )rn   )r"   r   r   rC   r   rO   r   r4  r   r   r   r   r   r   r   r   r   r   r   r   r   r   r~   rm   )r6   r   r$   r   r*  r%   r-  rV  r7   r   r   r`   r=  r   rT   r4   r4   r5   r     sL   

zGeminiNanoBanana2.executerT  r>  r4   r4   r4   r5   rU  O  s<    
o
	

rU  c                   @   s(   e Zd Zedeeej  fddZdS )GeminiExtensionr9   c                    s   t ttttgS r  )r   r&  r?  rU  r  )selfr4   r4   r5   get_node_list  s   zGeminiExtension.get_node_listN)	r.   r/   r0   r   r   typer   	ComfyNoder^  r4   r4   r4   r5   r\    s    r\  c                      s   t  S r  )r\  r4   r4   r4   r5   comfy_entrypoint  s   ra  )r   )F)Jr1   rs   r  enumr   r   ior   typingr   rw   typing_extensionsr   r  comfy_api.latestr   r   r	   r
   comfy_api_nodes.apis.geminir   r   r   r   r   r   r   r   r   r   r   r   r   r   r   comfy_api_nodes.utilr   r   r   r   r   r   r    r!   r"   r#   r   r  r0  r   r   rO  r   r+   r_  r`  r   r   r   rO   boolrS   re   rm   r~   floatr   r   r  r&  r?  rU  r\  ra  r4   r4   r4   r5   <module>   s`    D0	
+"+4 dO   -