feat: support structured output with json schema

zuisong · Feb 19, 2025 · 09bac6a · 09bac6a
1 parent a55a568
commit 09bac6a
Show file tree

Hide file tree

Showing 4 changed files with 57 additions and 24 deletions.
diff --git a/src/gemini-api-client/types.ts b/src/gemini-api-client/types.ts
@@ -14,6 +14,8 @@ export type FinishReason = Candidate["finishReason"]
 
 export type Part = components["schemas"]["Part"]
 
+export type JsonSchema = components["schemas"]["Schema"]
+
 export type EmbedContentRequest = components["schemas"]["EmbedContentRequest"]
 
 export type EmbedContentResponse = components["schemas"]["EmbedContentResponse"]

diff --git a/src/generated-types/gemini-types.ts b/src/generated-types/gemini-types.ts
@@ -2038,7 +2038,7 @@ export interface components {
         GenerationConfig: {
             /**
              * Format: int32
-             * @description Optional. Number of generated responses to return. Currently, this value can only be set to 1. If unset, this will default to 1.
+             * @description Optional. Number of generated responses to return. If unset, this will default to 1. Please note that this doesn't work for previous generation models (Gemini 1.0 family)
              */
             candidateCount?: number;
             /** @description Optional. Enables enhanced civic answers. It may not be available for all models. */
@@ -2644,7 +2644,7 @@ export interface components {
             description?: string;
             /** @description Optional. Possible values of the element of Type.STRING with enum format. For example we can define an Enum Direction as : {type:STRING, format:enum, enum:["EAST", NORTH", "SOUTH", "WEST"]} */
             enum?: string[];
-            /** @description Optional. The format of the data. This is used only for primitive datatypes. Supported formats: for NUMBER type: float, double for INTEGER type: int32, int64 for STRING type: enum */
+            /** @description Optional. The format of the data. This is used only for primitive datatypes. Supported formats: for NUMBER type: float, double for INTEGER type: int32, int64 for STRING type: enum, date-time */
             format?: string;
             /** @description Optional. Schema of the elements of Type.ARRAY. */
             items?: components["schemas"]["Schema"];
@@ -3013,6 +3013,13 @@ export interface components {
             promptTokenCount?: number;
             /** @description Output only. List of modalities that were processed in the request input. */
             readonly promptTokensDetails?: components["schemas"]["ModalityTokenCount"][];
+            /**
+             * Format: int32
+             * @description Output only. Number of tokens present in tool-use prompt(s).
+             */
+            readonly toolUsePromptTokenCount?: number;
+            /** @description Output only. List of modalities that were processed for tool-use request inputs. */
+            readonly toolUsePromptTokensDetails?: components["schemas"]["ModalityTokenCount"][];
             /**
              * Format: int32
              * @description Total token count for the generation request (prompt + response candidates).

diff --git a/src/generated-types/openai-types.ts b/src/generated-types/openai-types.ts
@@ -1590,6 +1590,8 @@ export interface components {
          *     integrate the Assistants API with streaming.
          *      */
         AssistantStreamEvent: components["schemas"]["ThreadStreamEvent"] | components["schemas"]["RunStreamEvent"] | components["schemas"]["RunStepStreamEvent"] | components["schemas"]["MessageStreamEvent"] | components["schemas"]["ErrorEvent"] | components["schemas"]["DoneEvent"];
+        /** @enum {string} */
+        AssistantSupportedModels: "o3-mini" | "o3-mini-2025-01-31" | "o1" | "o1-2024-12-17" | "gpt-4o" | "gpt-4o-2024-11-20" | "gpt-4o-2024-08-06" | "gpt-4o-2024-05-13" | "gpt-4o-mini" | "gpt-4o-mini-2024-07-18" | "gpt-4-turbo" | "gpt-4-turbo-2024-04-09" | "gpt-4-0125-preview" | "gpt-4-turbo-preview" | "gpt-4-1106-preview" | "gpt-4-vision-preview" | "gpt-4" | "gpt-4-0314" | "gpt-4-0613" | "gpt-4-32k" | "gpt-4-32k-0314" | "gpt-4-32k-0613" | "gpt-3.5-turbo" | "gpt-3.5-turbo-16k" | "gpt-3.5-turbo-0613" | "gpt-3.5-turbo-1106" | "gpt-3.5-turbo-0125" | "gpt-3.5-turbo-16k-0613";
         /** Code interpreter tool */
         AssistantToolsCode: {
             /**
@@ -2653,10 +2655,11 @@ export interface components {
              *
              * @example gpt-4o
              */
-            model: string | ("gpt-4o" | "gpt-4o-2024-11-20" | "gpt-4o-2024-08-06" | "gpt-4o-2024-05-13" | "gpt-4o-mini" | "gpt-4o-mini-2024-07-18" | "gpt-4-turbo" | "gpt-4-turbo-2024-04-09" | "gpt-4-0125-preview" | "gpt-4-turbo-preview" | "gpt-4-1106-preview" | "gpt-4-vision-preview" | "gpt-4" | "gpt-4-0314" | "gpt-4-0613" | "gpt-4-32k" | "gpt-4-32k-0314" | "gpt-4-32k-0613" | "gpt-3.5-turbo" | "gpt-3.5-turbo-16k" | "gpt-3.5-turbo-0613" | "gpt-3.5-turbo-1106" | "gpt-3.5-turbo-0125" | "gpt-3.5-turbo-16k-0613");
+            model: string | components["schemas"]["AssistantSupportedModels"];
             /** @description The name of the assistant. The maximum length is 256 characters.
              *      */
             name?: string | null;
+            reasoning_effort?: components["schemas"]["ReasoningEffort"];
             response_format?: components["schemas"]["AssistantsApiResponseFormatOption"] & unknown;
             /**
              * @description What sampling temperature to use, between 0 and 2. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic.
@@ -2911,19 +2914,7 @@ export interface components {
              * @default 0
              */
             presence_penalty: number | null;
-            /**
-             * @description **o1 models only**
-             *
-             *     Constrains effort on reasoning for
-             *     [reasoning models](https://platform.openai.com/docs/guides/reasoning).
-             *     Currently supported values are `low`, `medium`, and `high`. Reducing
-             *     reasoning effort can result in faster responses and fewer tokens used
-             *     on reasoning in a response.
-             *
-             * @default medium
-             * @enum {string}
-             */
-            reasoning_effort: "low" | "medium" | "high";
+            reasoning_effort?: components["schemas"]["ReasoningEffort"];
             /** @description An object specifying the format that the model must output.
              *
              *     Setting to `{ "type": "json_schema", "json_schema": {...} }` enables
@@ -3759,9 +3750,9 @@ export interface components {
                     /** @description Hateful content that also includes violence or serious harm towards the targeted group based on race, gender, ethnicity, religion, nationality, sexual orientation, disability status, or caste. */
                     "hate/threatening": boolean;
                     /** @description Content that includes instructions or advice that facilitate the planning or execution of wrongdoing, or that gives advice or instruction on how to commit illicit acts. For example, "how to shoplift" would fit this category. */
-                    illicit: boolean;
+                    illicit: boolean | null;
                     /** @description Content that includes instructions or advice that facilitate the planning or execution of wrongdoing that also includes violence, or that gives advice or instruction on the procurement of any weapon. */
-                    "illicit/violent": boolean;
+                    "illicit/violent": boolean | null;
                     /** @description Content that promotes, encourages, or depicts acts of self-harm, such as suicide, cutting, and eating disorders. */
                     "self-harm": boolean;
                     /** @description Content that encourages performing acts of self-harm, such as suicide, cutting, and eating disorders, or that gives instructions or advice on how to commit such acts. */
@@ -3869,8 +3860,9 @@ export interface components {
              * @description The ID of the [Model](/docs/api-reference/models) to be used to execute this run. If a value is provided here, it will override the model associated with the assistant. If not, the model associated with the assistant will be used.
              * @example gpt-4o
              */
-            model?: (string | ("gpt-4o" | "gpt-4o-2024-11-20" | "gpt-4o-2024-08-06" | "gpt-4o-2024-05-13" | "gpt-4o-mini" | "gpt-4o-mini-2024-07-18" | "gpt-4-turbo" | "gpt-4-turbo-2024-04-09" | "gpt-4-0125-preview" | "gpt-4-turbo-preview" | "gpt-4-1106-preview" | "gpt-4-vision-preview" | "gpt-4" | "gpt-4-0314" | "gpt-4-0613" | "gpt-4-32k" | "gpt-4-32k-0314" | "gpt-4-32k-0613" | "gpt-3.5-turbo" | "gpt-3.5-turbo-16k" | "gpt-3.5-turbo-0613" | "gpt-3.5-turbo-1106" | "gpt-3.5-turbo-0125" | "gpt-3.5-turbo-16k-0613")) | null;
+            model?: (string | components["schemas"]["AssistantSupportedModels"]) | null;
             parallel_tool_calls?: components["schemas"]["ParallelToolCalls"];
+            reasoning_effort?: components["schemas"]["ReasoningEffort"];
             response_format?: components["schemas"]["AssistantsApiResponseFormatOption"] & unknown;
             /** @description If `true`, returns a stream of events that happen during the Run as server-sent events, terminating when the Run enters a terminal state with a `data: [DONE]` message.
              *      */
@@ -4836,6 +4828,7 @@ export interface components {
         };
         ListFineTuningJobEventsResponse: {
             data: components["schemas"]["FineTuningJobEvent"][];
+            has_more: boolean;
             /** @enum {string} */
             object: "list";
         } & {
@@ -5383,10 +5376,11 @@ export interface components {
             metadata?: components["schemas"]["Metadata"];
             /** @description ID of the model to use. You can use the [List models](/docs/api-reference/models/list) API to see all of your available models, or see our [Model overview](/docs/models) for descriptions of them.
              *      */
-            model?: string;
+            model?: string | components["schemas"]["AssistantSupportedModels"];
             /** @description The name of the assistant. The maximum length is 256 characters.
              *      */
             name?: string | null;
+            reasoning_effort?: components["schemas"]["ReasoningEffort"];
             response_format?: components["schemas"]["AssistantsApiResponseFormatOption"] & unknown;
             /**
              * @description What sampling temperature to use, between 0 and 2. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic.
@@ -7476,6 +7470,19 @@ export interface components {
         } & {
             [key: string]: unknown;
         };
+        /**
+         * @description **o1 and o3-mini models only**
+         *
+         *     Constrains effort on reasoning for
+         *     [reasoning models](https://platform.openai.com/docs/guides/reasoning).
+         *     Currently supported values are `low`, `medium`, and `high`. Reducing
+         *     reasoning effort can result in faster responses and fewer tokens used
+         *     on reasoning in a response.
+         *
+         * @default medium
+         * @enum {string|null}
+         */
+        ReasoningEffort: "low" | "medium" | "high";
         ResponseFormatJsonObject: {
             /**
              * @description The type of response format being defined: `json_object`
@@ -8432,12 +8439,12 @@ export interface components {
         };
         /** @description The aggregated code interpreter sessions usage details of the specific time bucket. */
         UsageCodeInterpreterSessionsResult: {
+            /** @description The number of code interpreter sessions. */
+            num_sessions?: number;
             /** @enum {string} */
             object: "organization.usage.code_interpreter_sessions.result";
             /** @description When `group_by=project_id`, this field provides the project ID of the grouped usage result. */
             project_id?: string | null;
-            /** @description The number of code interpreter sessions. */
-            sessions: number;
         } & {
             [key: string]: unknown;
         };

diff --git a/src/utils.ts b/src/utils.ts
@@ -1,4 +1,4 @@
-import type { Content, GenerateContentRequest, Part } from "./gemini-api-client/types.ts"
+import type { Content, GenerateContentRequest, JsonSchema, Part } from "./gemini-api-client/types.ts"
 import type { Any } from "./log.ts"
 import type { OpenAI } from "./types.ts"
 
@@ -78,7 +78,23 @@ export function genModel(req: OpenAI.Chat.ChatCompletionCreateParams): [GeminiMo
 
   functions = functions.concat((req.functions ?? []).map((it) => ({ strict: null, ...it })))
 
-  const responseMimeType = req.response_format?.type === "json_object" ? "application/json" : "text/plain"
+  let responseMimeType: string | undefined
+  let responseSchema: JsonSchema | undefined
+
+  switch (req.response_format?.type) {
+    case "json_object":
+      responseMimeType = "application/json"
+      break
+    case "json_schema":
+      responseMimeType = "application/json"
+      responseSchema = req.response_format.json_schema.schema
+      break
+    case "text":
+      responseMimeType = "text/plain"
+      break
+    default:
+      break
+  }
 
   const generateContentRequest: GenerateContentRequest = {
     contents: openAiMessageToGeminiMessage(req.messages),
@@ -87,6 +103,7 @@ export function genModel(req: OpenAI.Chat.ChatCompletionCreateParams): [GeminiMo
       temperature: req.temperature ?? undefined,
       topP: req.top_p ?? undefined,
       responseMimeType: responseMimeType,
+      responseSchema: responseSchema,
       thinkingConfig: !model.isThinkingModel()
         ? undefined
         : {