diff --git a/dist/main_bun.mjs b/dist/main_bun.mjs index c5a2a92..77d9a81 100644 --- a/dist/main_bun.mjs +++ b/dist/main_bun.mjs @@ -102,13 +102,7 @@ function openAiMessageToGeminiMessage(messages) { return result; } function genModel(req) { - const defaultModel = (m) => { - if (m.startsWith("gemini")) { - return m; - } - return "gemini-1.5-flash-latest"; - }; - const model = ModelMapping[req.model] ?? defaultModel(req.model); + const model = GeminiModel.modelMapping(req.model); let functions = req.tools?.filter((it) => it.type === "function")?.map((it) => it.function) ?? []; functions = functions.concat((req.functions ?? []).map((it) => ({ strict: null, ...it }))); const responseMimeType = req.response_format?.type === "json_object" ? "application/json" : "text/plain"; @@ -118,7 +112,10 @@ function genModel(req) { maxOutputTokens: req.max_tokens ?? void 0, temperature: req.temperature ?? void 0, topP: req.top_p ?? void 0, - responseMimeType + responseMimeType, + thinkingConfig: !model.isThinkingModel() ? void 0 : { + includeThoughts: true + } }, tools: functions.length === 0 ? void 0 : [ { @@ -137,6 +134,34 @@ function genModel(req) { }; return [model, generateContentRequest]; } +var GeminiModel = class _GeminiModel { + static modelMapping(model) { + const modelName = ModelMapping[model] ?? _GeminiModel.defaultModel(model); + return new _GeminiModel(modelName); + } + model; + constructor(model) { + this.model = model; + } + isThinkingModel() { + return this.model.includes("thinking"); + } + apiVersion() { + if (this.isThinkingModel()) { + return "v1alpha"; + } + return "v1beta"; + } + toString() { + return this.model; + } + static defaultModel(m) { + if (m.startsWith("gemini")) { + return m; + } + return "gemini-1.5-flash-latest"; + } +}; var ModelMapping = { "gpt-3.5-turbo": "gemini-1.5-flash-8b-latest", "gpt-4": "gemini-1.5-pro-latest", @@ -405,7 +430,7 @@ var RequestUrl = class { this.apiParam = apiParam; } toURL() { - const api_version = "v1beta"; + const api_version = this.model.apiVersion(); const url = new URL(`${BASE_URL}/${api_version}/models/${this.model}:${this.task}`); url.searchParams.append("key", this.apiParam.apikey); if (this.stream) { @@ -676,7 +701,12 @@ async function embeddingProxyHandler(rawReq) { log?.warn("request", embedContentRequest); let geminiResp = []; try { - for await (const it of generateContent("embedContent", apiParam, "text-embedding-004", embedContentRequest)) { + for await (const it of generateContent( + "embedContent", + apiParam, + new GeminiModel("text-embedding-004"), + embedContentRequest + )) { const data = it.embedding?.values; geminiResp = data; break; @@ -745,7 +775,7 @@ app.post("/v1/chat/completions", chatProxyHandler); app.post("/v1/embeddings", embeddingProxyHandler); app.get("/v1/models", () => Response.json(models())); app.get("/v1/models/:model", (c) => Response.json(modelDetail(c.params.model))); -app.post(":model_version/models/:model_and_action", geminiProxy); +app.post("/:model_version/models/:model_and_action", geminiProxy); app.all("*", () => new Response("Page Not Found", { status: 404 })); // main_bun.ts diff --git a/dist/main_cloudflare-workers.mjs b/dist/main_cloudflare-workers.mjs index 61b58f4..ec1e764 100644 --- a/dist/main_cloudflare-workers.mjs +++ b/dist/main_cloudflare-workers.mjs @@ -102,13 +102,7 @@ function openAiMessageToGeminiMessage(messages) { return result; } function genModel(req) { - const defaultModel = (m) => { - if (m.startsWith("gemini")) { - return m; - } - return "gemini-1.5-flash-latest"; - }; - const model = ModelMapping[req.model] ?? defaultModel(req.model); + const model = GeminiModel.modelMapping(req.model); let functions = req.tools?.filter((it) => it.type === "function")?.map((it) => it.function) ?? []; functions = functions.concat((req.functions ?? []).map((it) => ({ strict: null, ...it }))); const responseMimeType = req.response_format?.type === "json_object" ? "application/json" : "text/plain"; @@ -118,7 +112,10 @@ function genModel(req) { maxOutputTokens: req.max_tokens ?? void 0, temperature: req.temperature ?? void 0, topP: req.top_p ?? void 0, - responseMimeType + responseMimeType, + thinkingConfig: !model.isThinkingModel() ? void 0 : { + includeThoughts: true + } }, tools: functions.length === 0 ? void 0 : [ { @@ -137,6 +134,34 @@ function genModel(req) { }; return [model, generateContentRequest]; } +var GeminiModel = class _GeminiModel { + static modelMapping(model) { + const modelName = ModelMapping[model] ?? _GeminiModel.defaultModel(model); + return new _GeminiModel(modelName); + } + model; + constructor(model) { + this.model = model; + } + isThinkingModel() { + return this.model.includes("thinking"); + } + apiVersion() { + if (this.isThinkingModel()) { + return "v1alpha"; + } + return "v1beta"; + } + toString() { + return this.model; + } + static defaultModel(m) { + if (m.startsWith("gemini")) { + return m; + } + return "gemini-1.5-flash-latest"; + } +}; var ModelMapping = { "gpt-3.5-turbo": "gemini-1.5-flash-8b-latest", "gpt-4": "gemini-1.5-pro-latest", @@ -405,7 +430,7 @@ var RequestUrl = class { this.apiParam = apiParam; } toURL() { - const api_version = "v1beta"; + const api_version = this.model.apiVersion(); const url = new URL(`${BASE_URL}/${api_version}/models/${this.model}:${this.task}`); url.searchParams.append("key", this.apiParam.apikey); if (this.stream) { @@ -676,7 +701,12 @@ async function embeddingProxyHandler(rawReq) { log?.warn("request", embedContentRequest); let geminiResp = []; try { - for await (const it of generateContent("embedContent", apiParam, "text-embedding-004", embedContentRequest)) { + for await (const it of generateContent( + "embedContent", + apiParam, + new GeminiModel("text-embedding-004"), + embedContentRequest + )) { const data = it.embedding?.values; geminiResp = data; break; @@ -745,7 +775,7 @@ app.post("/v1/chat/completions", chatProxyHandler); app.post("/v1/embeddings", embeddingProxyHandler); app.get("/v1/models", () => Response.json(models())); app.get("/v1/models/:model", (c) => Response.json(modelDetail(c.params.model))); -app.post(":model_version/models/:model_and_action", geminiProxy); +app.post("/:model_version/models/:model_and_action", geminiProxy); app.all("*", () => new Response("Page Not Found", { status: 404 })); // main_cloudflare-workers.ts diff --git a/dist/main_deno.mjs b/dist/main_deno.mjs index a849ac1..198c839 100644 --- a/dist/main_deno.mjs +++ b/dist/main_deno.mjs @@ -102,13 +102,7 @@ function openAiMessageToGeminiMessage(messages) { return result; } function genModel(req) { - const defaultModel = (m) => { - if (m.startsWith("gemini")) { - return m; - } - return "gemini-1.5-flash-latest"; - }; - const model = ModelMapping[req.model] ?? defaultModel(req.model); + const model = GeminiModel.modelMapping(req.model); let functions = req.tools?.filter((it) => it.type === "function")?.map((it) => it.function) ?? []; functions = functions.concat((req.functions ?? []).map((it) => ({ strict: null, ...it }))); const responseMimeType = req.response_format?.type === "json_object" ? "application/json" : "text/plain"; @@ -118,7 +112,10 @@ function genModel(req) { maxOutputTokens: req.max_tokens ?? void 0, temperature: req.temperature ?? void 0, topP: req.top_p ?? void 0, - responseMimeType + responseMimeType, + thinkingConfig: !model.isThinkingModel() ? void 0 : { + includeThoughts: true + } }, tools: functions.length === 0 ? void 0 : [ { @@ -137,6 +134,34 @@ function genModel(req) { }; return [model, generateContentRequest]; } +var GeminiModel = class _GeminiModel { + static modelMapping(model) { + const modelName = ModelMapping[model] ?? _GeminiModel.defaultModel(model); + return new _GeminiModel(modelName); + } + model; + constructor(model) { + this.model = model; + } + isThinkingModel() { + return this.model.includes("thinking"); + } + apiVersion() { + if (this.isThinkingModel()) { + return "v1alpha"; + } + return "v1beta"; + } + toString() { + return this.model; + } + static defaultModel(m) { + if (m.startsWith("gemini")) { + return m; + } + return "gemini-1.5-flash-latest"; + } +}; var ModelMapping = { "gpt-3.5-turbo": "gemini-1.5-flash-8b-latest", "gpt-4": "gemini-1.5-pro-latest", @@ -405,7 +430,7 @@ var RequestUrl = class { this.apiParam = apiParam; } toURL() { - const api_version = "v1beta"; + const api_version = this.model.apiVersion(); const url = new URL(`${BASE_URL}/${api_version}/models/${this.model}:${this.task}`); url.searchParams.append("key", this.apiParam.apikey); if (this.stream) { @@ -676,7 +701,12 @@ async function embeddingProxyHandler(rawReq) { log?.warn("request", embedContentRequest); let geminiResp = []; try { - for await (const it of generateContent("embedContent", apiParam, "text-embedding-004", embedContentRequest)) { + for await (const it of generateContent( + "embedContent", + apiParam, + new GeminiModel("text-embedding-004"), + embedContentRequest + )) { const data = it.embedding?.values; geminiResp = data; break; @@ -745,7 +775,7 @@ app.post("/v1/chat/completions", chatProxyHandler); app.post("/v1/embeddings", embeddingProxyHandler); app.get("/v1/models", () => Response.json(models())); app.get("/v1/models/:model", (c) => Response.json(modelDetail(c.params.model))); -app.post(":model_version/models/:model_and_action", geminiProxy); +app.post("/:model_version/models/:model_and_action", geminiProxy); app.all("*", () => new Response("Page Not Found", { status: 404 })); // main_deno.ts diff --git a/dist/main_node.mjs b/dist/main_node.mjs index 3de4828..a4b577f 100644 --- a/dist/main_node.mjs +++ b/dist/main_node.mjs @@ -547,13 +547,7 @@ function openAiMessageToGeminiMessage(messages) { return result; } function genModel(req) { - const defaultModel = (m) => { - if (m.startsWith("gemini")) { - return m; - } - return "gemini-1.5-flash-latest"; - }; - const model = ModelMapping[req.model] ?? defaultModel(req.model); + const model = GeminiModel.modelMapping(req.model); let functions = req.tools?.filter((it) => it.type === "function")?.map((it) => it.function) ?? []; functions = functions.concat((req.functions ?? []).map((it) => ({ strict: null, ...it }))); const responseMimeType = req.response_format?.type === "json_object" ? "application/json" : "text/plain"; @@ -563,7 +557,10 @@ function genModel(req) { maxOutputTokens: req.max_tokens ?? void 0, temperature: req.temperature ?? void 0, topP: req.top_p ?? void 0, - responseMimeType + responseMimeType, + thinkingConfig: !model.isThinkingModel() ? void 0 : { + includeThoughts: true + } }, tools: functions.length === 0 ? void 0 : [ { @@ -582,6 +579,34 @@ function genModel(req) { }; return [model, generateContentRequest]; } +var GeminiModel = class _GeminiModel { + static modelMapping(model) { + const modelName = ModelMapping[model] ?? _GeminiModel.defaultModel(model); + return new _GeminiModel(modelName); + } + model; + constructor(model) { + this.model = model; + } + isThinkingModel() { + return this.model.includes("thinking"); + } + apiVersion() { + if (this.isThinkingModel()) { + return "v1alpha"; + } + return "v1beta"; + } + toString() { + return this.model; + } + static defaultModel(m) { + if (m.startsWith("gemini")) { + return m; + } + return "gemini-1.5-flash-latest"; + } +}; var ModelMapping = { "gpt-3.5-turbo": "gemini-1.5-flash-8b-latest", "gpt-4": "gemini-1.5-pro-latest", @@ -850,7 +875,7 @@ var RequestUrl = class { this.apiParam = apiParam; } toURL() { - const api_version = "v1beta"; + const api_version = this.model.apiVersion(); const url = new URL(`${BASE_URL}/${api_version}/models/${this.model}:${this.task}`); url.searchParams.append("key", this.apiParam.apikey); if (this.stream) { @@ -1121,7 +1146,12 @@ async function embeddingProxyHandler(rawReq) { log?.warn("request", embedContentRequest); let geminiResp = []; try { - for await (const it of generateContent("embedContent", apiParam, "text-embedding-004", embedContentRequest)) { + for await (const it of generateContent( + "embedContent", + apiParam, + new GeminiModel("text-embedding-004"), + embedContentRequest + )) { const data = it.embedding?.values; geminiResp = data; break; @@ -1190,7 +1220,7 @@ app.post("/v1/chat/completions", chatProxyHandler); app.post("/v1/embeddings", embeddingProxyHandler); app.get("/v1/models", () => Response.json(models())); app.get("/v1/models/:model", (c) => Response.json(modelDetail(c.params.model))); -app.post(":model_version/models/:model_and_action", geminiProxy); +app.post("/:model_version/models/:model_and_action", geminiProxy); app.all("*", () => new Response("Page Not Found", { status: 404 })); // main_node.ts diff --git a/fly.toml b/fly.toml index 45ae8b9..f0c9e19 100644 --- a/fly.toml +++ b/fly.toml @@ -7,7 +7,7 @@ dockerfile = 'docker/bun.Dockerfile' [http_service] internal_port = 8000 force_https = true -auto_stop_machines = true +auto_stop_machines = "suspend" auto_start_machines = true min_machines_running = 0 processes = ['app'] diff --git a/src/app.ts b/src/app.ts index 9eb876d..2bf9c48 100644 --- a/src/app.ts +++ b/src/app.ts @@ -32,7 +32,7 @@ app.post("/v1/chat/completions", chatProxyHandler) app.post("/v1/embeddings", embeddingProxyHandler) app.get("/v1/models", () => Response.json(models())) app.get("/v1/models/:model", (c) => Response.json(modelDetail(c.params.model))) -app.post(":model_version/models/:model_and_action", geminiProxy) +app.post("/:model_version/models/:model_and_action", geminiProxy) app.all("*", () => new Response("Page Not Found", { status: 404 })) export { app } diff --git a/src/gemini-api-client/gemini-api-client.ts b/src/gemini-api-client/gemini-api-client.ts index fa532ee..631ff17 100644 --- a/src/gemini-api-client/gemini-api-client.ts +++ b/src/gemini-api-client/gemini-api-client.ts @@ -89,8 +89,9 @@ export class RequestUrl { this.stream = stream this.apiParam = apiParam } + toURL(): URL { - const api_version: API_VERSION = "v1beta" + const api_version = this.model.apiVersion() const url = new URL(`${BASE_URL}/${api_version}/models/${this.model}:${this.task}`) url.searchParams.append("key", this.apiParam.apikey) if (this.stream) { @@ -102,8 +103,6 @@ export class RequestUrl { const BASE_URL = "https://generativelanguage.googleapis.com" -type API_VERSION = "v1beta" | "v1" - /** * Generates the request options to be passed to the fetch API. * @param requestOptions - The user-defined request options. diff --git a/src/openai/embeddingProxyHandler.ts b/src/openai/embeddingProxyHandler.ts index b688bb0..c1b585e 100644 --- a/src/openai/embeddingProxyHandler.ts +++ b/src/openai/embeddingProxyHandler.ts @@ -1,7 +1,7 @@ import { generateContent } from "../gemini-api-client/gemini-api-client.ts" import type { EmbedContentRequest } from "../gemini-api-client/types.ts" import type { OpenAI } from "../types.ts" -import { getToken } from "../utils.ts" +import { GeminiModel, getToken } from "../utils.ts" export async function embeddingProxyHandler(rawReq: Request): Promise { const req = (await rawReq.json()) as OpenAI.Embeddings.EmbeddingCreateParams @@ -24,7 +24,12 @@ export async function embeddingProxyHandler(rawReq: Request): Promise let geminiResp: number[] | undefined = [] try { - for await (const it of generateContent("embedContent", apiParam, "text-embedding-004", embedContentRequest)) { + for await (const it of generateContent( + "embedContent", + apiParam, + new GeminiModel("text-embedding-004"), + embedContentRequest, + )) { const data = it.embedding?.values geminiResp = data break diff --git a/src/utils.ts b/src/utils.ts index 9626150..fe2bc97 100644 --- a/src/utils.ts +++ b/src/utils.ts @@ -71,14 +71,7 @@ export function openAiMessageToGeminiMessage(messages: OpenAI.Chat.ChatCompletio } export function genModel(req: OpenAI.Chat.ChatCompletionCreateParams): [GeminiModel, GenerateContentRequest] { - const defaultModel = (m: string): GeminiModel => { - if (m.startsWith("gemini")) { - return m as GeminiModel - } - return "gemini-1.5-flash-latest" - } - - const model: GeminiModel = ModelMapping[req.model] ?? defaultModel(req.model) + const model: GeminiModel = GeminiModel.modelMapping(req.model) let functions: OpenAI.Chat.FunctionObject[] = req.tools?.filter((it) => it.type === "function")?.map((it) => it.function) ?? [] @@ -94,6 +87,11 @@ export function genModel(req: OpenAI.Chat.ChatCompletionCreateParams): [GeminiMo temperature: req.temperature ?? undefined, topP: req.top_p ?? undefined, responseMimeType: responseMimeType, + thinkingConfig: !model.isThinkingModel() + ? undefined + : { + includeThoughts: true, + }, }, tools: functions.length === 0 @@ -124,7 +122,42 @@ export type KnownGeminiModel = | "gemini-2.0-flash-exp" | "text-embedding-004" -export type GeminiModel = `gemini${string}` | "text-embedding-004" +export type API_VERSION = "v1beta" | "v1" | "v1alpha" + +export class GeminiModel { + static modelMapping(model: string): GeminiModel { + const modelName: GeminiModelName | KnownGeminiModel = ModelMapping[model] ?? GeminiModel.defaultModel(model) + return new GeminiModel(modelName) + } + public readonly model: GeminiModelName + constructor(model: GeminiModelName) { + this.model = model + } + + isThinkingModel(): boolean { + return this.model.includes("thinking") + } + + apiVersion(): API_VERSION { + if (this.isThinkingModel()) { + return "v1alpha" + } + return "v1beta" + } + + toString(): string { + return this.model + } + + private static defaultModel(m: string): GeminiModelName { + if (m.startsWith("gemini")) { + return m as GeminiModelName + } + return "gemini-1.5-flash-latest" + } +} + +export type GeminiModelName = `gemini${string}` | "text-embedding-004" export const ModelMapping: Readonly> = { "gpt-3.5-turbo": "gemini-1.5-flash-8b-latest",