diff --git a/typescript/src/model.ts b/typescript/src/model.ts index cd49e1da..bec4a8af 100644 --- a/typescript/src/model.ts +++ b/typescript/src/model.ts @@ -13,9 +13,42 @@ export interface PromptSection { /** * Specifies the content of this section. */ - content: string; + content: string | MultimodalPromptContent[]; } +/** + * GPT-4-vision, GPT-4-omni and GPT-4-turbo allow multi-modal input, where images and text can + * be part of the prompt. To support this, the content section of the prompt has an array of objects. + */ +export type MultimodalPromptContent = + | string + | TextPromptContent + | ImagePromptContent; + +export type TextPromptContent = { + type: "text"; + text: string; +}; + +export type ImagePromptContent = { + type: "image_url"; + image_url: ImageUrl; +}; + +export type ImageUrl = { + /* + * This could be a URL to a hosted image, or the base64-encoded image content. + */ + url: string; + + /* + * Controls how the model processes the image and generates its textual understanding. + * In "low" mode, the model treats the image as 512x512px, while "high" mode considers + * the image at full size. + */ + detail?: "auto" | "low" | "high"; +}; + /** * Represents a AI language model that can complete prompts. TypeChat uses an implementation of this * interface to communicate with an AI service that can translate natural language requests to JSON @@ -135,7 +168,11 @@ function createFetchLanguageModel(url: string, headers: object, defaultParams: o const response = await fetch(url, options); if (response.ok) { const json = await response.json() as { choices: { message: PromptSection }[] }; - return success(json.choices[0].message.content ?? ""); + if (typeof json.choices[0].message.content === "string") { + return success(json.choices[0].message.content ?? ""); + } else { + return error(`REST API unexpected response format: ${JSON.stringify(json.choices[0].message.content)}`); + } } if (!isTransientHttpError(response.status) || retryCount >= retryMaxAttempts) { return error(`REST API error ${response.status}: ${response.statusText}`);