From 8ddc9aa03621aed8d33536a5bd549f5dee68ed9d Mon Sep 17 00:00:00 2001 From: Hassieb Pakzad <68423100+hassiebp@users.noreply.github.com> Date: Mon, 16 Dec 2024 18:31:46 +0100 Subject: [PATCH] feat(cost): allow all usage types (#486) --- ...angfuse-integration-error-handling.spec.ts | 6 +- .../langfuse-integration-langchain.spec.ts | 10 +- .../openapi-spec/openapi-server.yaml | 121 ++++++++++++++++-- langfuse-core/src/openapi/server.ts | 68 +++++++++- langfuse-core/src/types.ts | 1 + langfuse-langchain/src/callback.ts | 43 ++++--- langfuse-vercel/src/LangfuseExporter.ts | 37 +++--- langfuse/src/openai/parseOpenAI.ts | 31 ++++- langfuse/src/openai/traceMethod.ts | 18 ++- 9 files changed, 272 insertions(+), 63 deletions(-) diff --git a/integration-test/langfuse-integration-error-handling.spec.ts b/integration-test/langfuse-integration-error-handling.spec.ts index fded4ba8..c2a8b577 100644 --- a/integration-test/langfuse-integration-error-handling.spec.ts +++ b/integration-test/langfuse-integration-error-handling.spec.ts @@ -84,7 +84,7 @@ describe("No errors should be thrown by SDKs", () => { await handler.shutdownAsync(); // expect no errors to be thrown (would kill jest) - expect(global.console.error).toHaveBeenCalledTimes(1); + expect(global.console.error).toHaveBeenCalledTimes(0); }, 10000); it("incorrect keys", async () => { @@ -109,7 +109,7 @@ describe("No errors should be thrown by SDKs", () => { await handler.shutdownAsync(); // expect no errors to be thrown (would kill jest) - expect(global.console.error).toHaveBeenCalledTimes(1); + expect(global.console.error).toHaveBeenCalledTimes(0); }, 10000); }); }); @@ -185,7 +185,7 @@ describe("shutdown async behavior", () => { } await handler.shutdownAsync(); - expect(flushCallback).toHaveBeenCalledTimes(15); + expect(flushCallback).toHaveBeenCalledTimes(8); const anyCallbackCount = anyCallback.mock.calls.length; diff --git a/integration-test/langfuse-integration-langchain.spec.ts b/integration-test/langfuse-integration-langchain.spec.ts index ec5285cf..fbf4891d 100644 --- a/integration-test/langfuse-integration-langchain.spec.ts +++ b/integration-test/langfuse-integration-langchain.spec.ts @@ -146,7 +146,7 @@ describe("Langchain", () => { it("should execute simple non chat llm call", async () => { const handler = new CallbackHandler({}); - const llm = new OpenAI({ modelName: "gpt-4-1106-preview", maxTokens: 20 }); + const llm = new OpenAI({ modelName: "gpt-3.5-turbo-instruct", maxTokens: 20 }); const res = await llm.invoke("Tell me a joke on a non chat api", { callbacks: [handler] }); const traceId = handler.traceId; await handler.flushAsync(); @@ -169,7 +169,7 @@ describe("Langchain", () => { // eslint-disable-next-line @typescript-eslint/no-non-null-assertion const singleGeneration = generation![0]; - expect(singleGeneration.name).toBe("OpenAIChat"); + expect(singleGeneration.name).toBe("OpenAI"); expect(singleGeneration.input).toMatchObject(["Tell me a joke on a non chat api"]); expect(singleGeneration.usage?.input).toBeDefined(); expect(singleGeneration.usage?.output).toBeDefined(); @@ -793,9 +793,9 @@ describe("Langchain", () => { role: "assistant", }, usage: { - completionTokens: expect.any(Number), - promptTokens: expect.any(Number), - totalTokens: expect.any(Number), + input: expect.any(Number), + output: expect.any(Number), + total: expect.any(Number), }, version: "1.0.0", }); diff --git a/langfuse-core/openapi-spec/openapi-server.yaml b/langfuse-core/openapi-spec/openapi-server.yaml index 1b84a034..9e40794a 100644 --- a/langfuse-core/openapi-spec/openapi-server.yaml +++ b/langfuse-core/openapi-spec/openapi-server.yaml @@ -2389,7 +2389,7 @@ components: type: string description: >- The content of the comment. May include markdown. Currently limited - to 500 characters. + to 3000 characters. authorUserId: type: string nullable: true @@ -2627,7 +2627,9 @@ components: usage: $ref: '#/components/schemas/Usage' nullable: true - description: The usage data of the observation + description: >- + (Deprecated. Use usageDetails and costDetails instead.) The usage + data of the observation level: $ref: '#/components/schemas/ObservationLevel' description: The level of the observation @@ -2643,6 +2645,25 @@ components: type: string nullable: true description: The prompt ID associated with the observation + usageDetails: + type: object + additionalProperties: + type: integer + nullable: true + description: >- + The usage details of the observation. Key is the name of the usage + metric, value is the number of units consumed. The total key is the + sum of all (non-total) usage metrics or the total value ingested. + costDetails: + type: object + additionalProperties: + type: number + format: double + nullable: true + description: >- + The cost details of the observation. Key is the name of the cost + metric, value is the cost in USD. The total key is the sum of all + (non-total) cost metrics or the total value ingested. required: - id - type @@ -2683,17 +2704,23 @@ components: type: number format: double nullable: true - description: The calculated cost of the input in USD + description: >- + (Deprecated. Use usageDetails and costDetails instead.) The + calculated cost of the input in USD calculatedOutputCost: type: number format: double nullable: true - description: The calculated cost of the output in USD + description: >- + (Deprecated. Use usageDetails and costDetails instead.) The + calculated cost of the output in USD calculatedTotalCost: type: number format: double nullable: true - description: The calculated total cost in USD + description: >- + (Deprecated. Use usageDetails and costDetails instead.) The + calculated total cost in USD latency: type: number format: double @@ -2709,7 +2736,9 @@ components: Usage: title: Usage type: object - description: Standard interface for usage and cost + description: >- + (Deprecated. Use usageDetails and costDetails instead.) Standard + interface for usage and cost properties: input: type: integer @@ -3608,6 +3637,15 @@ components: usage: $ref: '#/components/schemas/IngestionUsage' nullable: true + usageDetails: + $ref: '#/components/schemas/UsageDetails' + nullable: true + costDetails: + type: object + additionalProperties: + type: number + format: double + nullable: true promptName: type: string nullable: true @@ -3638,6 +3676,15 @@ components: promptName: type: string nullable: true + usageDetails: + $ref: '#/components/schemas/UsageDetails' + nullable: true + costDetails: + type: object + additionalProperties: + type: number + format: double + nullable: true promptVersion: type: integer nullable: true @@ -3955,6 +4002,37 @@ components: required: - successes - errors + OpenAIUsageSchema: + title: OpenAIUsageSchema + type: object + properties: + prompt_tokens: + type: integer + completion_tokens: + type: integer + total_tokens: + type: integer + prompt_tokens_details: + type: object + additionalProperties: + type: integer + nullable: true + completion_tokens_details: + type: object + additionalProperties: + type: integer + nullable: true + required: + - prompt_tokens + - completion_tokens + - total_tokens + UsageDetails: + title: UsageDetails + oneOf: + - type: object + additionalProperties: + type: integer + - $ref: '#/components/schemas/OpenAIUsageSchema' GetMediaResponse: title: GetMediaResponse type: object @@ -4058,8 +4136,35 @@ components: title: MediaContentType type: string enum: - - >- - image/png","image/jpeg","image/jpg","image/webp","image/gif","image/svg+xml","image/tiff","image/bmp","audio/mpeg","audio/mp3","audio/wav","audio/ogg","audio/oga","audio/aac","audio/mp4","audio/flac","video/mp4","video/webm","text/plain","text/html","text/css","text/csv","application/pdf","application/msword","application/vnd.ms-excel","application/zip","application/json","application/xml","application/octet-stream + - image/png + - image/jpeg + - image/jpg + - image/webp + - image/gif + - image/svg+xml + - image/tiff + - image/bmp + - audio/mpeg + - audio/mp3 + - audio/wav + - audio/ogg + - audio/oga + - audio/aac + - audio/mp4 + - audio/flac + - video/mp4 + - video/webm + - text/plain + - text/html + - text/css + - text/csv + - application/pdf + - application/msword + - application/vnd.ms-excel + - application/zip + - application/json + - application/xml + - application/octet-stream description: The MIME type of the media record DailyMetrics: title: DailyMetrics diff --git a/langfuse-core/src/openapi/server.ts b/langfuse-core/src/openapi/server.ts index 12537a99..2fbc6996 100644 --- a/langfuse-core/src/openapi/server.ts +++ b/langfuse-core/src/openapi/server.ts @@ -524,7 +524,7 @@ export interface components { objectType: string; /** @description The id of the object to attach the comment to. If this does not reference a valid existing object, an error will be thrown. */ objectId: string; - /** @description The content of the comment. May include markdown. Currently limited to 500 characters. */ + /** @description The content of the comment. May include markdown. Currently limited to 3000 characters. */ content: string; /** @description The id of the user who created the comment. */ authorUserId?: string | null; @@ -657,7 +657,7 @@ export interface components { metadata?: unknown; /** @description The output data of the observation */ output?: unknown; - /** @description The usage data of the observation */ + /** @description (Deprecated. Use usageDetails and costDetails instead.) The usage data of the observation */ usage?: components["schemas"]["Usage"]; /** @description The level of the observation */ level: components["schemas"]["ObservationLevel"]; @@ -667,6 +667,14 @@ export interface components { parentObservationId?: string | null; /** @description The prompt ID associated with the observation */ promptId?: string | null; + /** @description The usage details of the observation. Key is the name of the usage metric, value is the number of units consumed. The total key is the sum of all (non-total) usage metrics or the total value ingested. */ + usageDetails?: { + [key: string]: number; + } | null; + /** @description The cost details of the observation. Key is the name of the cost metric, value is the cost in USD. The total key is the sum of all (non-total) cost metrics or the total value ingested. */ + costDetails?: { + [key: string]: number; + } | null; }; /** ObservationsView */ ObservationsView: { @@ -693,17 +701,17 @@ export interface components { totalPrice?: number | null; /** * Format: double - * @description The calculated cost of the input in USD + * @description (Deprecated. Use usageDetails and costDetails instead.) The calculated cost of the input in USD */ calculatedInputCost?: number | null; /** * Format: double - * @description The calculated cost of the output in USD + * @description (Deprecated. Use usageDetails and costDetails instead.) The calculated cost of the output in USD */ calculatedOutputCost?: number | null; /** * Format: double - * @description The calculated total cost in USD + * @description (Deprecated. Use usageDetails and costDetails instead.) The calculated total cost in USD */ calculatedTotalCost?: number | null; /** @@ -719,7 +727,7 @@ export interface components { } & components["schemas"]["Observation"]; /** * Usage - * @description Standard interface for usage and cost + * @description (Deprecated. Use usageDetails and costDetails instead.) Standard interface for usage and cost */ Usage: { /** @description Number of input units (e.g. tokens) */ @@ -1156,6 +1164,10 @@ export interface components { [key: string]: components["schemas"]["MapValue"]; } | null; usage?: components["schemas"]["IngestionUsage"]; + usageDetails?: components["schemas"]["UsageDetails"]; + costDetails?: { + [key: string]: number; + } | null; promptName?: string | null; promptVersion?: number | null; } & components["schemas"]["CreateSpanBody"]; @@ -1169,6 +1181,10 @@ export interface components { } | null; usage?: components["schemas"]["IngestionUsage"]; promptName?: string | null; + usageDetails?: components["schemas"]["UsageDetails"]; + costDetails?: { + [key: string]: number; + } | null; promptVersion?: number | null; } & components["schemas"]["UpdateSpanBody"]; /** ObservationBody */ @@ -1299,6 +1315,24 @@ export interface components { successes: components["schemas"]["IngestionSuccess"][]; errors: components["schemas"]["IngestionError"][]; }; + /** OpenAIUsageSchema */ + OpenAIUsageSchema: { + prompt_tokens: number; + completion_tokens: number; + total_tokens: number; + prompt_tokens_details?: { + [key: string]: number; + } | null; + completion_tokens_details?: { + [key: string]: number; + } | null; + }; + /** UsageDetails */ + UsageDetails: + | { + [key: string]: number; + } + | components["schemas"]["OpenAIUsageSchema"]; /** GetMediaResponse */ GetMediaResponse: { /** @description The unique langfuse identifier of a media record */ @@ -1362,11 +1396,31 @@ export interface components { | "image/jpeg" | "image/jpg" | "image/webp" + | "image/gif" + | "image/svg+xml" + | "image/tiff" + | "image/bmp" | "audio/mpeg" | "audio/mp3" | "audio/wav" + | "audio/ogg" + | "audio/oga" + | "audio/aac" + | "audio/mp4" + | "audio/flac" + | "video/mp4" + | "video/webm" | "text/plain" - | "application/pdf"; + | "text/html" + | "text/css" + | "text/csv" + | "application/pdf" + | "application/msword" + | "application/vnd.ms-excel" + | "application/zip" + | "application/json" + | "application/xml" + | "application/octet-stream"; /** DailyMetrics */ DailyMetrics: { /** @description A list of daily metrics, only days with ingested data are included. */ diff --git a/langfuse-core/src/types.ts b/langfuse-core/src/types.ts index 034abc32..9545145e 100644 --- a/langfuse-core/src/types.ts +++ b/langfuse-core/src/types.ts @@ -93,6 +93,7 @@ export type EventBody = | UpdateLangfuseGenerationBody; export type Usage = FixTypes; +export type UsageDetails = FixTypes; export type CreateLangfuseGenerationBody = FixTypes; export type UpdateLangfuseGenerationBody = FixTypes; diff --git a/langfuse-langchain/src/callback.ts b/langfuse-langchain/src/callback.ts index 0f97ddc8..e7108cb9 100644 --- a/langfuse-langchain/src/callback.ts +++ b/langfuse-langchain/src/callback.ts @@ -10,6 +10,7 @@ import { HumanMessage, SystemMessage, ToolMessage, + type UsageMetadata, type BaseMessageFields, type MessageContent, } from "@langchain/core/messages"; @@ -20,13 +21,7 @@ import type { ChainValues } from "@langchain/core/utils/types"; import type { Generation, LLMResult } from "@langchain/core/outputs"; import type { Document } from "@langchain/core/documents"; -import type { - ChatPromptClient, - components, - LangfuseSpanClient, - LangfuseTraceClient, - TextPromptClient, -} from "langfuse-core"; +import type { ChatPromptClient, LangfuseSpanClient, LangfuseTraceClient, TextPromptClient } from "langfuse-core"; export type LlmMessage = { role: string; @@ -599,8 +594,25 @@ export class CallbackHandler extends BaseCallbackHandler { const lastResponse = output.generations[output.generations.length - 1][output.generations[output.generations.length - 1].length - 1]; + const llmUsage = this.extractUsageMetadata(lastResponse) ?? output.llmOutput?.["tokenUsage"]; - const llmUsage = output.llmOutput?.["tokenUsage"] ?? this.extractUsageMetadata(lastResponse); + const usageDetails: Record = { + input: llmUsage?.input_tokens ?? ("promptTokens" in llmUsage ? llmUsage?.promptTokens : undefined), + output: llmUsage?.output_tokens ?? ("completionTokens" in llmUsage ? llmUsage?.completionTokens : undefined), + total: llmUsage?.total_tokens ?? ("totalTokens" in llmUsage ? llmUsage?.totalTokens : undefined), + }; + + if (llmUsage && "input_token_details" in llmUsage) { + for (const [key, val] of Object.entries(llmUsage["input_token_details"] ?? {})) { + usageDetails[`input_${key}`] = val; + } + } + + if (llmUsage && "output_token_details" in llmUsage) { + for (const [key, val] of Object.entries(llmUsage["output_token_details"] ?? {})) { + usageDetails[`output_${key}`] = val; + } + } const extractedOutput = "message" in lastResponse && lastResponse["message"] instanceof BaseMessage @@ -613,7 +625,8 @@ export class CallbackHandler extends BaseCallbackHandler { output: extractedOutput, endTime: new Date(), completionStartTime: runId in this.completionStartTimes ? this.completionStartTimes[runId] : undefined, - usage: llmUsage, + usage: usageDetails, + usageDetails: usageDetails, version: this.version, }); @@ -628,7 +641,7 @@ export class CallbackHandler extends BaseCallbackHandler { } /** Not all models supports tokenUsage in llmOutput, can use AIMessage.usage_metadata instead */ - private extractUsageMetadata(generation: Generation): components["schemas"]["IngestionUsage"] | undefined { + private extractUsageMetadata(generation: Generation): UsageMetadata | undefined { try { const usageMetadata = "message" in generation && @@ -636,15 +649,7 @@ export class CallbackHandler extends BaseCallbackHandler { ? generation["message"].usage_metadata : undefined; - if (!usageMetadata) { - return; - } - - return { - promptTokens: usageMetadata.input_tokens, - completionTokens: usageMetadata.output_tokens, - totalTokens: usageMetadata.total_tokens, - }; + return usageMetadata; } catch (err) { this._log(`Error extracting usage metadata: ${err}`); diff --git a/langfuse-vercel/src/LangfuseExporter.ts b/langfuse-vercel/src/LangfuseExporter.ts index ba092b48..9a5ec347 100644 --- a/langfuse-vercel/src/LangfuseExporter.ts +++ b/langfuse-vercel/src/LangfuseExporter.ts @@ -190,22 +190,8 @@ export class LangfuseExporter implements SpanExporter { "ai.settings.maxRetries" in attributes ? attributes["ai.settings.maxRetries"]?.toString() : undefined, mode: "ai.settings.mode" in attributes ? attributes["ai.settings.mode"]?.toString() : undefined, }, - usage: { - input: - "gen_ai.usage.prompt_tokens" in attributes // Backward compat, input_tokens used in latest ai SDK versions - ? parseInt(attributes["gen_ai.usage.prompt_tokens"]?.toString() ?? "0") - : "gen_ai.usage.input_tokens" in attributes - ? parseInt(attributes["gen_ai.usage.input_tokens"]?.toString() ?? "0") - : undefined, - - output: - "gen_ai.usage.completion_tokens" in attributes // Backward compat, output_tokens used in latest ai SDK versions - ? parseInt(attributes["gen_ai.usage.completion_tokens"]?.toString() ?? "0") - : "gen_ai.usage.output_tokens" in attributes - ? parseInt(attributes["gen_ai.usage.output_tokens"]?.toString() ?? "0") - : undefined, - total: "ai.usage.tokens" in attributes ? parseInt(attributes["ai.usage.tokens"]?.toString() ?? "0") : undefined, - }, + usage: this.parseUsageDetails(attributes), + usageDetails: this.parseUsageDetails(attributes), input: this.parseInput(span), output: this.parseOutput(span), @@ -214,6 +200,25 @@ export class LangfuseExporter implements SpanExporter { }); } + private parseUsageDetails(attributes: Record): Record { + return { + input: + "gen_ai.usage.prompt_tokens" in attributes // Backward compat, input_tokens used in latest ai SDK versions + ? parseInt(attributes["gen_ai.usage.prompt_tokens"]?.toString() ?? "0") + : "gen_ai.usage.input_tokens" in attributes + ? parseInt(attributes["gen_ai.usage.input_tokens"]?.toString() ?? "0") + : undefined, + + output: + "gen_ai.usage.completion_tokens" in attributes // Backward compat, output_tokens used in latest ai SDK versions + ? parseInt(attributes["gen_ai.usage.completion_tokens"]?.toString() ?? "0") + : "gen_ai.usage.output_tokens" in attributes + ? parseInt(attributes["gen_ai.usage.output_tokens"]?.toString() ?? "0") + : undefined, + total: "ai.usage.tokens" in attributes ? parseInt(attributes["ai.usage.tokens"]?.toString() ?? "0") : undefined, + }; + } + private parseSpanMetadata(span: ReadableSpan): Record { return Object.entries(span.attributes).reduce( (acc, [key, value]) => { diff --git a/langfuse/src/openai/parseOpenAI.ts b/langfuse/src/openai/parseOpenAI.ts index b2cc62d7..83ac5167 100644 --- a/langfuse/src/openai/parseOpenAI.ts +++ b/langfuse/src/openai/parseOpenAI.ts @@ -1,5 +1,5 @@ import type OpenAI from "openai"; -import type { Usage } from "langfuse-core"; +import type { Usage, UsageDetails } from "langfuse-core"; type ParsedOpenAIArguments = { model: string; @@ -67,13 +67,36 @@ export const parseUsage = (res: unknown): Usage | undefined => { const { prompt_tokens, completion_tokens, total_tokens } = res.usage; return { - promptTokens: prompt_tokens, - completionTokens: completion_tokens, - totalTokens: total_tokens, + input: prompt_tokens, + output: completion_tokens, + total: total_tokens, }; } }; +export const parseUsageDetails = (completionUsage: OpenAI.CompletionUsage): UsageDetails | undefined => { + const { prompt_tokens, completion_tokens, total_tokens, completion_tokens_details, prompt_tokens_details } = + completionUsage; + + return { + input: prompt_tokens, + output: completion_tokens, + total: total_tokens, + ...Object.fromEntries( + Object.entries(prompt_tokens_details ?? {}).map(([key, value]) => [`input_${key}`, value as number]) + ), + ...Object.fromEntries( + Object.entries(completion_tokens_details ?? {}).map(([key, value]) => [`output_${key}`, value as number]) + ), + }; +}; + +export const parseUsageDetailsFromResponse = (res: unknown): UsageDetails | undefined => { + if (hasCompletionUsage(res)) { + return parseUsageDetails(res.usage); + } +}; + export const parseChunk = ( rawChunk: unknown ): diff --git a/langfuse/src/openai/traceMethod.ts b/langfuse/src/openai/traceMethod.ts index b66f707a..e844b22c 100644 --- a/langfuse/src/openai/traceMethod.ts +++ b/langfuse/src/openai/traceMethod.ts @@ -2,7 +2,15 @@ import type OpenAI from "openai"; import type { LangfuseParent } from "./types"; import { LangfuseSingleton } from "./LangfuseSingleton"; -import { getToolCallOutput, parseChunk, parseCompletionOutput, parseInputArgs, parseUsage } from "./parseOpenAI"; +import { + getToolCallOutput, + parseChunk, + parseCompletionOutput, + parseInputArgs, + parseUsage, + parseUsageDetails, + parseUsageDetailsFromResponse, +} from "./parseOpenAI"; import { isAsyncIterable } from "./utils"; import type { LangfuseConfig } from "./types"; @@ -109,6 +117,7 @@ const wrapMethod = async ( total: "total_tokens" in usage ? usage.total_tokens : undefined, } : undefined, + usageDetails: usage ? parseUsageDetails(usage) : undefined, }); if (!hasUserProvidedParent) { @@ -121,12 +130,14 @@ const wrapMethod = async ( const output = parseCompletionOutput(res); const usage = parseUsage(res); + const usageDetails = parseUsageDetailsFromResponse(res); langfuseParent.generation({ ...observationData, output, endTime: new Date(), usage, + usageDetails, }); if (!hasUserProvidedParent) { @@ -145,6 +156,11 @@ const wrapMethod = async ( outputCost: 0, totalCost: 0, }, + costDetails: { + input: 0, + output: 0, + total: 0, + }, }); throw error;