Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Added support for Claude Sonnet 3.7 thinking via Vertex AI #1254

Merged
merged 5 commits into from
Feb 27, 2025
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 5 additions & 5 deletions package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion package.json
Original file line number Diff line number Diff line change
Expand Up @@ -305,7 +305,7 @@
"dependencies": {
"@anthropic-ai/bedrock-sdk": "^0.10.2",
"@anthropic-ai/sdk": "^0.37.0",
"@anthropic-ai/vertex-sdk": "^0.4.1",
"@anthropic-ai/vertex-sdk": "^0.7.0",
"@aws-sdk/client-bedrock-runtime": "^3.706.0",
"@google/generative-ai": "^0.18.0",
"@mistralai/mistralai": "^1.3.6",
Expand Down
110 changes: 91 additions & 19 deletions src/api/providers/vertex.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ import { Anthropic } from "@anthropic-ai/sdk"
import { AnthropicVertex } from "@anthropic-ai/vertex-sdk"
import { Stream as AnthropicStream } from "@anthropic-ai/sdk/streaming"
import { ApiHandler, SingleCompletionHandler } from "../"
import { BetaThinkingConfigParam } from "@anthropic-ai/sdk/resources/beta"
import { ApiHandlerOptions, ModelInfo, vertexDefaultModelId, VertexModelId, vertexModels } from "../../shared/api"
import { ApiStream } from "../transform/stream"

Expand Down Expand Up @@ -70,15 +71,25 @@ interface VertexMessageStreamEvent {
usage?: {
output_tokens: number
}
content_block?: {
type: "text"
text: string
}
content_block?:
| {
type: "text"
text: string
}
| {
type: "thinking"
thinking: string
}
index?: number
delta?: {
type: "text_delta"
text: string
}
delta?:
| {
type: "text_delta"
text: string
}
| {
type: "thinking_delta"
thinking: string
}
}

// https://docs.anthropic.com/en/api/claude-on-vertex-ai
Expand Down Expand Up @@ -145,6 +156,7 @@ export class VertexHandler implements ApiHandler, SingleCompletionHandler {

async *createMessage(systemPrompt: string, messages: Anthropic.Messages.MessageParam[]): ApiStream {
const model = this.getModel()
let { id, info, temperature, maxTokens, thinking } = model
const useCache = model.info.supportsPromptCache

// Find indices of user messages that we want to cache
Expand All @@ -158,9 +170,10 @@ export class VertexHandler implements ApiHandler, SingleCompletionHandler {

// Create the stream with appropriate caching configuration
const params = {
model: model.id,
max_tokens: model.info.maxTokens || 8192,
temperature: this.options.modelTemperature ?? 0,
model: id,
max_tokens: maxTokens,
temperature,
thinking,
// Cache the system prompt if caching is enabled
system: useCache
? [
Expand Down Expand Up @@ -220,6 +233,19 @@ export class VertexHandler implements ApiHandler, SingleCompletionHandler {
}
break
}
case "thinking": {
if (chunk.index! > 0) {
yield {
type: "reasoning",
text: "\n",
}
}
yield {
type: "reasoning",
text: (chunk.content_block as any).thinking,
}
break
}
}
break
}
Expand All @@ -232,31 +258,77 @@ export class VertexHandler implements ApiHandler, SingleCompletionHandler {
}
break
}
case "thinking_delta": {
yield {
type: "reasoning",
text: (chunk.delta as any).thinking,
}
break
}
}
break
}
}
}
}

getModel(): { id: VertexModelId; info: ModelInfo } {
getModel(): {
id: VertexModelId
info: ModelInfo
temperature: number
maxTokens: number
thinking?: BetaThinkingConfigParam
} {
const modelId = this.options.apiModelId
let temperature = this.options.modelTemperature ?? 0
let thinking: BetaThinkingConfigParam | undefined = undefined

if (modelId && modelId in vertexModels) {
const id = modelId as VertexModelId
return { id, info: vertexModels[id] }
const info: ModelInfo = vertexModels[id]

// The `:thinking` variant is a virtual identifier for thinking-enabled models
// Similar to how it's handled in the Anthropic provider
let actualId = id
if (id.endsWith(":thinking")) {
actualId = id.replace(":thinking", "") as VertexModelId
}

const maxTokens = this.options.modelMaxTokens || info.maxTokens || 8192

if (info.thinking) {
temperature = 1.0 // Thinking requires temperature 1.0
const maxBudgetTokens = Math.floor(maxTokens * 0.8)
const budgetTokens = Math.max(
Math.min(
this.options.vertexThinking ?? this.options.anthropicThinking ?? maxBudgetTokens,
maxBudgetTokens,
),
1024,
)
thinking = { type: "enabled", budget_tokens: budgetTokens }
}

return { id: actualId, info, temperature, maxTokens, thinking }
}
return { id: vertexDefaultModelId, info: vertexModels[vertexDefaultModelId] }

const id = vertexDefaultModelId
const info = vertexModels[id]
const maxTokens = this.options.modelMaxTokens || info.maxTokens || 8192

return { id, info, temperature, maxTokens, thinking }
}

async completePrompt(prompt: string): Promise<string> {
try {
const model = this.getModel()
const useCache = model.info.supportsPromptCache
let { id, info, temperature, maxTokens, thinking } = this.getModel()
const useCache = info.supportsPromptCache

const params = {
model: model.id,
max_tokens: model.info.maxTokens || 8192,
temperature: this.options.modelTemperature ?? 0,
model: id,
max_tokens: maxTokens,
temperature,
thinking,
system: "", // No system prompt needed for single completions
messages: [
{
Expand Down
5 changes: 5 additions & 0 deletions src/core/webview/ClineProvider.ts
Original file line number Diff line number Diff line change
Expand Up @@ -1652,6 +1652,7 @@ export class ClineProvider implements vscode.WebviewViewProvider {
lmStudioBaseUrl,
anthropicBaseUrl,
anthropicThinking,
vertexThinking,
geminiApiKey,
openAiNativeApiKey,
deepSeekApiKey,
Expand Down Expand Up @@ -1701,6 +1702,7 @@ export class ClineProvider implements vscode.WebviewViewProvider {
this.updateGlobalState("lmStudioBaseUrl", lmStudioBaseUrl),
this.updateGlobalState("anthropicBaseUrl", anthropicBaseUrl),
this.updateGlobalState("anthropicThinking", anthropicThinking),
this.updateGlobalState("vertexThinking", vertexThinking),
this.storeSecret("geminiApiKey", geminiApiKey),
this.storeSecret("openAiNativeApiKey", openAiNativeApiKey),
this.storeSecret("deepSeekApiKey", deepSeekApiKey),
Expand Down Expand Up @@ -2158,6 +2160,7 @@ export class ClineProvider implements vscode.WebviewViewProvider {
lmStudioBaseUrl,
anthropicBaseUrl,
anthropicThinking,
vertexThinking,
geminiApiKey,
openAiNativeApiKey,
deepSeekApiKey,
Expand Down Expand Up @@ -2242,6 +2245,7 @@ export class ClineProvider implements vscode.WebviewViewProvider {
this.getGlobalState("lmStudioBaseUrl") as Promise<string | undefined>,
this.getGlobalState("anthropicBaseUrl") as Promise<string | undefined>,
this.getGlobalState("anthropicThinking") as Promise<number | undefined>,
this.getGlobalState("vertexThinking") as Promise<number | undefined>,
this.getSecret("geminiApiKey") as Promise<string | undefined>,
this.getSecret("openAiNativeApiKey") as Promise<string | undefined>,
this.getSecret("deepSeekApiKey") as Promise<string | undefined>,
Expand Down Expand Up @@ -2343,6 +2347,7 @@ export class ClineProvider implements vscode.WebviewViewProvider {
lmStudioBaseUrl,
anthropicBaseUrl,
anthropicThinking,
vertexThinking,
geminiApiKey,
openAiNativeApiKey,
deepSeekApiKey,
Expand Down
14 changes: 14 additions & 0 deletions src/shared/api.ts
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ export interface ApiHandlerOptions {
awsUseProfile?: boolean
vertexProjectId?: string
vertexRegion?: string
vertexThinking?: number
openAiBaseUrl?: string
openAiApiKey?: string
openAiModelId?: string
Expand Down Expand Up @@ -436,6 +437,18 @@ export const openRouterDefaultModelInfo: ModelInfo = {
export type VertexModelId = keyof typeof vertexModels
export const vertexDefaultModelId: VertexModelId = "claude-3-7-sonnet@20250219"
export const vertexModels = {
"claude-3-7-sonnet@20250219:thinking": {
maxTokens: 64000,
contextWindow: 200_000,
supportsImages: true,
supportsComputerUse: true,
supportsPromptCache: true,
inputPrice: 3.0,
outputPrice: 15.0,
cacheWritesPrice: 3.75,
cacheReadsPrice: 0.3,
thinking: true,
},
"claude-3-7-sonnet@20250219": {
maxTokens: 8192,
contextWindow: 200_000,
Expand All @@ -446,6 +459,7 @@ export const vertexModels = {
outputPrice: 15.0,
cacheWritesPrice: 3.75,
cacheReadsPrice: 0.3,
thinking: false,
},
"claude-3-5-sonnet-v2@20241022": {
maxTokens: 8192,
Expand Down
2 changes: 2 additions & 0 deletions src/shared/globalState.ts
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ export type GlobalStateKey =
| "awsUseProfile"
| "vertexProjectId"
| "vertexRegion"
| "vertexThinking"
| "lastShownAnnouncementId"
| "customInstructions"
| "alwaysAllowReadOnly"
Expand All @@ -43,6 +44,7 @@ export type GlobalStateKey =
| "lmStudioBaseUrl"
| "anthropicBaseUrl"
| "anthropicThinking"
| "vertexThinking"
| "azureApiVersion"
| "openAiStreamingEnabled"
| "openRouterModelId"
Expand Down
3 changes: 3 additions & 0 deletions webview-ui/src/components/settings/ApiOptions.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ import * as vscodemodels from "vscode"
import {
ApiConfiguration,
ModelInfo,
ApiProvider,
anthropicDefaultModelId,
anthropicModels,
azureOpenAiDefaultApiVersion,
Expand Down Expand Up @@ -1380,9 +1381,11 @@ const ApiOptions = ({
/>
</div>
<ThinkingBudget
key={`${selectedProvider}-${selectedModelId}`}
apiConfiguration={apiConfiguration}
setApiConfigurationField={setApiConfigurationField}
modelInfo={selectedModelInfo}
provider={selectedProvider as ApiProvider}
/>
<ModelInfoView
selectedModelId={selectedModelId}
Expand Down
30 changes: 22 additions & 8 deletions webview-ui/src/components/settings/ThinkingBudget.tsx
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import { useEffect } from "react"

import { useEffect, useMemo } from "react"
import { ApiProvider } from "../../../../src/shared/api"
import { Slider } from "@/components/ui"

import { ApiConfiguration, ModelInfo } from "../../../../src/shared/api"
Expand All @@ -8,24 +8,38 @@ interface ThinkingBudgetProps {
apiConfiguration: ApiConfiguration
setApiConfigurationField: <K extends keyof ApiConfiguration>(field: K, value: ApiConfiguration[K]) => void
modelInfo?: ModelInfo
provider?: ApiProvider
}

export const ThinkingBudget = ({ apiConfiguration, setApiConfigurationField, modelInfo }: ThinkingBudgetProps) => {
export const ThinkingBudget = ({
apiConfiguration,
setApiConfigurationField,
modelInfo,
provider,
}: ThinkingBudgetProps) => {
const isVertexProvider = provider === "vertex"
const budgetField = isVertexProvider ? "vertexThinking" : "anthropicThinking"

const tokens = apiConfiguration?.modelMaxTokens || modelInfo?.maxTokens || 64_000
const tokensMin = 8192
const tokensMax = modelInfo?.maxTokens || 64_000

const thinkingTokens = apiConfiguration?.anthropicThinking || 8192
// Get the appropriate thinking tokens based on provider
const thinkingTokens = useMemo(() => {
const value = isVertexProvider ? apiConfiguration?.vertexThinking : apiConfiguration?.anthropicThinking
return value || Math.min(Math.floor(0.8 * tokens), 8192)
}, [apiConfiguration, isVertexProvider, tokens])

const thinkingTokensMin = 1024
const thinkingTokensMax = Math.floor(0.8 * tokens)

useEffect(() => {
if (thinkingTokens > thinkingTokensMax) {
setApiConfigurationField("anthropicThinking", thinkingTokensMax)
setApiConfigurationField(budgetField, thinkingTokensMax)
}
}, [thinkingTokens, thinkingTokensMax, setApiConfigurationField])
}, [thinkingTokens, thinkingTokensMax, setApiConfigurationField, budgetField])

if (!modelInfo || !modelInfo.thinking) {
if (!modelInfo?.thinking) {
return null
}

Expand All @@ -52,7 +66,7 @@ export const ThinkingBudget = ({ apiConfiguration, setApiConfigurationField, mod
max={thinkingTokensMax}
step={1024}
value={[thinkingTokens]}
onValueChange={([value]) => setApiConfigurationField("anthropicThinking", value)}
onValueChange={([value]) => setApiConfigurationField(budgetField, value)}
/>
<div className="w-12 text-sm text-center">{thinkingTokens}</div>
</div>
Expand Down
Loading