From aa300b16e0d09f954bec268823fadacbc9817649 Mon Sep 17 00:00:00 2001 From: hitesh-1997 Date: Tue, 1 Oct 2024 17:31:00 +0530 Subject: [PATCH 1/4] use the accurate docContext and persist repo visibility using local storage --- .../inline-completion-item-provider.ts | 3 + vscode/src/completions/logger.test.ts | 28 ++++++-- vscode/src/completions/logger.ts | 17 ++++- vscode/src/repository/githubRepoMetadata.ts | 67 +++++++++++++------ vscode/src/services/LocalStorageProvider.ts | 29 ++++++++ 5 files changed, 118 insertions(+), 26 deletions(-) diff --git a/vscode/src/completions/inline-completion-item-provider.ts b/vscode/src/completions/inline-completion-item-provider.ts index b7d8c4e8904b..a4c3bbb9d210 100644 --- a/vscode/src/completions/inline-completion-item-provider.ts +++ b/vscode/src/completions/inline-completion-item-provider.ts @@ -843,10 +843,13 @@ export class InlineCompletionItemProvider takeSuggestWidgetSelectionIntoAccount, undefined ) + completion.requestParams.docContext.position if (isStillVisible) { suggestionEvent.markAsRead({ document: invokedDocument, position: invokedPosition, + docPrefix: completion.requestParams.docContext.completePrefix, + docSuffix: completion.requestParams.docContext.completeSuffix, }) } }, this.COMPLETION_VISIBLE_DELAY_MS) diff --git a/vscode/src/completions/logger.test.ts b/vscode/src/completions/logger.test.ts index 354f8bf5ac6b..27c04d4b9c47 100644 --- a/vscode/src/completions/logger.test.ts +++ b/vscode/src/completions/logger.test.ts @@ -76,7 +76,12 @@ describe('logger', () => { isDotComUser: false, }) const suggestionEvent = CompletionLogger.prepareSuggestionEvent({ id }) - suggestionEvent?.markAsRead({ document, position }) + suggestionEvent?.markAsRead({ + document, + position, + docPrefix: defaultRequestParams.docContext.completePrefix, + docSuffix: defaultRequestParams.docContext.completeSuffix, + }) CompletionLogger.accepted(id, document, item, range(0, 0, 0, 0), false) expect(recordSpy).toHaveBeenCalledWith('cody.completion', 'suggested', { @@ -111,7 +116,12 @@ describe('logger', () => { isDotComUser: false, }) const firstSuggestionEvent = CompletionLogger.prepareSuggestionEvent({ id: id1 }) - firstSuggestionEvent?.markAsRead({ document, position }) + firstSuggestionEvent?.markAsRead({ + document, + position, + docPrefix: defaultRequestParams.docContext.completePrefix, + docSuffix: defaultRequestParams.docContext.completeSuffix, + }) const loggerItem = CompletionLogger.getCompletionEvent(id1) const completionId = loggerItem?.params.id @@ -129,7 +139,12 @@ describe('logger', () => { isDotComUser: false, }) const secondSuggestionEvent = CompletionLogger.prepareSuggestionEvent({ id: id2 }) - secondSuggestionEvent?.markAsRead({ document, position }) + secondSuggestionEvent?.markAsRead({ + document, + position, + docPrefix: defaultRequestParams.docContext.completePrefix, + docSuffix: defaultRequestParams.docContext.completeSuffix, + }) CompletionLogger.accepted(id2, document, item, range(0, 0, 0, 0), false) const loggerItem2 = CompletionLogger.getCompletionEvent(id2) @@ -154,7 +169,12 @@ describe('logger', () => { isDotComUser: false, }) const thirdSuggestionEvent = CompletionLogger.prepareSuggestionEvent({ id: id3 }) - thirdSuggestionEvent?.markAsRead({ document, position }) + thirdSuggestionEvent?.markAsRead({ + document, + position, + docPrefix: defaultRequestParams.docContext.completePrefix, + docSuffix: defaultRequestParams.docContext.completeSuffix, + }) const loggerItem3 = CompletionLogger.getCompletionEvent(id3) expect(loggerItem3?.params.id).not.toBe(completionId) diff --git a/vscode/src/completions/logger.ts b/vscode/src/completions/logger.ts index e8c752f234bd..c632f122dbee 100644 --- a/vscode/src/completions/logger.ts +++ b/vscode/src/completions/logger.ts @@ -775,7 +775,9 @@ function getInlineContextItemContext( function suggestionDocumentDiffTracker( interactionId: CompletionAnalyticsID, document: vscode.TextDocument, - position: vscode.Position + position: vscode.Position, + docPrefix: string, + docSuffix: string ): void { // If user is not in the same document, we don't track the diff. if (document.uri.scheme !== 'file') { @@ -786,12 +788,13 @@ function suggestionDocumentDiffTracker( } // Offset around the current cursor position to track the diff const offsetBytes = 1024 * 128 + const startPosition = document.positionAt(Math.max(0, document.offsetAt(position) - offsetBytes)) const endPosition = document.positionAt( Math.min(document.getText().length, document.offsetAt(position) + offsetBytes) ) const trackingRange = new vscode.Range(startPosition, endPosition) - const documentText = document.getText(trackingRange) + const documentText = docPrefix.slice(-offsetBytes) + docSuffix.slice(0, offsetBytes) const persistenceTimeoutList = [ 20 * 1000, // 20 seconds @@ -815,6 +818,8 @@ function suggestionDocumentDiffTracker( type SuggestionMarkReadParam = { document: vscode.TextDocument position: vscode.Position + docPrefix: string + docSuffix: string } // Suggested completions will not be logged immediately. Instead, we log them when we either hide @@ -872,7 +877,13 @@ export function prepareSuggestionEvent({ isDotCom(authStatus.endpoint || '') && event.params.inlineCompletionItemContext?.isRepoPublic ) { - suggestionDocumentDiffTracker(event.params.id, param.document, param.position) + suggestionDocumentDiffTracker( + event.params.id, + param.document, + param.position, + param.docPrefix, + param.docSuffix + ) } }, } diff --git a/vscode/src/repository/githubRepoMetadata.ts b/vscode/src/repository/githubRepoMetadata.ts index 00da4d5ab76e..4e354cf4d00e 100644 --- a/vscode/src/repository/githubRepoMetadata.ts +++ b/vscode/src/repository/githubRepoMetadata.ts @@ -10,19 +10,18 @@ import { } from '@sourcegraph/cody-shared' import { Observable, map } from 'observable-fns' import { logDebug } from '../log' +import { localStorage } from '../services/LocalStorageProvider' import { remoteReposForAllWorkspaceFolders } from './remoteRepos' interface GitHubDotComRepoMetaData { // The full uniquely identifying name on github.com, e.g., "github.com/sourcegraph/cody" repoName: string - isPublic: boolean } export class GitHubDotComRepoMetadata { // This class is used to get the metadata from the gitApi. private static instance: GitHubDotComRepoMetadata | null = null - private cache = new Map() private constructor() {} @@ -33,29 +32,41 @@ export class GitHubDotComRepoMetadata { return GitHubDotComRepoMetadata.instance } - public getRepoMetadataIfCached(repoName: string): GitHubDotComRepoMetaData | undefined { - return this.cache.get(repoName) + public getRepoMetadataIfCached(repoBaseName: string): GitHubDotComRepoMetaData | undefined { + const normalizedRepoName = this.getNormalizedRepoNameFromBaseRepoName(repoBaseName) + if (!normalizedRepoName) { + return undefined + } + const repoVisibility = localStorage.getGitHubRepoVisibility(normalizedRepoName) + if (!repoVisibility) { + return undefined + } + return { + repoName: normalizedRepoName, + isPublic: repoVisibility, + } } public async getRepoMetadataUsingRepoName( - repoName: string, + repoBaseName: string, signal?: AbortSignal ): Promise { - if (this.cache.has(repoName)) { - return this.cache.get(repoName) + const repoMetadata = this.getRepoMetadataIfCached(repoBaseName) + if (repoMetadata) { + return repoMetadata } - const repoMetaData = await this.ghMetadataFromGit(repoName, signal) + const repoMetaData = await this.ghMetadataFromGit(repoBaseName, signal) if (repoMetaData) { - this.cache.set(repoName, repoMetaData) + await localStorage.setGitHubRepoVisibility(repoBaseName, repoMetaData.isPublic) } return repoMetaData } private async ghMetadataFromGit( - repoName: string, + repoBaseName: string, signal?: AbortSignal ): Promise { - const ownerAndRepoName = this.parseOwnerAndRepoName(repoName) + const ownerAndRepoName = this.parseOwnerAndRepoName(repoBaseName) if (!ownerAndRepoName) { return undefined } @@ -69,11 +80,12 @@ export class GitHubDotComRepoMetadata { private async queryGitHubApi( owner: string, - repoBasename: string, + repoName: string, signal?: AbortSignal ): Promise { - const apiUrl = `https://api.github.com/repos/${owner}/${repoBasename}` - const metadata = { repoName: `github.com/${owner}/${repoBasename}`, isPublic: false } + const apiUrl = `https://api.github.com/repos/${owner}/${repoName}` + const normalizedRepoName = this.getNormalizedRepoNameFromOwnerAndRepoName(owner, repoName) + const metadata = { repoName: normalizedRepoName, isPublic: false } try { const response = await fetch(apiUrl, { method: 'HEAD', signal }) metadata.isPublic = response.ok @@ -82,7 +94,7 @@ export class GitHubDotComRepoMetadata { logDebug( 'queryGitHubApi', 'error querying GitHub API (assuming repository is non-public', - `${owner}/${repoBasename}`, + `${owner}/${repoName}`, error ) } @@ -90,13 +102,30 @@ export class GitHubDotComRepoMetadata { return metadata } - private parseOwnerAndRepoName(repoName: string): { owner: string; repoName: string } | undefined { - const match = repoName?.match(/github\.com\/([^/]+)\/([^/]+?)(?:\.git)?$/) + private getNormalizedRepoNameFromBaseRepoName(repoBaseName: string): string | undefined { + const ownerAndRepoName = this.parseOwnerAndRepoName(repoBaseName) + if (!ownerAndRepoName) { + return undefined + } + return this.getNormalizedRepoNameFromOwnerAndRepoName( + ownerAndRepoName.owner, + ownerAndRepoName.repoName + ) + } + + private getNormalizedRepoNameFromOwnerAndRepoName(owner: string, repoName: string): string { + return `github.com/${owner}/${repoName}` + } + + private parseOwnerAndRepoName( + repoBaseName: string + ): { owner: string; repoName: string } | undefined { + const match = repoBaseName?.match(/github\.com\/([^/]+)\/([^/]+?)(?:\.git)?$/) if (!match) { return undefined } - const [, owner, repoBasename] = match - return { owner, repoName: repoBasename } + const [, owner, repoName] = match + return { owner, repoName: repoName } } } diff --git a/vscode/src/services/LocalStorageProvider.ts b/vscode/src/services/LocalStorageProvider.ts index 1d66446dcaa8..598980e80d76 100644 --- a/vscode/src/services/LocalStorageProvider.ts +++ b/vscode/src/services/LocalStorageProvider.ts @@ -32,6 +32,7 @@ class LocalStorage implements LocalStorageForModelPreferences { protected readonly CODY_ENDPOINT_HISTORY = 'SOURCEGRAPH_CODY_ENDPOINT_HISTORY' protected readonly CODY_ENROLLMENT_HISTORY = 'SOURCEGRAPH_CODY_ENROLLMENTS' protected readonly LAST_USED_CHAT_MODALITY = 'cody-last-used-chat-modality' + protected readonly GIT_REPO_VISIBILITY_KEY = 'cody-git-repo-visibility' public readonly ANONYMOUS_USER_ID_KEY = 'sourcegraphAnonymousUid' public readonly LAST_USED_ENDPOINT = 'SOURCEGRAPH_CODY_ENDPOINT' public readonly LAST_USED_USERNAME = 'SOURCEGRAPH_CODY_USERNAME' @@ -223,6 +224,34 @@ class LocalStorage implements LocalStorageForModelPreferences { return this.get(this.KEY_LOCAL_MINION_HISTORY) } + public async setGitHubRepoVisibility(repoName: string, visibility: boolean): Promise { + const visibilityKey = `${this.GIT_REPO_VISIBILITY_KEY}_${repoName}` + const visibilityValue = { + visibility: visibility, + timestamp: Date.now(), + } + await this.set(visibilityKey, visibilityValue) + } + + public getGitHubRepoVisibility(repoName: string): boolean | null { + const visibilityKey = `${this.GIT_REPO_VISIBILITY_KEY}_${repoName}` + const visibilityValue = this.get<{ visibility: boolean; timestamp: number } | null>( + visibilityKey + ) + + if (visibilityValue) { + const currentTime = Date.now() + const timeDifference = currentTime - visibilityValue.timestamp + // If the visibility value is older than 24 hours, delete it. + if (timeDifference > 24 * 60 * 60 * 1000) { + this.delete(visibilityKey) + return null + } + return visibilityValue.visibility + } + return null + } + public async removeChatHistory(authStatus: AuthenticatedAuthStatus): Promise { try { await this.setChatHistory(authStatus, { chat: {} }) From ce11196dec0bd165cb183ddcae4e6c2290db0d8d Mon Sep 17 00:00:00 2001 From: hitesh-1997 Date: Tue, 1 Oct 2024 22:11:37 +0530 Subject: [PATCH 2/4] adding codeqwen and deepseek long context experiment --- .../experimentation/FeatureFlagProvider.ts | 14 +- .../model-helpers/__tests__/codeqwen.test.ts | 170 ++++++++++++++++++ .../src/completions/model-helpers/codeqwen.ts | 82 +++++++++ vscode/src/completions/model-helpers/index.ts | 5 + vscode/src/completions/providers/fireworks.ts | 33 +--- .../providers/shared/get-experiment-model.ts | 54 ++---- 6 files changed, 283 insertions(+), 75 deletions(-) create mode 100644 vscode/src/completions/model-helpers/__tests__/codeqwen.test.ts create mode 100644 vscode/src/completions/model-helpers/codeqwen.ts diff --git a/lib/shared/src/experimentation/FeatureFlagProvider.ts b/lib/shared/src/experimentation/FeatureFlagProvider.ts index 8ebc9245a5bf..dc98470dd0bb 100644 --- a/lib/shared/src/experimentation/FeatureFlagProvider.ts +++ b/lib/shared/src/experimentation/FeatureFlagProvider.ts @@ -38,13 +38,13 @@ export enum FeatureFlag { CodyAutocompleteDataCollectionFlag = 'cody-autocomplete-data-collection-flag', // Enable various feature flags to experiment with FIM trained fine-tuned models via Fireworks - CodyAutocompleteFIMModelExperimentBaseFeatureFlag = 'cody-autocomplete-fim-model-experiment-flag-v2', - CodyAutocompleteFIMModelExperimentControl = 'cody-autocomplete-fim-model-experiment-control-v2', - CodyAutocompleteFIMModelExperimentCurrentBest = 'cody-autocomplete-fim-model-experiment-current-best-v2', - CodyAutocompleteFIMModelExperimentVariant1 = 'cody-autocomplete-fim-model-experiment-variant-1-v2', - CodyAutocompleteFIMModelExperimentVariant2 = 'cody-autocomplete-fim-model-experiment-variant-2-v2', - CodyAutocompleteFIMModelExperimentVariant3 = 'cody-autocomplete-fim-model-experiment-variant-3-v2', - CodyAutocompleteFIMModelExperimentVariant4 = 'cody-autocomplete-fim-model-experiment-variant-4-v2', + CodyAutocompleteFIMModelExperimentBaseFeatureFlag = 'cody-autocomplete-fim-model-experiment-flag', + CodyAutocompleteFIMModelExperimentControl = 'cody-autocomplete-fim-model-experiment-control', + CodyAutocompleteFIMModelExperimentCurrentBest = 'cody-autocomplete-fim-model-experiment-current-best', + CodyAutocompleteFIMModelExperimentVariant1 = 'cody-autocomplete-fim-model-experiment-variant-1', + CodyAutocompleteFIMModelExperimentVariant2 = 'cody-autocomplete-fim-model-experiment-variant-2', + CodyAutocompleteFIMModelExperimentVariant3 = 'cody-autocomplete-fim-model-experiment-variant-3', + CodyAutocompleteFIMModelExperimentVariant4 = 'cody-autocomplete-fim-model-experiment-variant-4', CodyAutocompleteDisableLowPerfLangDelay = 'cody-autocomplete-disable-low-perf-lang-delay', // Enables Claude 3 if the user is in our holdout group CodyAutocompleteClaude3 = 'cody-autocomplete-claude-3', diff --git a/vscode/src/completions/model-helpers/__tests__/codeqwen.test.ts b/vscode/src/completions/model-helpers/__tests__/codeqwen.test.ts new file mode 100644 index 000000000000..a852c7be55a6 --- /dev/null +++ b/vscode/src/completions/model-helpers/__tests__/codeqwen.test.ts @@ -0,0 +1,170 @@ +import { describe, expect, it } from 'vitest' + +import { isWindows } from '@sourcegraph/cody-shared' + +import { completionParams, contextSnippets } from './test-data' + +import { CodeQwen } from '../codeqwen' + +describe('CodeQwen', () => { + describe.skipIf(isWindows())('getMessages', () => { + it('returns the prompt with the correct intro snippets', () => { + const model = new CodeQwen() + const { docContext, document, provider } = completionParams + + const result = model.getMessages({ + document, + docContext, + snippets: contextSnippets, + promptChars: provider.contextSizeHints.totalChars, + }) + + expect(result).toMatchInlineSnapshot(` + [ + { + "speaker": "human", + "text": "<|file_sep|>codebase/context1.ts + function contextSnippetOne() {} + <|file_sep|>codebase/context2.ts + const contextSnippet2 = {} + Additional documentation for \`ContextParams\`: + interface ContextParams {} + <|file_sep|>codebase/test.ts + <|fim_prefix|>console.log(prefix line: 1) + console.log(prefix line: 2) + console.log(prefix line: 3) + console.log(prefix line: 4) + console.log(prefix line: 5) + console.log(prefix line: 6) + console.log(prefix line: 7) + console.log(prefix line: 8) + console.log(prefix line: 9) + console.log(prefix line: 10) + console.log(prefix line: 11) + console.log(prefix line: 12) + console.log(prefix line: 13) + console.log(prefix line: 14) + console.log(prefix line: 15) + console.log(prefix line: 16) + console.log(prefix line: 17) + console.log(prefix line: 18) + console.log(prefix line: 19) + console.log(prefix line: 20) + console.log(prefix line: 21) + console.log(prefix line: 22) + console.log(prefix line: 23) + console.log(prefix line: 24) + console.log(prefix line: 25) + console.log(prefix line: 26) + console.log(prefix line: 27) + console.log(prefix line: 28) + console.log(prefix line: 29) + console.log(prefix line: 30) + console.log(prefix line: 31) + console.log(prefix line: 32) + console.log(prefix line: 33) + console.log(prefix line: 34) + console.log(prefix line: 35) + console.log(prefix line: 36) + console.log(prefix line: 37) + console.log(prefix line: 38) + console.log(prefix line: 39) + console.log(prefix line: 40) + console.log(prefix line: 41) + console.log(prefix line: 42) + console.log(prefix line: 43) + console.log(prefix line: 44) + console.log(prefix line: 45) + console.log(prefix line: 46) + console.log(prefix line: 47) + console.log(prefix line: 48) + console.log(prefix line: 49) + console.log(prefix line: 50) + console.log(prefix line: 51) + console.log(prefix line: 52) + console.log(prefix line: 53) + console.log(prefix line: 54) + console.log(prefix line: 55) + console.log(prefix line: 56) + console.log(prefix line: 57) + console.log(prefix line: 58) + console.log(prefix line: 59) + console.log(prefix line: 60) + console.log(prefix line: 61) + console.log(prefix line: 62) + console.log(prefix line: 63) + console.log(prefix line: 64) + console.log(prefix line: 65) + console.log(prefix line: 66) + console.log(prefix line: 67) + console.log(prefix line: 68) + console.log(prefix line: 69) + console.log(prefix line: 70) + console.log(prefix line: 71) + console.log(prefix line: 72) + console.log(prefix line: 73) + console.log(prefix line: 74) + console.log(prefix line: 75) + console.log(prefix line: 76) + console.log(prefix line: 77) + console.log(prefix line: 78) + console.log(prefix line: 79) + console.log(prefix line: 80) + console.log(prefix line: 81) + console.log(prefix line: 82) + console.log(prefix line: 83) + console.log(prefix line: 84) + console.log(prefix line: 85) + console.log(prefix line: 86) + console.log(prefix line: 87) + console.log(prefix line: 88) + console.log(prefix line: 89) + console.log(prefix line: 90) + console.log(prefix line: 91) + console.log(prefix line: 92) + console.log(prefix line: 93) + console.log(prefix line: 94) + console.log(prefix line: 95) + console.log(prefix line: 96) + console.log(prefix line: 97) + console.log(prefix line: 98) + console.log(prefix line: 99) + console.log(prefix line: 100) + function myFunction() { + console.log(1) + console.log(2) + console.log(3) + console.log(4) + <|fim_suffix|> + } + console.log(suffix line: 1) + console.log(suffix line: 2) + console.log(suffix line: 3) + console.log(suffix line: 4) + console.log(suffix line: 5) + console.log(suffix line: 6) + console.log(suffix line: 7) + console.log(suffix line: 8) + console.log(suffix line: 9) + console.log(suffix line: 10) + console.log(suffix line: 11) + console.log(suffix line: 12) + console.log(suffix line: 13) + console.log(suffix line: 14) + console.log(suffix line: 15) + console.log(suffix line: 16) + console.log(suffix line: 17) + console.log(suffix line: 18) + console.log(suffix line: 19) + console.log(suffix line: 20) + console.log(suffix line: 21) + console.log(suffix line: 22) + console.log(suffix line: 23) + console.log(suffix line: 24) + console.log(suffix line: 25)<|fim_middle|>", + }, + ] + `) + }) + }) +}) diff --git a/vscode/src/completions/model-helpers/codeqwen.ts b/vscode/src/completions/model-helpers/codeqwen.ts new file mode 100644 index 000000000000..500070ec5345 --- /dev/null +++ b/vscode/src/completions/model-helpers/codeqwen.ts @@ -0,0 +1,82 @@ +import { + type AutocompleteFileContextSnippet, + type OllamaGenerateParameters, + PromptString, + ps, +} from '@sourcegraph/cody-shared' +import { + DefaultModel, + type FormatIntroSnippetsParams, + type FormatPromptParams, + type GetOllamaPromptParams, +} from './default' + +const EOT_CODEQWEN = '<|endoftext|>' + +export class CodeQwen extends DefaultModel { + stopSequences = [ + '<|repo_name|>', + '<|file_sep|>', + '<|fim_prefix|>', + '<|fim_suffix|>', + '<|fim_middle|>', + EOT_CODEQWEN, + ] + + getOllamaPrompt(promptContext: GetOllamaPromptParams): PromptString { + const { context, currentFileNameComment, prefix, suffix } = promptContext + + const infillPrefix = context.concat(currentFileNameComment, prefix) + + return ps`<|fim_prefix|>${infillPrefix}<|fim_suffix|>${suffix}<|fim_middle|>` + } + + getOllamaRequestOptions(isMultiline: boolean): OllamaGenerateParameters { + const params = { + stop: ['\n', ...this.stopSequences], + temperature: 0.2, + top_k: 40, + top_p: 0.8, + num_predict: 256, + num_gpu: 99, + repeat_penalty: 1.1, + } + + if (isMultiline) { + params.stop = ['\n\n', ...this.stopSequences] + } + + return params + } + + postProcess(content: string): string { + return content.replace(EOT_CODEQWEN, '') + } + + formatIntroSnippets(params: FormatIntroSnippetsParams): PromptString { + let introPrompt = ps`${PromptString.join(params.intro, ps`\n`)}` + if (introPrompt.length > 0) { + introPrompt = ps`${introPrompt}\n` + } + return introPrompt + } + + fileSnippetToPromptString(snippet: AutocompleteFileContextSnippet): PromptString { + const { content } = PromptString.fromAutocompleteContextSnippet(snippet) + return ps`<|file_sep|>${PromptString.fromDisplayPath(snippet.uri)}\n${content}` + } + + formatPrompt(params: FormatPromptParams): PromptString { + // Prompt format for CodeQwen in technical report: https://arxiv.org/pdf/2409.12186 + const { intro, prefix, suffix, repoName, fileName } = params + let introPrefix = ps`` + if (intro.length > 0) { + introPrefix = ps`${intro}\n` + } + const prompt = ps`${intro}<|file_sep|>${fileName}\n<|fim_prefix|>${prefix}<|fim_suffix|>${suffix}<|fim_middle|>` + if (repoName) { + return ps`<|repo_name|>${repoName}\n${prompt}` + } + return prompt + } +} diff --git a/vscode/src/completions/model-helpers/index.ts b/vscode/src/completions/model-helpers/index.ts index 39c815d91c89..ff91a52fbd48 100644 --- a/vscode/src/completions/model-helpers/index.ts +++ b/vscode/src/completions/model-helpers/index.ts @@ -1,6 +1,7 @@ import { Claude } from './claude' import { CodeGemma } from './codegemma' import { CodeLlama } from './codellama' +import { CodeQwen } from './codeqwen' import { DeepseekCoder } from './deepseek' import { DefaultModel } from './default' import { Gemini } from './gemini' @@ -14,6 +15,10 @@ export function getModelHelpers(model: string): DefaultModel { return new CodeLlama() } + if (model.includes('code-qwen')) { + return new CodeQwen() + } + if (model.includes('deepseek')) { return new DeepseekCoder() } diff --git a/vscode/src/completions/providers/fireworks.ts b/vscode/src/completions/providers/fireworks.ts index 7047c6fa2094..eb4a00b6abb3 100644 --- a/vscode/src/completions/providers/fireworks.ts +++ b/vscode/src/completions/providers/fireworks.ts @@ -19,17 +19,10 @@ import { type ProviderFactoryParams, } from './shared/provider' -export const FIREWORKS_DEEPSEEK_7B_LANG_SPECIFIC_V0 = 'deepseek-finetuned-lang-specific-v0' -export const FIREWORKS_DEEPSEEK_7B_LANG_SPECIFIC_V1 = 'deepseek-finetuned-lang-specific-v1' -export const FIREWORKS_DEEPSEEK_7B_LANG_ALL = 'deepseek-finetuned-lang-all-v0' - export const DEEPSEEK_CODER_V2_LITE_BASE = 'deepseek-coder-v2-lite-base' - // Context window experiments with DeepSeek Model export const DEEPSEEK_CODER_V2_LITE_BASE_WINDOW_4096 = 'deepseek-coder-v2-lite-base-context-4096' -const DEEPSEEK_CODER_V2_LITE_BASE_WINDOW_8192 = 'deepseek-coder-v2-lite-base-context-8192' -const DEEPSEEK_CODER_V2_LITE_BASE_WINDOW_16384 = 'deepseek-coder-v2-lite-base-context-16383' -const DEEPSEEK_CODER_V2_LITE_BASE_WINDOW_32768 = 'deepseek-coder-v2-lite-base-context-32768' +export const CODE_QWEN_7B_V2P5 = 'code-qwen-7b-v2p5' // Model identifiers can be found in https://docs.fireworks.ai/explore/ and in our internal // conversations @@ -41,17 +34,9 @@ const MODEL_MAP = { // Fireworks model identifiers 'llama-code-13b': 'fireworks/accounts/fireworks/models/llama-v2-13b-code', - - [FIREWORKS_DEEPSEEK_7B_LANG_SPECIFIC_V0]: 'fireworks/finetuned-fim-lang-specific-model-ds2-v0', - [FIREWORKS_DEEPSEEK_7B_LANG_SPECIFIC_V1]: 'fireworks/finetuned-fim-lang-specific-model-ds2-v1', - [FIREWORKS_DEEPSEEK_7B_LANG_ALL]: 'accounts/sourcegraph/models/finetuned-fim-lang-all-model-ds2-v0', [DEEPSEEK_CODER_V2_LITE_BASE]: 'fireworks/deepseek-coder-v2-lite-base', - [DEEPSEEK_CODER_V2_LITE_BASE_WINDOW_4096]: 'accounts/sourcegraph/models/deepseek-coder-v2-lite-base', - [DEEPSEEK_CODER_V2_LITE_BASE_WINDOW_8192]: 'accounts/sourcegraph/models/deepseek-coder-v2-lite-base', - [DEEPSEEK_CODER_V2_LITE_BASE_WINDOW_16384]: - 'accounts/sourcegraph/models/deepseek-coder-v2-lite-base', - [DEEPSEEK_CODER_V2_LITE_BASE_WINDOW_32768]: - 'accounts/sourcegraph/models/deepseek-coder-v2-lite-base', + [DEEPSEEK_CODER_V2_LITE_BASE_WINDOW_4096]: 'accounts/fireworks/models/deepseek-coder-v2-lite-base', + [CODE_QWEN_7B_V2P5]: 'accounts/fireworks/models/qwen-v2p5-7b', } as const type FireworksModel = @@ -73,20 +58,12 @@ function getMaxContextTokens(model: FireworksModel): number { // Llama 2 on Fireworks supports up to 4k tokens. We're constraining it here to better // compare the results return 2048 - case FIREWORKS_DEEPSEEK_7B_LANG_SPECIFIC_V0: - case FIREWORKS_DEEPSEEK_7B_LANG_SPECIFIC_V1: - case FIREWORKS_DEEPSEEK_7B_LANG_ALL: - case DEEPSEEK_CODER_V2_LITE_BASE: { + case DEEPSEEK_CODER_V2_LITE_BASE: + case CODE_QWEN_7B_V2P5: { return 2048 } case DEEPSEEK_CODER_V2_LITE_BASE_WINDOW_4096: return 4096 - case DEEPSEEK_CODER_V2_LITE_BASE_WINDOW_8192: - return 8192 - case DEEPSEEK_CODER_V2_LITE_BASE_WINDOW_16384: - return 16384 - case DEEPSEEK_CODER_V2_LITE_BASE_WINDOW_32768: - return 32768 default: return 1200 } diff --git a/vscode/src/completions/providers/shared/get-experiment-model.ts b/vscode/src/completions/providers/shared/get-experiment-model.ts index 0d5c30bd2235..db5bd462f442 100644 --- a/vscode/src/completions/providers/shared/get-experiment-model.ts +++ b/vscode/src/completions/providers/shared/get-experiment-model.ts @@ -11,11 +11,9 @@ import { import { Observable, map } from 'observable-fns' import * as vscode from 'vscode' import { + CODE_QWEN_7B_V2P5, DEEPSEEK_CODER_V2_LITE_BASE, DEEPSEEK_CODER_V2_LITE_BASE_WINDOW_4096, - FIREWORKS_DEEPSEEK_7B_LANG_ALL, - FIREWORKS_DEEPSEEK_7B_LANG_SPECIFIC_V0, - FIREWORKS_DEEPSEEK_7B_LANG_SPECIFIC_V1, } from '../fireworks' interface ProviderConfigFromFeatureFlags { @@ -88,46 +86,22 @@ function resolveFIMModelExperimentFromFeatureFlags(): ReturnType { - if (fimModelVariant1) { - return { provider: 'fireworks', model: DEEPSEEK_CODER_V2_LITE_BASE } - } - if (fimModelVariant2) { - return { provider: 'fireworks', model: FIREWORKS_DEEPSEEK_7B_LANG_SPECIFIC_V0 } - } - if (fimModelVariant3) { - return { provider: 'fireworks', model: FIREWORKS_DEEPSEEK_7B_LANG_SPECIFIC_V1 } - } - if (fimModelVariant4) { - return { provider: 'fireworks', model: FIREWORKS_DEEPSEEK_7B_LANG_ALL } - } - if (fimModelCurrentBest) { - return { provider: 'fireworks', model: DEEPSEEK_CODER_V2_LITE_BASE_WINDOW_4096 } - } - if (fimModelControl) { - // Current production model - return { provider: 'fireworks', model: DEEPSEEK_CODER_V2_LITE_BASE } - } - - // Extra free traffic - redirect to the current production model which could be different than control + map(([fimModelControl, fimModelVariant1, fimModelVariant2]) => { + if (fimModelVariant1) { + return { provider: 'fireworks', model: DEEPSEEK_CODER_V2_LITE_BASE_WINDOW_4096 } + } + if (fimModelVariant2) { + return { provider: 'fireworks', model: CODE_QWEN_7B_V2P5 } + } + if (fimModelControl) { + // Current production model return { provider: 'fireworks', model: DEEPSEEK_CODER_V2_LITE_BASE } } - ), + // Extra free traffic - redirect to the current production model which could be different than control + return { provider: 'fireworks', model: DEEPSEEK_CODER_V2_LITE_BASE } + }), distinctUntilChanged() ) } From 419354daa787bd01f026d7d7e03a40f6a4dc1258 Mon Sep 17 00:00:00 2001 From: hitesh-1997 Date: Tue, 1 Oct 2024 22:16:32 +0530 Subject: [PATCH 3/4] fix lint errors --- .../src/completions/model-helpers/__tests__/codeqwen.test.ts | 3 +++ vscode/src/completions/model-helpers/codeqwen.ts | 4 ++-- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/vscode/src/completions/model-helpers/__tests__/codeqwen.test.ts b/vscode/src/completions/model-helpers/__tests__/codeqwen.test.ts index a852c7be55a6..e11d90382965 100644 --- a/vscode/src/completions/model-helpers/__tests__/codeqwen.test.ts +++ b/vscode/src/completions/model-helpers/__tests__/codeqwen.test.ts @@ -25,10 +25,13 @@ describe('CodeQwen', () => { "speaker": "human", "text": "<|file_sep|>codebase/context1.ts function contextSnippetOne() {} + <|file_sep|>codebase/context2.ts const contextSnippet2 = {} + Additional documentation for \`ContextParams\`: interface ContextParams {} + <|file_sep|>codebase/test.ts <|fim_prefix|>console.log(prefix line: 1) console.log(prefix line: 2) diff --git a/vscode/src/completions/model-helpers/codeqwen.ts b/vscode/src/completions/model-helpers/codeqwen.ts index 500070ec5345..63c5b662a418 100644 --- a/vscode/src/completions/model-helpers/codeqwen.ts +++ b/vscode/src/completions/model-helpers/codeqwen.ts @@ -54,7 +54,7 @@ export class CodeQwen extends DefaultModel { } formatIntroSnippets(params: FormatIntroSnippetsParams): PromptString { - let introPrompt = ps`${PromptString.join(params.intro, ps`\n`)}` + let introPrompt = ps`${PromptString.join(params.intro, ps`\n\n`)}` if (introPrompt.length > 0) { introPrompt = ps`${introPrompt}\n` } @@ -73,7 +73,7 @@ export class CodeQwen extends DefaultModel { if (intro.length > 0) { introPrefix = ps`${intro}\n` } - const prompt = ps`${intro}<|file_sep|>${fileName}\n<|fim_prefix|>${prefix}<|fim_suffix|>${suffix}<|fim_middle|>` + const prompt = ps`${introPrefix}<|file_sep|>${fileName}\n<|fim_prefix|>${prefix}<|fim_suffix|>${suffix}<|fim_middle|>` if (repoName) { return ps`<|repo_name|>${repoName}\n${prompt}` } From 5a5ae772ef4d92429a0cb57ebea798ba8ac1f8aa Mon Sep 17 00:00:00 2001 From: hitesh-1997 Date: Wed, 2 Oct 2024 08:03:08 +0530 Subject: [PATCH 4/4] fix pr comments --- .../context/context-data-logging.ts | 3 +- .../inline-completion-item-provider.ts | 4 +- vscode/src/completions/logger.test.ts | 16 +++---- vscode/src/completions/logger.ts | 20 ++++---- .../model-helpers/__tests__/codeqwen.test.ts | 1 - .../src/completions/model-helpers/codeqwen.ts | 46 ++----------------- .../process-inline-completions.ts | 1 + 7 files changed, 27 insertions(+), 64 deletions(-) diff --git a/vscode/src/completions/context/context-data-logging.ts b/vscode/src/completions/context/context-data-logging.ts index 0e70271d4d0c..1fbbf93355a1 100644 --- a/vscode/src/completions/context/context-data-logging.ts +++ b/vscode/src/completions/context/context-data-logging.ts @@ -27,6 +27,7 @@ export class ContextRetrieverDataCollection implements vscode.Disposable { private disposables: vscode.Disposable[] = [] private static readonly MAX_PAYLOAD_SIZE_BYTES = 1024 * 1024 // 1 MB private dataCollectionFlagState = false + private gitMetadataInstance = GitHubDotComRepoMetadata.getInstance() private readonly retrieverConfigs: RetrieverConfig[] = [ { identifier: RetrieverIdentifier.RecentCopyRetriever, maxSnippets: 1 }, @@ -96,7 +97,7 @@ export class ContextRetrieverDataCollection implements vscode.Disposable { if (!repoName || !isDotComAuthed() || this.dataCollectionRetrievers.length === 0) { return false } - const gitRepoMetadata = GitHubDotComRepoMetadata.getInstance().getRepoMetadataIfCached(repoName) + const gitRepoMetadata = this.gitMetadataInstance.getRepoMetadataIfCached(repoName) return gitRepoMetadata?.isPublic ?? false } diff --git a/vscode/src/completions/inline-completion-item-provider.ts b/vscode/src/completions/inline-completion-item-provider.ts index a4c3bbb9d210..b0bdd8297e69 100644 --- a/vscode/src/completions/inline-completion-item-provider.ts +++ b/vscode/src/completions/inline-completion-item-provider.ts @@ -848,8 +848,8 @@ export class InlineCompletionItemProvider suggestionEvent.markAsRead({ document: invokedDocument, position: invokedPosition, - docPrefix: completion.requestParams.docContext.completePrefix, - docSuffix: completion.requestParams.docContext.completeSuffix, + completePrefix: completion.requestParams.docContext.completePrefix, + completeSuffix: completion.requestParams.docContext.completeSuffix, }) } }, this.COMPLETION_VISIBLE_DELAY_MS) diff --git a/vscode/src/completions/logger.test.ts b/vscode/src/completions/logger.test.ts index 27c04d4b9c47..bacef18e6679 100644 --- a/vscode/src/completions/logger.test.ts +++ b/vscode/src/completions/logger.test.ts @@ -79,8 +79,8 @@ describe('logger', () => { suggestionEvent?.markAsRead({ document, position, - docPrefix: defaultRequestParams.docContext.completePrefix, - docSuffix: defaultRequestParams.docContext.completeSuffix, + completePrefix: defaultRequestParams.docContext.completePrefix, + completeSuffix: defaultRequestParams.docContext.completeSuffix, }) CompletionLogger.accepted(id, document, item, range(0, 0, 0, 0), false) @@ -119,8 +119,8 @@ describe('logger', () => { firstSuggestionEvent?.markAsRead({ document, position, - docPrefix: defaultRequestParams.docContext.completePrefix, - docSuffix: defaultRequestParams.docContext.completeSuffix, + completePrefix: defaultRequestParams.docContext.completePrefix, + completeSuffix: defaultRequestParams.docContext.completeSuffix, }) const loggerItem = CompletionLogger.getCompletionEvent(id1) @@ -142,8 +142,8 @@ describe('logger', () => { secondSuggestionEvent?.markAsRead({ document, position, - docPrefix: defaultRequestParams.docContext.completePrefix, - docSuffix: defaultRequestParams.docContext.completeSuffix, + completePrefix: defaultRequestParams.docContext.completePrefix, + completeSuffix: defaultRequestParams.docContext.completeSuffix, }) CompletionLogger.accepted(id2, document, item, range(0, 0, 0, 0), false) @@ -172,8 +172,8 @@ describe('logger', () => { thirdSuggestionEvent?.markAsRead({ document, position, - docPrefix: defaultRequestParams.docContext.completePrefix, - docSuffix: defaultRequestParams.docContext.completeSuffix, + completePrefix: defaultRequestParams.docContext.completePrefix, + completeSuffix: defaultRequestParams.docContext.completeSuffix, }) const loggerItem3 = CompletionLogger.getCompletionEvent(id3) diff --git a/vscode/src/completions/logger.ts b/vscode/src/completions/logger.ts index c632f122dbee..9adc4d912c13 100644 --- a/vscode/src/completions/logger.ts +++ b/vscode/src/completions/logger.ts @@ -7,6 +7,7 @@ import { type BillingCategory, type BillingProduct, currentAuthStatusAuthed, + displayPathWithoutWorkspaceFolderPrefix, isDotCom, isNetworkError, telemetryRecorder, @@ -767,7 +768,7 @@ function getInlineContextItemContext( content, startLine, endLine, - filePath: uri.fsPath, + filePath: displayPathWithoutWorkspaceFolderPrefix(uri), })), } } @@ -776,8 +777,8 @@ function suggestionDocumentDiffTracker( interactionId: CompletionAnalyticsID, document: vscode.TextDocument, position: vscode.Position, - docPrefix: string, - docSuffix: string + completePrefix: string, + completeSuffix: string ): void { // If user is not in the same document, we don't track the diff. if (document.uri.scheme !== 'file') { @@ -794,10 +795,11 @@ function suggestionDocumentDiffTracker( Math.min(document.getText().length, document.offsetAt(position) + offsetBytes) ) const trackingRange = new vscode.Range(startPosition, endPosition) - const documentText = docPrefix.slice(-offsetBytes) + docSuffix.slice(0, offsetBytes) + const documentText = completePrefix.slice(-offsetBytes) + completeSuffix.slice(0, offsetBytes) const persistenceTimeoutList = [ - 20 * 1000, // 20 seconds + 15 * 1000, // 15 seconds + 30 * 1000, // 30 seconds 60 * 1000, // 60 seconds ] persistenceTracker.track({ @@ -818,8 +820,8 @@ function suggestionDocumentDiffTracker( type SuggestionMarkReadParam = { document: vscode.TextDocument position: vscode.Position - docPrefix: string - docSuffix: string + completePrefix: string + completeSuffix: string } // Suggested completions will not be logged immediately. Instead, we log them when we either hide @@ -881,8 +883,8 @@ export function prepareSuggestionEvent({ event.params.id, param.document, param.position, - param.docPrefix, - param.docSuffix + param.completePrefix, + param.completeSuffix ) } }, diff --git a/vscode/src/completions/model-helpers/__tests__/codeqwen.test.ts b/vscode/src/completions/model-helpers/__tests__/codeqwen.test.ts index e11d90382965..d60be6bc2b80 100644 --- a/vscode/src/completions/model-helpers/__tests__/codeqwen.test.ts +++ b/vscode/src/completions/model-helpers/__tests__/codeqwen.test.ts @@ -31,7 +31,6 @@ describe('CodeQwen', () => { Additional documentation for \`ContextParams\`: interface ContextParams {} - <|file_sep|>codebase/test.ts <|fim_prefix|>console.log(prefix line: 1) console.log(prefix line: 2) diff --git a/vscode/src/completions/model-helpers/codeqwen.ts b/vscode/src/completions/model-helpers/codeqwen.ts index 63c5b662a418..501894ba18f4 100644 --- a/vscode/src/completions/model-helpers/codeqwen.ts +++ b/vscode/src/completions/model-helpers/codeqwen.ts @@ -1,15 +1,5 @@ -import { - type AutocompleteFileContextSnippet, - type OllamaGenerateParameters, - PromptString, - ps, -} from '@sourcegraph/cody-shared' -import { - DefaultModel, - type FormatIntroSnippetsParams, - type FormatPromptParams, - type GetOllamaPromptParams, -} from './default' +import { type AutocompleteFileContextSnippet, PromptString, ps } from '@sourcegraph/cody-shared' +import { DefaultModel, type FormatIntroSnippetsParams, type FormatPromptParams } from './default' const EOT_CODEQWEN = '<|endoftext|>' @@ -23,32 +13,6 @@ export class CodeQwen extends DefaultModel { EOT_CODEQWEN, ] - getOllamaPrompt(promptContext: GetOllamaPromptParams): PromptString { - const { context, currentFileNameComment, prefix, suffix } = promptContext - - const infillPrefix = context.concat(currentFileNameComment, prefix) - - return ps`<|fim_prefix|>${infillPrefix}<|fim_suffix|>${suffix}<|fim_middle|>` - } - - getOllamaRequestOptions(isMultiline: boolean): OllamaGenerateParameters { - const params = { - stop: ['\n', ...this.stopSequences], - temperature: 0.2, - top_k: 40, - top_p: 0.8, - num_predict: 256, - num_gpu: 99, - repeat_penalty: 1.1, - } - - if (isMultiline) { - params.stop = ['\n\n', ...this.stopSequences] - } - - return params - } - postProcess(content: string): string { return content.replace(EOT_CODEQWEN, '') } @@ -69,11 +33,7 @@ export class CodeQwen extends DefaultModel { formatPrompt(params: FormatPromptParams): PromptString { // Prompt format for CodeQwen in technical report: https://arxiv.org/pdf/2409.12186 const { intro, prefix, suffix, repoName, fileName } = params - let introPrefix = ps`` - if (intro.length > 0) { - introPrefix = ps`${intro}\n` - } - const prompt = ps`${introPrefix}<|file_sep|>${fileName}\n<|fim_prefix|>${prefix}<|fim_suffix|>${suffix}<|fim_middle|>` + const prompt = ps`${intro}<|file_sep|>${fileName}\n<|fim_prefix|>${prefix}<|fim_suffix|>${suffix}<|fim_middle|>` if (repoName) { return ps`<|repo_name|>${repoName}\n${prompt}` } diff --git a/vscode/src/completions/text-processing/process-inline-completions.ts b/vscode/src/completions/text-processing/process-inline-completions.ts index 6a76e50d7e67..11c23fcad17f 100644 --- a/vscode/src/completions/text-processing/process-inline-completions.ts +++ b/vscode/src/completions/text-processing/process-inline-completions.ts @@ -141,6 +141,7 @@ const RESPONSE_HEADERS_TO_SAVE = [ 'fireworks-prompt-tokens', 'fireworks-server-time-to-first-token', 'fireworks-speculation-matched-tokens', + 'x-upstream-time-to-first-token', ] as const type ResponseHeaderName = (typeof RESPONSE_HEADERS_TO_SAVE)[number]