diff --git a/.dockerignore b/.dockerignore index 1b72cff..f821cd2 100644 --- a/.dockerignore +++ b/.dockerignore @@ -21,6 +21,7 @@ !/.npmrc !/tsconfig.json !/requirements.txt +!/.nuxt # Ignore unnecessary files inside allowed directories # This should go after the allowed directories diff --git a/Dockerfile b/Dockerfile index c4a841b..8c8b8aa 100644 --- a/Dockerfile +++ b/Dockerfile @@ -6,18 +6,21 @@ RUN pnpm install --frozen-lockfile COPY . . RUN pnpm build -FROM python:3.11-slim +FROM alpine:3.14 as weaviate +ARG WEAVIATE_VERSION=1.24.8 +WORKDIR /app +RUN wget https://github.com/weaviate/weaviate/releases/download/v${WEAVIATE_VERSION}/weaviate-v${WEAVIATE_VERSION}-linux-amd64.tar.gz && \ + tar -xzf weaviate-v${WEAVIATE_VERSION}-linux-amd64.tar.gz + +FROM node:20-alpine ENV NUXT_DATA_PATH=/app/data ENV NUXT_MIGRATIONS_PATH=/app/migrations ENV NODE_ENV=production ENV NITRO_PORT=3000 EXPOSE 3000 WORKDIR /app -RUN apt update -y && apt install curl git musl-dev -y && \ - ln -s /usr/lib/x86_64-linux-musl/libc.so /lib/libc.musl-x86_64.so.1 && \ - curl -sL https://deb.nodesource.com/setup_20.x | bash - && \ - apt-get install -y nodejs && \ - pip install chromadb +RUN apk update && apk add git musl-dev +COPY --from=weaviate /app/weaviate /bin/weaviate COPY docker/start.sh . COPY server/db/migrations /app/migrations COPY --from=builder /app/.output .output diff --git a/docker-compose.yml b/docker-compose.yml index b305b56..1cb1b8d 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -1,13 +1,27 @@ version: '3.4' services: - chroma: - image: chromadb/chroma + weaviate: + command: + - --host + - 0.0.0.0 + - --port + - '8080' + - --scheme + - http + image: cr.weaviate.io/semitechnologies/weaviate:1.24.8 ports: - - 8000:8000 - volumes: - - chroma_data:/var/lib/weaviate + - 8080:8080 restart: on-failure:0 + volumes: + - weaviate_data:/var/lib/weaviate + environment: + QUERY_DEFAULTS_LIMIT: 25 + AUTHENTICATION_ANONYMOUS_ACCESS_ENABLED: 'true' + PERSISTENCE_DATA_PATH: '/var/lib/weaviate' + DEFAULT_VECTORIZER_MODULE: 'none' + ENABLE_MODULES: '' + CLUSTER_HOSTNAME: 'node1' volumes: - chroma_data: + weaviate_data: diff --git a/docker/start.sh b/docker/start.sh index 0dac167..36d6230 100755 --- a/docker/start.sh +++ b/docker/start.sh @@ -1,6 +1,13 @@ -#!/bin/bash +#!/bin/sh set -m # to make job control work node .output/server/index.mjs --port 3000 --host 0.0.0.0 & -chroma run --path /app/data/chroma --port 8000 --host 0.0.0.0 & + +export PERSISTENCE_DATA_PATH=/app/vectorstore +export QUERY_DEFAULTS_LIMIT=25 +export AUTHENTICATION_ANONYMOUS_ACCESS_ENABLED='true' +export ENABLE_MODULES='' +export DEFAULT_VECTORIZER_MODULE='none' +export CLUSTER_HOSTNAME='node1' +weaviate --port 8000 --host 0.0.0.0 --scheme http & fg %1 # gross! diff --git a/nuxt.config.ts b/nuxt.config.ts index 80b4c92..568bcbf 100644 --- a/nuxt.config.ts +++ b/nuxt.config.ts @@ -4,7 +4,6 @@ export default defineNuxtConfig({ telemetry: true, runtimeConfig: { ai: { - vectorDatabaseUrl: 'http://localhost:8000', token: '', }, auth: { diff --git a/package.json b/package.json index 997be6c..22832e1 100644 --- a/package.json +++ b/package.json @@ -40,8 +40,8 @@ "@gitbeaker/rest": "^40.0.1", "@langchain/community": "^0.0.41", "@langchain/openai": "^0.0.23", + "@langchain/weaviate": "^0.0.1", "better-sqlite3": "^9.4.3", - "chromadb": "^1.8.1", "consola": "^3.2.3", "drizzle-orm": "^0.30.4", "glob": "^10.3.10", diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index d6bedd8..8b813d0 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -13,16 +13,16 @@ dependencies: version: 40.0.1 '@langchain/community': specifier: ^0.0.41 - version: 0.0.41(better-sqlite3@9.4.3)(chromadb@1.8.1)(jsonwebtoken@9.0.2) + version: 0.0.41(better-sqlite3@9.4.3)(jsonwebtoken@9.0.2) '@langchain/openai': specifier: ^0.0.23 version: 0.0.23 + '@langchain/weaviate': + specifier: ^0.0.1 + version: 0.0.1(graphql@16.8.1) better-sqlite3: specifier: ^9.4.3 version: 9.4.3 - chromadb: - specifier: ^1.8.1 - version: 1.8.1 consola: specifier: ^3.2.3 version: 3.2.3 @@ -40,7 +40,7 @@ dependencies: version: 4.0.0 langchain: specifier: ^0.1.28 - version: 0.1.28(better-sqlite3@9.4.3)(chromadb@1.8.1)(jsonwebtoken@9.0.2) + version: 0.1.28(better-sqlite3@9.4.3)(jsonwebtoken@9.0.2) nuxt-icon: specifier: ^0.6.10 version: 0.6.10(nuxt@3.11.1)(vite@4.4.9)(vue@3.4.21) @@ -1533,6 +1533,14 @@ packages: '@gitbeaker/requester-utils': 40.0.1 dev: false + /@graphql-typed-document-node/core@3.2.0(graphql@16.8.1): + resolution: {integrity: sha512-mB9oAsNCm9aM3/SOv4YtBMqZbYj10R7dkq8byBqxGY/ncFwhf2oQzMV+LCRlWoDSEBJ3COiR1yeDvMtsoOsuFQ==} + peerDependencies: + graphql: ^0.8.0 || ^0.9.0 || ^0.10.0 || ^0.11.0 || ^0.12.0 || ^0.13.0 || ^14.0.0 || ^15.0.0 || ^16.0.0 || ^17.0.0 + dependencies: + graphql: 16.8.1 + dev: false + /@hapi/hoek@9.3.0: resolution: {integrity: sha512-/c6rf4UJlmHlC9b5BaNvzAcFv7HZ2QHaV0D4/HNlBdvFnvQq8RI4kYdhyPCl7Xj+oWvTWQ8ujhqS53LIgAe6KQ==} dev: true @@ -1690,7 +1698,7 @@ packages: /@kwsites/promise-deferred@1.1.1: resolution: {integrity: sha512-GaHYm+c0O9MjZRu0ongGBRbinu8gVAMd2UZjji6jVmqKtZluZnptXGWhz1E8j8D2HJ3f/yMxKAUC0b+57wncIw==} - /@langchain/community@0.0.41(better-sqlite3@9.4.3)(chromadb@1.8.1)(jsonwebtoken@9.0.2): + /@langchain/community@0.0.41(better-sqlite3@9.4.3)(jsonwebtoken@9.0.2): resolution: {integrity: sha512-8QvA/Gx95ijUxmH3amj1La6Rl9NmMCnif802q/Y2zze+SW2bgeF7brC6n2BIui2GYUQlCDBoga/QZ2lPqPM6vg==} engines: {node: '>=18'} peerDependencies: @@ -1966,7 +1974,6 @@ packages: '@langchain/core': 0.1.49 '@langchain/openai': 0.0.23 better-sqlite3: 9.4.3 - chromadb: 1.8.1 expr-eval: 2.0.2 flat: 5.0.2 jsonwebtoken: 9.0.2 @@ -2007,6 +2014,18 @@ packages: - encoding dev: false + /@langchain/weaviate@0.0.1(graphql@16.8.1): + resolution: {integrity: sha512-Lf6zgTf6i/fsPNlkDxPRLA3LEz2Wwgk6LNe54dByt0oZM4W+N4n5n/gDwojsXAKNEF5alXUv2N6yAOcUuXSbxg==} + engines: {node: '>=18'} + dependencies: + '@langchain/core': 0.1.49 + uuid: 9.0.1 + weaviate-ts-client: 2.1.1(graphql@16.8.1) + transitivePeerDependencies: + - encoding + - graphql + dev: false + /@mapbox/node-pre-gyp@1.0.11: resolution: {integrity: sha512-Yhlar6v9WQgUp/He7BdgzOz8lqMQ8sU+jkCq7Wx8Myc5YFJLbEe7lgui/V7G1qB1DJykHSGwreceSaD60Y0PUQ==} hasBin: true @@ -4668,27 +4687,6 @@ packages: resolution: {integrity: sha512-bIomtDF5KGpdogkLd9VspvFzk9KfpyyGlS8YFVZl7TGPBHL5snIOnxeshwVgPteQ9b4Eydl+pVbIyE1DcvCWgQ==} engines: {node: '>=10'} - /chromadb@1.8.1: - resolution: {integrity: sha512-NpbYydbg4Uqt/9BXKgkZXn0fqpsh2Z1yjhkhKH+rcHMoq0pwI18BFSU2QU7Fk/ZypwGefW2AvqyE/3ZJIgy4QA==} - engines: {node: '>=14.17.0'} - peerDependencies: - '@google/generative-ai': ^0.1.1 - cohere-ai: ^5.0.0 || ^6.0.0 || ^7.0.0 - openai: ^3.0.0 || ^4.0.0 - peerDependenciesMeta: - '@google/generative-ai': - optional: true - cohere-ai: - optional: true - openai: - optional: true - dependencies: - cliui: 8.0.1 - isomorphic-fetch: 3.0.0 - transitivePeerDependencies: - - encoding - dev: false - /ci-info@4.0.0: resolution: {integrity: sha512-TdHqgGf9odd8SXNuxtUBVx8Nv+qZOejE6qyqiy5NtbYYQOeFa6zmHkxlPzmaLxWWHsU6nJmB7AETdVPi+2NBUg==} engines: {node: '>=8'} @@ -4911,6 +4909,14 @@ packages: resolution: {integrity: sha512-w+VAWjiBJmKYeeK+i0ur3G47LcKNgFuWwb8LVJTaXSS2ExtQ5zdiIVnuysgB3N457gTaSllme0qTpdsJWK/wIg==} hasBin: true + /cross-fetch@3.1.8: + resolution: {integrity: sha512-cvA+JwZoU0Xq+h6WkMvAUqPEYy92Obet6UdKLfW60qn99ftItKjB5T+BkyWOFWe2pUyfQ+IJHmpOTznqk1M6Kg==} + dependencies: + node-fetch: 2.7.0 + transitivePeerDependencies: + - encoding + dev: false + /cross-spawn@7.0.3: resolution: {integrity: sha512-iRDPJKUPVEND7dHPO8rkbOnPpyDygcDFtWjpeWNCgy8WP2rXcxXL8TskReQl6OrB2G7+UJrags1q15Fudc7G6w==} engines: {node: '>= 8'} @@ -5742,6 +5748,11 @@ packages: pathe: 1.1.2 ufo: 1.5.3 + /extract-files@9.0.0: + resolution: {integrity: sha512-CvdFfHkC95B4bBBk36hcEmvdR2awOdhhVUYH6S/zrVj3477zven/fJMYg7121h4T1xHZC+tetUpubpAhxwI7hQ==} + engines: {node: ^10.17.0 || ^12.0.0 || >= 13.7.0} + dev: false + /fast-fifo@1.3.2: resolution: {integrity: sha512-/d9sfos4yxzpwkDkuN7k2SqFKtYNmCTzgfEpz82x34IM9/zc8KGxQoXg1liNC/izpRM/MBdt44Nmx41ZWqk+FQ==} @@ -5836,6 +5847,15 @@ packages: resolution: {integrity: sha512-qfqtYan3rxrnCk1VYaA4H+Ms9xdpPqvLZa6xmMgFvhO32x7/3J/ExcTd6qpxM0vH2GdMI+poehyBZvqfMTto8A==} dev: false + /form-data@3.0.1: + resolution: {integrity: sha512-RHkBKtLWUVwd7SqRIvCZMEvAMoGUp0XU+seQiZejj0COz3RI3hWP4sCv3gZWWLjJTd7rGwcsF5eKZGii0r/hbg==} + engines: {node: '>= 6'} + dependencies: + asynckit: 0.4.0 + combined-stream: 1.0.8 + mime-types: 2.1.35 + dev: false + /form-data@4.0.0: resolution: {integrity: sha512-ETEklSGi5t0QMZuiXoA/Q6vcnxcLQP5vdugSpuAyi6SVGi2clPPp+xgEhuMaHC+zGgn31Kd235W35f7Hykkaww==} engines: {node: '>= 6'} @@ -6154,6 +6174,25 @@ packages: /graceful-fs@4.2.11: resolution: {integrity: sha512-RbJ5/jmFcNNCcDV5o9eTnBLJ/HszWV0P73bc+Ff4nS/rJj+YaS6IGyiOL0VoBYX+l1Wrl3k63h/KrH+nhJ0XvQ==} + /graphql-request@5.2.0(graphql@16.8.1): + resolution: {integrity: sha512-pLhKIvnMyBERL0dtFI3medKqWOz/RhHdcgbZ+hMMIb32mEPa5MJSzS4AuXxfI4sRAu6JVVk5tvXuGfCWl9JYWQ==} + peerDependencies: + graphql: 14 - 16 + dependencies: + '@graphql-typed-document-node/core': 3.2.0(graphql@16.8.1) + cross-fetch: 3.1.8 + extract-files: 9.0.0 + form-data: 3.0.1 + graphql: 16.8.1 + transitivePeerDependencies: + - encoding + dev: false + + /graphql@16.8.1: + resolution: {integrity: sha512-59LZHPdGZVh695Ud9lRzPBVTtlX9ZCV150Er2W43ro37wVof0ctenSaskPPjN7lVTIN8mSZt8PHUNKZuNQUuxw==} + engines: {node: ^12.22.0 || ^14.16.0 || ^16.0.0 || >=17.0.0} + dev: false + /gzip-size@6.0.0: resolution: {integrity: sha512-ax7ZYomf6jqPTQ4+XCpUGyXKHk5WweS+e05MBO4/y3WJ5RkmPXNKvX+bx1behVILVwr6JSQvZAku021CHPXG3Q==} engines: {node: '>=10'} @@ -6592,15 +6631,6 @@ packages: resolution: {integrity: sha512-LpB/54B+/2J5hqQ7imZHfdU31OlgQqx7ZicVlkm9kzg9/w8GKLEcFfJl/t7DCEDueOyBAD6zCCwTO6Fzs0NoEQ==} engines: {node: '>=16'} - /isomorphic-fetch@3.0.0: - resolution: {integrity: sha512-qvUtwJ3j6qwsF3jLxkZ72qCgjMysPzDfeV240JHiGZsANBYd+EEuu35v7dfrJ9Up0Ak07D7GGSkGhCHTqg/5wA==} - dependencies: - node-fetch: 2.7.0 - whatwg-fetch: 3.6.20 - transitivePeerDependencies: - - encoding - dev: false - /jackspeak@2.3.6: resolution: {integrity: sha512-N3yCS/NegsOBokc8GAdM8UcmfsKiSS8cipheD/nivzr700H+nsMOxJjQnvwOcRYVuFkdH0wGUvW2WbXGmrZGbQ==} engines: {node: '>=14'} @@ -6808,7 +6838,7 @@ packages: /kolorist@1.8.0: resolution: {integrity: sha512-Y+60/zizpJ3HRH8DCss+q95yr6145JXZo46OTpFvDZWLfRCE4qChOyk1b26nMaNpfHHgxagk9dXT5OP0Tfe+dQ==} - /langchain@0.1.28(better-sqlite3@9.4.3)(chromadb@1.8.1)(jsonwebtoken@9.0.2): + /langchain@0.1.28(better-sqlite3@9.4.3)(jsonwebtoken@9.0.2): resolution: {integrity: sha512-LNuILtOSFptfOqDwUwQAl7B0dYrBtxzVZ6lDhc5DA8rk+pyTCy12HQO7t8bGqC8oQlEPYPKaom8VRlvfupUBTw==} engines: {node: '>=18'} peerDependencies: @@ -6968,11 +6998,10 @@ packages: optional: true dependencies: '@anthropic-ai/sdk': 0.9.1 - '@langchain/community': 0.0.41(better-sqlite3@9.4.3)(chromadb@1.8.1)(jsonwebtoken@9.0.2) + '@langchain/community': 0.0.41(better-sqlite3@9.4.3)(jsonwebtoken@9.0.2) '@langchain/core': 0.1.49 '@langchain/openai': 0.0.23 binary-extensions: 2.2.0 - chromadb: 1.8.1 expr-eval: 2.0.2 js-tiktoken: 1.0.10 js-yaml: 4.1.0 @@ -10597,6 +10626,17 @@ packages: - debug dev: true + /weaviate-ts-client@2.1.1(graphql@16.8.1): + resolution: {integrity: sha512-d8yc2KnIEIV1beHAU8mhrElT3BoROoXGDsLlqFX8QGx3G+gOiPTRMc7SLy4F17+LvaUaTD0XkHvWX++4iehnsg==} + engines: {node: '>=16.0.0'} + dependencies: + graphql-request: 5.2.0(graphql@16.8.1) + uuid: 9.0.1 + transitivePeerDependencies: + - encoding + - graphql + dev: false + /web-streams-polyfill@3.3.3: resolution: {integrity: sha512-d2JWLCivmZYTSIoge9MsgFCZrt571BikcWGYkjC1khllbTeDlGqZ2D8vD8E/lJa8WGWbb7Plm8/XJYV7IJHZZw==} engines: {node: '>= 8'} @@ -10621,10 +10661,6 @@ packages: /webpack-virtual-modules@0.6.1: resolution: {integrity: sha512-poXpCylU7ExuvZK8z+On3kX+S8o/2dQ/SVYueKA0D4WEMXROXgY8Ez50/bQEUmvoSMMrWcrJqCHuhAbsiwg7Dg==} - /whatwg-fetch@3.6.20: - resolution: {integrity: sha512-EqhiFU6daOA8kpjOWTL0olhVOF3i7OrFzSYiGsEMB8GcXS+RrzauAERX65xMeNWVqxA6HXH2m69Z9LaKKdisfg==} - dev: false - /whatwg-url@5.0.0: resolution: {integrity: sha512-saE57nupxk6v3HY35+jzBwYa0rKSy0XR8JSxZPwgLr7ys0IBzhGviA1/TUGJLmSVqs8pb9AnvICXEuOHLprYTw==} dependencies: diff --git a/server/api/forges/[forge_id]/index.delete.ts b/server/api/forges/[forge_id]/index.delete.ts index 2b9711e..b7c6fb0 100644 --- a/server/api/forges/[forge_id]/index.delete.ts +++ b/server/api/forges/[forge_id]/index.delete.ts @@ -1,4 +1,4 @@ -import { forgeSchema, userForgesSchema } from '~/server/schemas'; +import { forgeSchema, repoSchema, userForgesSchema } from '~/server/schemas'; import { eq, and } from 'drizzle-orm'; export default defineEventHandler(async (event) => { @@ -15,11 +15,16 @@ export default defineEventHandler(async (event) => { await db.delete(userForgesSchema).where(eq(userForgesSchema.forgeId, forgeId)).run(); - const forge = await db + await db .delete(forgeSchema) .where(and(eq(forgeSchema.owner, user.id), eq(forgeSchema.id, forgeId))) .returning() .get(); - return forge; + const repos = await db.select().from(repoSchema).where(eq(repoSchema.forgeId, forgeId)).all(); + for await (const repo of repos) { + await deleteRepo(repo.id); + } + + return 'ok'; }); diff --git a/server/api/repos/[repo_id]/chat.post.ts b/server/api/repos/[repo_id]/chat.post.ts index 4dc93c5..c6df233 100644 --- a/server/api/repos/[repo_id]/chat.post.ts +++ b/server/api/repos/[repo_id]/chat.post.ts @@ -1,5 +1,3 @@ -import { OpenAIEmbeddings } from '@langchain/openai'; -import { Chroma } from '@langchain/community/vectorstores/chroma'; import { ChatOpenAI } from '@langchain/openai'; import { BufferMemory } from 'langchain/memory'; import { @@ -42,24 +40,11 @@ export default defineEventHandler(async (event) => { const model = new ChatOpenAI({ modelName: 'gpt-4', openAIApiKey: config.ai.token }).pipe(new StringOutputParser()); - const vectorStore = await Chroma.fromExistingCollection( - new OpenAIEmbeddings({ - openAIApiKey: config.ai.token, - }), - { - collectionName: `repo-${repo.id}`, - url: config.ai.vectorDatabaseUrl, - collectionMetadata: { - 'hnsw:space': 'cosine', - }, - }, - ); + const vectorStore = await getRepoVectorStore(repo.id); const retriever = vectorStore.asRetriever({ - // TODO: use max marginal relevance search - // searchType: 'mmr', // Use max marginal relevance search - // searchKwargs: { fetchK: 5 }, - searchType: 'similarity', + searchType: 'mmr', // Use max marginal relevance search + searchKwargs: { fetchK: 5 }, }); const memory = new BufferMemory({ @@ -119,14 +104,5 @@ export default defineEventHandler(async (event) => { question: message, }); - await memory.saveContext( - { - input: message, - }, - { - output: result, - }, - ); - return { answer: result }; }); diff --git a/server/api/repos/[repo_id]/clone.post.ts b/server/api/repos/[repo_id]/clone.post.ts index f03d1d2..eaa7606 100644 --- a/server/api/repos/[repo_id]/clone.post.ts +++ b/server/api/repos/[repo_id]/clone.post.ts @@ -4,8 +4,6 @@ import { repoSchema } from '~/server/schemas'; import { eq } from 'drizzle-orm'; import { TextLoader } from 'langchain/document_loaders/fs/text'; import { CharacterTextSplitter, RecursiveCharacterTextSplitter } from 'langchain/text_splitter'; -import { OpenAIEmbeddings } from '@langchain/openai'; -import { Chroma } from '@langchain/community/vectorstores/chroma'; import { Document } from 'langchain/document'; import { Glob } from 'glob'; @@ -125,7 +123,7 @@ export default defineEventHandler(async (event) => { docs.push(...(await splitter.splitDocuments(issueDocs))); - log(`indexed ${page * perPage} issues`); + log(`indexed ${issueDocs.length} issues`); const javascriptSplitter = RecursiveCharacterTextSplitter.fromLanguage('js', { chunkSize: 2000, @@ -196,6 +194,9 @@ export default defineEventHandler(async (event) => { nocase: true, ignore, }); + + // TODO: index only the files that were updated since the last fetch + for await (const file of glob) { const loader = new TextLoader(path.join(repoPath, file)); const fileDocs = await splitter.splitDocuments(await loader.load()); @@ -207,7 +208,7 @@ export default defineEventHandler(async (event) => { }), ); - console.log('indexing', file); + log('indexing', file); // TODO: split documents based on language // switch (path.extname(file)) { @@ -224,21 +225,14 @@ export default defineEventHandler(async (event) => { // } } - console.log({ docs: docs.length }); - - await Chroma.fromDocuments( - docs, - new OpenAIEmbeddings({ - openAIApiKey: config.ai.token, - }), - { - collectionName: `repo-${repo.id}`, - url: config.ai.vectorDatabaseUrl, - collectionMetadata: { - 'hnsw:space': 'cosine', - }, - }, - ); + log({ docs: docs.length }); + + await deleteRepoVectorStore(repo.id); + const vectorStore = await getRepoVectorStore(repo.id); + + log('deleted old documents'); + + await vectorStore.addDocuments(docs); await db .update(repoSchema) diff --git a/server/api/repos/[repo_id]/index.delete.ts b/server/api/repos/[repo_id]/index.delete.ts index 130bf39..14a369d 100644 --- a/server/api/repos/[repo_id]/index.delete.ts +++ b/server/api/repos/[repo_id]/index.delete.ts @@ -1,8 +1,3 @@ -import * as path from 'path'; -import { promises as fs } from 'fs'; -import { repoSchema, userReposSchema } from '~/server/schemas'; -import { eq } from 'drizzle-orm'; - export default defineEventHandler(async (event) => { const user = await requireUser(event); @@ -17,19 +12,7 @@ export default defineEventHandler(async (event) => { const repo = await requireAccessToRepo(user, repoId); - const config = useRuntimeConfig(); - const folder = path.join(config.data_path, repo.id.toString()); - - await createDataFolder(); - - try { - await fs.rm(folder, { recursive: true }); - } catch (e) { - console.error('error while deleting repo folder', e); - } - - await db.delete(userReposSchema).where(eq(userReposSchema.repoId, repoId)).run(); - await db.delete(repoSchema).where(eq(repoSchema.id, repoId)).run(); + await deleteRepo(repo.id); return 'ok'; }); diff --git a/server/utils/repo.ts b/server/utils/repo.ts new file mode 100644 index 0000000..a0edc98 --- /dev/null +++ b/server/utils/repo.ts @@ -0,0 +1,24 @@ +import path from 'node:path'; +import fs from 'node:fs/promises'; +import { repoSchema, userReposSchema } from '../schemas'; +import { eq } from 'drizzle-orm'; + +export async function deleteRepo(repoId: number) { + const config = useRuntimeConfig(); + const folder = path.join(config.data_path, repoId.toString()); + + await createDataFolder(); + + try { + await fs.rm(folder, { recursive: true }); + } catch (e) { + console.error('error while deleting repo folder', e); + } + + await db.delete(userReposSchema).where(eq(userReposSchema.repoId, repoId)).run(); + await db.delete(repoSchema).where(eq(repoSchema.id, repoId)).run(); + + await deleteRepoVectorStore(repoId); + + return 'ok'; +} diff --git a/server/utils/vectorStore.ts b/server/utils/vectorStore.ts new file mode 100644 index 0000000..274d30f --- /dev/null +++ b/server/utils/vectorStore.ts @@ -0,0 +1,68 @@ +import { OpenAIEmbeddings } from '@langchain/openai'; +import weaviate from 'weaviate-ts-client'; +import { WeaviateStore } from '@langchain/weaviate'; + +const indexName = 'Repos'; + +export async function getRepoVectorStoreFromDocs(repoId: number) {} + +export async function getVectorStoreClient() { + const config = useRuntimeConfig(); + + return weaviate.client({ + scheme: 'http', + host: 'localhost:8080', + headers: { + 'X-OpenAI-Api-Key': config.ai.token, + }, + }); +} + +export async function getRepoVectorStore(repoId: number) { + const config = useRuntimeConfig(); + + const client = await getVectorStoreClient(); + + const exists = await client.schema.exists(indexName); + if (!exists) { + await client.schema + .classCreator() + .withClass({ + class: indexName, + multiTenancyConfig: { enabled: true }, + }) + .do(); + } + + const tenants = await client.schema.tenantsGetter(indexName).do(); + if (!tenants.some((t) => t.name === `repo-${repoId}`)) { + await client.schema.tenantsCreator(indexName, [{ name: `repo-${repoId}` }]).do(); + } + + return await WeaviateStore.fromExistingIndex( + new OpenAIEmbeddings({ + openAIApiKey: config.ai.token, + }), + { + client, + indexName, + tenant: `repo-${repoId}`, + }, + ); +} + +export async function deleteRepoVectorStore(repoId: number) { + const client = await getVectorStoreClient(); + + const exists = await client.schema.exists(indexName); + if (!exists) { + return; + } + + const tenants = await client.schema.tenantsGetter(indexName).do(); + if (!tenants.some((t) => t.name === `repo-${repoId}`)) { + await client.schema.tenantsDeleter(indexName, [`repo-${repoId}`]).do(); + } + + // await client.data.deleter().withClassName(indexName).withTenant(`repo-${repoId}`).do(); +}