Add backend service

Signed-off-by: Andreia Ocănoaia <[email protected]>
andreia-oca · Jun 9, 2024 · 13d9d6e · 13d9d6e
1 parent bd587e5
commit 13d9d6e
Show file tree

Hide file tree

Showing 10 changed files with 511 additions and 3,904 deletions.
diff --git a/server/backend.ts b/server/backend.ts
@@ -22,10 +22,14 @@ export type Talks = {
   bio: string;
 }
 
+const CONTEXT_DOCS_NUMBER = 15
+
 @GenezioDeploy()
 export class BackendService {
   constructor() {}
 
+  // I am a fullstack software engineer interested in: open source, generative ai, backend technologies, cloud, cloud native, deployment, dev tools.
+  // I am a product engineer interested in leadership, defining clear scopes, user experience, getting feedback
   async ask(user: UserDescription): Promise<string> {
       const OPENAI_API_KEY = process.env.OPENAI_API_KEY;
       if (!OPENAI_API_KEY) {
@@ -34,27 +38,6 @@ export class BackendService {
         );
       }
 
-      // Define the OpenAI model
-      const model = new OpenAI({
-        modelName: "gpt-4o",
-        openAIApiKey: OPENAI_API_KEY,
-        temperature: 0,
-        verbose: true
-      });
-
-      // Define the prompt that will be fed to the model
-      const prompt = ChatPromptTemplate.fromMessages([
-        [
-          "ai",
-          `You are a helpful assistant for ${user.name}. Based on the user description select the top 3 talks from the context that are most relevant to the user.
-
-{context}`,
-        ],
-        [
-          "human",
-          `My name is ${user.name}. I am a ${user.description}.`,],
-      ]);
-
       // Set the database path
       const database = "./lancedb";
       // Connect to the database
@@ -64,24 +47,79 @@ export class BackendService {
 
       // Initialize the vector store object with the OpenAI embeddings and the table
       const vectorStore = new LanceDB(new OpenAIEmbeddings(), { table });
+
+      // Debugging: Retrieve the most similar context to the input question
+      const result = await vectorStore.similaritySearch(user.description, CONTEXT_DOCS_NUMBER);
+      for (const item of result) {
+        console.log("Context metadata: ", item.metadata);
+        console.log("Context content: ", item.pageContent.slice(0, 10));
+      }
+
       // Retrieve the most similar context to the input question
-      const retriever = vectorStore.asRetriever(1);
-      // Create an output parser that will convert the model's response to a string
-      const outputParser = new StringOutputParser();
+      const retriever = vectorStore.asRetriever(
+        {
+          vectorStore: vectorStore,
+          k: CONTEXT_DOCS_NUMBER,
+          searchType: "similarity",
+          filter: {},
+        },
+        {
+          verbose: true
+        },
+      );
 
       // Create a pipeline that will feed the input question and the database retrieved context to the model
       const setupAndRetrieval = RunnableMap.from({
         context: new RunnableLambda({
-          // eslint-disable-next-line @typescript-eslint/no-explicit-any
-          func: (input: string) => retriever.invoke(input).then((response) => response[0].pageContent),
-        }).withConfig({ runName: "contextRetriever" }),
+          func: (input: string) => {
+            return retriever.invoke(input).then((response) => response.map(item => item.pageContent).join(' ')
+          )
+          }
+        }).withConfig({ runName: "context" }),
         question: new RunnablePassthrough(),
       });
 
+      // Define the prompt that will be fed to the model
+      const prompt = ChatPromptTemplate.fromMessages([
+        [
+          "ai",
+          `Your task is to advise me on the top 3 speakers I should see at a conference.
+
+Based on the provided user description select the top 3 speakers you would recommend to the user.
+You must also mention why you selected these speakers.
+
+You must respond as a json object with the following structure: a list of speakers with the following fields: speaker, why.
+
+Do not add any additional information to the response.
+
+Respond only based on the context provided below - do not use any external information:
+
+Context: {context}`,
+        ],
+        [
+          "human",
+          `User description: {question}`,],
+      ]);
+
+      // Define the OpenAI model
+      const model = new OpenAI({
+        modelName: "gpt-4o",
+        openAIApiKey: OPENAI_API_KEY,
+        temperature: 0.9,
+        verbose: true
+      });
+
+      // Create an output parser that will convert the model's response to a string
+      const outputParser = new StringOutputParser();
+
       // Feed the input question and the database retrieved context to the model
       const chain = setupAndRetrieval.pipe(prompt).pipe(model).pipe(outputParser);
+
       // Invoke the model to answer the question
-      const response = await chain.invoke(user.description);
+      const response = await chain.invoke(
+        user.description,
+      );
+
       console.log("Answer:", response);
 
       return response;

diff --git a/server/createVectorDatabase.ts b/server/createVectorDatabase.ts
@@ -5,6 +5,12 @@ import { OpenAIEmbeddings } from "@langchain/openai";
 import * as lancedb from "vectordb";
 import { LanceDB } from "@langchain/community/vectorstores/lancedb";
 import { TextLoader} from "langchain/document_loaders/fs/text";
+import { RecursiveCharacterTextSplitter } from "langchain/text_splitter";
+
+import dotenv from "dotenv";
+dotenv.config();
+
+const DATABASE_PATH = "./vectorStore";
 
 export async function createVectorDatabase() {
     // Set the OpenAI API key
@@ -15,45 +21,56 @@ export async function createVectorDatabase() {
       );
     }
 
-    // Use the OpenAIEmbeddings model to create embeddings from text
-    const embeddings = new OpenAIEmbeddings({ openAIApiKey: OPENAI_API_KEY });
+    // Document loading
+    const loader = new TextLoader("./data/talks.txt");
+    const raw_documents = await loader.load();
+    console.log("Documents length: ", raw_documents.length)
 
-    // Set the database path
-    const database = "./lancedb";
+    // Document splitting
+    const splitter = new RecursiveCharacterTextSplitter({
+      separators: ["\n\n", "\n", ",", " ", ""],
+      chunkSize: 1024,
+      chunkOverlap: 256,
+    });
+    const documents = await splitter.splitDocuments(raw_documents);
+    console.log("Splitted documents length: ", documents.length)
 
-    // Create the database directory if it doesn't exist
-    if (!fs.existsSync(database)) {
+    // Use the OpenAIEmbeddings model to create embeddings from text
+    const embeddings = new OpenAIEmbeddings({openAIApiKey: OPENAI_API_KEY});
+
+    // Create the vector store directory if it doesn't exist
+    if (!fs.existsSync(DATABASE_PATH)) {
       try {
-        fs.mkdirSync(database);
+        fs.mkdirSync(DATABASE_PATH);
       } catch (e) {
-        console.error(`Error creating directory '${database}':`, e);
+        console.error(`Error creating directory '${DATABASE_PATH}':`, e);
       }
     }
 
-    // Connect to the database
-    const db = await lancedb.connect(database);
+    // Connect to the vector store
+    const db = await lancedb.connect(DATABASE_PATH);
 
-    // Create a table in the database called "vectors" with the schema corresponding to a TextLoader
+    // Create a table in the vector store with a specific schema
     const table = await db.createTable(
       "vectors",
-      [{ vector: Array(1536), text: "sample", source: 'string' }],
-      // Overwrite the database if it already exists
+      [
+        {
+          vector: await embeddings.embedQuery("string"),
+          text: "",
+          source: "",
+          loc: { lines: { from: 0, to: 0 } },
+        },
+      ],
+      // Overwrite the table if it already exists
       { writeMode: lancedb.WriteMode.Overwrite }
     );
 
-    // Load the data from a text file
-    const loader = new TextLoader("./data/talks.txt");
-    // Load the data into documents
-    const documents = await loader.load();
     // Save the data as OpenAI embeddings in a table
-    const vectorStore = await LanceDB.fromDocuments(documents, embeddings, { table });
-
-    return vectorStore;
+    await LanceDB.fromDocuments(documents, embeddings, { table });
 }
 
 (async () => {
-  console.log("Creating LanceDB vector table..");
-  // Create the LanceDB vector table
+  console.log("Creating the vector store...");
   await createVectorDatabase();
-  console.log("Successfully created LanceDB vector table.");
+  console.log("Successfully saved embeddings in the vector store.");
 })();