Skip to content

Commit

Permalink
Add backend service
Browse files Browse the repository at this point in the history
Signed-off-by: Andreia Ocănoaia <[email protected]>
  • Loading branch information
andreia-oca committed Jun 9, 2024
1 parent bd587e5 commit 13d9d6e
Show file tree
Hide file tree
Showing 10 changed files with 511 additions and 3,904 deletions.
94 changes: 66 additions & 28 deletions server/backend.ts
Original file line number Diff line number Diff line change
Expand Up @@ -22,10 +22,14 @@ export type Talks = {
bio: string;
}

const CONTEXT_DOCS_NUMBER = 15

@GenezioDeploy()
export class BackendService {
constructor() {}

// I am a fullstack software engineer interested in: open source, generative ai, backend technologies, cloud, cloud native, deployment, dev tools.
// I am a product engineer interested in leadership, defining clear scopes, user experience, getting feedback
async ask(user: UserDescription): Promise<string> {
const OPENAI_API_KEY = process.env.OPENAI_API_KEY;
if (!OPENAI_API_KEY) {
Expand All @@ -34,27 +38,6 @@ export class BackendService {
);
}

// Define the OpenAI model
const model = new OpenAI({
modelName: "gpt-4o",
openAIApiKey: OPENAI_API_KEY,
temperature: 0,
verbose: true
});

// Define the prompt that will be fed to the model
const prompt = ChatPromptTemplate.fromMessages([
[
"ai",
`You are a helpful assistant for ${user.name}. Based on the user description select the top 3 talks from the context that are most relevant to the user.
{context}`,
],
[
"human",
`My name is ${user.name}. I am a ${user.description}.`,],
]);

// Set the database path
const database = "./lancedb";
// Connect to the database
Expand All @@ -64,24 +47,79 @@ export class BackendService {

// Initialize the vector store object with the OpenAI embeddings and the table
const vectorStore = new LanceDB(new OpenAIEmbeddings(), { table });

// Debugging: Retrieve the most similar context to the input question
const result = await vectorStore.similaritySearch(user.description, CONTEXT_DOCS_NUMBER);
for (const item of result) {
console.log("Context metadata: ", item.metadata);
console.log("Context content: ", item.pageContent.slice(0, 10));
}

// Retrieve the most similar context to the input question
const retriever = vectorStore.asRetriever(1);
// Create an output parser that will convert the model's response to a string
const outputParser = new StringOutputParser();
const retriever = vectorStore.asRetriever(
{
vectorStore: vectorStore,
k: CONTEXT_DOCS_NUMBER,
searchType: "similarity",
filter: {},
},
{
verbose: true
},
);

// Create a pipeline that will feed the input question and the database retrieved context to the model
const setupAndRetrieval = RunnableMap.from({
context: new RunnableLambda({
// eslint-disable-next-line @typescript-eslint/no-explicit-any
func: (input: string) => retriever.invoke(input).then((response) => response[0].pageContent),
}).withConfig({ runName: "contextRetriever" }),
func: (input: string) => {
return retriever.invoke(input).then((response) => response.map(item => item.pageContent).join(' ')
)
}
}).withConfig({ runName: "context" }),
question: new RunnablePassthrough(),
});

// Define the prompt that will be fed to the model
const prompt = ChatPromptTemplate.fromMessages([
[
"ai",
`Your task is to advise me on the top 3 speakers I should see at a conference.
Based on the provided user description select the top 3 speakers you would recommend to the user.
You must also mention why you selected these speakers.
You must respond as a json object with the following structure: a list of speakers with the following fields: speaker, why.
Do not add any additional information to the response.
Respond only based on the context provided below - do not use any external information:
Context: {context}`,
],
[
"human",
`User description: {question}`,],
]);

// Define the OpenAI model
const model = new OpenAI({
modelName: "gpt-4o",
openAIApiKey: OPENAI_API_KEY,
temperature: 0.9,
verbose: true
});

// Create an output parser that will convert the model's response to a string
const outputParser = new StringOutputParser();

// Feed the input question and the database retrieved context to the model
const chain = setupAndRetrieval.pipe(prompt).pipe(model).pipe(outputParser);

// Invoke the model to answer the question
const response = await chain.invoke(user.description);
const response = await chain.invoke(
user.description,
);

console.log("Answer:", response);

return response;
Expand Down
63 changes: 40 additions & 23 deletions server/createVectorDatabase.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,12 @@ import { OpenAIEmbeddings } from "@langchain/openai";
import * as lancedb from "vectordb";
import { LanceDB } from "@langchain/community/vectorstores/lancedb";
import { TextLoader} from "langchain/document_loaders/fs/text";
import { RecursiveCharacterTextSplitter } from "langchain/text_splitter";

import dotenv from "dotenv";
dotenv.config();

const DATABASE_PATH = "./vectorStore";

export async function createVectorDatabase() {
// Set the OpenAI API key
Expand All @@ -15,45 +21,56 @@ export async function createVectorDatabase() {
);
}

// Use the OpenAIEmbeddings model to create embeddings from text
const embeddings = new OpenAIEmbeddings({ openAIApiKey: OPENAI_API_KEY });
// Document loading
const loader = new TextLoader("./data/talks.txt");
const raw_documents = await loader.load();
console.log("Documents length: ", raw_documents.length)

// Set the database path
const database = "./lancedb";
// Document splitting
const splitter = new RecursiveCharacterTextSplitter({
separators: ["\n\n", "\n", ",", " ", ""],
chunkSize: 1024,
chunkOverlap: 256,
});
const documents = await splitter.splitDocuments(raw_documents);
console.log("Splitted documents length: ", documents.length)

// Create the database directory if it doesn't exist
if (!fs.existsSync(database)) {
// Use the OpenAIEmbeddings model to create embeddings from text
const embeddings = new OpenAIEmbeddings({openAIApiKey: OPENAI_API_KEY});

// Create the vector store directory if it doesn't exist
if (!fs.existsSync(DATABASE_PATH)) {
try {
fs.mkdirSync(database);
fs.mkdirSync(DATABASE_PATH);
} catch (e) {
console.error(`Error creating directory '${database}':`, e);
console.error(`Error creating directory '${DATABASE_PATH}':`, e);
}
}

// Connect to the database
const db = await lancedb.connect(database);
// Connect to the vector store
const db = await lancedb.connect(DATABASE_PATH);

// Create a table in the database called "vectors" with the schema corresponding to a TextLoader
// Create a table in the vector store with a specific schema
const table = await db.createTable(
"vectors",
[{ vector: Array(1536), text: "sample", source: 'string' }],
// Overwrite the database if it already exists
[
{
vector: await embeddings.embedQuery("string"),
text: "",
source: "",
loc: { lines: { from: 0, to: 0 } },
},
],
// Overwrite the table if it already exists
{ writeMode: lancedb.WriteMode.Overwrite }
);

// Load the data from a text file
const loader = new TextLoader("./data/talks.txt");
// Load the data into documents
const documents = await loader.load();
// Save the data as OpenAI embeddings in a table
const vectorStore = await LanceDB.fromDocuments(documents, embeddings, { table });

return vectorStore;
await LanceDB.fromDocuments(documents, embeddings, { table });
}

(async () => {
console.log("Creating LanceDB vector table..");
// Create the LanceDB vector table
console.log("Creating the vector store...");
await createVectorDatabase();
console.log("Successfully created LanceDB vector table.");
console.log("Successfully saved embeddings in the vector store.");
})();
Loading

0 comments on commit 13d9d6e

Please sign in to comment.