Skip to content

Commit

Permalink
C
Browse files Browse the repository at this point in the history
C
ready for initial publishing
  • Loading branch information
SAMAD101 committed Feb 12, 2024
1 parent 343a579 commit 09c8b45
Show file tree
Hide file tree
Showing 9 changed files with 190 additions and 185 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/publish_pypi.yml
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ jobs:
- name: Set up Python
uses: actions/setup-python@v2
with:
python-version: "3.11.4"
python-version: "3.11.7"

- name: Build and publish package
env:
Expand Down
16 changes: 14 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ classifiers = [
"Programming Language :: Python",
"Programming Language :: Python :: 3",
]
keywords = ["chatbot", "terminal", "openai", "rag"]
keywords = ["chatbot", "terminal", "openai", "rag", "bumpver"]

[build-system]
requires = ["setuptools>=61", "wheel"]
Expand All @@ -34,4 +34,16 @@ dev = ["black", "pytest", "pre-commit", "twine"]
Homepage = "https://github.com/SAMAD101/Chino"

[project.scripts]
chino = "chino.__main__:main"
chino = "chino.__main__:app"

[tool.bumpver]
current_version = "1.0.0"
version_pattern = "MAJOR.MINOR.PATCH"
commit_message = "Bump version {old_version} -> {new_version}"
commit = true
tag = true
push = false

[tool.bumpver.file_patterns]
"pyproject.toml" = ['current_version = "{version}"', 'version = "{version}"']
"src/reader/__init__.py" = ["{version}"]
117 changes: 34 additions & 83 deletions src/chino/__main__.py
Original file line number Diff line number Diff line change
@@ -1,91 +1,42 @@
# !/usr/bin/python
import os

import typer
from typer import Typer

from typing import List, Union
from .query import Query
from .conversation import Conversation
from .migrations import Migration

from rich.console import Console

from langchain.prompts.chat import (
ChatPromptTemplate,
HumanMessagePromptTemplate,
SystemMessagePromptTemplate,
app: Typer = Typer(
help="Chino is a chatbot based on OpenAI. It can also provide responses about queries on user-provided data."
)
from langchain.schema import HumanMessage, SystemMessage, BaseMessage
from langchain_openai import ChatOpenAI

from .store.query import query_data
from .store.migrations import generate_data_store


app = typer.Typer()
console = Console()

model: ChatOpenAI = ChatOpenAI()

messages: List[Union[SystemMessage, HumanMessage]] = [
SystemMessage(
content="You are Chino, a chatbot based on ChatGPT. 'Chino' means 'intelligence' in Japanese."
),
]


def get_response(prompt: str) -> None:
global model, messages

with console.status("[i]thinking...[/i]"):
messages.append(HumanMessage(content=prompt))
response: BaseMessage = model.invoke(messages)
messages.append(SystemMessage(content=response.content))
console.print(f"[b blue]Chino:[/b blue] {response.content}")
console.rule()


def run_query(prompt: str) -> None:
global model, messages

with console.status("[i]thinking...[/i]"):
query_text, query_sources = query_data(prompt)
messages.append(HumanMessage(content=query_text))
response: BaseMessage = model.invoke(messages)
messages.append(SystemMessage(content=response.content))
console.print(
f"[b blue]Chino:[/b blue] {response.content}\n\n[i violet]Sources:[/i violet]{query_sources}"
)
console.rule()


def run_conversation(prompt: str, query: bool) -> None:
global model, messages

if prompt:
if query or prompt.lower().startswith("\\q:"):
run_query(prompt)
return
get_response(prompt)
return

while True:
prompt: str = console.input("[b green]You: [/b green]")
if prompt == "quit":
break
elif query or prompt.lower().startswith("\\query:"):
run_query(prompt)
continue
get_response(prompt)


def main(
prompt: str = typer.Option(None, "-p", "--prompt", help="Prompt for ChatGPT"),
query: bool = typer.Option(False, "-q", "--query", help="Query for your data"),
process: bool = typer.Option(False, "--process", help="Process your data"),
@app.command()
def migrate(
chroma_path: str = os.path.expanduser("~/.local/share/chino/chroma/"),
data_path: str = os.path.expanduser("~/.local/share/chino/data/"),
) -> None:
if process:
console.status("Processing your data...")
generate_data_store()
return
run_conversation(prompt, query)


if __name__ == "__main__":
typer.run(main)
"""Migrate the data to the chroma using vector embeddings."""

migration = Migration(chroma_path, data_path)
migration.generate_data_store()


@app.command("start")
def main():
"""Start the main event loop function. A chat interface will be opened."""

conversation: Conversation = Conversation()
try:
while True:
prompt = conversation.console.input("[bold green]You: [/bold green]")
if prompt == "quit":
conversation.console.print("[bold red]Quiting...[/bold red]")
break
elif prompt.lower().startswith("\\q:"):
conversation.run_query(prompt)
continue
conversation.get_response(prompt)
except KeyboardInterrupt:
conversation.console.print("\n[bold red]Quiting...[/bold red]")
42 changes: 42 additions & 0 deletions src/chino/conversation.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
import os

from typing import List, Union

from rich.console import Console

from langchain.schema import HumanMessage, SystemMessage, BaseMessage
from langchain_openai import ChatOpenAI

from .query import Query


class Conversation:
def __init__(self) -> None:
self.model = ChatOpenAI()
self.messages: List[Union[SystemMessage, HumanMessage]] = [
SystemMessage(
content="You are Chino, a chatbot based on ChatGPT. 'Chino' means 'intelligence' in Japanese."
),
]
self.console = Console()

def get_response(self, prompt: str) -> None:
with self.console.status("[i]thinking...[/i]"):
self.messages.append(HumanMessage(content=prompt))
response: BaseMessage = self.model.invoke(self.messages)
self.messages.append(SystemMessage(content=response.content))
self.console.print(f"[b blue]Chino:[/b blue] {response.content}")
self.console.rule()

def run_query(self, prompt: str) -> None:
with self.console.status("[i]thinking...[/i]"):
query_text, query_sources = Query(
prompt, os.path.expanduser("~/.local/share/chino/chroma/")
).query_data()
self.messages.append(HumanMessage(content=query_text))
response: BaseMessage = self.model.invoke(self.messages)
self.messages.append(SystemMessage(content=response.content))
self.console.print(
f"[b blue]Chino:[/b blue] {response.content}\n\n[i violet]Sources:[/i violet]{query_sources}"
)
self.console.rule()
53 changes: 53 additions & 0 deletions src/chino/migrations.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
import os
import shutil

from typing import Any, List

from langchain_community.document_loaders import DirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.schema import Document
from langchain_openai import OpenAIEmbeddings
from langchain.vectorstores.chroma import Chroma


class Migration:
def __init__(self, chroma_path: str = None, data_path: str = None) -> None:
self.CHROMA_PATH: str = chroma_path
self.DATA_PATH: str = data_path

def generate_data_store(self) -> None:
documents: Any = Migration._load_documents(self.DATA_PATH)
chunks: List[Document] = Migration._split_text(documents)
Migration._save_to_chroma(chunks, self.CHROMA_PATH)

@classmethod
def _load_documents(cls, data_path):
loader: DirectoryLoader = DirectoryLoader(data_path)
documents: Any = loader.load()
return documents

@classmethod
def _split_text(cls, documents: List[Document]) -> List[Document]:
text_splitter: RecursiveCharacterTextSplitter = RecursiveCharacterTextSplitter(
chunk_size=300,
chunk_overlap=100,
length_function=len,
add_start_index=True,
)
chunks: List[Document] = text_splitter.split_documents(documents)
print(f"Split {len(documents)} documents into {len(chunks)} chunks.")

return chunks

@classmethod
def _save_to_chroma(cls, chunks: List[Document], chroma_path: str):
# Clear out the database first.
if os.path.exists(chroma_path):
shutil.rmtree(chroma_path)

# Create a new DB from the documents.
db: Chroma = Chroma.from_documents(
chunks, OpenAIEmbeddings(), persist_directory=chroma_path
)
db.persist()
print(f"Saved {len(chunks)} chunks to {chroma_path}.")
46 changes: 46 additions & 0 deletions src/chino/query.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
import sys

from typing import List, Tuple

from langchain.vectorstores.chroma import Chroma
from langchain_openai import OpenAIEmbeddings, ChatOpenAI
from langchain.prompts import ChatPromptTemplate


class Query:
def __init__(self, prompt: str = None, chroma_path: str = None) -> None:
self.query_text = prompt
self.CHROMA_PATH = chroma_path

def _prepare_db(self) -> Chroma:
return Chroma(
persist_directory=self.CHROMA_PATH, embedding_function=OpenAIEmbeddings()
)

def query_data(self) -> Tuple[str, List[str]]:
# query text will be the prompt - provided by the user - to be processed using the embeddings and the model
PROMPT_TEMPLATE: str = """
Details
{context}
---
Answer this question based on the above details and previous messages: {question}
"""
db: Chroma = self._prepare_db()

# Search the DB.
results: List = db.similarity_search_with_relevance_scores(self.query_text, k=3)
if len(results) == 0 or results[0][1] < 0.7:
print(f"Unable to find matching results.")
sys.exit()

context_text: str = "\n\n---\n\n".join(
[doc.page_content for doc, _score in results]
)
prompt_template = ChatPromptTemplate.from_template(PROMPT_TEMPLATE)
query_prompt = "QUERY:" + prompt_template.format(
context=context_text, question=self.query_text
)
# This prompt will serve as a context for the model to generate a response.

query_sources = [doc.metadata.get("source", None) for doc, _score in results]
return query_prompt, query_sources
Empty file removed src/chino/store/__init__.py
Empty file.
52 changes: 0 additions & 52 deletions src/chino/store/migrations.py

This file was deleted.

Loading

0 comments on commit 09c8b45

Please sign in to comment.