Skip to content

Commit

Permalink
add chatify to the ci scripts
Browse files Browse the repository at this point in the history
  • Loading branch information
iamzoltan committed Jul 1, 2024
1 parent 0e69ce7 commit 0b1b7cb
Show file tree
Hide file tree
Showing 5 changed files with 213 additions and 0 deletions.
13 changes: 13 additions & 0 deletions scripts/chatify/background.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
**⚠ Experimental LLM-enhanced tutorial ⚠**

This notebook includes Neuromatch's experimental [Chatify](https://github.com/ContextLab/chatify) 🤖 functionality. The Chatify notebook extension adds support for a large language model-based "coding tutor" to the materials. The tutor provides automatically generated text to help explain any code cell in this notebook.

Note that using Chatify may cause breaking changes and/or provide incorrect or misleading information. If you wish to proceed by installing and enabling the Chatify extension, you should run the next two code blocks (hidden by default). If you do *not* want to use this experimental version of the Neuromatch materials, please use the [stable](https://deeplearning.neuromatch.io/tutorials/intro.html) materials instead.

To use the Chatify helper, insert the `%%explain` magic command at the start of any code cell and then run it (shift + enter) to access an interface for receiving LLM-based assitance. You can then select different options from the dropdown menus depending on what sort of assitance you want. To disable Chatify and run the code block as usual, simply delete the `%%explain` command and re-run the cell.

Note that, by default, all of Chatify's responses are generated locally. This often takes several minutes per response. Once you click the "Submit request" button, just be patient-- stuff is happening even if you can't see it right away!

Thanks for giving Chatify a try! Love it? Hate it? Either way, we'd love to hear from you about your Chatify experience! Please consider filling out our [brief survey](https://forms.gle/jNq85KVvNwj1JHZV9) to provide feedback and help us make Chatify more awesome!

**Run the next two cells to install and configure Chatify...**
2 changes: 2 additions & 0 deletions scripts/chatify/install_and_load_chatify.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
smuggle chatify # pip: git+https://github.com/ContextLab/chatify.git
%load_ext chatify
3 changes: 3 additions & 0 deletions scripts/chatify/install_davos.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
%pip install -q davos
import davos
davos.config.suppress_stdout = True
190 changes: 190 additions & 0 deletions scripts/chatify/process_notebooks.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,190 @@
import os
import yaml
from glob import glob as lsdir

import nbformat as nbf
from chatify import Chatify
from tqdm import tqdm

import numpy as np
import pickle

from langchain.prompts import PromptTemplate
from gptcache import Cache
from gptcache.processor.pre import get_prompt
from gptcache.manager import get_data_manager
from gptcache.similarity_evaluation.exact_match import ExactMatchEvaluation

source_repo = os.environ.get("SOURCE_REPO", "NeuroAI_Course")
mod_repo = os.environ.get("MOD_REPO", "chatify_NeuroAI_Course")
CACHE = False


def get_tutorial_notebooks(basedir):
return lsdir(os.path.join(basedir, 'tutorials', '*', 'student', '*Tutorial*.ipynb'))


def chatified(fname):
notebook = nbf.read(fname, nbf.NO_CONVERT)
header_cell = notebook['cells'][0]
return mod_repo in header_cell['source']


def get_text(fname):
with open(os.path.join(os.getcwd(), 'ci', 'chatify', fname), 'r') as f:
return ''.join(f.readlines())


def inject_chatify(fname):
notebook = nbf.read(fname, nbf.NO_CONVERT)
new_notebook = notebook.copy()

# update header cell
header_cell = new_notebook['cells'][0]
header_cell['source'] = header_cell['source'].replace(source_repo, mod_repo)

# insert background cell
background_cell = nbf.v4.new_markdown_cell(source=get_text('background.md'), metadata={'execution': {}})
del background_cell['id']

# create davos cell
davos_cell = nbf.v4.new_code_cell(source=get_text('install_davos.py'), metadata={'cellView': 'form', 'execution': {}})
del davos_cell['id']

# create chatify cell
chatify_cell = nbf.v4.new_code_cell(source=get_text('install_and_load_chatify.py'), metadata={'cellView': 'form', 'execution': {}})
del chatify_cell['id']

idx = 0
for cell in new_notebook['cells']:
idx += 1
if cell['cell_type'] == 'markdown':
if '# Setup' in cell['source']:
break

if idx == len(new_notebook['cells']) - 1:
return

try:
if chatified(fname):
new_notebook.cells[0] = header_cell
new_notebook.cells[idx] = background_cell
new_notebook.cells[idx + 1] = davos_cell
new_notebook.cells[idx + 2] = chatify_cell
else:
new_notebook.cells.insert(idx, background_cell)
new_notebook.cells.insert(idx + 1, davos_cell)
new_notebook.cells.insert(idx + 2, chatify_cell)
except IndexError:
raise ValueError(f"Notebook Missing Setup Header: {fname}, index: {idx}")

# Write the file
nbf.write(
new_notebook,
fname,
version=nbf.NO_CONVERT,
)


def compress_code(text):
return '\n'.join([line.strip() for line in text.split('\n') if len(line.strip()) > 0])


def get_code_cells(fname):
notebook = nbf.read(fname, nbf.NO_CONVERT)
return [compress_code(cell['source']) for cell in notebook['cells'] if cell['cell_type'] == 'code']


def convert_pickle_file_to_cache(pickle_file, config):
cache_db_version = config['cache_config']['cache_db_version']
file_name = f'NMA_2023_v{cache_db_version}.cache'

# Remove file before creating a new one
if os.path.exists(file_name):
os.remove(file_name)

llm_cache = Cache()
llm_cache.set_openai_key()
data_manager = get_data_manager(data_path=file_name)

llm_cache.init(
pre_embedding_func=get_prompt,
data_manager=data_manager,
similarity_evaluation=ExactMatchEvaluation(),
)

chatify = Chatify()
prompts = chatify._read_prompt_dir()['tutor']

with open(pickle_file, 'rb') as f:
cache = pickle.load(f)

for key, value in cache.items():
for prompt_name, prompt in prompts.items():
prompt = PromptTemplate(
template=prompt['content'],
input_variables=prompt['input_variables'],
)
question = prompt.format(text=compress_code(key))
try:
answer = value[prompt_name]
data_manager.save(question, answer, embedding_data=question)
except KeyError:
pass


tutorials = get_tutorial_notebooks(os.getcwd())
tutor = Chatify()
prompts = tutor._read_prompt_dir()['tutor']
code_cells = []
failed_queries = []

for notebook in tqdm(tutorials):
inject_chatify(notebook)
code_cells.extend(get_code_cells(notebook))


if CACHE:
savefile = os.path.join(os.getcwd(), 'chatify', 'cache.pkl')
failed_queries_file = os.path.join(os.getcwd(), 'chatify', 'failed_queries.pkl')

if os.path.exists(savefile):
with open(savefile, 'rb') as f:
cache = pickle.load(f)
else:
cache = {}

failed_queries = []

tmpfile = os.path.join(os.getcwd(), 'chatify', 'tmp.pkl')
for cell in tqdm(np.unique(code_cells)):
if cell not in cache:
cache[cell] = {}

for name, content in prompts.items():
if name not in cache[cell] or len(cache[cell][name]) == 0:
try:
cache[cell][name] = tutor._cache(cell, content)

with open(tmpfile, 'wb') as f:
pickle.dump(cache, f)

if cache[cell][name] is None or len(cache[cell][name]) == 0:
failed_queries.append((cell, name, 'null response'))
print('Response failed for cell (null response):\n', cell)
except:
failed_queries.append((cell, name, 'exception raised'))
print('Response failed for cell (exception raised):\n', cell)

with open(savefile, 'wb') as f:
pickle.dump(cache, f)

with open(failed_queries_file, 'wb') as f:
pickle.dump(failed_queries, f)

if os.path.exists(tmpfile):
os.remove(tmpfile)

# build cache
config = yaml.load(open('config.yaml', 'r'), Loader=yaml.SafeLoader)
convert_pickle_file_to_cache(savefile, config)
5 changes: 5 additions & 0 deletions scripts/chatify/requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
chatify @ git+https://github.com/ContextLab/chatify
nbformat
davos
tqdm
gptcache

0 comments on commit 0b1b7cb

Please sign in to comment.