From 0b1b7cb739a44b477575ae34852dfb0c24815930 Mon Sep 17 00:00:00 2001 From: Konstantine Tsafatinos Date: Mon, 1 Jul 2024 11:49:26 -0400 Subject: [PATCH] add chatify to the ci scripts --- scripts/chatify/background.md | 13 ++ scripts/chatify/install_and_load_chatify.py | 2 + scripts/chatify/install_davos.py | 3 + scripts/chatify/process_notebooks.py | 190 ++++++++++++++++++++ scripts/chatify/requirements.txt | 5 + 5 files changed, 213 insertions(+) create mode 100644 scripts/chatify/background.md create mode 100644 scripts/chatify/install_and_load_chatify.py create mode 100644 scripts/chatify/install_davos.py create mode 100644 scripts/chatify/process_notebooks.py create mode 100644 scripts/chatify/requirements.txt diff --git a/scripts/chatify/background.md b/scripts/chatify/background.md new file mode 100644 index 0000000..ea4fef2 --- /dev/null +++ b/scripts/chatify/background.md @@ -0,0 +1,13 @@ +**⚠ Experimental LLM-enhanced tutorial ⚠** + +This notebook includes Neuromatch's experimental [Chatify](https://github.com/ContextLab/chatify) 🤖 functionality. The Chatify notebook extension adds support for a large language model-based "coding tutor" to the materials. The tutor provides automatically generated text to help explain any code cell in this notebook. + +Note that using Chatify may cause breaking changes and/or provide incorrect or misleading information. If you wish to proceed by installing and enabling the Chatify extension, you should run the next two code blocks (hidden by default). If you do *not* want to use this experimental version of the Neuromatch materials, please use the [stable](https://deeplearning.neuromatch.io/tutorials/intro.html) materials instead. + +To use the Chatify helper, insert the `%%explain` magic command at the start of any code cell and then run it (shift + enter) to access an interface for receiving LLM-based assitance. You can then select different options from the dropdown menus depending on what sort of assitance you want. To disable Chatify and run the code block as usual, simply delete the `%%explain` command and re-run the cell. + +Note that, by default, all of Chatify's responses are generated locally. This often takes several minutes per response. Once you click the "Submit request" button, just be patient-- stuff is happening even if you can't see it right away! + +Thanks for giving Chatify a try! Love it? Hate it? Either way, we'd love to hear from you about your Chatify experience! Please consider filling out our [brief survey](https://forms.gle/jNq85KVvNwj1JHZV9) to provide feedback and help us make Chatify more awesome! + +**Run the next two cells to install and configure Chatify...** \ No newline at end of file diff --git a/scripts/chatify/install_and_load_chatify.py b/scripts/chatify/install_and_load_chatify.py new file mode 100644 index 0000000..12312d4 --- /dev/null +++ b/scripts/chatify/install_and_load_chatify.py @@ -0,0 +1,2 @@ +smuggle chatify # pip: git+https://github.com/ContextLab/chatify.git +%load_ext chatify \ No newline at end of file diff --git a/scripts/chatify/install_davos.py b/scripts/chatify/install_davos.py new file mode 100644 index 0000000..c3a6488 --- /dev/null +++ b/scripts/chatify/install_davos.py @@ -0,0 +1,3 @@ +%pip install -q davos +import davos +davos.config.suppress_stdout = True \ No newline at end of file diff --git a/scripts/chatify/process_notebooks.py b/scripts/chatify/process_notebooks.py new file mode 100644 index 0000000..cf70168 --- /dev/null +++ b/scripts/chatify/process_notebooks.py @@ -0,0 +1,190 @@ +import os +import yaml +from glob import glob as lsdir + +import nbformat as nbf +from chatify import Chatify +from tqdm import tqdm + +import numpy as np +import pickle + +from langchain.prompts import PromptTemplate +from gptcache import Cache +from gptcache.processor.pre import get_prompt +from gptcache.manager import get_data_manager +from gptcache.similarity_evaluation.exact_match import ExactMatchEvaluation + +source_repo = os.environ.get("SOURCE_REPO", "NeuroAI_Course") +mod_repo = os.environ.get("MOD_REPO", "chatify_NeuroAI_Course") +CACHE = False + + +def get_tutorial_notebooks(basedir): + return lsdir(os.path.join(basedir, 'tutorials', '*', 'student', '*Tutorial*.ipynb')) + + +def chatified(fname): + notebook = nbf.read(fname, nbf.NO_CONVERT) + header_cell = notebook['cells'][0] + return mod_repo in header_cell['source'] + + +def get_text(fname): + with open(os.path.join(os.getcwd(), 'ci', 'chatify', fname), 'r') as f: + return ''.join(f.readlines()) + + +def inject_chatify(fname): + notebook = nbf.read(fname, nbf.NO_CONVERT) + new_notebook = notebook.copy() + + # update header cell + header_cell = new_notebook['cells'][0] + header_cell['source'] = header_cell['source'].replace(source_repo, mod_repo) + + # insert background cell + background_cell = nbf.v4.new_markdown_cell(source=get_text('background.md'), metadata={'execution': {}}) + del background_cell['id'] + + # create davos cell + davos_cell = nbf.v4.new_code_cell(source=get_text('install_davos.py'), metadata={'cellView': 'form', 'execution': {}}) + del davos_cell['id'] + + # create chatify cell + chatify_cell = nbf.v4.new_code_cell(source=get_text('install_and_load_chatify.py'), metadata={'cellView': 'form', 'execution': {}}) + del chatify_cell['id'] + + idx = 0 + for cell in new_notebook['cells']: + idx += 1 + if cell['cell_type'] == 'markdown': + if '# Setup' in cell['source']: + break + + if idx == len(new_notebook['cells']) - 1: + return + + try: + if chatified(fname): + new_notebook.cells[0] = header_cell + new_notebook.cells[idx] = background_cell + new_notebook.cells[idx + 1] = davos_cell + new_notebook.cells[idx + 2] = chatify_cell + else: + new_notebook.cells.insert(idx, background_cell) + new_notebook.cells.insert(idx + 1, davos_cell) + new_notebook.cells.insert(idx + 2, chatify_cell) + except IndexError: + raise ValueError(f"Notebook Missing Setup Header: {fname}, index: {idx}") + + # Write the file + nbf.write( + new_notebook, + fname, + version=nbf.NO_CONVERT, + ) + + +def compress_code(text): + return '\n'.join([line.strip() for line in text.split('\n') if len(line.strip()) > 0]) + + +def get_code_cells(fname): + notebook = nbf.read(fname, nbf.NO_CONVERT) + return [compress_code(cell['source']) for cell in notebook['cells'] if cell['cell_type'] == 'code'] + + +def convert_pickle_file_to_cache(pickle_file, config): + cache_db_version = config['cache_config']['cache_db_version'] + file_name = f'NMA_2023_v{cache_db_version}.cache' + + # Remove file before creating a new one + if os.path.exists(file_name): + os.remove(file_name) + + llm_cache = Cache() + llm_cache.set_openai_key() + data_manager = get_data_manager(data_path=file_name) + + llm_cache.init( + pre_embedding_func=get_prompt, + data_manager=data_manager, + similarity_evaluation=ExactMatchEvaluation(), + ) + + chatify = Chatify() + prompts = chatify._read_prompt_dir()['tutor'] + + with open(pickle_file, 'rb') as f: + cache = pickle.load(f) + + for key, value in cache.items(): + for prompt_name, prompt in prompts.items(): + prompt = PromptTemplate( + template=prompt['content'], + input_variables=prompt['input_variables'], + ) + question = prompt.format(text=compress_code(key)) + try: + answer = value[prompt_name] + data_manager.save(question, answer, embedding_data=question) + except KeyError: + pass + + +tutorials = get_tutorial_notebooks(os.getcwd()) +tutor = Chatify() +prompts = tutor._read_prompt_dir()['tutor'] +code_cells = [] +failed_queries = [] + +for notebook in tqdm(tutorials): + inject_chatify(notebook) + code_cells.extend(get_code_cells(notebook)) + + +if CACHE: + savefile = os.path.join(os.getcwd(), 'chatify', 'cache.pkl') + failed_queries_file = os.path.join(os.getcwd(), 'chatify', 'failed_queries.pkl') + + if os.path.exists(savefile): + with open(savefile, 'rb') as f: + cache = pickle.load(f) + else: + cache = {} + + failed_queries = [] + + tmpfile = os.path.join(os.getcwd(), 'chatify', 'tmp.pkl') + for cell in tqdm(np.unique(code_cells)): + if cell not in cache: + cache[cell] = {} + + for name, content in prompts.items(): + if name not in cache[cell] or len(cache[cell][name]) == 0: + try: + cache[cell][name] = tutor._cache(cell, content) + + with open(tmpfile, 'wb') as f: + pickle.dump(cache, f) + + if cache[cell][name] is None or len(cache[cell][name]) == 0: + failed_queries.append((cell, name, 'null response')) + print('Response failed for cell (null response):\n', cell) + except: + failed_queries.append((cell, name, 'exception raised')) + print('Response failed for cell (exception raised):\n', cell) + + with open(savefile, 'wb') as f: + pickle.dump(cache, f) + + with open(failed_queries_file, 'wb') as f: + pickle.dump(failed_queries, f) + + if os.path.exists(tmpfile): + os.remove(tmpfile) + + # build cache + config = yaml.load(open('config.yaml', 'r'), Loader=yaml.SafeLoader) + convert_pickle_file_to_cache(savefile, config) diff --git a/scripts/chatify/requirements.txt b/scripts/chatify/requirements.txt new file mode 100644 index 0000000..d95fabf --- /dev/null +++ b/scripts/chatify/requirements.txt @@ -0,0 +1,5 @@ +chatify @ git+https://github.com/ContextLab/chatify +nbformat +davos +tqdm +gptcache