From 60c88b8508182d1e86655e128b1d4fdf458339aa Mon Sep 17 00:00:00 2001 From: "Carlos E. Jimenez" Date: Wed, 10 Jul 2024 17:47:44 -0400 Subject: [PATCH] Update inference --- README.md | 6 +++--- setup.py | 8 ++++++++ swebench/inference/README.md | 10 +++++++--- swebench/inference/llamao/modeling_flash_llama.py | 2 +- swebench/inference/run_llama.py | 2 +- 5 files changed, 20 insertions(+), 8 deletions(-) diff --git a/README.md b/README.md index 03ca1143..52bde40f 100644 --- a/README.md +++ b/README.md @@ -99,9 +99,9 @@ python -m swebench.harness.run_evaluation --help ``` Additionally, the SWE-Bench repo can help you: -* Train your own models on our pre-processed datasets -* Run [inference](https://github.com/princeton-nlp/SWE-bench/blob/main/inference/) on existing models (either models you have on-disk like LLaMA, or models you have access to through an API like GPT-4). The inference step is where you get a repo and an issue and have the model try to generate a fix for it. -* Run SWE-bench's [data collection procedure](https://github.com/princeton-nlp/SWE-bench/blob/main/swebench/collect/) on your own repositories, to make new SWE-Bench tasks. +* Train your own models on our pre-processed datasets +* Run [inference](https://github.com/princeton-nlp/SWE-bench/blob/main/swebench/inference/README.md) on existing models (either models you have on-disk like LLaMA, or models you have access to through an API like GPT-4). The inference step is where you get a repo and an issue and have the model try to generate a fix for it. +* Run SWE-bench's [data collection procedure](https://github.com/princeton-nlp/SWE-bench/blob/main/swebench/collect/) on your own repositories, to make new SWE-Bench tasks. ## ⬇️ Downloads | Datasets | Models | diff --git a/setup.py b/setup.py index 1365ae2b..b0c29408 100644 --- a/setup.py +++ b/setup.py @@ -47,16 +47,24 @@ 'openai', 'anthropic', 'transformers', + 'peft', 'sentencepiece', 'protobuf', + 'torch', + 'flash_attn', + 'triton', ], 'retrieval': [ 'tiktoken', 'openai', 'anthropic', 'transformers', + 'peft', 'sentencepiece', 'protobuf', + 'torch', + 'flash_attn', + 'triton', 'pyserini', ], }, diff --git a/swebench/inference/README.md b/swebench/inference/README.md index 1ad7543a..ffeb5882 100644 --- a/swebench/inference/README.md +++ b/swebench/inference/README.md @@ -17,7 +17,7 @@ This python script is designed to run inference on a dataset using either the Op For instance, to run this script on SWE-bench with the ``Oracle`` context and Anthropic's Claude 2 model, you can run the following command: ```bash export ANTHROPIC_API_KEY= -python run_api.py --dataset_name_or_path princeton-nlp/SWE-bench_oracle --model_name_or_path claude-2 --output_dir ./outputs +python -m swebench.inference.run_api --dataset_name_or_path princeton-nlp/SWE-bench_oracle --model_name_or_path claude-2 --output_dir ./outputs ``` You can also specify further options: @@ -35,7 +35,11 @@ This script is similar to `run_api.py`, but it is designed to run inference usin For instance, to run this script on SWE-bench with the ``Oracle`` context and SWE-Llama, you can run the following command: ```bash -python run_llama.py --dataset_path princeton-nlp/SWE-bench_oracle --model_name_or_path princeton-nlp/SWE-Llama-13b --output_dir ./outputs --temperature 0 +python -m swebench.inference.run_llama \ + --dataset_path princeton-nlp/SWE-bench_oracle \ + --model_name_or_path princeton-nlp/SWE-Llama-13b \ + --output_dir ./outputs \ + --temperature 0 ``` You can also specify further options: @@ -54,6 +58,6 @@ Then run `run_live.py` to try solving a new issue. For example, you can try solv ```bash export OPENAI_API_KEY= -python run_live.py --model_name gpt-3.5-turbo-1106 \ +python -m swebench.inference.run_live --model_name gpt-3.5-turbo-1106 \ --issue_url https://github.com/huggingface/transformers/issues/26706 ``` diff --git a/swebench/inference/llamao/modeling_flash_llama.py b/swebench/inference/llamao/modeling_flash_llama.py index faf89fe8..0e6078d6 100644 --- a/swebench/inference/llamao/modeling_flash_llama.py +++ b/swebench/inference/llamao/modeling_flash_llama.py @@ -33,7 +33,7 @@ from transformers.utils import logging from transformers.models.llama.configuration_llama import LlamaConfig -from llamao.distributed_attention import DistributedAttention +from swebench.inference.llamao.distributed_attention import DistributedAttention from flash_attn import flash_attn_kvpacked_func, flash_attn_varlen_kvpacked_func from flash_attn.bert_padding import unpad_input, pad_input diff --git a/swebench/inference/run_llama.py b/swebench/inference/run_llama.py index 4ee73992..b1850f24 100644 --- a/swebench/inference/run_llama.py +++ b/swebench/inference/run_llama.py @@ -21,7 +21,7 @@ logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s %(message)s") logger = logging.getLogger(__name__) -DEVICE_MAPS = json.load(open("codellama_device_maps.json")) +DEVICE_MAPS = json.load(open(Path(__file__).parent / "codellama_device_maps.json")) def get_output_file( output_dir,