From 60c88b8508182d1e86655e128b1d4fdf458339aa Mon Sep 17 00:00:00 2001
From: "Carlos E. Jimenez" <cjsaltlake@gmail.com>
Date: Wed, 10 Jul 2024 17:47:44 -0400
Subject: [PATCH] Update inference

---
 README.md                                         |  6 +++---
 setup.py                                          |  8 ++++++++
 swebench/inference/README.md                      | 10 +++++++---
 swebench/inference/llamao/modeling_flash_llama.py |  2 +-
 swebench/inference/run_llama.py                   |  2 +-
 5 files changed, 20 insertions(+), 8 deletions(-)
diff --git a/README.md b/README.md
index 03ca1143..52bde40f 100644
--- a/README.md
+++ b/README.md
@@ -99,9 +99,9 @@ python -m swebench.harness.run_evaluation --help
 ```
 
 Additionally, the SWE-Bench repo can help you:
-* Train your own models on our pre-processed datasets  
-* Run [inference](https://github.com/princeton-nlp/SWE-bench/blob/main/inference/) on existing models (either models you have on-disk like LLaMA, or models you have access to through an API like GPT-4). The inference step is where you get a repo and an issue and have the model try to generate a fix for it.
-*  Run SWE-bench's [data collection procedure](https://github.com/princeton-nlp/SWE-bench/blob/main/swebench/collect/) on your own repositories, to make new SWE-Bench tasks. 
+* Train your own models on our pre-processed datasets
+* Run [inference](https://github.com/princeton-nlp/SWE-bench/blob/main/swebench/inference/README.md) on existing models (either models you have on-disk like LLaMA, or models you have access to through an API like GPT-4). The inference step is where you get a repo and an issue and have the model try to generate a fix for it.
+*  Run SWE-bench's [data collection procedure](https://github.com/princeton-nlp/SWE-bench/blob/main/swebench/collect/) on your own repositories, to make new SWE-Bench tasks.
 
 ## ⬇️ Downloads
 | Datasets | Models |
diff --git a/setup.py b/setup.py
index 1365ae2b..b0c29408 100644
--- a/setup.py
+++ b/setup.py
@@ -47,16 +47,24 @@
             'openai',
             'anthropic',
             'transformers',
+            'peft',
             'sentencepiece',
             'protobuf',
+            'torch',
+            'flash_attn',
+            'triton',
         ],
         'retrieval': [
             'tiktoken',
             'openai',
             'anthropic',
             'transformers',
+            'peft',
             'sentencepiece',
             'protobuf',
+            'torch',
+            'flash_attn',
+            'triton',
             'pyserini',
         ],
     },
diff --git a/swebench/inference/README.md b/swebench/inference/README.md
index 1ad7543a..ffeb5882 100644
--- a/swebench/inference/README.md
+++ b/swebench/inference/README.md
@@ -17,7 +17,7 @@ This python script is designed to run inference on a dataset using either the Op
 For instance, to run this script on SWE-bench with the ``Oracle`` context and Anthropic's Claude 2 model, you can run the following command:
 ```bash
 export ANTHROPIC_API_KEY=<your key>
-python run_api.py --dataset_name_or_path princeton-nlp/SWE-bench_oracle --model_name_or_path claude-2 --output_dir ./outputs
+python -m swebench.inference.run_api --dataset_name_or_path princeton-nlp/SWE-bench_oracle --model_name_or_path claude-2 --output_dir ./outputs
 ```
 
 You can also specify further options:
@@ -35,7 +35,11 @@ This script is similar to `run_api.py`, but it is designed to run inference usin
 
 For instance, to run this script on SWE-bench with the ``Oracle`` context and SWE-Llama, you can run the following command:
 ```bash
-python run_llama.py --dataset_path princeton-nlp/SWE-bench_oracle --model_name_or_path princeton-nlp/SWE-Llama-13b --output_dir ./outputs --temperature 0
+python -m swebench.inference.run_llama \
+    --dataset_path princeton-nlp/SWE-bench_oracle \
+    --model_name_or_path princeton-nlp/SWE-Llama-13b \
+    --output_dir ./outputs \
+    --temperature 0
 ```
 
 You can also specify further options:
@@ -54,6 +58,6 @@ Then run `run_live.py` to try solving a new issue. For example, you can try solv
 
 ```bash
 export OPENAI_API_KEY=<your key>
-python run_live.py --model_name gpt-3.5-turbo-1106 \
+python -m swebench.inference.run_live --model_name gpt-3.5-turbo-1106 \
     --issue_url https://github.com/huggingface/transformers/issues/26706 
 ```
diff --git a/swebench/inference/llamao/modeling_flash_llama.py b/swebench/inference/llamao/modeling_flash_llama.py
index faf89fe8..0e6078d6 100644
--- a/swebench/inference/llamao/modeling_flash_llama.py
+++ b/swebench/inference/llamao/modeling_flash_llama.py
@@ -33,7 +33,7 @@
 from transformers.utils import logging
 from transformers.models.llama.configuration_llama import LlamaConfig
 
-from llamao.distributed_attention import DistributedAttention
+from swebench.inference.llamao.distributed_attention import DistributedAttention
 from flash_attn import flash_attn_kvpacked_func, flash_attn_varlen_kvpacked_func
 from flash_attn.bert_padding import unpad_input, pad_input
 
diff --git a/swebench/inference/run_llama.py b/swebench/inference/run_llama.py
index 4ee73992..b1850f24 100644
--- a/swebench/inference/run_llama.py
+++ b/swebench/inference/run_llama.py
@@ -21,7 +21,7 @@
 logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s %(message)s")
 logger = logging.getLogger(__name__)
 
-DEVICE_MAPS = json.load(open("codellama_device_maps.json"))
+DEVICE_MAPS = json.load(open(Path(__file__).parent / "codellama_device_maps.json")) 
 
 def get_output_file(
     output_dir,