Add all sizes of LLaMA on Together (#1740)

stanford-crfm · Jul 25, 2023 · 33bda52 · 33bda52
1 parent 87ee885
commit 33bda52
Show file tree

Hide file tree

Showing 5 changed files with 52 additions and 7 deletions.
diff --git a/src/helm/benchmark/static/schema.yaml b/src/helm/benchmark/static/schema.yaml
@@ -407,13 +407,34 @@ models:
     num_parameters: 30000000000
     release_date: 2022-11-15
     todo: true
-  - name: huggingface/llama-7b
+  - name: meta/llama-7b
     display_name: LLaMA (7B)
     description: LLaMA is a collection of foundation language models ranging from 7B to 65B parameters.
     creator_organization: Meta
     access: open
     num_parameters: 7000000000
     release_date: 2023-02-24
+  - name: meta/llama-13b
+    display_name: LLaMA (13B)
+    description: LLaMA is a collection of foundation language models ranging from 7B to 65B parameters.
+    creator_organization: Meta
+    access: open
+    num_parameters: 13000000000
+    release_date: 2023-02-24
+  - name: meta/llama-30b
+    display_name: LLaMA (30B)
+    description: LLaMA is a collection of foundation language models ranging from 7B to 65B parameters.
+    creator_organization: Meta
+    access: open
+    num_parameters: 30000000000
+    release_date: 2023-02-24
+  - name: meta/llama-65b
+    display_name: LLaMA (65B)
+    description: LLaMA is a collection of foundation language models ranging from 7B to 65B parameters.
+    creator_organization: Meta
+    access: open
+    num_parameters: 65000000000
+    release_date: 2023-02-24
   - name: meta/llama-2-7b
     display_name: LLaMA-2 (7B)
     description: Llama 2 pretrained models are trained on 2 trillion tokens, and have double the context length than Llama 1.

diff --git a/src/helm/benchmark/window_services/window_service_factory.py b/src/helm/benchmark/window_services/window_service_factory.py
@@ -172,7 +172,10 @@ def get_window_service(model_name: str, service: TokenizerService) -> WindowServ
         elif model_name == "nvidia/megatron-gpt2":
             window_service = MegatronWindowService(service)
         elif model_name in [
-            "together/llama-7b",
+            "meta/llama-7b",
+            "meta/llama-13b",
+            "meta/llama-30b",
+            "meta/llama-65b",
             "together/alpaca-7b",
             "together/vicuna-13b",
         ]:

diff --git a/src/helm/proxy/clients/test_together_client.py b/src/helm/proxy/clients/test_together_client.py
@@ -42,7 +42,7 @@ def teardown_method(self, method):
             ),
             (
                 Request(
-                    model="together/llama-7b",
+                    model="meta/llama-7b",
                     prompt="I am a computer scientist.",
                     temperature=0,
                     num_completions=4,
@@ -57,7 +57,7 @@ def teardown_method(self, method):
                     "echo": True,
                     "logprobs": 3,
                     "max_tokens": 24,
-                    "model": "llama-7b-full-precision",
+                    "model": "huggyllama/llama-7b",
                     "n": 4,
                     "prompt": "I am a computer scientist.",
                     "request_type": "language-model-inference",

diff --git a/src/helm/proxy/clients/together_client.py b/src/helm/proxy/clients/together_client.py
@@ -17,7 +17,6 @@
 _ASYNC_MODELS: Set[str] = {
     # Legacy models
     "alpaca-7b",
-    "llama-7b",
     "pythia-7b",
     "vicuna-13b",
     # Production models
@@ -26,6 +25,10 @@
     "dolly-v2-3b",
     "dolly-v2-7b",
     "dolly-v2-12b",
+    "llama-7b",
+    "llama-13b",
+    "llama-30b",
+    "llama-65b",
     "llama-2-7b",
     "llama-2-13b",
     "llama-2-70b",
@@ -52,7 +55,6 @@
     # alpaca-7b is half-precision
     # alpaca-7b-full-precision is full-precision
     "alpaca-7b": "alpaca-7b-full-precision",
-    "llama-7b": "llama-7b-full-precision",
     "pythia-7b": "pythia-7b-full-precision",
     "vicuna-13b": "vicuna-13b-full-precision",
     # Production models
@@ -61,6 +63,10 @@
     "dolly-v2-3b": "databricks/dolly-v2-3b",
     "dolly-v2-7b": "databricks/dolly-v2-7b",
     "dolly-v2-12b": "databricks/dolly-v2-12b",
+    "llama-7b": "huggyllama/llama-7b",
+    "llama-13b": "huggyllama/llama-13b",
+    "llama-30b": "huggyllama/llama-30b",
+    "llama-65b": "huggyllama/llama-65b",
     "llama-2-7b": "togethercomputer/llama-2-7b",
     "llama-2-13b": "togethercomputer/llama-2-13b",
     "llama-2-70b": "togethercomputer/llama-2-70b",

diff --git a/src/helm/proxy/models.py b/src/helm/proxy/models.py
@@ -301,7 +301,22 @@ def engine(self) -> str:
     # Meta
     Model(
         group="together",
-        name="together/llama-7b",
+        name="meta/llama-7b",
+        tags=[TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG],
+    ),
+    Model(
+        group="together",
+        name="meta/llama-13b",
+        tags=[TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG],
+    ),
+    Model(
+        group="together",
+        name="meta/llama-30b",
+        tags=[TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG],
+    ),
+    Model(
+        group="together",
+        name="meta/llama-65b",
         tags=[TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG],
     ),
     Model(