From 1401d1068033a5186c38c843b876048e507d6f30 Mon Sep 17 00:00:00 2001
From: Yifan Mai <yifan@cs.stanford.edu>
Date: Tue, 15 Oct 2024 13:32:27 -0700
Subject: [PATCH] Add Qwen2.5 Instruct Turbo models on Together AI (#3063)

---
 src/helm/config/model_deployments.yaml | 14 ++++++++++++++
 src/helm/config/model_metadata.yaml    | 16 ++++++++++++++++
 src/helm/config/tokenizer_configs.yaml |  8 ++++++++
 3 files changed, 38 insertions(+)

diff --git a/src/helm/config/model_deployments.yaml b/src/helm/config/model_deployments.yaml
index 4d0188adc6..59662e0591 100644
--- a/src/helm/config/model_deployments.yaml
+++ b/src/helm/config/model_deployments.yaml
@@ -2543,6 +2543,20 @@ model_deployments:
     client_spec:
       class_name: "helm.clients.together_client.TogetherChatClient"
 
+  - name: together/qwen2.5-7b-instruct-turbo
+    model_name: qwen/qwen2.5-7b-instruct-turbo
+    tokenizer_name: qwen/qwen2.5-7b-instruct
+    max_sequence_length: 128000
+    client_spec:
+      class_name: "helm.clients.together_client.TogetherChatClient"
+
+  - name: together/qwen2.5-72b-instruct-turbo
+    model_name: qwen/qwen2.5-72b-instruct-turbo
+    tokenizer_name: qwen/qwen2.5-7b-instruct
+    max_sequence_length: 128000
+    client_spec:
+      class_name: "helm.clients.together_client.TogetherChatClient"
+
   - name: huggingface/qwen-vl
     model_name: qwen/qwen-vl
     tokenizer_name: qwen/qwen-vl
diff --git a/src/helm/config/model_metadata.yaml b/src/helm/config/model_metadata.yaml
index d2ef81b027..9df74e17c6 100644
--- a/src/helm/config/model_metadata.yaml
+++ b/src/helm/config/model_metadata.yaml
@@ -2620,6 +2620,22 @@ models:
     release_date: 2024-06-07
     tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
 
+  - name: qwen/qwen2.5-7b-instruct-turbo
+    display_name: Qwen2.5 Instruct Turbo (7B)
+    description: Qwen2.5 Instruct Turbo (7B) was trained on 18 trillion tokens and supports 29 languages, and shows improvements over Qwen2 in knowledge, coding, mathematics, instruction following, generating long texts, and processing structure data. ([blog](https://qwenlm.github.io/blog/qwen2.5/)) Turbo is Together's cost-efficient implementation, providing fast FP8 performance while maintaining quality, closely matching FP16 reference models. ([blog](https://www.together.ai/blog/together-inference-engine-2))
+    creator_organization_name: Qwen
+    access: open
+    release_date: 2024-09-19
+    tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+
+  - name: qwen/qwen2.5-72b-instruct-turbo
+    display_name: Qwen2.5 Instruct Turbo (72B)
+    description: Qwen2.5 Instruct Turbo (72B) was trained on 18 trillion tokens and supports 29 languages, and shows improvements over Qwen2 in knowledge, coding, mathematics, instruction following, generating long texts, and processing structure data. ([blog](https://qwenlm.github.io/blog/qwen2.5/)) Turbo is Together's cost-efficient implementation, providing fast FP8 performance while maintaining quality, closely matching FP16 reference models. ([blog](https://www.together.ai/blog/together-inference-engine-2))
+    creator_organization_name: Qwen
+    access: open
+    release_date: 2024-09-19
+    tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+
   - name: qwen/qwen-vl
     display_name: Qwen-VL
     description: Visual multimodal version of the Qwen large language model series ([paper](https://arxiv.org/abs/2308.12966)).
diff --git a/src/helm/config/tokenizer_configs.yaml b/src/helm/config/tokenizer_configs.yaml
index c3839c103a..c960114d76 100644
--- a/src/helm/config/tokenizer_configs.yaml
+++ b/src/helm/config/tokenizer_configs.yaml
@@ -513,6 +513,14 @@ tokenizer_configs:
     end_of_text_token: <|im_end|>"
     prefix_token: "<|im_start|>'"
 
+  - name: qwen/qwen2.5-7b-instruct
+    tokenizer_spec:
+      class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
+      args:
+        pretrained_model_name_or_path: Qwen/Qwen2.5-7B-Instruct
+    end_of_text_token: <|im_end|>"
+    prefix_token: "<|im_start|>'"
+
   - name: qwen/qwen-vl
     tokenizer_spec:
       class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"