From a367c1a844c951262ad420dafa22db3a16e71b23 Mon Sep 17 00:00:00 2001
From: Abhijoy Sarkar <abhijoy.sar@gmail.com>
Date: Fri, 19 Jul 2024 06:32:09 +0000
Subject: [PATCH] Remove files that should be ignored

---
 config/accelerate_config.yaml                 |  2 +-
 config/merge_config.yaml                      | 21 ++----
 ..._TinyLlama-1.1B-Chat-v0.1_all_results.json | 19 -----
 ...ft_Phi-3-mini-4k-instruct_all_results.json | 19 -----
 scripts/evaluate_model.py                     |  6 +-
 scripts/evaluate_model_tensor.py              |  6 +-
 scripts/test.py                               | 75 ++++++++++++++++---
 7 files changed, 81 insertions(+), 67 deletions(-)
 delete mode 100644 evaluation_results/PY007_TinyLlama-1.1B-Chat-v0.1_all_results.json
 delete mode 100644 evaluation_results/microsoft_Phi-3-mini-4k-instruct_all_results.json

diff --git a/config/accelerate_config.yaml b/config/accelerate_config.yaml
index 21ab374..72a7c08 100644
--- a/config/accelerate_config.yaml
+++ b/config/accelerate_config.yaml
@@ -13,7 +13,7 @@ machine_rank: 0
 main_training_function: main
 mixed_precision: bf16
 num_machines: 1
-num_processes: 8
+num_processes: 7
 rdzv_backend: static
 same_network: true
 tpu_env: []
diff --git a/config/merge_config.yaml b/config/merge_config.yaml
index 67613c6..b826443 100644
--- a/config/merge_config.yaml
+++ b/config/merge_config.yaml
@@ -1,16 +1,11 @@
+merge_method: linear
 slices:
   - sources:
-      - model: microsoft/Phi-3-mini-4k-instruct
-        layer_range: [0, 21]
-        parameters:
-          weight: 0.7
-      - model: TinyLlama/TinyLlama-1.1B-Chat-v1.0
-        layer_range: [0, 21]
-        parameters:
-          weight: 0.3
-  - sources:
-      - model: TinyLlama/TinyLlama-1.1B-Chat-v1.0
-        layer_range: [22, 21]  # Since Phi-3-mini-4k-instruct has no more layers, use TinyLlama layers
-merge_method: passthrough
+      - model: NousResearch/Llama-2-7b-chat-hf
+        layer_range: [0, 31]  # Adjusted to match the common layers
+      - model: /home/ubuntu/profiler/Downloaded_checkpoint_pintxo
+        layer_range: [0, 31]
+parameters:
+  weight: 0.5  # Merge with equal weight
 dtype: float16
-tokenizer_source: union
+tokenizer_source: NousResearch/Llama-2-7b-chat-hf
diff --git a/evaluation_results/PY007_TinyLlama-1.1B-Chat-v0.1_all_results.json b/evaluation_results/PY007_TinyLlama-1.1B-Chat-v0.1_all_results.json
deleted file mode 100644
index 6c4d7f1..0000000
--- a/evaluation_results/PY007_TinyLlama-1.1B-Chat-v0.1_all_results.json
+++ /dev/null
@@ -1,19 +0,0 @@
-{
-  "mmlu": {
-    "acc,none": 0.2748896168636946,
-    "acc_stderr,none": 0.0037469011421359276,
-    "alias": "mmlu"
-  },
-  "hellaswag": {
-    "acc,none": 0.4206333399721171,
-    "acc_stderr,none": 0.004926518439372273,
-    "acc_norm,none": 0.5381398127862975,
-    "acc_norm_stderr,none": 0.004975243508751997,
-    "alias": "hellaswag"
-  },
-  "boolq": {
-    "acc,none": 0.5804281345565749,
-    "acc_stderr,none": 0.008631175489166724,
-    "alias": "boolq"
-  }
-}
\ No newline at end of file
diff --git a/evaluation_results/microsoft_Phi-3-mini-4k-instruct_all_results.json b/evaluation_results/microsoft_Phi-3-mini-4k-instruct_all_results.json
deleted file mode 100644
index f03737f..0000000
--- a/evaluation_results/microsoft_Phi-3-mini-4k-instruct_all_results.json
+++ /dev/null
@@ -1,19 +0,0 @@
-{
-  "mmlu": {
-    "acc,none": 0.7016806722689075,
-    "acc_stderr,none": 0.003683328920119816,
-    "alias": "mmlu"
-  },
-  "hellaswag": {
-    "acc,none": 0.5900219079864569,
-    "acc_stderr,none": 0.004908241354310222,
-    "acc_norm,none": 0.775542720573591,
-    "acc_norm_stderr,none": 0.004163717220873828,
-    "alias": "hellaswag"
-  },
-  "boolq": {
-    "acc,none": 0.8513761467889909,
-    "acc_stderr,none": 0.006221534835349704,
-    "alias": "boolq"
-  }
-}
\ No newline at end of file
diff --git a/scripts/evaluate_model.py b/scripts/evaluate_model.py
index 40c9e3b..6c3ef9d 100644
--- a/scripts/evaluate_model.py
+++ b/scripts/evaluate_model.py
@@ -50,12 +50,12 @@ def evaluate_model(model_name, model_args, datasets, num_fewshot, batch_size, de
     return all_results
 
 # Configuration parameters
-model_name = "NousResearch/Llama-2-7b-chat-hf"
-model_args = {"revision": "main", "dtype": "float", "parallelize": True}
+model_name = "/home/ubuntu/abhijoy/model_compressor/outputs/sft_mmlu/checkpoint-3566"
+model_args = {"revision": "main", "dtype": "float", "parallelize": False}
 datasets = ["mmlu", "hellaswag", "boolq"]  # Add your datasets here
 num_fewshot = 0
 batch_size = "auto:4"  # Set batch size to auto with recomputation
-device = "cuda:5"
+device = "cuda:7"
 output_dir = "./evaluation_results"
 limit = None
 
diff --git a/scripts/evaluate_model_tensor.py b/scripts/evaluate_model_tensor.py
index 8a559ab..3c6e507 100644
--- a/scripts/evaluate_model_tensor.py
+++ b/scripts/evaluate_model_tensor.py
@@ -63,12 +63,12 @@ def evaluate_model(model_name, model_args, datasets, num_fewshot, batch_size, de
     return all_results
 
 # Configuration parameters
-model_name = "NousResearch/Llama-2-7b-chat-hf"
-model_args = {"revision": "main", "dtype": "float", "parallelize": True}
+model_name = "/home/ubuntu/profiler/compact/compact_Pintxo_1071_1000_supra3_no_gate_mpo_zero_98"
+model_args = {"revision": "main", "dtype": "float", "parallelize": False}
 datasets = ["mmlu", "hellaswag", "boolq"]  # Add your datasets here
 num_fewshot = 0
 batch_size = "auto:4"  # Set batch size to auto with recomputation
-device = "cuda:0"
+device = "cuda:7"
 output_dir = "./evaluation_results"
 limit = None
 
diff --git a/scripts/test.py b/scripts/test.py
index 2b0473e..7b1104b 100644
--- a/scripts/test.py
+++ b/scripts/test.py
@@ -2,16 +2,73 @@
 from model_loader import LlamaCausalLMTensor
 
 # Load the models
-model_1 = AutoModelForCausalLM.from_pretrained("NousResearch/Llama-2-7b-chat-hf")
-model_2 = LlamaCausalLMTensor.from_pretrained("/home/ubuntu/profiler/Downloaded_checkpoint_pintxo")
+# model_2 = LlamaCausalLMTensor.from_pretrained("/home/ubuntu/profiler/Downloaded_checkpoint_pintxo")
+# model_1 = LlamaCausalLMTensor.from_pretrained("/home/ubuntu/profiler/compact/compact_Pintxo_999_1000_supra3_no_gate_train_zero")
+model_3 = LlamaCausalLMTensor.from_pretrained("/home/ubuntu/profiler/compact/compact_Pintxo_1071_1000_supra3_no_gate_mpo_zero_98")
+# model_4 = AutoModelForCausalLM.from_pretrained("NousResearch/Llama-2-7b-chat-hf")
 
+# print(model_4)
+
+print(model_3)
+# print(model_1)
 # Function to print layer types
-def print_layer_types(model):
-    for name, module in model.named_modules():
-        print(f"{name}: {type(module)}")
+# def print_layer_types(model):
+#     for name, module in model.named_modules():
+#         print(f"{name}: {type(module)}")
+
+# # print("Model 1 Layers:")
+# # print_layer_types(model_1)
+
+# for params in model_1.parameters():
+#     # print(f"{params=}")
+#     print(params.shape)
+
+
+
+# # print("\nModel 2 Layers:")
+# # print_layer_types(model_2)
+
+
+# print number of parameters
+print(f"Number of parameters in model 1: {sum(p.numel() for p in model_3.parameters())}")
+
+# def main(prompt):
+#     hf_model_path = "/home/ubuntu/profiler/compact/compact_Pintxo_999_1000_supra3_no_gate_train_zero"
+
+#     pretrain_model = LlamaCausalLMTensor.from_pretrained(hf_model_path, local_files_only=True)
+#     # print(f"Pretrained model: {pretrain_model}")
+#     device = "cuda"
+#     pretrain_model.to(device)  # Move model to GPU if available
+#     print("Model loaded successfully.")
+#     tokenizer = AutoTokenizer.from_pretrained(
+#         hf_model_path, add_bos_token=False, add_eos_token=False
+#     )
+
+#     chat = [
+#         {"role": "user", "content": prompt},
+#     ]
+#     prompt = tokenizer.apply_chat_template(chat, tokenize=False)
+
+#     inputs = tokenizer(prompt, return_tensors="pt", padding=True, max_length=512, truncation=True)
+#     inputs.to(device)  # Move input tensors to GPU if available
+
+#     generated_text = ""
+#     for _ in range(1):  # Limiting to 5 iterations for demonstration
+#         sample_output = pretrain_model.generate(
+#             **inputs, max_new_tokens=512, do_sample=True, top_p=0.92, top_k=0
+#         )
+#         decoded_output = tokenizer.decode(sample_output[0], skip_special_tokens=True)
+#         if "</s>" in decoded_output:
+#             decoded_output = decoded_output.split("</s>")[0] + "</s>"
+#             generated_text += decoded_output
+#             break
+#         generated_text += decoded_output
+#         inputs["input_ids"] = sample_output  # Update input for next generation
+
+#     print("Output:\n" + 100 * '-')
+#     print(generated_text)
 
-print("Model 1 Layers:")
-print_layer_types(model_1)
 
-print("\nModel 2 Layers:")
-print_layer_types(model_2)
+# if __name__ == "__main__":
+#     prompt = "Where is spain? "
+#     main(prompt)