Remove files that should be ignored

acebot712 · Jul 19, 2024 · a367c1a · a367c1a
1 parent be90349
commit a367c1a
Show file tree

Hide file tree

Showing 7 changed files with 81 additions and 67 deletions.
diff --git a/config/accelerate_config.yaml b/config/accelerate_config.yaml
@@ -13,7 +13,7 @@ machine_rank: 0
 main_training_function: main
 mixed_precision: bf16
 num_machines: 1
-num_processes: 8
+num_processes: 7
 rdzv_backend: static
 same_network: true
 tpu_env: []

diff --git a/config/merge_config.yaml b/config/merge_config.yaml
@@ -1,16 +1,11 @@
+merge_method: linear
 slices:
   - sources:
-      - model: microsoft/Phi-3-mini-4k-instruct
-        layer_range: [0, 21]
-        parameters:
-          weight: 0.7
-      - model: TinyLlama/TinyLlama-1.1B-Chat-v1.0
-        layer_range: [0, 21]
-        parameters:
-          weight: 0.3
-  - sources:
-      - model: TinyLlama/TinyLlama-1.1B-Chat-v1.0
-        layer_range: [22, 21]  # Since Phi-3-mini-4k-instruct has no more layers, use TinyLlama layers
-merge_method: passthrough
+      - model: NousResearch/Llama-2-7b-chat-hf
+        layer_range: [0, 31]  # Adjusted to match the common layers
+      - model: /home/ubuntu/profiler/Downloaded_checkpoint_pintxo
+        layer_range: [0, 31]
+parameters:
+  weight: 0.5  # Merge with equal weight
 dtype: float16
-tokenizer_source: union
+tokenizer_source: NousResearch/Llama-2-7b-chat-hf
diff --git a/evaluation_results/PY007_TinyLlama-1.1B-Chat-v0.1_all_results.json b/evaluation_results/PY007_TinyLlama-1.1B-Chat-v0.1_all_results.json
diff --git a/evaluation_results/microsoft_Phi-3-mini-4k-instruct_all_results.json b/evaluation_results/microsoft_Phi-3-mini-4k-instruct_all_results.json
diff --git a/scripts/evaluate_model.py b/scripts/evaluate_model.py
@@ -50,12 +50,12 @@ def evaluate_model(model_name, model_args, datasets, num_fewshot, batch_size, de
     return all_results
 
 # Configuration parameters
-model_name = "NousResearch/Llama-2-7b-chat-hf"
-model_args = {"revision": "main", "dtype": "float", "parallelize": True}
+model_name = "/home/ubuntu/abhijoy/model_compressor/outputs/sft_mmlu/checkpoint-3566"
+model_args = {"revision": "main", "dtype": "float", "parallelize": False}
 datasets = ["mmlu", "hellaswag", "boolq"]  # Add your datasets here
 num_fewshot = 0
 batch_size = "auto:4"  # Set batch size to auto with recomputation
-device = "cuda:5"
+device = "cuda:7"
 output_dir = "./evaluation_results"
 limit = None
 

diff --git a/scripts/evaluate_model_tensor.py b/scripts/evaluate_model_tensor.py
@@ -63,12 +63,12 @@ def evaluate_model(model_name, model_args, datasets, num_fewshot, batch_size, de
     return all_results
 
 # Configuration parameters
-model_name = "NousResearch/Llama-2-7b-chat-hf"
-model_args = {"revision": "main", "dtype": "float", "parallelize": True}
+model_name = "/home/ubuntu/profiler/compact/compact_Pintxo_1071_1000_supra3_no_gate_mpo_zero_98"
+model_args = {"revision": "main", "dtype": "float", "parallelize": False}
 datasets = ["mmlu", "hellaswag", "boolq"]  # Add your datasets here
 num_fewshot = 0
 batch_size = "auto:4"  # Set batch size to auto with recomputation
-device = "cuda:0"
+device = "cuda:7"
 output_dir = "./evaluation_results"
 limit = None
 

diff --git a/scripts/test.py b/scripts/test.py
@@ -2,16 +2,73 @@
 from model_loader import LlamaCausalLMTensor
 
 # Load the models
-model_1 = AutoModelForCausalLM.from_pretrained("NousResearch/Llama-2-7b-chat-hf")
-model_2 = LlamaCausalLMTensor.from_pretrained("/home/ubuntu/profiler/Downloaded_checkpoint_pintxo")
+# model_2 = LlamaCausalLMTensor.from_pretrained("/home/ubuntu/profiler/Downloaded_checkpoint_pintxo")
+# model_1 = LlamaCausalLMTensor.from_pretrained("/home/ubuntu/profiler/compact/compact_Pintxo_999_1000_supra3_no_gate_train_zero")
+model_3 = LlamaCausalLMTensor.from_pretrained("/home/ubuntu/profiler/compact/compact_Pintxo_1071_1000_supra3_no_gate_mpo_zero_98")
+# model_4 = AutoModelForCausalLM.from_pretrained("NousResearch/Llama-2-7b-chat-hf")
 
+# print(model_4)
+
+print(model_3)
+# print(model_1)
 # Function to print layer types
-def print_layer_types(model):
-    for name, module in model.named_modules():
-        print(f"{name}: {type(module)}")
+# def print_layer_types(model):
+#     for name, module in model.named_modules():
+#         print(f"{name}: {type(module)}")
+
+# # print("Model 1 Layers:")
+# # print_layer_types(model_1)
+
+# for params in model_1.parameters():
+#     # print(f"{params=}")
+#     print(params.shape)
+
+
+
+# # print("\nModel 2 Layers:")
+# # print_layer_types(model_2)
+
+
+# print number of parameters
+print(f"Number of parameters in model 1: {sum(p.numel() for p in model_3.parameters())}")
+
+# def main(prompt):
+#     hf_model_path = "/home/ubuntu/profiler/compact/compact_Pintxo_999_1000_supra3_no_gate_train_zero"
+
+#     pretrain_model = LlamaCausalLMTensor.from_pretrained(hf_model_path, local_files_only=True)
+#     # print(f"Pretrained model: {pretrain_model}")
+#     device = "cuda"
+#     pretrain_model.to(device)  # Move model to GPU if available
+#     print("Model loaded successfully.")
+#     tokenizer = AutoTokenizer.from_pretrained(
+#         hf_model_path, add_bos_token=False, add_eos_token=False
+#     )
+
+#     chat = [
+#         {"role": "user", "content": prompt},
+#     ]
+#     prompt = tokenizer.apply_chat_template(chat, tokenize=False)
+
+#     inputs = tokenizer(prompt, return_tensors="pt", padding=True, max_length=512, truncation=True)
+#     inputs.to(device)  # Move input tensors to GPU if available
+
+#     generated_text = ""
+#     for _ in range(1):  # Limiting to 5 iterations for demonstration
+#         sample_output = pretrain_model.generate(
+#             **inputs, max_new_tokens=512, do_sample=True, top_p=0.92, top_k=0
+#         )
+#         decoded_output = tokenizer.decode(sample_output[0], skip_special_tokens=True)
+#         if "</s>" in decoded_output:
+#             decoded_output = decoded_output.split("</s>")[0] + "</s>"
+#             generated_text += decoded_output
+#             break
+#         generated_text += decoded_output
+#         inputs["input_ids"] = sample_output  # Update input for next generation
+
+#     print("Output:\n" + 100 * '-')
+#     print(generated_text)
 
-print("Model 1 Layers:")
-print_layer_types(model_1)
 
-print("\nModel 2 Layers:")
-print_layer_types(model_2)
+# if __name__ == "__main__":
+#     prompt = "Where is spain? "
+#     main(prompt)