From a367c1a844c951262ad420dafa22db3a16e71b23 Mon Sep 17 00:00:00 2001 From: Abhijoy Sarkar Date: Fri, 19 Jul 2024 06:32:09 +0000 Subject: [PATCH] Remove files that should be ignored --- config/accelerate_config.yaml | 2 +- config/merge_config.yaml | 21 ++---- ..._TinyLlama-1.1B-Chat-v0.1_all_results.json | 19 ----- ...ft_Phi-3-mini-4k-instruct_all_results.json | 19 ----- scripts/evaluate_model.py | 6 +- scripts/evaluate_model_tensor.py | 6 +- scripts/test.py | 75 ++++++++++++++++--- 7 files changed, 81 insertions(+), 67 deletions(-) delete mode 100644 evaluation_results/PY007_TinyLlama-1.1B-Chat-v0.1_all_results.json delete mode 100644 evaluation_results/microsoft_Phi-3-mini-4k-instruct_all_results.json diff --git a/config/accelerate_config.yaml b/config/accelerate_config.yaml index 21ab374..72a7c08 100644 --- a/config/accelerate_config.yaml +++ b/config/accelerate_config.yaml @@ -13,7 +13,7 @@ machine_rank: 0 main_training_function: main mixed_precision: bf16 num_machines: 1 -num_processes: 8 +num_processes: 7 rdzv_backend: static same_network: true tpu_env: [] diff --git a/config/merge_config.yaml b/config/merge_config.yaml index 67613c6..b826443 100644 --- a/config/merge_config.yaml +++ b/config/merge_config.yaml @@ -1,16 +1,11 @@ +merge_method: linear slices: - sources: - - model: microsoft/Phi-3-mini-4k-instruct - layer_range: [0, 21] - parameters: - weight: 0.7 - - model: TinyLlama/TinyLlama-1.1B-Chat-v1.0 - layer_range: [0, 21] - parameters: - weight: 0.3 - - sources: - - model: TinyLlama/TinyLlama-1.1B-Chat-v1.0 - layer_range: [22, 21] # Since Phi-3-mini-4k-instruct has no more layers, use TinyLlama layers -merge_method: passthrough + - model: NousResearch/Llama-2-7b-chat-hf + layer_range: [0, 31] # Adjusted to match the common layers + - model: /home/ubuntu/profiler/Downloaded_checkpoint_pintxo + layer_range: [0, 31] +parameters: + weight: 0.5 # Merge with equal weight dtype: float16 -tokenizer_source: union +tokenizer_source: NousResearch/Llama-2-7b-chat-hf diff --git a/evaluation_results/PY007_TinyLlama-1.1B-Chat-v0.1_all_results.json b/evaluation_results/PY007_TinyLlama-1.1B-Chat-v0.1_all_results.json deleted file mode 100644 index 6c4d7f1..0000000 --- a/evaluation_results/PY007_TinyLlama-1.1B-Chat-v0.1_all_results.json +++ /dev/null @@ -1,19 +0,0 @@ -{ - "mmlu": { - "acc,none": 0.2748896168636946, - "acc_stderr,none": 0.0037469011421359276, - "alias": "mmlu" - }, - "hellaswag": { - "acc,none": 0.4206333399721171, - "acc_stderr,none": 0.004926518439372273, - "acc_norm,none": 0.5381398127862975, - "acc_norm_stderr,none": 0.004975243508751997, - "alias": "hellaswag" - }, - "boolq": { - "acc,none": 0.5804281345565749, - "acc_stderr,none": 0.008631175489166724, - "alias": "boolq" - } -} \ No newline at end of file diff --git a/evaluation_results/microsoft_Phi-3-mini-4k-instruct_all_results.json b/evaluation_results/microsoft_Phi-3-mini-4k-instruct_all_results.json deleted file mode 100644 index f03737f..0000000 --- a/evaluation_results/microsoft_Phi-3-mini-4k-instruct_all_results.json +++ /dev/null @@ -1,19 +0,0 @@ -{ - "mmlu": { - "acc,none": 0.7016806722689075, - "acc_stderr,none": 0.003683328920119816, - "alias": "mmlu" - }, - "hellaswag": { - "acc,none": 0.5900219079864569, - "acc_stderr,none": 0.004908241354310222, - "acc_norm,none": 0.775542720573591, - "acc_norm_stderr,none": 0.004163717220873828, - "alias": "hellaswag" - }, - "boolq": { - "acc,none": 0.8513761467889909, - "acc_stderr,none": 0.006221534835349704, - "alias": "boolq" - } -} \ No newline at end of file diff --git a/scripts/evaluate_model.py b/scripts/evaluate_model.py index 40c9e3b..6c3ef9d 100644 --- a/scripts/evaluate_model.py +++ b/scripts/evaluate_model.py @@ -50,12 +50,12 @@ def evaluate_model(model_name, model_args, datasets, num_fewshot, batch_size, de return all_results # Configuration parameters -model_name = "NousResearch/Llama-2-7b-chat-hf" -model_args = {"revision": "main", "dtype": "float", "parallelize": True} +model_name = "/home/ubuntu/abhijoy/model_compressor/outputs/sft_mmlu/checkpoint-3566" +model_args = {"revision": "main", "dtype": "float", "parallelize": False} datasets = ["mmlu", "hellaswag", "boolq"] # Add your datasets here num_fewshot = 0 batch_size = "auto:4" # Set batch size to auto with recomputation -device = "cuda:5" +device = "cuda:7" output_dir = "./evaluation_results" limit = None diff --git a/scripts/evaluate_model_tensor.py b/scripts/evaluate_model_tensor.py index 8a559ab..3c6e507 100644 --- a/scripts/evaluate_model_tensor.py +++ b/scripts/evaluate_model_tensor.py @@ -63,12 +63,12 @@ def evaluate_model(model_name, model_args, datasets, num_fewshot, batch_size, de return all_results # Configuration parameters -model_name = "NousResearch/Llama-2-7b-chat-hf" -model_args = {"revision": "main", "dtype": "float", "parallelize": True} +model_name = "/home/ubuntu/profiler/compact/compact_Pintxo_1071_1000_supra3_no_gate_mpo_zero_98" +model_args = {"revision": "main", "dtype": "float", "parallelize": False} datasets = ["mmlu", "hellaswag", "boolq"] # Add your datasets here num_fewshot = 0 batch_size = "auto:4" # Set batch size to auto with recomputation -device = "cuda:0" +device = "cuda:7" output_dir = "./evaluation_results" limit = None diff --git a/scripts/test.py b/scripts/test.py index 2b0473e..7b1104b 100644 --- a/scripts/test.py +++ b/scripts/test.py @@ -2,16 +2,73 @@ from model_loader import LlamaCausalLMTensor # Load the models -model_1 = AutoModelForCausalLM.from_pretrained("NousResearch/Llama-2-7b-chat-hf") -model_2 = LlamaCausalLMTensor.from_pretrained("/home/ubuntu/profiler/Downloaded_checkpoint_pintxo") +# model_2 = LlamaCausalLMTensor.from_pretrained("/home/ubuntu/profiler/Downloaded_checkpoint_pintxo") +# model_1 = LlamaCausalLMTensor.from_pretrained("/home/ubuntu/profiler/compact/compact_Pintxo_999_1000_supra3_no_gate_train_zero") +model_3 = LlamaCausalLMTensor.from_pretrained("/home/ubuntu/profiler/compact/compact_Pintxo_1071_1000_supra3_no_gate_mpo_zero_98") +# model_4 = AutoModelForCausalLM.from_pretrained("NousResearch/Llama-2-7b-chat-hf") +# print(model_4) + +print(model_3) +# print(model_1) # Function to print layer types -def print_layer_types(model): - for name, module in model.named_modules(): - print(f"{name}: {type(module)}") +# def print_layer_types(model): +# for name, module in model.named_modules(): +# print(f"{name}: {type(module)}") + +# # print("Model 1 Layers:") +# # print_layer_types(model_1) + +# for params in model_1.parameters(): +# # print(f"{params=}") +# print(params.shape) + + + +# # print("\nModel 2 Layers:") +# # print_layer_types(model_2) + + +# print number of parameters +print(f"Number of parameters in model 1: {sum(p.numel() for p in model_3.parameters())}") + +# def main(prompt): +# hf_model_path = "/home/ubuntu/profiler/compact/compact_Pintxo_999_1000_supra3_no_gate_train_zero" + +# pretrain_model = LlamaCausalLMTensor.from_pretrained(hf_model_path, local_files_only=True) +# # print(f"Pretrained model: {pretrain_model}") +# device = "cuda" +# pretrain_model.to(device) # Move model to GPU if available +# print("Model loaded successfully.") +# tokenizer = AutoTokenizer.from_pretrained( +# hf_model_path, add_bos_token=False, add_eos_token=False +# ) + +# chat = [ +# {"role": "user", "content": prompt}, +# ] +# prompt = tokenizer.apply_chat_template(chat, tokenize=False) + +# inputs = tokenizer(prompt, return_tensors="pt", padding=True, max_length=512, truncation=True) +# inputs.to(device) # Move input tensors to GPU if available + +# generated_text = "" +# for _ in range(1): # Limiting to 5 iterations for demonstration +# sample_output = pretrain_model.generate( +# **inputs, max_new_tokens=512, do_sample=True, top_p=0.92, top_k=0 +# ) +# decoded_output = tokenizer.decode(sample_output[0], skip_special_tokens=True) +# if "" in decoded_output: +# decoded_output = decoded_output.split("")[0] + "" +# generated_text += decoded_output +# break +# generated_text += decoded_output +# inputs["input_ids"] = sample_output # Update input for next generation + +# print("Output:\n" + 100 * '-') +# print(generated_text) -print("Model 1 Layers:") -print_layer_types(model_1) -print("\nModel 2 Layers:") -print_layer_types(model_2) +# if __name__ == "__main__": +# prompt = "Where is spain? " +# main(prompt)