-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathsubmit_outlier_opt.sh
239 lines (226 loc) · 6.83 KB
/
submit_outlier_opt.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
#!/bin/bash
#SBATCH -A pxxxxx ## Required: your allocation/account name, i.e. eXXXX, pXXXX or bXXXX
#SBATCH -p gengpu ## Required: (buyin, short, normal, long, gengpu, genhimem, etc)
#SBATCH --gres gpu:a100:2
#SBATCH -t 48:00:00 ## Required: How long will the job need to run (remember different partitions have restrictions on this parameter)
#SBATCH --nodes 1 ## how many computers/nodes do you need (no default)
#SBATCH --cpus-per-task 32
#SBATCH --ntasks-per-node 1 ## how many cpus or processors do you need on per computer/node (default value 1)
#SBATCH --mem 200G ## how much RAM do you need per computer/node (this affects your FairShare score so be careful to not ask for more than you need))
#SBATCH --job-name=opt_gate ## When you run squeue -u
#SBATCH --mail-type END ## BEGIN, END, FAIL or ALL
module purge
module load python-miniconda3/4.12.0
module load moose/1.0.0
module load cuda/11.4.0-gcc
module load gcc/9.2.0
conda init bash
source ~/.bashrc
#conda create -n retnet python=3.9
conda activate outlier
cd ../OutEffHop
locate
export LC_ALL=C.UTF-8
export LANG=C.UTF-8
export PYTHONPATH=${PYTHONPATH}:$(realpath "$PWD")
# Vanilla
accelerate launch --config_file accelerate_configs/1gpu_fp16.yaml run_clm.py \
--pad_to_max_length \
--wd_LN_gamma \
--with_tracking \
--report_to wandb \
--run_name test_vanilla_opt_1.3b \
--extra_tb_stats \
--seed 1000 \
--dataset_setup bookcorpus_and_wiki \
--preprocessing_num_workers 4 \
--data_cache_dir .hf_data \
--model_cache_dir .hf_cache \
--model_type opt \
--tokenizer_name facebook/opt-350m \
--max_seq_length 2048 \
--block_size 512 \
--learning_rate 0.0004 \
--lr_scheduler_type linear \
--max_train_steps 125000 \
--num_warmup_steps 2000 \
--per_device_train_batch_size 48 \
--per_device_eval_batch_size 48 \
--gradient_accumulation_steps 4 \
--max_grad_norm 1.0 \
--weight_decay 0.1 \
--checkpointing_steps 5000 \
--tb_scalar_log_interval 2000 \
--tb_hist_log_interval 10000 \
--config_path model_configs/opt-12L12H.yaml \
--attn_softmax vanilla \
--output_dir output/vanilla_opt \
--resume_from_checkpoint checkpoints_path
#Softmax1
accelerate launch --config_file accelerate_configs/1gpu_fp16.yaml run_clm.py \
--pad_to_max_length \
--wd_LN_gamma \
--with_tracking \
--report_to wandb \
--run_name test_softmax1_opt_1.3b \
--extra_tb_stats \
--seed 1000 \
--dataset_setup bookcorpus_and_wiki \
--preprocessing_num_workers 4 \
--data_cache_dir .hf_data \
--model_cache_dir .hf_cache \
--model_type opt \
--tokenizer_name facebook/opt-350m \
--max_seq_length 2048 \
--block_size 512 \
--learning_rate 0.0004 \
--lr_scheduler_type linear \
--max_train_steps 125000 \
--num_warmup_steps 2000 \
--per_device_train_batch_size 48 \
--per_device_eval_batch_size 48 \
--gradient_accumulation_steps 4 \
--max_grad_norm 1.0 \
--weight_decay 0.1 \
--checkpointing_steps 5000 \
--tb_scalar_log_interval 2000 \
--tb_hist_log_interval 10000 \
--config_path model_configs/opt-12L12H.yaml \
--attn_softmax softmax1 \
--output_dir output/softmax1_opt \
--resume_from_checkpoint checkpoints_path
# clipped softmax
accelerate launch --config_file accelerate_configs/1gpu_fp16.yaml run_clm.py \
--pad_to_max_length \
--wd_LN_gamma \
--with_tracking \
--report_to wandb \
--extra_tb_stats \
--seed 1000 \
--run_name test_clip_opt_1.3b \
--dataset_setup bookcorpus_and_wiki \
--preprocessing_num_workers 4 \
--data_cache_dir .hf_data \
--model_cache_dir .hf_cache \
--model_type opt \
--tokenizer_name facebook/opt-350m \
--max_seq_length 2048 \
--block_size 512 \
--learning_rate 0.0004 \
--lr_scheduler_type linear \
--max_train_steps 125000 \
--num_warmup_steps 2000 \
--per_device_train_batch_size 24 \
--per_device_eval_batch_size 24 \
--gradient_accumulation_steps 8 \
--max_grad_norm 1.0 \
--weight_decay 0.1 \
--checkpointing_steps 10000 \
--tb_scalar_log_interval 2000 \
--tb_hist_log_interval 10000 \
--config_path model_configs/opt-12L12H.yaml \
--alpha 12 \
--output_dir output/clip_opt \
--resume_from_checkpoint checkpoints_path
# clipped softmax1
accelerate launch --config_file accelerate_configs/1gpu_fp16.yaml run_clm.py \
--pad_to_max_length \
--wd_LN_gamma \
--with_tracking \
--report_to wandb \
--extra_tb_stats \
--seed 1000 \
--run_name revised_clip_softmax1_opt_1.3b \
--dataset_setup bookcorpus_and_wiki \
--preprocessing_num_workers 4 \
--data_cache_dir .hf_data \
--model_cache_dir .hf_cache \
--model_type opt \
--tokenizer_name facebook/opt-350m \
--max_seq_length 2048 \
--block_size 512 \
--learning_rate 0.0004 \
--lr_scheduler_type linear \
--max_train_steps 125000 \
--num_warmup_steps 2000 \
--per_device_train_batch_size 24 \
--per_device_eval_batch_size 24 \
--gradient_accumulation_steps 8 \
--max_grad_norm 1.0 \
--weight_decay 0.1 \
--checkpointing_steps 10000 \
--tb_scalar_log_interval 2000 \
--tb_hist_log_interval 10000 \
--config_path model_configs/opt-12L12H.yaml \
--alpha 12 \
--attn_softmax softmax1 \
--output_dir output/clip_softmax1_opt \
--resume_from_checkpoint checkpoints_path
# gate_attention
accelerate launch --config_file accelerate_configs/1gpu_fp16.yaml run_clm.py \
--pad_to_max_length \
--wd_LN_gamma \
--with_tracking \
--report_to wandb \
--extra_tb_stats \
--seed 1000 \
--dataset_setup bookcorpus_and_wiki \
--run_name test_gate_opt_1.3b \
--preprocessing_num_workers 4 \
--data_cache_dir .hf_data \
--model_cache_dir .hf_cache \
--model_type opt \
--tokenizer_name facebook/opt-350m \
--max_seq_length 2048 \
--block_size 512 \
--learning_rate 0.0004 \
--lr_scheduler_type linear \
--max_train_steps 125000 \
--num_warmup_steps 2000 \
--per_device_train_batch_size 48 \
--per_device_eval_batch_size 48 \
--gradient_accumulation_steps 4 \
--max_grad_norm 1.0 \
--weight_decay 0.1 \
--checkpointing_steps 10000 \
--tb_scalar_log_interval 2000 \
--tb_hist_log_interval 10000 \
--config_path model_configs/opt-12L12H.yaml \
--attn_gate_type conditional_per_token \
--attn_gate_init 0.25 \
--output_dir output/gate_opt \
# gate_attention softmax1
accelerate launch --config_file accelerate_configs/1gpu_fp16.yaml run_clm.py \
--pad_to_max_length \
--wd_LN_gamma \
--with_tracking \
--report_to wandb \
--extra_tb_stats \
--seed 1000 \
--dataset_setup bookcorpus_and_wiki \
--run_name test_gate_softmax1_opt_1.3b \
--preprocessing_num_workers 4 \
--data_cache_dir .hf_data \
--model_cache_dir .hf_cache \
--model_type opt \
--tokenizer_name facebook/opt-350m \
--max_seq_length 2048 \
--block_size 512 \
--learning_rate 0.0004 \
--lr_scheduler_type linear \
--max_train_steps 125000 \
--num_warmup_steps 2000 \
--per_device_train_batch_size 48 \
--per_device_eval_batch_size 48 \
--gradient_accumulation_steps 4 \
--max_grad_norm 1.0 \
--weight_decay 0.1 \
--checkpointing_steps 5000 \
--tb_scalar_log_interval 2000 \
--tb_hist_log_interval 10000 \
--config_path model_configs/opt-12L12H.yaml \
--attn_gate_type conditional_per_token \
--attn_gate_init 0.25 \
--attn_softmax softmax1 \
--output_dir output/gate_softmax1_opt \
--resume_from_checkpoint checkpoint_path \