-
Notifications
You must be signed in to change notification settings - Fork 4
/
Copy pathtrain_zero_shot.sh
52 lines (48 loc) · 1.14 KB
/
train_zero_shot.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
#!/bin/bash
#SBATCH --job-name=loca_zero_shot
#SBATCH --output=loca_zero_shot_out.txt
#SBATCH --error=loca_zero_shot_err.txt
#SBATCH --nodes=1
#SBATCH --ntasks-per-node=2
#SBATCH --cpus-per-task=12
#SBATCH --partition=gpu
#SBATCH --gres=gpu:2
#SBATCH --time=1-00:00:00
master_addr=$(scontrol show hostnames "$SLURM_JOB_NODELIST" | head -n 1)
export MASTER_ADDR=$master_addr
export MASTER_PORT=50188
export NCCL_P2P_DISABLE=1
export NCCL_IB_DISABLE=1
export NCCL_BLOCKING_WAIT=1
# export NCCL_DEBUG=INFO
module load Anaconda3
source activate pytorch
conda activate base
conda activate pytorch
srun python train.py \
--model_name loca_zero_shot \
--data_path /d/hpc/projects/FRI/nd1776/data/fsc147 \
--model_path /d/hpc/projects/FRI/nd1776/pretrained \
--backbone resnet50 \
--swav_backbone \
--reduction 8 \
--image_size 512 \
--num_enc_layers 3 \
--num_ope_iterative_steps 3 \
--emb_dim 256 \
--num_heads 8 \
--kernel_dim 3 \
--num_objects 3 \
--epochs 200 \
--lr 1e-4 \
--backbone_lr 0 \
--lr_drop 300 \
--weight_decay 1e-4 \
--batch_size 4 \
--dropout 0.1 \
--num_workers 8 \
--max_grad_norm 0.1 \
--aux_weight 0.3 \
--tiling_p 0.5 \
--pre_norm \
--zero_shot