-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathdailydialog.yaml
46 lines (39 loc) · 3.41 KB
/
dailydialog.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
# ------------------------ PyTorch Lightning Configurations --------------------------------------
seed: 111 # Training seed set everywhere
verbose: False # Verbosity level
experiment_name: 111
# ----------------------------- Early Stopping ----------------------------------------------------
monitor: macro-f1 # Metric to monitor during training
min_delta: 0.0 # Sensitivity to the metric.
patience: 5 # Number of epochs without improvement before stopping training
metric_mode: max # 'min' or 'max' depending if we wish to maximize or minimize the metric
# ----------------------------- Model Checkpoint --------------------------------------------------
save_top_k: 1 # How many checkpoints we want to save.
save_weights_only: True # Saves the model weights only
# ----------------------------- Lightning Trainer --------------------------------------------------
gradient_clip_val: 1.0 # Clips gradients when the norm value exceeds 1.0
gpus: 1 # Number of GPUs to use. (1 is recommended)
deterministic: True # if true enables cudnn.deterministic. Might make your system slower, but ensures reproducibility.
overfit_batches: 0.0 # DEGUB: Uses this much data of the training set. If nonzero, will use the same training set for validation and testing.
accumulate_grad_batches: -1 # Gradient accumulation steps
min_epochs: 1 # Min number of epochs
max_epochs: 10 # Max number of epochs
# limit_train_batches: 0.4 # To train with a lower percentage of the training data you can use this flag
# limit_val_batches: 500 # Same as the previous flag but for validation.
# val_check_interval: 0.25 # How often within one training epoch to check the validation set. Can specify as float or int.
# precision: 16 # Train with 16 bit precision
# profiler: True # To profile individual steps during training and assist in identifying bottlenecks.
# resume_from_checkpoint: checkpoint.ckpt # To resume training from a specific checkpoint pass in the path here.
# --------------------------------- Dataset -------------------------------------------------------
pretrained_model: roberta-base # to run MiniBERT set this flag to: google/bert_uncased_L-2_H-128_A-2
dataset_path: data/dailydialog/ # Options: data/ekman/ or data/goemotions/
dataset: dailydialog # Options: ekman, goemotions, ekman
labels: dailydialog # Options: ekman, goemotions, polarity
batch_size: 4 # Batch size used during training.
# -------------------------------- Transformer Fine-tuning -----------------------------------------------
nr_frozen_epochs: 1 # Number of epochs where the encoder model is frozen (can also be a float between 0 and 1).
encoder_learning_rate: 1.0e-5 # Learning rate to be used for the encoder parameters.
learning_rate: 5.0e-5 # Learning rate to be used on the classification head.
layerwise_decay: 0.95 # Learning rate dacay for the encoder layers.
context: True
context_turns: 3