From 5a825ad23b5e5ef319e6e4b31ccb74441dda44c4 Mon Sep 17 00:00:00 2001 From: Pawel Date: Tue, 13 Jul 2021 04:52:18 -0700 Subject: [PATCH] chore: fix scientific notation in example yamls (#2688) Example yamls use scientific notation like 1e-5 which pyyaml parses as str instead of float. This is because pyyaml, while widespread, is not a yaml-1.2 compliant parser yet. Improve lives of users a bit by using yaml 1.1-friendly configs. --- .../tests/fixtures/mnist_estimator/single-multi-slot.yaml | 2 +- e2e_tests/tests/fixtures/mnist_estimator/single.yaml | 2 +- examples/computer_vision/cifar10_pytorch/adaptive.yaml | 2 +- examples/computer_vision/cifar10_pytorch/const.yaml | 4 ++-- examples/computer_vision/cifar10_pytorch/distributed.yaml | 4 ++-- examples/computer_vision/cifar10_tf_keras/adaptive.yaml | 2 +- examples/computer_vision/cifar10_tf_keras/const.yaml | 4 ++-- examples/computer_vision/cifar10_tf_keras/distributed.yaml | 4 ++-- .../deformabledetr_coco_pytorch/const_fake.yaml | 6 +++--- .../deformabledetr_coco_pytorch/distributed.yaml | 6 +++--- .../deformabledetr_coco_pytorch/finetune.yaml | 4 ++-- .../deformabledetr_coco_pytorch/finetune_adaptive.yaml | 6 +++--- examples/computer_vision/detr_coco_pytorch/const_fake.yaml | 6 +++--- examples/computer_vision/detr_coco_pytorch/distributed.yaml | 6 +++--- examples/computer_vision/detr_coco_pytorch/finetune.yaml | 4 ++-- .../detr_coco_pytorch/finetune_adaptive.yaml | 6 +++--- examples/computer_vision/efficientdet_pytorch/adaptive.yaml | 2 +- examples/computer_vision/efficientdet_pytorch/const.yaml | 4 ++-- .../computer_vision/efficientdet_pytorch/const_fake.yaml | 4 ++-- .../computer_vision/efficientdet_pytorch/distributed.yaml | 4 ++-- examples/computer_vision/iris_tf_keras/adaptive.yaml | 2 +- examples/computer_vision/iris_tf_keras/const.yaml | 4 ++-- examples/computer_vision/iris_tf_keras/distributed.yaml | 4 ++-- examples/computer_vision/mnist_estimator/const.yaml | 2 +- examples/computer_vision/mnist_estimator/distributed.yaml | 2 +- examples/computer_vision/mnist_tf_layers/const.yaml | 2 +- examples/computer_vision/unets_tf_keras/const.yaml | 4 ++-- examples/computer_vision/unets_tf_keras/distributed.yaml | 4 ++-- examples/features/data_layer_mnist_estimator/const.yaml | 2 +- .../features/data_layer_mnist_estimator/distributed.yaml | 2 +- examples/features/data_layer_mnist_tf_keras/const.yaml | 4 ++-- .../darts_cifar10_pytorch/adaptive.yaml | 2 +- .../darts_cifar10_pytorch/constrained_adaptive.yaml | 2 +- .../darts_cifar10_pytorch/constrained_random.yaml | 2 +- .../darts_penntreebank_pytorch/adaptive.yaml | 4 ++-- .../darts_penntreebank_pytorch/const.yaml | 4 ++-- .../meta_learning/protonet_omniglot_pytorch/20way1shot.yaml | 2 +- .../meta_learning/protonet_omniglot_pytorch/20way5shot.yaml | 2 +- examples/nas/gaea_pytorch/eval/const.yaml | 2 +- examples/nas/gaea_pytorch/eval/distributed.yaml | 2 +- .../nas/gaea_pytorch/eval/distributed_no_data_download.yaml | 2 +- examples/nas/gaea_pytorch/search/const.yaml | 2 +- examples/nlp/albert_squad_pytorch/const.yaml | 4 ++-- examples/nlp/albert_squad_pytorch/distributed_64gpu.yaml | 2 +- examples/nlp/albert_squad_pytorch/distributed_8gpu.yaml | 4 ++-- examples/nlp/bert_glue_pytorch/const.yaml | 4 ++-- examples/nlp/bert_glue_pytorch/distributed.yaml | 4 ++-- examples/nlp/bert_squad_pytorch/const.yaml | 4 ++-- examples/nlp/bert_squad_pytorch/distributed.yaml | 4 ++-- .../examples/huggingface/language-modeling/clm_config.yaml | 4 ++-- .../examples/huggingface/language-modeling/mlm_config.yaml | 4 ++-- .../examples/huggingface/language-modeling/plm_config.yaml | 4 ++-- .../examples/huggingface/multiple-choice/swag_config.yaml | 4 ++-- .../examples/huggingface/question-answering/squad.yaml | 4 ++-- .../huggingface/question-answering/squad_beam_search.yaml | 4 ++-- .../examples/huggingface/question-answering/squad_v2.yaml | 4 ++-- .../question-answering/squad_v2_beam_search.yaml | 4 ++-- .../huggingface/text-classification/glue_config.yaml | 4 ++-- .../huggingface/text-classification/xnli_config.yaml | 4 ++-- .../huggingface/token-classification/ner_config.yaml | 4 ++-- 60 files changed, 105 insertions(+), 105 deletions(-) diff --git a/e2e_tests/tests/fixtures/mnist_estimator/single-multi-slot.yaml b/e2e_tests/tests/fixtures/mnist_estimator/single-multi-slot.yaml index f5ea1e1f933..1605a1ccdaa 100644 --- a/e2e_tests/tests/fixtures/mnist_estimator/single-multi-slot.yaml +++ b/e2e_tests/tests/fixtures/mnist_estimator/single-multi-slot.yaml @@ -1,6 +1,6 @@ description: mnist-estimator-single hyperparameters: - learning_rate: 1e-3 + learning_rate: 1.0e-3 global_batch_size: 64 hidden_layer_1: 2500 hidden_layer_2: 1000 diff --git a/e2e_tests/tests/fixtures/mnist_estimator/single.yaml b/e2e_tests/tests/fixtures/mnist_estimator/single.yaml index 2696e9fc1f8..710d0c4f64e 100644 --- a/e2e_tests/tests/fixtures/mnist_estimator/single.yaml +++ b/e2e_tests/tests/fixtures/mnist_estimator/single.yaml @@ -1,6 +1,6 @@ description: mnist-estimator-single hyperparameters: - learning_rate: 1e-3 + learning_rate: 1.0e-3 global_batch_size: 64 hidden_layer_1: 2500 hidden_layer_2: 1000 diff --git a/examples/computer_vision/cifar10_pytorch/adaptive.yaml b/examples/computer_vision/cifar10_pytorch/adaptive.yaml index 59b85883a15..f8deaf5f277 100644 --- a/examples/computer_vision/cifar10_pytorch/adaptive.yaml +++ b/examples/computer_vision/cifar10_pytorch/adaptive.yaml @@ -5,7 +5,7 @@ hyperparameters: minval: -5.0 maxval: 1.0 base: 10.0 - learning_rate_decay: 1e-6 + learning_rate_decay: 1.0e-6 layer1_dropout: type: double minval: 0.2 diff --git a/examples/computer_vision/cifar10_pytorch/const.yaml b/examples/computer_vision/cifar10_pytorch/const.yaml index e466b1392f9..243ff25bcd1 100644 --- a/examples/computer_vision/cifar10_pytorch/const.yaml +++ b/examples/computer_vision/cifar10_pytorch/const.yaml @@ -1,7 +1,7 @@ name: cifar10_pytorch_const hyperparameters: - learning_rate: 1e-4 - learning_rate_decay: 1e-6 + learning_rate: 1.0e-4 + learning_rate_decay: 1.0e-6 layer1_dropout: 0.25 layer2_dropout: 0.25 layer3_dropout: 0.5 diff --git a/examples/computer_vision/cifar10_pytorch/distributed.yaml b/examples/computer_vision/cifar10_pytorch/distributed.yaml index 76b87515f47..3a931d300b5 100644 --- a/examples/computer_vision/cifar10_pytorch/distributed.yaml +++ b/examples/computer_vision/cifar10_pytorch/distributed.yaml @@ -1,7 +1,7 @@ name: cifar10_pytorch_distributed hyperparameters: - learning_rate: 1e-4 - learning_rate_decay: 1e-6 + learning_rate: 1.0e-4 + learning_rate_decay: 1.0e-6 layer1_dropout: 0.25 layer2_dropout: 0.25 layer3_dropout: 0.5 diff --git a/examples/computer_vision/cifar10_tf_keras/adaptive.yaml b/examples/computer_vision/cifar10_tf_keras/adaptive.yaml index c95c13ef000..3c15ae7d082 100644 --- a/examples/computer_vision/cifar10_tf_keras/adaptive.yaml +++ b/examples/computer_vision/cifar10_tf_keras/adaptive.yaml @@ -7,7 +7,7 @@ hyperparameters: minval: -5.0 maxval: 1.0 base: 10.0 - learning_rate_decay: 1e-6 + learning_rate_decay: 1.0e-6 layer1_dropout: type: double minval: 0.2 diff --git a/examples/computer_vision/cifar10_tf_keras/const.yaml b/examples/computer_vision/cifar10_tf_keras/const.yaml index 3dcdcb3a8be..00c0559e746 100644 --- a/examples/computer_vision/cifar10_tf_keras/const.yaml +++ b/examples/computer_vision/cifar10_tf_keras/const.yaml @@ -2,8 +2,8 @@ name: cifar10_tf_keras_const data: url: https://s3-us-west-2.amazonaws.com/determined-ai-datasets/cifar10/cifar-10-python.tar.gz hyperparameters: - learning_rate: 1e-4 - learning_rate_decay: 1e-6 + learning_rate: 1.0e-4 + learning_rate_decay: 1.0e-6 layer1_dropout: 0.25 layer2_dropout: 0.25 layer3_dropout: 0.5 diff --git a/examples/computer_vision/cifar10_tf_keras/distributed.yaml b/examples/computer_vision/cifar10_tf_keras/distributed.yaml index 0d721c586d4..a55c2d8e256 100644 --- a/examples/computer_vision/cifar10_tf_keras/distributed.yaml +++ b/examples/computer_vision/cifar10_tf_keras/distributed.yaml @@ -2,8 +2,8 @@ name: cifar10_tf_keras_distributed data: url: https://s3-us-west-2.amazonaws.com/determined-ai-datasets/cifar10/cifar-10-python.tar.gz hyperparameters: - learning_rate: 1e-4 - learning_rate_decay: 1e-6 + learning_rate: 1.0e-4 + learning_rate_decay: 1.0e-6 layer1_dropout: 0.25 layer2_dropout: 0.25 layer3_dropout: 0.5 diff --git a/examples/computer_vision/deformabledetr_coco_pytorch/const_fake.yaml b/examples/computer_vision/deformabledetr_coco_pytorch/const_fake.yaml index e411a5ab2ce..55d3135c77a 100644 --- a/examples/computer_vision/deformabledetr_coco_pytorch/const_fake.yaml +++ b/examples/computer_vision/deformabledetr_coco_pytorch/const_fake.yaml @@ -1,15 +1,15 @@ name: deformabledetr_coco_fake_data hyperparameters: - lr: 2e-4 + lr: 2.0e-4 lr_backbone_names: - backbone.0 - lr_backbone: 2e-5 + lr_backbone: 2.0e-5 lr_linear_proj_names: - reference_points - sampling_offsets lr_linear_proj_mult: 0.1 global_batch_size: 1 - weight_decay: 1e-4 + weight_decay: 1.0e-4 lr_drop: 40 clip_max_norm: 0.1 diff --git a/examples/computer_vision/deformabledetr_coco_pytorch/distributed.yaml b/examples/computer_vision/deformabledetr_coco_pytorch/distributed.yaml index 70a91e1a680..e176962223a 100644 --- a/examples/computer_vision/deformabledetr_coco_pytorch/distributed.yaml +++ b/examples/computer_vision/deformabledetr_coco_pytorch/distributed.yaml @@ -2,16 +2,16 @@ name: deformabledetr_coco_distributed hyperparameters: # These settings match those used in this experiment from the original repo: # https://github.com/fundamentalvision/Deformable-DETR/blob/main/configs/r50_deformable_detr.sh - lr: 2e-4 + lr: 2.0e-4 lr_backbone_names: - backbone.0 - lr_backbone: 2e-5 + lr_backbone: 2.0e-5 lr_linear_proj_names: - reference_points - sampling_offsets lr_linear_proj_mult: 0.1 global_batch_size: 32 - weight_decay: 1e-4 + weight_decay: 1.0e-4 lr_drop: 40 clip_max_norm: 0.1 diff --git a/examples/computer_vision/deformabledetr_coco_pytorch/finetune.yaml b/examples/computer_vision/deformabledetr_coco_pytorch/finetune.yaml index 29aab51e1c9..59800508dfe 100644 --- a/examples/computer_vision/deformabledetr_coco_pytorch/finetune.yaml +++ b/examples/computer_vision/deformabledetr_coco_pytorch/finetune.yaml @@ -1,6 +1,6 @@ name: deformabledetr_coco_finetune hyperparameters: - lr: 1e-4 + lr: 1.0e-4 lr_backbone_names: - backbone.0 lr_backbone: 0 @@ -9,7 +9,7 @@ hyperparameters: - sampling_offsets lr_linear_proj_mult: 0.1 global_batch_size: 2 - weight_decay: 1e-4 + weight_decay: 1.0e-4 lr_drop: 4 clip_max_norm: 0.1 diff --git a/examples/computer_vision/deformabledetr_coco_pytorch/finetune_adaptive.yaml b/examples/computer_vision/deformabledetr_coco_pytorch/finetune_adaptive.yaml index 3ad7b8176e7..c05bca481c0 100644 --- a/examples/computer_vision/deformabledetr_coco_pytorch/finetune_adaptive.yaml +++ b/examples/computer_vision/deformabledetr_coco_pytorch/finetune_adaptive.yaml @@ -2,8 +2,8 @@ name: deformabledetr_coco_adaptive hyperparameters: lr: type: double - minval: 1e-5 - maxval: 1e-4 + minval: 1.0e-5 + maxval: 1.0e-4 lr_backbone_names: - backbone.0 lr_backbone: 0 @@ -12,7 +12,7 @@ hyperparameters: - sampling_offsets lr_linear_proj_mult: 0.1 global_batch_size: 4 - weight_decay: 1e-4 + weight_decay: 1.0e-4 lr_drop: 4 clip_max_norm: type: double diff --git a/examples/computer_vision/detr_coco_pytorch/const_fake.yaml b/examples/computer_vision/detr_coco_pytorch/const_fake.yaml index c516a063f30..1ad15cbfab7 100644 --- a/examples/computer_vision/detr_coco_pytorch/const_fake.yaml +++ b/examples/computer_vision/detr_coco_pytorch/const_fake.yaml @@ -1,9 +1,9 @@ name: detr_coco_fake_data hyperparameters: - lr: 1e-4 - lr_backbone: 1e-5 + lr: 1.0e-4 + lr_backbone: 1.0e-5 global_batch_size: 2 - weight_decay: 1e-4 + weight_decay: 1.0e-4 lr_drop: 100 clip_max_norm: 0.1 diff --git a/examples/computer_vision/detr_coco_pytorch/distributed.yaml b/examples/computer_vision/detr_coco_pytorch/distributed.yaml index 3a6dc69b9e7..dbb1b486ee0 100644 --- a/examples/computer_vision/detr_coco_pytorch/distributed.yaml +++ b/examples/computer_vision/detr_coco_pytorch/distributed.yaml @@ -2,10 +2,10 @@ name: detr_coco_distributed hyperparameters: # These settings match that for the 150 epoch run provided in the original repo: # https://github.com/facebookresearch/detr - lr: 1e-4 - lr_backbone: 1e-5 + lr: 1.0e-4 + lr_backbone: 1.0e-5 global_batch_size: 16 - weight_decay: 1e-4 + weight_decay: 1.0e-4 lr_drop: 100 clip_max_norm: 0.1 diff --git a/examples/computer_vision/detr_coco_pytorch/finetune.yaml b/examples/computer_vision/detr_coco_pytorch/finetune.yaml index f10d3d67d2f..76cde3cfa05 100644 --- a/examples/computer_vision/detr_coco_pytorch/finetune.yaml +++ b/examples/computer_vision/detr_coco_pytorch/finetune.yaml @@ -1,9 +1,9 @@ name: detr_coco_finetune hyperparameters: - lr: 1e-4 + lr: 1.0e-4 lr_backbone: 0 global_batch_size: 4 - weight_decay: 1e-4 + weight_decay: 1.0e-4 lr_drop: 4 clip_max_norm: 0.1 diff --git a/examples/computer_vision/detr_coco_pytorch/finetune_adaptive.yaml b/examples/computer_vision/detr_coco_pytorch/finetune_adaptive.yaml index 5c33d41d7cd..ffb86164348 100644 --- a/examples/computer_vision/detr_coco_pytorch/finetune_adaptive.yaml +++ b/examples/computer_vision/detr_coco_pytorch/finetune_adaptive.yaml @@ -3,11 +3,11 @@ hyperparameters: # We will tune learning rate and gradient clipping. lr: type: double - minval: 1e-5 - maxval: 1e-4 + minval: 1.0e-5 + maxval: 1.0e-4 lr_backbone: 0 global_batch_size: 4 - weight_decay: 1e-4 + weight_decay: 1.0e-4 lr_drop: 4 clip_max_norm: type: double diff --git a/examples/computer_vision/efficientdet_pytorch/adaptive.yaml b/examples/computer_vision/efficientdet_pytorch/adaptive.yaml index ccdaf7802a1..c1dd1b4981a 100644 --- a/examples/computer_vision/efficientdet_pytorch/adaptive.yaml +++ b/examples/computer_vision/efficientdet_pytorch/adaptive.yaml @@ -55,7 +55,7 @@ hyperparameters: lr_cycle_mul: 1.0 lr_cycle_limit: 1 warmup_lr: 0.0001 - min_lr: 1e-5 + min_lr: 1.0e-5 start_epoch: None decay_epochs: 30 warmup_epochs: 5 diff --git a/examples/computer_vision/efficientdet_pytorch/const.yaml b/examples/computer_vision/efficientdet_pytorch/const.yaml index 3e34d0f06ce..9af003e5ba0 100644 --- a/examples/computer_vision/efficientdet_pytorch/const.yaml +++ b/examples/computer_vision/efficientdet_pytorch/const.yaml @@ -26,7 +26,7 @@ hyperparameters: opt: fusedmomentum opt_eps: 0.001 momentum: 0.9 - weight_decay: 4e-05 + weight_decay: 4.0e-05 sched: cosine lr: .03 lr_noise: 0.4 0.9 @@ -35,7 +35,7 @@ hyperparameters: lr_cycle_mul: 1.0 lr_cycle_limit: 1 warmup_lr: 0.0001 - min_lr: 1e-5 + min_lr: 1.0e-5 start_epoch: None decay_epochs: 30 warmup_epochs: 5 diff --git a/examples/computer_vision/efficientdet_pytorch/const_fake.yaml b/examples/computer_vision/efficientdet_pytorch/const_fake.yaml index fa24225a1ee..4f67bc6f6c7 100644 --- a/examples/computer_vision/efficientdet_pytorch/const_fake.yaml +++ b/examples/computer_vision/efficientdet_pytorch/const_fake.yaml @@ -26,7 +26,7 @@ hyperparameters: opt: fusedmomentum opt_eps: 0.001 momentum: 0.9 - weight_decay: 4e-05 + weight_decay: 4.0e-05 sched: cosine lr: .03 lr_noise: 0.4 0.9 @@ -35,7 +35,7 @@ hyperparameters: lr_cycle_mul: 1.0 lr_cycle_limit: 1 warmup_lr: 0.0001 - min_lr: 1e-5 + min_lr: 1.0e-5 start_epoch: None decay_epochs: 30 warmup_epochs: 5 diff --git a/examples/computer_vision/efficientdet_pytorch/distributed.yaml b/examples/computer_vision/efficientdet_pytorch/distributed.yaml index e060f1ef494..840fa1f3694 100644 --- a/examples/computer_vision/efficientdet_pytorch/distributed.yaml +++ b/examples/computer_vision/efficientdet_pytorch/distributed.yaml @@ -28,7 +28,7 @@ hyperparameters: opt: fusedmomentum opt_eps: 0.001 momentum: 0.9 - weight_decay: 4e-05 + weight_decay: 4.0e-05 sched: cosine lr: 0.06 lr_noise: 0.4 0.9 @@ -37,7 +37,7 @@ hyperparameters: lr_cycle_mul: 1.0 lr_cycle_limit: 1 warmup_lr: 0.0001 - min_lr: 1e-5 + min_lr: 1.0e-5 start_epoch: None decay_epochs: 30 warmup_epochs: 5 diff --git a/examples/computer_vision/iris_tf_keras/adaptive.yaml b/examples/computer_vision/iris_tf_keras/adaptive.yaml index 1e21e99c52a..9e738fbc4b8 100644 --- a/examples/computer_vision/iris_tf_keras/adaptive.yaml +++ b/examples/computer_vision/iris_tf_keras/adaptive.yaml @@ -8,7 +8,7 @@ hyperparameters: minval: -5.0 maxval: 1.0 base: 10.0 - learning_rate_decay: 1e-6 + learning_rate_decay: 1.0e-6 layer1_dense_size: type: int minval: 4 diff --git a/examples/computer_vision/iris_tf_keras/const.yaml b/examples/computer_vision/iris_tf_keras/const.yaml index 67c78cd79bc..010228242b4 100644 --- a/examples/computer_vision/iris_tf_keras/const.yaml +++ b/examples/computer_vision/iris_tf_keras/const.yaml @@ -3,8 +3,8 @@ data: train_url: http://download.tensorflow.org/data/iris_training.csv test_url: http://download.tensorflow.org/data/iris_test.csv hyperparameters: - learning_rate: 1e-4 - learning_rate_decay: 1e-6 + learning_rate: 1.0e-4 + learning_rate_decay: 1.0e-6 layer1_dense_size: 16 global_batch_size: 30 searcher: diff --git a/examples/computer_vision/iris_tf_keras/distributed.yaml b/examples/computer_vision/iris_tf_keras/distributed.yaml index 2c848bb173e..c1d0fa509d0 100644 --- a/examples/computer_vision/iris_tf_keras/distributed.yaml +++ b/examples/computer_vision/iris_tf_keras/distributed.yaml @@ -3,8 +3,8 @@ data: train_url: http://download.tensorflow.org/data/iris_training.csv test_url: http://download.tensorflow.org/data/iris_test.csv hyperparameters: - learning_rate: 1e-4 - learning_rate_decay: 1e-6 + learning_rate: 1.0e-4 + learning_rate_decay: 1.0e-6 layer1_dense_size: 16 global_batch_size: 30 resources: diff --git a/examples/computer_vision/mnist_estimator/const.yaml b/examples/computer_vision/mnist_estimator/const.yaml index c9f40b452df..015cd0ef500 100644 --- a/examples/computer_vision/mnist_estimator/const.yaml +++ b/examples/computer_vision/mnist_estimator/const.yaml @@ -1,6 +1,6 @@ name: mnist_estimator_const hyperparameters: - learning_rate: 1e-3 + learning_rate: 1.0e-3 global_batch_size: 64 hidden_layer_1: 2500 hidden_layer_2: 1000 diff --git a/examples/computer_vision/mnist_estimator/distributed.yaml b/examples/computer_vision/mnist_estimator/distributed.yaml index 4e189438164..93f9997a061 100644 --- a/examples/computer_vision/mnist_estimator/distributed.yaml +++ b/examples/computer_vision/mnist_estimator/distributed.yaml @@ -1,6 +1,6 @@ name: mnist_estimator_distributed hyperparameters: - learning_rate: 1e-3 + learning_rate: 1.0e-3 global_batch_size: 1024 # per GPU batch size of 64 hidden_layer_1: 2500 hidden_layer_2: 1000 diff --git a/examples/computer_vision/mnist_tf_layers/const.yaml b/examples/computer_vision/mnist_tf_layers/const.yaml index f01df6d3fc3..dd053002c5b 100644 --- a/examples/computer_vision/mnist_tf_layers/const.yaml +++ b/examples/computer_vision/mnist_tf_layers/const.yaml @@ -1,6 +1,6 @@ name: mnist_tf_core_to_estimator hyperparameters: - learning_rate: 1e-3 + learning_rate: 1.0e-3 global_batch_size: 64 n_filters_1: 10 n_filters_2: 40 diff --git a/examples/computer_vision/unets_tf_keras/const.yaml b/examples/computer_vision/unets_tf_keras/const.yaml index 3a251bfb412..6623d24c6ff 100644 --- a/examples/computer_vision/unets_tf_keras/const.yaml +++ b/examples/computer_vision/unets_tf_keras/const.yaml @@ -4,8 +4,8 @@ data: data_file: mobilenet_v2_weights_tf_dim_ordering_tf_kernels_1.0_128_no_top.h5 hyperparameters: - learning_rate: 1e-4 - learning_rate_decay: 1e-6 + learning_rate: 1.0e-4 + learning_rate_decay: 1.0e-6 layer1_dense_size: 16 global_batch_size: 64 OUTPUT_CHANNELS: 3 diff --git a/examples/computer_vision/unets_tf_keras/distributed.yaml b/examples/computer_vision/unets_tf_keras/distributed.yaml index ecdd0f771cf..18fe7672924 100644 --- a/examples/computer_vision/unets_tf_keras/distributed.yaml +++ b/examples/computer_vision/unets_tf_keras/distributed.yaml @@ -4,8 +4,8 @@ data: data_file: mobilenet_v2_weights_tf_dim_ordering_tf_kernels_1.0_128_no_top.h5 hyperparameters: - learning_rate: 1e-4 - learning_rate_decay: 1e-6 + learning_rate: 1.0e-4 + learning_rate_decay: 1.0e-6 layer1_dense_size: 16 global_batch_size: 512 # per slot batch size = 64 OUTPUT_CHANNELS: 3 diff --git a/examples/features/data_layer_mnist_estimator/const.yaml b/examples/features/data_layer_mnist_estimator/const.yaml index 6351e2232fb..e4cc27a6c34 100644 --- a/examples/features/data_layer_mnist_estimator/const.yaml +++ b/examples/features/data_layer_mnist_estimator/const.yaml @@ -2,7 +2,7 @@ name: data_layer_mnist_estimator_const data: skip_checkpointing_input: true hyperparameters: - learning_rate: 1e-3 + learning_rate: 1.0e-3 global_batch_size: 16 hidden_layer_1: 2500 hidden_layer_2: 1000 diff --git a/examples/features/data_layer_mnist_estimator/distributed.yaml b/examples/features/data_layer_mnist_estimator/distributed.yaml index 2271f5c0495..5445dadf6f0 100644 --- a/examples/features/data_layer_mnist_estimator/distributed.yaml +++ b/examples/features/data_layer_mnist_estimator/distributed.yaml @@ -2,7 +2,7 @@ name: data_layer_mnist_estimator_const data: skip_checkpointing_input: true hyperparameters: - learning_rate: 1e-3 + learning_rate: 1.0e-3 global_batch_size: 128 hidden_layer_1: 2500 hidden_layer_2: 1000 diff --git a/examples/features/data_layer_mnist_tf_keras/const.yaml b/examples/features/data_layer_mnist_tf_keras/const.yaml index 4ec782c95f2..b6553fd8d29 100644 --- a/examples/features/data_layer_mnist_tf_keras/const.yaml +++ b/examples/features/data_layer_mnist_tf_keras/const.yaml @@ -2,8 +2,8 @@ name: data_layer_mnist_tf_keras_const data: url: https://s3-us-west-2.amazonaws.com/determined-ai-datasets/cifar10/cifar-10-python.tar.gz hyperparameters: - learning_rate: 1e-4 - learning_rate_decay: 1e-6 + learning_rate: 1.0e-4 + learning_rate_decay: 1.0e-6 layer1_dropout: 0.25 layer2_dropout: 0.25 layer3_dropout: 0.5 diff --git a/examples/hp_search_benchmarks/darts_cifar10_pytorch/adaptive.yaml b/examples/hp_search_benchmarks/darts_cifar10_pytorch/adaptive.yaml index df249d4bc95..3755d5b613d 100644 --- a/examples/hp_search_benchmarks/darts_cifar10_pytorch/adaptive.yaml +++ b/examples/hp_search_benchmarks/darts_cifar10_pytorch/adaptive.yaml @@ -9,7 +9,7 @@ min_validation_period: hyperparameters: learning_rate: 0.025 momentum: 0.9 - weight_decay: 3e-4 + weight_decay: 3.0e-4 train_epochs: 300 global_batch_size: 96 init_channels: 36 diff --git a/examples/hp_search_benchmarks/darts_cifar10_pytorch/constrained_adaptive.yaml b/examples/hp_search_benchmarks/darts_cifar10_pytorch/constrained_adaptive.yaml index 1f3fcf7597b..3bffb9c05fd 100644 --- a/examples/hp_search_benchmarks/darts_cifar10_pytorch/constrained_adaptive.yaml +++ b/examples/hp_search_benchmarks/darts_cifar10_pytorch/constrained_adaptive.yaml @@ -10,7 +10,7 @@ hyperparameters: use_constraints: true learning_rate: 0.025 momentum: 0.9 - weight_decay: 3e-4 + weight_decay: 3.0e-4 train_epochs: 300 global_batch_size: 96 init_channels: 36 diff --git a/examples/hp_search_benchmarks/darts_cifar10_pytorch/constrained_random.yaml b/examples/hp_search_benchmarks/darts_cifar10_pytorch/constrained_random.yaml index bce9b5f70db..da0354b1ff3 100644 --- a/examples/hp_search_benchmarks/darts_cifar10_pytorch/constrained_random.yaml +++ b/examples/hp_search_benchmarks/darts_cifar10_pytorch/constrained_random.yaml @@ -10,7 +10,7 @@ hyperparameters: use_constraints: true learning_rate: 0.025 momentum: 0.9 - weight_decay: 3e-4 + weight_decay: 3.0e-4 train_epochs: 300 global_batch_size: 96 init_channels: 36 diff --git a/examples/hp_search_benchmarks/darts_penntreebank_pytorch/adaptive.yaml b/examples/hp_search_benchmarks/darts_penntreebank_pytorch/adaptive.yaml index ee2d14ab6a6..93624d5f91b 100644 --- a/examples/hp_search_benchmarks/darts_penntreebank_pytorch/adaptive.yaml +++ b/examples/hp_search_benchmarks/darts_penntreebank_pytorch/adaptive.yaml @@ -21,8 +21,8 @@ hyperparameters: dropoute: 0.1 nonmono: 5 alpha: 0 - beta: 1e-3 - weight_decay: 8e-7 + beta: 1.0e-3 + weight_decay: 8.0e-7 max_seq_length_delta: 20 clip_gradients_l2_norm: 0.25 diff --git a/examples/hp_search_benchmarks/darts_penntreebank_pytorch/const.yaml b/examples/hp_search_benchmarks/darts_penntreebank_pytorch/const.yaml index 0620cc7324b..2f4201d905d 100644 --- a/examples/hp_search_benchmarks/darts_penntreebank_pytorch/const.yaml +++ b/examples/hp_search_benchmarks/darts_penntreebank_pytorch/const.yaml @@ -26,8 +26,8 @@ hyperparameters: dropoute: 0.1 nonmono: 5 alpha: 0 - beta: 1e-3 - weight_decay: 8e-7 + beta: 1.0e-3 + weight_decay: 8.0e-7 max_seq_length_delta: 20 clip_gradients_l2_norm: 0.25 diff --git a/examples/meta_learning/protonet_omniglot_pytorch/20way1shot.yaml b/examples/meta_learning/protonet_omniglot_pytorch/20way1shot.yaml index 0e6cffbcd63..3667a47fee4 100644 --- a/examples/meta_learning/protonet_omniglot_pytorch/20way1shot.yaml +++ b/examples/meta_learning/protonet_omniglot_pytorch/20way1shot.yaml @@ -8,7 +8,7 @@ data: val_workers: 4 hyperparameters: - learning_rate: 1e-3 + learning_rate: 1.0e-3 weight_decay: 0 reduce_every: 200 lr_gamma: 0.5 diff --git a/examples/meta_learning/protonet_omniglot_pytorch/20way5shot.yaml b/examples/meta_learning/protonet_omniglot_pytorch/20way5shot.yaml index e4aecf658fa..e0f0beb290c 100644 --- a/examples/meta_learning/protonet_omniglot_pytorch/20way5shot.yaml +++ b/examples/meta_learning/protonet_omniglot_pytorch/20way5shot.yaml @@ -8,7 +8,7 @@ data: val_workers: 4 hyperparameters: - learning_rate: 1e-3 + learning_rate: 1.0e-3 weight_decay: 0 reduce_every: 200 lr_gamma: 0.5 diff --git a/examples/nas/gaea_pytorch/eval/const.yaml b/examples/nas/gaea_pytorch/eval/const.yaml index e575281a51c..96f32c0b1b6 100644 --- a/examples/nas/gaea_pytorch/eval/const.yaml +++ b/examples/nas/gaea_pytorch/eval/const.yaml @@ -46,7 +46,7 @@ hyperparameters: clip_gradients_l2_norm: 5 learning_rate: 0.5 momentum: 0.9 - weight_decay: 3e-5 + weight_decay: 3.0e-5 # Choices include linear, efficientnet, and cosine lr_scheduler: linear lr_epochs: 300 diff --git a/examples/nas/gaea_pytorch/eval/distributed.yaml b/examples/nas/gaea_pytorch/eval/distributed.yaml index 75a0e2d8a88..78f77db52ee 100644 --- a/examples/nas/gaea_pytorch/eval/distributed.yaml +++ b/examples/nas/gaea_pytorch/eval/distributed.yaml @@ -43,7 +43,7 @@ hyperparameters: num_classes: 1000 learning_rate: 0.5 momentum: 0.9 - weight_decay: 3e-5 + weight_decay: 3.0e-5 drop_path_prob: 0.0 drop_prob: 0.0 label_smoothing_rate: 0.1 diff --git a/examples/nas/gaea_pytorch/eval/distributed_no_data_download.yaml b/examples/nas/gaea_pytorch/eval/distributed_no_data_download.yaml index 81be829ac3a..93d6c412155 100644 --- a/examples/nas/gaea_pytorch/eval/distributed_no_data_download.yaml +++ b/examples/nas/gaea_pytorch/eval/distributed_no_data_download.yaml @@ -46,7 +46,7 @@ hyperparameters: clip_gradients_l2_norm: 5 learning_rate: 0.5 momentum: 0.9 - weight_decay: 3e-5 + weight_decay: 3.0e-5 # Choices include linear, efficientnet, and cosine lr_scheduler: linear lr_epochs: 300 diff --git a/examples/nas/gaea_pytorch/search/const.yaml b/examples/nas/gaea_pytorch/search/const.yaml index cffb5994aa5..9a1db23b151 100644 --- a/examples/nas/gaea_pytorch/search/const.yaml +++ b/examples/nas/gaea_pytorch/search/const.yaml @@ -18,7 +18,7 @@ hyperparameters: momentum: 0.9 min_learning_rate: 0 scheduler_epochs: 50 - weight_decay: 3e-4 + weight_decay: 3.0e-4 arch_learning_rate: 0.1 init_channels: 16 layers: 8 diff --git a/examples/nlp/albert_squad_pytorch/const.yaml b/examples/nlp/albert_squad_pytorch/const.yaml index 3eb114a1780..77fb821cb4e 100644 --- a/examples/nlp/albert_squad_pytorch/const.yaml +++ b/examples/nlp/albert_squad_pytorch/const.yaml @@ -2,9 +2,9 @@ name: ALBert_SQuAD_PyTorch_1gpu hyperparameters: global_batch_size: 2 - learning_rate: 5e-5 + learning_rate: 5.0e-5 model_type: 'albert' - adam_epsilon: 1e-8 + adam_epsilon: 1.0e-8 weight_decay: 0 num_warmup_steps: 13220 # 10% of total training max_seq_length: 384 diff --git a/examples/nlp/albert_squad_pytorch/distributed_64gpu.yaml b/examples/nlp/albert_squad_pytorch/distributed_64gpu.yaml index 086d72ffa9e..ad9cc15bef6 100644 --- a/examples/nlp/albert_squad_pytorch/distributed_64gpu.yaml +++ b/examples/nlp/albert_squad_pytorch/distributed_64gpu.yaml @@ -5,7 +5,7 @@ hyperparameters: learning_rate: 0.0002 model_type: 'albert' do_lower_case: true - adam_epsilon: 1e-8 + adam_epsilon: 1.0e-8 weight_decay: 0 num_warmup_steps: 206 max_seq_length: 384 diff --git a/examples/nlp/albert_squad_pytorch/distributed_8gpu.yaml b/examples/nlp/albert_squad_pytorch/distributed_8gpu.yaml index 7ca5282c0ab..1c0ab541969 100644 --- a/examples/nlp/albert_squad_pytorch/distributed_8gpu.yaml +++ b/examples/nlp/albert_squad_pytorch/distributed_8gpu.yaml @@ -2,10 +2,10 @@ name: ALBert_SQuAD_PyTorch_8gpu hyperparameters: global_batch_size: 16 - learning_rate: 5e-5 + learning_rate: 5.0e-5 model_type: 'albert' do_lower_case: true - adam_epsilon: 1e-8 + adam_epsilon: 1.0e-8 weight_decay: 0 num_warmup_steps: 1620 max_seq_length: 384 diff --git a/examples/nlp/bert_glue_pytorch/const.yaml b/examples/nlp/bert_glue_pytorch/const.yaml index a4be3aa3b85..9f8610a7920 100644 --- a/examples/nlp/bert_glue_pytorch/const.yaml +++ b/examples/nlp/bert_glue_pytorch/const.yaml @@ -1,10 +1,10 @@ name: bert_glue_pytorch_const hyperparameters: global_batch_size: 24 - learning_rate: 2e-5 + learning_rate: 2.0e-5 lr_scheduler_epoch_freq: 1 model_type: 'bert' - adam_epsilon: 1e-8 + adam_epsilon: 1.0e-8 weight_decay: 0 num_warmup_steps: 0 num_training_steps: 459 diff --git a/examples/nlp/bert_glue_pytorch/distributed.yaml b/examples/nlp/bert_glue_pytorch/distributed.yaml index 0451839021e..9ffecd3ca25 100644 --- a/examples/nlp/bert_glue_pytorch/distributed.yaml +++ b/examples/nlp/bert_glue_pytorch/distributed.yaml @@ -1,10 +1,10 @@ name: bert_glue_pytorch_distributed hyperparameters: global_batch_size: 192 # per gpu batch size of 24 - learning_rate: 2e-5 + learning_rate: 2.0e-5 lr_scheduler_epoch_freq: 1 model_type: 'bert' - adam_epsilon: 1e-8 + adam_epsilon: 1.0e-8 weight_decay: 0 num_warmup_steps: 0 num_training_steps: 459 diff --git a/examples/nlp/bert_squad_pytorch/const.yaml b/examples/nlp/bert_squad_pytorch/const.yaml index 32d47249aba..6fa4a001ddb 100644 --- a/examples/nlp/bert_squad_pytorch/const.yaml +++ b/examples/nlp/bert_squad_pytorch/const.yaml @@ -2,10 +2,10 @@ name: Bert_SQuAD_PyTorch hyperparameters: global_batch_size: 12 - learning_rate: 3e-5 + learning_rate: 3.0e-5 lr_scheduler_epoch_freq: 1 model_type: 'bert' - adam_epsilon: 1e-8 + adam_epsilon: 1.0e-8 weight_decay: 0 num_warmup_steps: 0 max_seq_length: 384 diff --git a/examples/nlp/bert_squad_pytorch/distributed.yaml b/examples/nlp/bert_squad_pytorch/distributed.yaml index 663210978d4..d14cf6bb38c 100644 --- a/examples/nlp/bert_squad_pytorch/distributed.yaml +++ b/examples/nlp/bert_squad_pytorch/distributed.yaml @@ -2,10 +2,10 @@ name: Bert_SQuAD_PyTorch_distributed hyperparameters: global_batch_size: 96 # per slot batch size = 12 - learning_rate: 3e-5 + learning_rate: 3.0e-5 lr_scheduler_epoch_freq: 1 model_type: 'bert' - adam_epsilon: 1e-8 + adam_epsilon: 1.0e-8 weight_decay: 0 num_warmup_steps: 0 max_seq_length: 384 diff --git a/model_hub/examples/huggingface/language-modeling/clm_config.yaml b/model_hub/examples/huggingface/language-modeling/clm_config.yaml index 7d051e57394..15cca6046ee 100644 --- a/model_hub/examples/huggingface/language-modeling/clm_config.yaml +++ b/model_hub/examples/huggingface/language-modeling/clm_config.yaml @@ -7,8 +7,8 @@ hyperparameters: cache_dir: null # Training Args global_batch_size: 8 - learning_rate: 5e-5 - adam_epsilon: 1e-8 + learning_rate: 5.0e-5 + adam_epsilon: 1.0e-8 weight_decay: 0 lr_scheduler_type: linear num_warmup_steps: 0 diff --git a/model_hub/examples/huggingface/language-modeling/mlm_config.yaml b/model_hub/examples/huggingface/language-modeling/mlm_config.yaml index 4c453d4f99c..a4fcdf59832 100644 --- a/model_hub/examples/huggingface/language-modeling/mlm_config.yaml +++ b/model_hub/examples/huggingface/language-modeling/mlm_config.yaml @@ -7,8 +7,8 @@ hyperparameters: cache_dir: null # Training Args global_batch_size: 8 - learning_rate: 5e-5 - adam_epsilon: 1e-8 + learning_rate: 5.0e-5 + adam_epsilon: 1.0e-8 weight_decay: 0 lr_scheduler_type: linear num_warmup_steps: 0 diff --git a/model_hub/examples/huggingface/language-modeling/plm_config.yaml b/model_hub/examples/huggingface/language-modeling/plm_config.yaml index de3d3244eb7..97a7ecbb8a2 100644 --- a/model_hub/examples/huggingface/language-modeling/plm_config.yaml +++ b/model_hub/examples/huggingface/language-modeling/plm_config.yaml @@ -7,8 +7,8 @@ hyperparameters: cache_dir: null # Training Args global_batch_size: 2 - learning_rate: 2e-5 - adam_epsilon: 1e-8 + learning_rate: 2.0e-5 + adam_epsilon: 1.0e-8 weight_decay: 0 lr_scheduler_type: linear num_warmup_steps: 0 diff --git a/model_hub/examples/huggingface/multiple-choice/swag_config.yaml b/model_hub/examples/huggingface/multiple-choice/swag_config.yaml index 8cdfbe93619..ebaa85ad75d 100644 --- a/model_hub/examples/huggingface/multiple-choice/swag_config.yaml +++ b/model_hub/examples/huggingface/multiple-choice/swag_config.yaml @@ -7,8 +7,8 @@ hyperparameters: cache_dir: null # Training Args global_batch_size: 64 - learning_rate: 5e-5 - adam_epsilon: 1e-8 + learning_rate: 5.0e-5 + adam_epsilon: 1.0e-8 weight_decay: 0 lr_scheduler_type: linear num_warmup_steps: 0 diff --git a/model_hub/examples/huggingface/question-answering/squad.yaml b/model_hub/examples/huggingface/question-answering/squad.yaml index 972c2445d57..a99de834c4b 100644 --- a/model_hub/examples/huggingface/question-answering/squad.yaml +++ b/model_hub/examples/huggingface/question-answering/squad.yaml @@ -7,8 +7,8 @@ hyperparameters: cache_dir: null # Training Args global_batch_size: 12 - learning_rate: 3e-5 - adam_epsilon: 1e-8 + learning_rate: 3.0e-5 + adam_epsilon: 1.0e-8 weight_decay: 0 lr_scheduler_type: linear num_warmup_steps: 0 diff --git a/model_hub/examples/huggingface/question-answering/squad_beam_search.yaml b/model_hub/examples/huggingface/question-answering/squad_beam_search.yaml index d627feb1400..be9dfc30f1b 100644 --- a/model_hub/examples/huggingface/question-answering/squad_beam_search.yaml +++ b/model_hub/examples/huggingface/question-answering/squad_beam_search.yaml @@ -7,8 +7,8 @@ hyperparameters: cache_dir: null # Training Args global_batch_size: 4 - learning_rate: 3e-5 - adam_epsilon: 1e-8 + learning_rate: 3.0e-5 + adam_epsilon: 1.0e-8 weight_decay: 0 lr_scheduler_type: linear num_warmup_steps: 0 diff --git a/model_hub/examples/huggingface/question-answering/squad_v2.yaml b/model_hub/examples/huggingface/question-answering/squad_v2.yaml index b1a6617ff9d..1c2e49e8bc4 100644 --- a/model_hub/examples/huggingface/question-answering/squad_v2.yaml +++ b/model_hub/examples/huggingface/question-answering/squad_v2.yaml @@ -7,8 +7,8 @@ hyperparameters: cache_dir: null # Training Args global_batch_size: 12 - learning_rate: 3e-5 - adam_epsilon: 1e-8 + learning_rate: 3.0e-5 + adam_epsilon: 1.0e-8 weight_decay: 0 lr_scheduler_type: linear num_warmup_steps: 0 diff --git a/model_hub/examples/huggingface/question-answering/squad_v2_beam_search.yaml b/model_hub/examples/huggingface/question-answering/squad_v2_beam_search.yaml index ac511b8d562..9de15dd8f64 100644 --- a/model_hub/examples/huggingface/question-answering/squad_v2_beam_search.yaml +++ b/model_hub/examples/huggingface/question-answering/squad_v2_beam_search.yaml @@ -7,8 +7,8 @@ hyperparameters: cache_dir: null # Training Args global_batch_size: 4 - learning_rate: 3e-5 - adam_epsilon: 1e-8 + learning_rate: 3.0e-5 + adam_epsilon: 1.0e-8 weight_decay: 0 lr_scheduler_type: linear num_warmup_steps: 0 diff --git a/model_hub/examples/huggingface/text-classification/glue_config.yaml b/model_hub/examples/huggingface/text-classification/glue_config.yaml index 81d8db49b5e..e05701646a9 100644 --- a/model_hub/examples/huggingface/text-classification/glue_config.yaml +++ b/model_hub/examples/huggingface/text-classification/glue_config.yaml @@ -18,8 +18,8 @@ hyperparameters: use_apex_amp: true # Training Args global_batch_size: 32 - learning_rate: 3e-5 - adam_epsilon: 1e-8 + learning_rate: 3.0e-5 + adam_epsilon: 1.0e-8 weight_decay: 0 lr_scheduler_type: linear num_warmup_steps: 0 diff --git a/model_hub/examples/huggingface/text-classification/xnli_config.yaml b/model_hub/examples/huggingface/text-classification/xnli_config.yaml index 4405117b656..26eeadcab05 100644 --- a/model_hub/examples/huggingface/text-classification/xnli_config.yaml +++ b/model_hub/examples/huggingface/text-classification/xnli_config.yaml @@ -8,8 +8,8 @@ hyperparameters: do_lower_case: false # Training Args global_batch_size: 32 - learning_rate: 5e-5 - adam_epsilon: 1e-8 + learning_rate: 5.0e-5 + adam_epsilon: 1.0e-8 weight_decay: 0 lr_scheduler_type: linear num_warmup_steps: 0 diff --git a/model_hub/examples/huggingface/token-classification/ner_config.yaml b/model_hub/examples/huggingface/token-classification/ner_config.yaml index a91c746162a..41ac2a5334f 100644 --- a/model_hub/examples/huggingface/token-classification/ner_config.yaml +++ b/model_hub/examples/huggingface/token-classification/ner_config.yaml @@ -7,8 +7,8 @@ hyperparameters: use_apex_amp: false # Training Args global_batch_size: 8 - learning_rate: 5e-5 - adam_epsilon: 1e-8 + learning_rate: 5.0e-5 + adam_epsilon: 1.0e-8 weight_decay: 0 lr_scheduler_type: linear num_warmup_steps: 0