chore: fix scientific notation in example yamls (#2688)

Example yamls use scientific notation like 1e-5 which pyyaml parses as str instead of float. This is because pyyaml, while widespread, is not a yaml-1.2 compliant parser yet. Improve lives of users a bit by using yaml 1.1-friendly configs.
determined-ai · Jul 13, 2021 · 5a825ad · 5a825ad
1 parent 953af22
commit 5a825ad
Show file tree

Hide file tree

Showing 60 changed files with 105 additions and 105 deletions.
diff --git a/e2e_tests/tests/fixtures/mnist_estimator/single-multi-slot.yaml b/e2e_tests/tests/fixtures/mnist_estimator/single-multi-slot.yaml
@@ -1,6 +1,6 @@
 description: mnist-estimator-single
 hyperparameters:
-  learning_rate: 1e-3
+  learning_rate: 1.0e-3
   global_batch_size: 64
   hidden_layer_1: 2500
   hidden_layer_2: 1000

diff --git a/e2e_tests/tests/fixtures/mnist_estimator/single.yaml b/e2e_tests/tests/fixtures/mnist_estimator/single.yaml
@@ -1,6 +1,6 @@
 description: mnist-estimator-single
 hyperparameters:
-  learning_rate: 1e-3
+  learning_rate: 1.0e-3
   global_batch_size: 64
   hidden_layer_1: 2500
   hidden_layer_2: 1000

diff --git a/examples/computer_vision/cifar10_pytorch/adaptive.yaml b/examples/computer_vision/cifar10_pytorch/adaptive.yaml
@@ -5,7 +5,7 @@ hyperparameters:
     minval: -5.0
     maxval: 1.0
     base: 10.0
-  learning_rate_decay: 1e-6
+  learning_rate_decay: 1.0e-6
   layer1_dropout:
     type: double
     minval: 0.2

diff --git a/examples/computer_vision/cifar10_pytorch/const.yaml b/examples/computer_vision/cifar10_pytorch/const.yaml
@@ -1,7 +1,7 @@
 name: cifar10_pytorch_const
 hyperparameters:
-  learning_rate: 1e-4
-  learning_rate_decay: 1e-6
+  learning_rate: 1.0e-4
+  learning_rate_decay: 1.0e-6
   layer1_dropout: 0.25
   layer2_dropout: 0.25
   layer3_dropout: 0.5

diff --git a/examples/computer_vision/cifar10_pytorch/distributed.yaml b/examples/computer_vision/cifar10_pytorch/distributed.yaml
@@ -1,7 +1,7 @@
 name: cifar10_pytorch_distributed
 hyperparameters:
-  learning_rate: 1e-4
-  learning_rate_decay: 1e-6
+  learning_rate: 1.0e-4
+  learning_rate_decay: 1.0e-6
   layer1_dropout: 0.25
   layer2_dropout: 0.25
   layer3_dropout: 0.5

diff --git a/examples/computer_vision/cifar10_tf_keras/adaptive.yaml b/examples/computer_vision/cifar10_tf_keras/adaptive.yaml
@@ -7,7 +7,7 @@ hyperparameters:
     minval: -5.0
     maxval: 1.0
     base: 10.0
-  learning_rate_decay: 1e-6
+  learning_rate_decay: 1.0e-6
   layer1_dropout:
     type: double
     minval: 0.2

diff --git a/examples/computer_vision/cifar10_tf_keras/const.yaml b/examples/computer_vision/cifar10_tf_keras/const.yaml
@@ -2,8 +2,8 @@ name: cifar10_tf_keras_const
 data:
   url: https://s3-us-west-2.amazonaws.com/determined-ai-datasets/cifar10/cifar-10-python.tar.gz
 hyperparameters:
-  learning_rate: 1e-4
-  learning_rate_decay: 1e-6
+  learning_rate: 1.0e-4
+  learning_rate_decay: 1.0e-6
   layer1_dropout: 0.25
   layer2_dropout: 0.25
   layer3_dropout: 0.5

diff --git a/examples/computer_vision/cifar10_tf_keras/distributed.yaml b/examples/computer_vision/cifar10_tf_keras/distributed.yaml
@@ -2,8 +2,8 @@ name: cifar10_tf_keras_distributed
 data:
   url: https://s3-us-west-2.amazonaws.com/determined-ai-datasets/cifar10/cifar-10-python.tar.gz
 hyperparameters:
-  learning_rate: 1e-4
-  learning_rate_decay: 1e-6
+  learning_rate: 1.0e-4
+  learning_rate_decay: 1.0e-6
   layer1_dropout: 0.25
   layer2_dropout: 0.25
   layer3_dropout: 0.5

diff --git a/examples/computer_vision/deformabledetr_coco_pytorch/const_fake.yaml b/examples/computer_vision/deformabledetr_coco_pytorch/const_fake.yaml
@@ -1,15 +1,15 @@
 name: deformabledetr_coco_fake_data
 hyperparameters:
-    lr: 2e-4
+    lr: 2.0e-4
     lr_backbone_names:
         - backbone.0 
-    lr_backbone: 2e-5
+    lr_backbone: 2.0e-5
     lr_linear_proj_names:
         - reference_points
         - sampling_offsets
     lr_linear_proj_mult: 0.1
     global_batch_size: 1
-    weight_decay: 1e-4
+    weight_decay: 1.0e-4
     lr_drop: 40
     clip_max_norm: 0.1
 

diff --git a/examples/computer_vision/deformabledetr_coco_pytorch/distributed.yaml b/examples/computer_vision/deformabledetr_coco_pytorch/distributed.yaml
@@ -2,16 +2,16 @@ name: deformabledetr_coco_distributed
 hyperparameters:
     # These settings match those used in this experiment from the original repo:
     #   https://github.com/fundamentalvision/Deformable-DETR/blob/main/configs/r50_deformable_detr.sh
-    lr: 2e-4
+    lr: 2.0e-4
     lr_backbone_names:
         - backbone.0
-    lr_backbone: 2e-5
+    lr_backbone: 2.0e-5
     lr_linear_proj_names:
         - reference_points
         - sampling_offsets
     lr_linear_proj_mult: 0.1
     global_batch_size: 32
-    weight_decay: 1e-4
+    weight_decay: 1.0e-4
     lr_drop: 40
     clip_max_norm: 0.1
 

diff --git a/examples/computer_vision/deformabledetr_coco_pytorch/finetune.yaml b/examples/computer_vision/deformabledetr_coco_pytorch/finetune.yaml
@@ -1,6 +1,6 @@
 name: deformabledetr_coco_finetune
 hyperparameters:
-    lr: 1e-4
+    lr: 1.0e-4
     lr_backbone_names:
         - backbone.0
     lr_backbone: 0
@@ -9,7 +9,7 @@ hyperparameters:
         - sampling_offsets
     lr_linear_proj_mult: 0.1
     global_batch_size: 2
-    weight_decay: 1e-4
+    weight_decay: 1.0e-4
     lr_drop: 4
     clip_max_norm: 0.1
 

diff --git a/examples/computer_vision/deformabledetr_coco_pytorch/finetune_adaptive.yaml b/examples/computer_vision/deformabledetr_coco_pytorch/finetune_adaptive.yaml
@@ -2,8 +2,8 @@ name: deformabledetr_coco_adaptive
 hyperparameters:
     lr: 
         type: double
-        minval: 1e-5
-        maxval: 1e-4
+        minval: 1.0e-5
+        maxval: 1.0e-4
     lr_backbone_names:
         - backbone.0
     lr_backbone: 0
@@ -12,7 +12,7 @@ hyperparameters:
         - sampling_offsets
     lr_linear_proj_mult: 0.1
     global_batch_size: 4
-    weight_decay: 1e-4
+    weight_decay: 1.0e-4
     lr_drop: 4
     clip_max_norm: 
         type: double

diff --git a/examples/computer_vision/detr_coco_pytorch/const_fake.yaml b/examples/computer_vision/detr_coco_pytorch/const_fake.yaml
@@ -1,9 +1,9 @@
 name: detr_coco_fake_data
 hyperparameters:
-    lr: 1e-4
-    lr_backbone: 1e-5
+    lr: 1.0e-4
+    lr_backbone: 1.0e-5
     global_batch_size: 2
-    weight_decay: 1e-4
+    weight_decay: 1.0e-4
     lr_drop: 100
     clip_max_norm: 0.1
 

diff --git a/examples/computer_vision/detr_coco_pytorch/distributed.yaml b/examples/computer_vision/detr_coco_pytorch/distributed.yaml
@@ -2,10 +2,10 @@ name: detr_coco_distributed
 hyperparameters:
     # These settings match that for the 150 epoch run provided in the original repo:
     #   https://github.com/facebookresearch/detr
-    lr: 1e-4
-    lr_backbone: 1e-5
+    lr: 1.0e-4
+    lr_backbone: 1.0e-5
     global_batch_size: 16
-    weight_decay: 1e-4
+    weight_decay: 1.0e-4
     lr_drop: 100
     clip_max_norm: 0.1
 

diff --git a/examples/computer_vision/detr_coco_pytorch/finetune.yaml b/examples/computer_vision/detr_coco_pytorch/finetune.yaml
@@ -1,9 +1,9 @@
 name: detr_coco_finetune
 hyperparameters:
-    lr: 1e-4
+    lr: 1.0e-4
     lr_backbone: 0
     global_batch_size: 4
-    weight_decay: 1e-4
+    weight_decay: 1.0e-4
     lr_drop: 4
     clip_max_norm: 0.1
 

diff --git a/examples/computer_vision/detr_coco_pytorch/finetune_adaptive.yaml b/examples/computer_vision/detr_coco_pytorch/finetune_adaptive.yaml
@@ -3,11 +3,11 @@ hyperparameters:
     # We will tune learning rate and gradient clipping.
     lr: 
         type: double
-        minval: 1e-5
-        maxval: 1e-4
+        minval: 1.0e-5
+        maxval: 1.0e-4
     lr_backbone: 0
     global_batch_size: 4
-    weight_decay: 1e-4
+    weight_decay: 1.0e-4
     lr_drop: 4
     clip_max_norm: 
         type: double

diff --git a/examples/computer_vision/efficientdet_pytorch/adaptive.yaml b/examples/computer_vision/efficientdet_pytorch/adaptive.yaml
@@ -55,7 +55,7 @@ hyperparameters:
   lr_cycle_mul: 1.0
   lr_cycle_limit: 1
   warmup_lr: 0.0001
-  min_lr: 1e-5
+  min_lr: 1.0e-5
   start_epoch: None
   decay_epochs: 30
   warmup_epochs: 5

diff --git a/examples/computer_vision/efficientdet_pytorch/const.yaml b/examples/computer_vision/efficientdet_pytorch/const.yaml
@@ -26,7 +26,7 @@ hyperparameters:
   opt: fusedmomentum
   opt_eps: 0.001
   momentum: 0.9
-  weight_decay: 4e-05
+  weight_decay: 4.0e-05
   sched: cosine
   lr: .03
   lr_noise: 0.4 0.9
@@ -35,7 +35,7 @@ hyperparameters:
   lr_cycle_mul: 1.0
   lr_cycle_limit: 1
   warmup_lr: 0.0001
-  min_lr: 1e-5
+  min_lr: 1.0e-5
   start_epoch: None
   decay_epochs: 30
   warmup_epochs: 5

diff --git a/examples/computer_vision/efficientdet_pytorch/const_fake.yaml b/examples/computer_vision/efficientdet_pytorch/const_fake.yaml
@@ -26,7 +26,7 @@ hyperparameters:
   opt: fusedmomentum
   opt_eps: 0.001
   momentum: 0.9
-  weight_decay: 4e-05
+  weight_decay: 4.0e-05
   sched: cosine
   lr: .03
   lr_noise: 0.4 0.9
@@ -35,7 +35,7 @@ hyperparameters:
   lr_cycle_mul: 1.0
   lr_cycle_limit: 1
   warmup_lr: 0.0001
-  min_lr: 1e-5
+  min_lr: 1.0e-5
   start_epoch: None
   decay_epochs: 30
   warmup_epochs: 5

diff --git a/examples/computer_vision/efficientdet_pytorch/distributed.yaml b/examples/computer_vision/efficientdet_pytorch/distributed.yaml
@@ -28,7 +28,7 @@ hyperparameters:
   opt: fusedmomentum
   opt_eps: 0.001
   momentum: 0.9
-  weight_decay: 4e-05
+  weight_decay: 4.0e-05
   sched: cosine
   lr: 0.06
   lr_noise: 0.4 0.9
@@ -37,7 +37,7 @@ hyperparameters:
   lr_cycle_mul: 1.0
   lr_cycle_limit: 1
   warmup_lr: 0.0001
-  min_lr: 1e-5
+  min_lr: 1.0e-5
   start_epoch: None
   decay_epochs: 30
   warmup_epochs: 5

diff --git a/examples/computer_vision/iris_tf_keras/adaptive.yaml b/examples/computer_vision/iris_tf_keras/adaptive.yaml
@@ -8,7 +8,7 @@ hyperparameters:
     minval: -5.0
     maxval: 1.0
     base: 10.0
-  learning_rate_decay: 1e-6
+  learning_rate_decay: 1.0e-6
   layer1_dense_size:
     type: int
     minval: 4

diff --git a/examples/computer_vision/iris_tf_keras/const.yaml b/examples/computer_vision/iris_tf_keras/const.yaml
@@ -3,8 +3,8 @@ data:
   train_url: http://download.tensorflow.org/data/iris_training.csv
   test_url: http://download.tensorflow.org/data/iris_test.csv
 hyperparameters:
-  learning_rate: 1e-4
-  learning_rate_decay: 1e-6
+  learning_rate: 1.0e-4
+  learning_rate_decay: 1.0e-6
   layer1_dense_size: 16
   global_batch_size: 30
 searcher:

diff --git a/examples/computer_vision/iris_tf_keras/distributed.yaml b/examples/computer_vision/iris_tf_keras/distributed.yaml
@@ -3,8 +3,8 @@ data:
   train_url: http://download.tensorflow.org/data/iris_training.csv
   test_url: http://download.tensorflow.org/data/iris_test.csv
 hyperparameters:
-  learning_rate: 1e-4
-  learning_rate_decay: 1e-6
+  learning_rate: 1.0e-4
+  learning_rate_decay: 1.0e-6
   layer1_dense_size: 16
   global_batch_size: 30
 resources:

diff --git a/examples/computer_vision/mnist_estimator/const.yaml b/examples/computer_vision/mnist_estimator/const.yaml
@@ -1,6 +1,6 @@
 name: mnist_estimator_const
 hyperparameters:
-  learning_rate: 1e-3
+  learning_rate: 1.0e-3
   global_batch_size: 64
   hidden_layer_1: 2500
   hidden_layer_2: 1000

diff --git a/examples/computer_vision/mnist_estimator/distributed.yaml b/examples/computer_vision/mnist_estimator/distributed.yaml
@@ -1,6 +1,6 @@
 name: mnist_estimator_distributed
 hyperparameters:
-  learning_rate: 1e-3
+  learning_rate: 1.0e-3
   global_batch_size: 1024 # per GPU batch size of 64
   hidden_layer_1: 2500
   hidden_layer_2: 1000

diff --git a/examples/computer_vision/mnist_tf_layers/const.yaml b/examples/computer_vision/mnist_tf_layers/const.yaml
@@ -1,6 +1,6 @@
 name: mnist_tf_core_to_estimator
 hyperparameters:
-  learning_rate: 1e-3
+  learning_rate: 1.0e-3
   global_batch_size: 64
   n_filters_1: 10
   n_filters_2: 40

diff --git a/examples/computer_vision/unets_tf_keras/const.yaml b/examples/computer_vision/unets_tf_keras/const.yaml
@@ -4,8 +4,8 @@ data:
   data_file: mobilenet_v2_weights_tf_dim_ordering_tf_kernels_1.0_128_no_top.h5
 
 hyperparameters:
-  learning_rate: 1e-4
-  learning_rate_decay: 1e-6
+  learning_rate: 1.0e-4
+  learning_rate_decay: 1.0e-6
   layer1_dense_size: 16
   global_batch_size: 64
   OUTPUT_CHANNELS: 3

diff --git a/examples/computer_vision/unets_tf_keras/distributed.yaml b/examples/computer_vision/unets_tf_keras/distributed.yaml
@@ -4,8 +4,8 @@ data:
   data_file: mobilenet_v2_weights_tf_dim_ordering_tf_kernels_1.0_128_no_top.h5
 
 hyperparameters:
-  learning_rate: 1e-4
-  learning_rate_decay: 1e-6
+  learning_rate: 1.0e-4
+  learning_rate_decay: 1.0e-6
   layer1_dense_size: 16
   global_batch_size: 512 # per slot batch size = 64 
   OUTPUT_CHANNELS: 3

diff --git a/examples/features/data_layer_mnist_estimator/const.yaml b/examples/features/data_layer_mnist_estimator/const.yaml
@@ -2,7 +2,7 @@ name: data_layer_mnist_estimator_const
 data:
   skip_checkpointing_input: true
 hyperparameters:
-  learning_rate: 1e-3
+  learning_rate: 1.0e-3
   global_batch_size: 16
   hidden_layer_1: 2500
   hidden_layer_2: 1000

diff --git a/examples/features/data_layer_mnist_estimator/distributed.yaml b/examples/features/data_layer_mnist_estimator/distributed.yaml
@@ -2,7 +2,7 @@ name: data_layer_mnist_estimator_const
 data:
   skip_checkpointing_input: true
 hyperparameters:
-  learning_rate: 1e-3
+  learning_rate: 1.0e-3
   global_batch_size: 128
   hidden_layer_1: 2500
   hidden_layer_2: 1000

diff --git a/examples/features/data_layer_mnist_tf_keras/const.yaml b/examples/features/data_layer_mnist_tf_keras/const.yaml
@@ -2,8 +2,8 @@ name: data_layer_mnist_tf_keras_const
 data:
   url: https://s3-us-west-2.amazonaws.com/determined-ai-datasets/cifar10/cifar-10-python.tar.gz
 hyperparameters:
-  learning_rate: 1e-4
-  learning_rate_decay: 1e-6
+  learning_rate: 1.0e-4
+  learning_rate_decay: 1.0e-6
   layer1_dropout: 0.25
   layer2_dropout: 0.25
   layer3_dropout: 0.5

diff --git a/examples/hp_search_benchmarks/darts_cifar10_pytorch/adaptive.yaml b/examples/hp_search_benchmarks/darts_cifar10_pytorch/adaptive.yaml
@@ -9,7 +9,7 @@ min_validation_period:
 hyperparameters:
   learning_rate: 0.025
   momentum: 0.9
-  weight_decay: 3e-4
+  weight_decay: 3.0e-4
   train_epochs: 300
   global_batch_size: 96
   init_channels: 36

diff --git a/examples/hp_search_benchmarks/darts_cifar10_pytorch/constrained_adaptive.yaml b/examples/hp_search_benchmarks/darts_cifar10_pytorch/constrained_adaptive.yaml
@@ -10,7 +10,7 @@ hyperparameters:
   use_constraints: true
   learning_rate: 0.025
   momentum: 0.9
-  weight_decay: 3e-4
+  weight_decay: 3.0e-4
   train_epochs: 300
   global_batch_size: 96
   init_channels: 36