From 5a825ad23b5e5ef319e6e4b31ccb74441dda44c4 Mon Sep 17 00:00:00 2001
From: Pawel <pjanowski@users.noreply.github.com>
Date: Tue, 13 Jul 2021 04:52:18 -0700
Subject: [PATCH] chore: fix scientific notation in example yamls (#2688)

Example yamls use scientific notation like 1e-5 which pyyaml parses as
str instead of float.  This is because pyyaml, while widespread, is not
a yaml-1.2 compliant parser yet. Improve lives of users a bit by using
yaml 1.1-friendly configs.
---
 .../tests/fixtures/mnist_estimator/single-multi-slot.yaml   | 2 +-
 e2e_tests/tests/fixtures/mnist_estimator/single.yaml        | 2 +-
 examples/computer_vision/cifar10_pytorch/adaptive.yaml      | 2 +-
 examples/computer_vision/cifar10_pytorch/const.yaml         | 4 ++--
 examples/computer_vision/cifar10_pytorch/distributed.yaml   | 4 ++--
 examples/computer_vision/cifar10_tf_keras/adaptive.yaml     | 2 +-
 examples/computer_vision/cifar10_tf_keras/const.yaml        | 4 ++--
 examples/computer_vision/cifar10_tf_keras/distributed.yaml  | 4 ++--
 .../deformabledetr_coco_pytorch/const_fake.yaml             | 6 +++---
 .../deformabledetr_coco_pytorch/distributed.yaml            | 6 +++---
 .../deformabledetr_coco_pytorch/finetune.yaml               | 4 ++--
 .../deformabledetr_coco_pytorch/finetune_adaptive.yaml      | 6 +++---
 examples/computer_vision/detr_coco_pytorch/const_fake.yaml  | 6 +++---
 examples/computer_vision/detr_coco_pytorch/distributed.yaml | 6 +++---
 examples/computer_vision/detr_coco_pytorch/finetune.yaml    | 4 ++--
 .../detr_coco_pytorch/finetune_adaptive.yaml                | 6 +++---
 examples/computer_vision/efficientdet_pytorch/adaptive.yaml | 2 +-
 examples/computer_vision/efficientdet_pytorch/const.yaml    | 4 ++--
 .../computer_vision/efficientdet_pytorch/const_fake.yaml    | 4 ++--
 .../computer_vision/efficientdet_pytorch/distributed.yaml   | 4 ++--
 examples/computer_vision/iris_tf_keras/adaptive.yaml        | 2 +-
 examples/computer_vision/iris_tf_keras/const.yaml           | 4 ++--
 examples/computer_vision/iris_tf_keras/distributed.yaml     | 4 ++--
 examples/computer_vision/mnist_estimator/const.yaml         | 2 +-
 examples/computer_vision/mnist_estimator/distributed.yaml   | 2 +-
 examples/computer_vision/mnist_tf_layers/const.yaml         | 2 +-
 examples/computer_vision/unets_tf_keras/const.yaml          | 4 ++--
 examples/computer_vision/unets_tf_keras/distributed.yaml    | 4 ++--
 examples/features/data_layer_mnist_estimator/const.yaml     | 2 +-
 .../features/data_layer_mnist_estimator/distributed.yaml    | 2 +-
 examples/features/data_layer_mnist_tf_keras/const.yaml      | 4 ++--
 .../darts_cifar10_pytorch/adaptive.yaml                     | 2 +-
 .../darts_cifar10_pytorch/constrained_adaptive.yaml         | 2 +-
 .../darts_cifar10_pytorch/constrained_random.yaml           | 2 +-
 .../darts_penntreebank_pytorch/adaptive.yaml                | 4 ++--
 .../darts_penntreebank_pytorch/const.yaml                   | 4 ++--
 .../meta_learning/protonet_omniglot_pytorch/20way1shot.yaml | 2 +-
 .../meta_learning/protonet_omniglot_pytorch/20way5shot.yaml | 2 +-
 examples/nas/gaea_pytorch/eval/const.yaml                   | 2 +-
 examples/nas/gaea_pytorch/eval/distributed.yaml             | 2 +-
 .../nas/gaea_pytorch/eval/distributed_no_data_download.yaml | 2 +-
 examples/nas/gaea_pytorch/search/const.yaml                 | 2 +-
 examples/nlp/albert_squad_pytorch/const.yaml                | 4 ++--
 examples/nlp/albert_squad_pytorch/distributed_64gpu.yaml    | 2 +-
 examples/nlp/albert_squad_pytorch/distributed_8gpu.yaml     | 4 ++--
 examples/nlp/bert_glue_pytorch/const.yaml                   | 4 ++--
 examples/nlp/bert_glue_pytorch/distributed.yaml             | 4 ++--
 examples/nlp/bert_squad_pytorch/const.yaml                  | 4 ++--
 examples/nlp/bert_squad_pytorch/distributed.yaml            | 4 ++--
 .../examples/huggingface/language-modeling/clm_config.yaml  | 4 ++--
 .../examples/huggingface/language-modeling/mlm_config.yaml  | 4 ++--
 .../examples/huggingface/language-modeling/plm_config.yaml  | 4 ++--
 .../examples/huggingface/multiple-choice/swag_config.yaml   | 4 ++--
 .../examples/huggingface/question-answering/squad.yaml      | 4 ++--
 .../huggingface/question-answering/squad_beam_search.yaml   | 4 ++--
 .../examples/huggingface/question-answering/squad_v2.yaml   | 4 ++--
 .../question-answering/squad_v2_beam_search.yaml            | 4 ++--
 .../huggingface/text-classification/glue_config.yaml        | 4 ++--
 .../huggingface/text-classification/xnli_config.yaml        | 4 ++--
 .../huggingface/token-classification/ner_config.yaml        | 4 ++--
 60 files changed, 105 insertions(+), 105 deletions(-)

diff --git a/e2e_tests/tests/fixtures/mnist_estimator/single-multi-slot.yaml b/e2e_tests/tests/fixtures/mnist_estimator/single-multi-slot.yaml
index f5ea1e1f933..1605a1ccdaa 100644
--- a/e2e_tests/tests/fixtures/mnist_estimator/single-multi-slot.yaml
+++ b/e2e_tests/tests/fixtures/mnist_estimator/single-multi-slot.yaml
@@ -1,6 +1,6 @@
 description: mnist-estimator-single
 hyperparameters:
-  learning_rate: 1e-3
+  learning_rate: 1.0e-3
   global_batch_size: 64
   hidden_layer_1: 2500
   hidden_layer_2: 1000
diff --git a/e2e_tests/tests/fixtures/mnist_estimator/single.yaml b/e2e_tests/tests/fixtures/mnist_estimator/single.yaml
index 2696e9fc1f8..710d0c4f64e 100644
--- a/e2e_tests/tests/fixtures/mnist_estimator/single.yaml
+++ b/e2e_tests/tests/fixtures/mnist_estimator/single.yaml
@@ -1,6 +1,6 @@
 description: mnist-estimator-single
 hyperparameters:
-  learning_rate: 1e-3
+  learning_rate: 1.0e-3
   global_batch_size: 64
   hidden_layer_1: 2500
   hidden_layer_2: 1000
diff --git a/examples/computer_vision/cifar10_pytorch/adaptive.yaml b/examples/computer_vision/cifar10_pytorch/adaptive.yaml
index 59b85883a15..f8deaf5f277 100644
--- a/examples/computer_vision/cifar10_pytorch/adaptive.yaml
+++ b/examples/computer_vision/cifar10_pytorch/adaptive.yaml
@@ -5,7 +5,7 @@ hyperparameters:
     minval: -5.0
     maxval: 1.0
     base: 10.0
-  learning_rate_decay: 1e-6
+  learning_rate_decay: 1.0e-6
   layer1_dropout:
     type: double
     minval: 0.2
diff --git a/examples/computer_vision/cifar10_pytorch/const.yaml b/examples/computer_vision/cifar10_pytorch/const.yaml
index e466b1392f9..243ff25bcd1 100644
--- a/examples/computer_vision/cifar10_pytorch/const.yaml
+++ b/examples/computer_vision/cifar10_pytorch/const.yaml
@@ -1,7 +1,7 @@
 name: cifar10_pytorch_const
 hyperparameters:
-  learning_rate: 1e-4
-  learning_rate_decay: 1e-6
+  learning_rate: 1.0e-4
+  learning_rate_decay: 1.0e-6
   layer1_dropout: 0.25
   layer2_dropout: 0.25
   layer3_dropout: 0.5
diff --git a/examples/computer_vision/cifar10_pytorch/distributed.yaml b/examples/computer_vision/cifar10_pytorch/distributed.yaml
index 76b87515f47..3a931d300b5 100644
--- a/examples/computer_vision/cifar10_pytorch/distributed.yaml
+++ b/examples/computer_vision/cifar10_pytorch/distributed.yaml
@@ -1,7 +1,7 @@
 name: cifar10_pytorch_distributed
 hyperparameters:
-  learning_rate: 1e-4
-  learning_rate_decay: 1e-6
+  learning_rate: 1.0e-4
+  learning_rate_decay: 1.0e-6
   layer1_dropout: 0.25
   layer2_dropout: 0.25
   layer3_dropout: 0.5
diff --git a/examples/computer_vision/cifar10_tf_keras/adaptive.yaml b/examples/computer_vision/cifar10_tf_keras/adaptive.yaml
index c95c13ef000..3c15ae7d082 100644
--- a/examples/computer_vision/cifar10_tf_keras/adaptive.yaml
+++ b/examples/computer_vision/cifar10_tf_keras/adaptive.yaml
@@ -7,7 +7,7 @@ hyperparameters:
     minval: -5.0
     maxval: 1.0
     base: 10.0
-  learning_rate_decay: 1e-6
+  learning_rate_decay: 1.0e-6
   layer1_dropout:
     type: double
     minval: 0.2
diff --git a/examples/computer_vision/cifar10_tf_keras/const.yaml b/examples/computer_vision/cifar10_tf_keras/const.yaml
index 3dcdcb3a8be..00c0559e746 100644
--- a/examples/computer_vision/cifar10_tf_keras/const.yaml
+++ b/examples/computer_vision/cifar10_tf_keras/const.yaml
@@ -2,8 +2,8 @@ name: cifar10_tf_keras_const
 data:
   url: https://s3-us-west-2.amazonaws.com/determined-ai-datasets/cifar10/cifar-10-python.tar.gz
 hyperparameters:
-  learning_rate: 1e-4
-  learning_rate_decay: 1e-6
+  learning_rate: 1.0e-4
+  learning_rate_decay: 1.0e-6
   layer1_dropout: 0.25
   layer2_dropout: 0.25
   layer3_dropout: 0.5
diff --git a/examples/computer_vision/cifar10_tf_keras/distributed.yaml b/examples/computer_vision/cifar10_tf_keras/distributed.yaml
index 0d721c586d4..a55c2d8e256 100644
--- a/examples/computer_vision/cifar10_tf_keras/distributed.yaml
+++ b/examples/computer_vision/cifar10_tf_keras/distributed.yaml
@@ -2,8 +2,8 @@ name: cifar10_tf_keras_distributed
 data:
   url: https://s3-us-west-2.amazonaws.com/determined-ai-datasets/cifar10/cifar-10-python.tar.gz
 hyperparameters:
-  learning_rate: 1e-4
-  learning_rate_decay: 1e-6
+  learning_rate: 1.0e-4
+  learning_rate_decay: 1.0e-6
   layer1_dropout: 0.25
   layer2_dropout: 0.25
   layer3_dropout: 0.5
diff --git a/examples/computer_vision/deformabledetr_coco_pytorch/const_fake.yaml b/examples/computer_vision/deformabledetr_coco_pytorch/const_fake.yaml
index e411a5ab2ce..55d3135c77a 100644
--- a/examples/computer_vision/deformabledetr_coco_pytorch/const_fake.yaml
+++ b/examples/computer_vision/deformabledetr_coco_pytorch/const_fake.yaml
@@ -1,15 +1,15 @@
 name: deformabledetr_coco_fake_data
 hyperparameters:
-    lr: 2e-4
+    lr: 2.0e-4
     lr_backbone_names:
         - backbone.0 
-    lr_backbone: 2e-5
+    lr_backbone: 2.0e-5
     lr_linear_proj_names:
         - reference_points
         - sampling_offsets
     lr_linear_proj_mult: 0.1
     global_batch_size: 1
-    weight_decay: 1e-4
+    weight_decay: 1.0e-4
     lr_drop: 40
     clip_max_norm: 0.1
 
diff --git a/examples/computer_vision/deformabledetr_coco_pytorch/distributed.yaml b/examples/computer_vision/deformabledetr_coco_pytorch/distributed.yaml
index 70a91e1a680..e176962223a 100644
--- a/examples/computer_vision/deformabledetr_coco_pytorch/distributed.yaml
+++ b/examples/computer_vision/deformabledetr_coco_pytorch/distributed.yaml
@@ -2,16 +2,16 @@ name: deformabledetr_coco_distributed
 hyperparameters:
     # These settings match those used in this experiment from the original repo:
     #   https://github.com/fundamentalvision/Deformable-DETR/blob/main/configs/r50_deformable_detr.sh
-    lr: 2e-4
+    lr: 2.0e-4
     lr_backbone_names:
         - backbone.0
-    lr_backbone: 2e-5
+    lr_backbone: 2.0e-5
     lr_linear_proj_names:
         - reference_points
         - sampling_offsets
     lr_linear_proj_mult: 0.1
     global_batch_size: 32
-    weight_decay: 1e-4
+    weight_decay: 1.0e-4
     lr_drop: 40
     clip_max_norm: 0.1
 
diff --git a/examples/computer_vision/deformabledetr_coco_pytorch/finetune.yaml b/examples/computer_vision/deformabledetr_coco_pytorch/finetune.yaml
index 29aab51e1c9..59800508dfe 100644
--- a/examples/computer_vision/deformabledetr_coco_pytorch/finetune.yaml
+++ b/examples/computer_vision/deformabledetr_coco_pytorch/finetune.yaml
@@ -1,6 +1,6 @@
 name: deformabledetr_coco_finetune
 hyperparameters:
-    lr: 1e-4
+    lr: 1.0e-4
     lr_backbone_names:
         - backbone.0
     lr_backbone: 0
@@ -9,7 +9,7 @@ hyperparameters:
         - sampling_offsets
     lr_linear_proj_mult: 0.1
     global_batch_size: 2
-    weight_decay: 1e-4
+    weight_decay: 1.0e-4
     lr_drop: 4
     clip_max_norm: 0.1
 
diff --git a/examples/computer_vision/deformabledetr_coco_pytorch/finetune_adaptive.yaml b/examples/computer_vision/deformabledetr_coco_pytorch/finetune_adaptive.yaml
index 3ad7b8176e7..c05bca481c0 100644
--- a/examples/computer_vision/deformabledetr_coco_pytorch/finetune_adaptive.yaml
+++ b/examples/computer_vision/deformabledetr_coco_pytorch/finetune_adaptive.yaml
@@ -2,8 +2,8 @@ name: deformabledetr_coco_adaptive
 hyperparameters:
     lr: 
         type: double
-        minval: 1e-5
-        maxval: 1e-4
+        minval: 1.0e-5
+        maxval: 1.0e-4
     lr_backbone_names:
         - backbone.0
     lr_backbone: 0
@@ -12,7 +12,7 @@ hyperparameters:
         - sampling_offsets
     lr_linear_proj_mult: 0.1
     global_batch_size: 4
-    weight_decay: 1e-4
+    weight_decay: 1.0e-4
     lr_drop: 4
     clip_max_norm: 
         type: double
diff --git a/examples/computer_vision/detr_coco_pytorch/const_fake.yaml b/examples/computer_vision/detr_coco_pytorch/const_fake.yaml
index c516a063f30..1ad15cbfab7 100644
--- a/examples/computer_vision/detr_coco_pytorch/const_fake.yaml
+++ b/examples/computer_vision/detr_coco_pytorch/const_fake.yaml
@@ -1,9 +1,9 @@
 name: detr_coco_fake_data
 hyperparameters:
-    lr: 1e-4
-    lr_backbone: 1e-5
+    lr: 1.0e-4
+    lr_backbone: 1.0e-5
     global_batch_size: 2
-    weight_decay: 1e-4
+    weight_decay: 1.0e-4
     lr_drop: 100
     clip_max_norm: 0.1
 
diff --git a/examples/computer_vision/detr_coco_pytorch/distributed.yaml b/examples/computer_vision/detr_coco_pytorch/distributed.yaml
index 3a6dc69b9e7..dbb1b486ee0 100644
--- a/examples/computer_vision/detr_coco_pytorch/distributed.yaml
+++ b/examples/computer_vision/detr_coco_pytorch/distributed.yaml
@@ -2,10 +2,10 @@ name: detr_coco_distributed
 hyperparameters:
     # These settings match that for the 150 epoch run provided in the original repo:
     #   https://github.com/facebookresearch/detr
-    lr: 1e-4
-    lr_backbone: 1e-5
+    lr: 1.0e-4
+    lr_backbone: 1.0e-5
     global_batch_size: 16
-    weight_decay: 1e-4
+    weight_decay: 1.0e-4
     lr_drop: 100
     clip_max_norm: 0.1
 
diff --git a/examples/computer_vision/detr_coco_pytorch/finetune.yaml b/examples/computer_vision/detr_coco_pytorch/finetune.yaml
index f10d3d67d2f..76cde3cfa05 100644
--- a/examples/computer_vision/detr_coco_pytorch/finetune.yaml
+++ b/examples/computer_vision/detr_coco_pytorch/finetune.yaml
@@ -1,9 +1,9 @@
 name: detr_coco_finetune
 hyperparameters:
-    lr: 1e-4
+    lr: 1.0e-4
     lr_backbone: 0
     global_batch_size: 4
-    weight_decay: 1e-4
+    weight_decay: 1.0e-4
     lr_drop: 4
     clip_max_norm: 0.1
 
diff --git a/examples/computer_vision/detr_coco_pytorch/finetune_adaptive.yaml b/examples/computer_vision/detr_coco_pytorch/finetune_adaptive.yaml
index 5c33d41d7cd..ffb86164348 100644
--- a/examples/computer_vision/detr_coco_pytorch/finetune_adaptive.yaml
+++ b/examples/computer_vision/detr_coco_pytorch/finetune_adaptive.yaml
@@ -3,11 +3,11 @@ hyperparameters:
     # We will tune learning rate and gradient clipping.
     lr: 
         type: double
-        minval: 1e-5
-        maxval: 1e-4
+        minval: 1.0e-5
+        maxval: 1.0e-4
     lr_backbone: 0
     global_batch_size: 4
-    weight_decay: 1e-4
+    weight_decay: 1.0e-4
     lr_drop: 4
     clip_max_norm: 
         type: double
diff --git a/examples/computer_vision/efficientdet_pytorch/adaptive.yaml b/examples/computer_vision/efficientdet_pytorch/adaptive.yaml
index ccdaf7802a1..c1dd1b4981a 100644
--- a/examples/computer_vision/efficientdet_pytorch/adaptive.yaml
+++ b/examples/computer_vision/efficientdet_pytorch/adaptive.yaml
@@ -55,7 +55,7 @@ hyperparameters:
   lr_cycle_mul: 1.0
   lr_cycle_limit: 1
   warmup_lr: 0.0001
-  min_lr: 1e-5
+  min_lr: 1.0e-5
   start_epoch: None
   decay_epochs: 30
   warmup_epochs: 5
diff --git a/examples/computer_vision/efficientdet_pytorch/const.yaml b/examples/computer_vision/efficientdet_pytorch/const.yaml
index 3e34d0f06ce..9af003e5ba0 100644
--- a/examples/computer_vision/efficientdet_pytorch/const.yaml
+++ b/examples/computer_vision/efficientdet_pytorch/const.yaml
@@ -26,7 +26,7 @@ hyperparameters:
   opt: fusedmomentum
   opt_eps: 0.001
   momentum: 0.9
-  weight_decay: 4e-05
+  weight_decay: 4.0e-05
   sched: cosine
   lr: .03
   lr_noise: 0.4 0.9
@@ -35,7 +35,7 @@ hyperparameters:
   lr_cycle_mul: 1.0
   lr_cycle_limit: 1
   warmup_lr: 0.0001
-  min_lr: 1e-5
+  min_lr: 1.0e-5
   start_epoch: None
   decay_epochs: 30
   warmup_epochs: 5
diff --git a/examples/computer_vision/efficientdet_pytorch/const_fake.yaml b/examples/computer_vision/efficientdet_pytorch/const_fake.yaml
index fa24225a1ee..4f67bc6f6c7 100644
--- a/examples/computer_vision/efficientdet_pytorch/const_fake.yaml
+++ b/examples/computer_vision/efficientdet_pytorch/const_fake.yaml
@@ -26,7 +26,7 @@ hyperparameters:
   opt: fusedmomentum
   opt_eps: 0.001
   momentum: 0.9
-  weight_decay: 4e-05
+  weight_decay: 4.0e-05
   sched: cosine
   lr: .03
   lr_noise: 0.4 0.9
@@ -35,7 +35,7 @@ hyperparameters:
   lr_cycle_mul: 1.0
   lr_cycle_limit: 1
   warmup_lr: 0.0001
-  min_lr: 1e-5
+  min_lr: 1.0e-5
   start_epoch: None
   decay_epochs: 30
   warmup_epochs: 5
diff --git a/examples/computer_vision/efficientdet_pytorch/distributed.yaml b/examples/computer_vision/efficientdet_pytorch/distributed.yaml
index e060f1ef494..840fa1f3694 100644
--- a/examples/computer_vision/efficientdet_pytorch/distributed.yaml
+++ b/examples/computer_vision/efficientdet_pytorch/distributed.yaml
@@ -28,7 +28,7 @@ hyperparameters:
   opt: fusedmomentum
   opt_eps: 0.001
   momentum: 0.9
-  weight_decay: 4e-05
+  weight_decay: 4.0e-05
   sched: cosine
   lr: 0.06
   lr_noise: 0.4 0.9
@@ -37,7 +37,7 @@ hyperparameters:
   lr_cycle_mul: 1.0
   lr_cycle_limit: 1
   warmup_lr: 0.0001
-  min_lr: 1e-5
+  min_lr: 1.0e-5
   start_epoch: None
   decay_epochs: 30
   warmup_epochs: 5
diff --git a/examples/computer_vision/iris_tf_keras/adaptive.yaml b/examples/computer_vision/iris_tf_keras/adaptive.yaml
index 1e21e99c52a..9e738fbc4b8 100644
--- a/examples/computer_vision/iris_tf_keras/adaptive.yaml
+++ b/examples/computer_vision/iris_tf_keras/adaptive.yaml
@@ -8,7 +8,7 @@ hyperparameters:
     minval: -5.0
     maxval: 1.0
     base: 10.0
-  learning_rate_decay: 1e-6
+  learning_rate_decay: 1.0e-6
   layer1_dense_size:
     type: int
     minval: 4
diff --git a/examples/computer_vision/iris_tf_keras/const.yaml b/examples/computer_vision/iris_tf_keras/const.yaml
index 67c78cd79bc..010228242b4 100644
--- a/examples/computer_vision/iris_tf_keras/const.yaml
+++ b/examples/computer_vision/iris_tf_keras/const.yaml
@@ -3,8 +3,8 @@ data:
   train_url: http://download.tensorflow.org/data/iris_training.csv
   test_url: http://download.tensorflow.org/data/iris_test.csv
 hyperparameters:
-  learning_rate: 1e-4
-  learning_rate_decay: 1e-6
+  learning_rate: 1.0e-4
+  learning_rate_decay: 1.0e-6
   layer1_dense_size: 16
   global_batch_size: 30
 searcher:
diff --git a/examples/computer_vision/iris_tf_keras/distributed.yaml b/examples/computer_vision/iris_tf_keras/distributed.yaml
index 2c848bb173e..c1d0fa509d0 100644
--- a/examples/computer_vision/iris_tf_keras/distributed.yaml
+++ b/examples/computer_vision/iris_tf_keras/distributed.yaml
@@ -3,8 +3,8 @@ data:
   train_url: http://download.tensorflow.org/data/iris_training.csv
   test_url: http://download.tensorflow.org/data/iris_test.csv
 hyperparameters:
-  learning_rate: 1e-4
-  learning_rate_decay: 1e-6
+  learning_rate: 1.0e-4
+  learning_rate_decay: 1.0e-6
   layer1_dense_size: 16
   global_batch_size: 30
 resources:
diff --git a/examples/computer_vision/mnist_estimator/const.yaml b/examples/computer_vision/mnist_estimator/const.yaml
index c9f40b452df..015cd0ef500 100644
--- a/examples/computer_vision/mnist_estimator/const.yaml
+++ b/examples/computer_vision/mnist_estimator/const.yaml
@@ -1,6 +1,6 @@
 name: mnist_estimator_const
 hyperparameters:
-  learning_rate: 1e-3
+  learning_rate: 1.0e-3
   global_batch_size: 64
   hidden_layer_1: 2500
   hidden_layer_2: 1000
diff --git a/examples/computer_vision/mnist_estimator/distributed.yaml b/examples/computer_vision/mnist_estimator/distributed.yaml
index 4e189438164..93f9997a061 100644
--- a/examples/computer_vision/mnist_estimator/distributed.yaml
+++ b/examples/computer_vision/mnist_estimator/distributed.yaml
@@ -1,6 +1,6 @@
 name: mnist_estimator_distributed
 hyperparameters:
-  learning_rate: 1e-3
+  learning_rate: 1.0e-3
   global_batch_size: 1024 # per GPU batch size of 64
   hidden_layer_1: 2500
   hidden_layer_2: 1000
diff --git a/examples/computer_vision/mnist_tf_layers/const.yaml b/examples/computer_vision/mnist_tf_layers/const.yaml
index f01df6d3fc3..dd053002c5b 100644
--- a/examples/computer_vision/mnist_tf_layers/const.yaml
+++ b/examples/computer_vision/mnist_tf_layers/const.yaml
@@ -1,6 +1,6 @@
 name: mnist_tf_core_to_estimator
 hyperparameters:
-  learning_rate: 1e-3
+  learning_rate: 1.0e-3
   global_batch_size: 64
   n_filters_1: 10
   n_filters_2: 40
diff --git a/examples/computer_vision/unets_tf_keras/const.yaml b/examples/computer_vision/unets_tf_keras/const.yaml
index 3a251bfb412..6623d24c6ff 100644
--- a/examples/computer_vision/unets_tf_keras/const.yaml
+++ b/examples/computer_vision/unets_tf_keras/const.yaml
@@ -4,8 +4,8 @@ data:
   data_file: mobilenet_v2_weights_tf_dim_ordering_tf_kernels_1.0_128_no_top.h5
 
 hyperparameters:
-  learning_rate: 1e-4
-  learning_rate_decay: 1e-6
+  learning_rate: 1.0e-4
+  learning_rate_decay: 1.0e-6
   layer1_dense_size: 16
   global_batch_size: 64
   OUTPUT_CHANNELS: 3
diff --git a/examples/computer_vision/unets_tf_keras/distributed.yaml b/examples/computer_vision/unets_tf_keras/distributed.yaml
index ecdd0f771cf..18fe7672924 100644
--- a/examples/computer_vision/unets_tf_keras/distributed.yaml
+++ b/examples/computer_vision/unets_tf_keras/distributed.yaml
@@ -4,8 +4,8 @@ data:
   data_file: mobilenet_v2_weights_tf_dim_ordering_tf_kernels_1.0_128_no_top.h5
 
 hyperparameters:
-  learning_rate: 1e-4
-  learning_rate_decay: 1e-6
+  learning_rate: 1.0e-4
+  learning_rate_decay: 1.0e-6
   layer1_dense_size: 16
   global_batch_size: 512 # per slot batch size = 64 
   OUTPUT_CHANNELS: 3
diff --git a/examples/features/data_layer_mnist_estimator/const.yaml b/examples/features/data_layer_mnist_estimator/const.yaml
index 6351e2232fb..e4cc27a6c34 100644
--- a/examples/features/data_layer_mnist_estimator/const.yaml
+++ b/examples/features/data_layer_mnist_estimator/const.yaml
@@ -2,7 +2,7 @@ name: data_layer_mnist_estimator_const
 data:
   skip_checkpointing_input: true
 hyperparameters:
-  learning_rate: 1e-3
+  learning_rate: 1.0e-3
   global_batch_size: 16
   hidden_layer_1: 2500
   hidden_layer_2: 1000
diff --git a/examples/features/data_layer_mnist_estimator/distributed.yaml b/examples/features/data_layer_mnist_estimator/distributed.yaml
index 2271f5c0495..5445dadf6f0 100644
--- a/examples/features/data_layer_mnist_estimator/distributed.yaml
+++ b/examples/features/data_layer_mnist_estimator/distributed.yaml
@@ -2,7 +2,7 @@ name: data_layer_mnist_estimator_const
 data:
   skip_checkpointing_input: true
 hyperparameters:
-  learning_rate: 1e-3
+  learning_rate: 1.0e-3
   global_batch_size: 128
   hidden_layer_1: 2500
   hidden_layer_2: 1000
diff --git a/examples/features/data_layer_mnist_tf_keras/const.yaml b/examples/features/data_layer_mnist_tf_keras/const.yaml
index 4ec782c95f2..b6553fd8d29 100644
--- a/examples/features/data_layer_mnist_tf_keras/const.yaml
+++ b/examples/features/data_layer_mnist_tf_keras/const.yaml
@@ -2,8 +2,8 @@ name: data_layer_mnist_tf_keras_const
 data:
   url: https://s3-us-west-2.amazonaws.com/determined-ai-datasets/cifar10/cifar-10-python.tar.gz
 hyperparameters:
-  learning_rate: 1e-4
-  learning_rate_decay: 1e-6
+  learning_rate: 1.0e-4
+  learning_rate_decay: 1.0e-6
   layer1_dropout: 0.25
   layer2_dropout: 0.25
   layer3_dropout: 0.5
diff --git a/examples/hp_search_benchmarks/darts_cifar10_pytorch/adaptive.yaml b/examples/hp_search_benchmarks/darts_cifar10_pytorch/adaptive.yaml
index df249d4bc95..3755d5b613d 100644
--- a/examples/hp_search_benchmarks/darts_cifar10_pytorch/adaptive.yaml
+++ b/examples/hp_search_benchmarks/darts_cifar10_pytorch/adaptive.yaml
@@ -9,7 +9,7 @@ min_validation_period:
 hyperparameters:
   learning_rate: 0.025
   momentum: 0.9
-  weight_decay: 3e-4
+  weight_decay: 3.0e-4
   train_epochs: 300
   global_batch_size: 96
   init_channels: 36
diff --git a/examples/hp_search_benchmarks/darts_cifar10_pytorch/constrained_adaptive.yaml b/examples/hp_search_benchmarks/darts_cifar10_pytorch/constrained_adaptive.yaml
index 1f3fcf7597b..3bffb9c05fd 100644
--- a/examples/hp_search_benchmarks/darts_cifar10_pytorch/constrained_adaptive.yaml
+++ b/examples/hp_search_benchmarks/darts_cifar10_pytorch/constrained_adaptive.yaml
@@ -10,7 +10,7 @@ hyperparameters:
   use_constraints: true
   learning_rate: 0.025
   momentum: 0.9
-  weight_decay: 3e-4
+  weight_decay: 3.0e-4
   train_epochs: 300
   global_batch_size: 96
   init_channels: 36
diff --git a/examples/hp_search_benchmarks/darts_cifar10_pytorch/constrained_random.yaml b/examples/hp_search_benchmarks/darts_cifar10_pytorch/constrained_random.yaml
index bce9b5f70db..da0354b1ff3 100644
--- a/examples/hp_search_benchmarks/darts_cifar10_pytorch/constrained_random.yaml
+++ b/examples/hp_search_benchmarks/darts_cifar10_pytorch/constrained_random.yaml
@@ -10,7 +10,7 @@ hyperparameters:
   use_constraints: true
   learning_rate: 0.025
   momentum: 0.9
-  weight_decay: 3e-4
+  weight_decay: 3.0e-4
   train_epochs: 300
   global_batch_size: 96
   init_channels: 36
diff --git a/examples/hp_search_benchmarks/darts_penntreebank_pytorch/adaptive.yaml b/examples/hp_search_benchmarks/darts_penntreebank_pytorch/adaptive.yaml
index ee2d14ab6a6..93624d5f91b 100644
--- a/examples/hp_search_benchmarks/darts_penntreebank_pytorch/adaptive.yaml
+++ b/examples/hp_search_benchmarks/darts_penntreebank_pytorch/adaptive.yaml
@@ -21,8 +21,8 @@ hyperparameters:
   dropoute: 0.1
   nonmono: 5
   alpha: 0
-  beta: 1e-3
-  weight_decay: 8e-7
+  beta: 1.0e-3
+  weight_decay: 8.0e-7
   max_seq_length_delta: 20
   clip_gradients_l2_norm: 0.25
 
diff --git a/examples/hp_search_benchmarks/darts_penntreebank_pytorch/const.yaml b/examples/hp_search_benchmarks/darts_penntreebank_pytorch/const.yaml
index 0620cc7324b..2f4201d905d 100644
--- a/examples/hp_search_benchmarks/darts_penntreebank_pytorch/const.yaml
+++ b/examples/hp_search_benchmarks/darts_penntreebank_pytorch/const.yaml
@@ -26,8 +26,8 @@ hyperparameters:
   dropoute: 0.1
   nonmono: 5
   alpha: 0
-  beta: 1e-3
-  weight_decay: 8e-7
+  beta: 1.0e-3
+  weight_decay: 8.0e-7
   max_seq_length_delta: 20
   clip_gradients_l2_norm: 0.25
 
diff --git a/examples/meta_learning/protonet_omniglot_pytorch/20way1shot.yaml b/examples/meta_learning/protonet_omniglot_pytorch/20way1shot.yaml
index 0e6cffbcd63..3667a47fee4 100644
--- a/examples/meta_learning/protonet_omniglot_pytorch/20way1shot.yaml
+++ b/examples/meta_learning/protonet_omniglot_pytorch/20way1shot.yaml
@@ -8,7 +8,7 @@ data:
     val_workers: 4
 
 hyperparameters:
-  learning_rate: 1e-3
+  learning_rate: 1.0e-3
   weight_decay: 0
   reduce_every: 200
   lr_gamma: 0.5
diff --git a/examples/meta_learning/protonet_omniglot_pytorch/20way5shot.yaml b/examples/meta_learning/protonet_omniglot_pytorch/20way5shot.yaml
index e4aecf658fa..e0f0beb290c 100644
--- a/examples/meta_learning/protonet_omniglot_pytorch/20way5shot.yaml
+++ b/examples/meta_learning/protonet_omniglot_pytorch/20way5shot.yaml
@@ -8,7 +8,7 @@ data:
     val_workers: 4
 
 hyperparameters:
-  learning_rate: 1e-3
+  learning_rate: 1.0e-3
   weight_decay: 0
   reduce_every: 200
   lr_gamma: 0.5
diff --git a/examples/nas/gaea_pytorch/eval/const.yaml b/examples/nas/gaea_pytorch/eval/const.yaml
index e575281a51c..96f32c0b1b6 100644
--- a/examples/nas/gaea_pytorch/eval/const.yaml
+++ b/examples/nas/gaea_pytorch/eval/const.yaml
@@ -46,7 +46,7 @@ hyperparameters:
   clip_gradients_l2_norm: 5
   learning_rate: 0.5
   momentum: 0.9
-  weight_decay: 3e-5
+  weight_decay: 3.0e-5
   # Choices include linear, efficientnet, and cosine
   lr_scheduler: linear
   lr_epochs: 300
diff --git a/examples/nas/gaea_pytorch/eval/distributed.yaml b/examples/nas/gaea_pytorch/eval/distributed.yaml
index 75a0e2d8a88..78f77db52ee 100644
--- a/examples/nas/gaea_pytorch/eval/distributed.yaml
+++ b/examples/nas/gaea_pytorch/eval/distributed.yaml
@@ -43,7 +43,7 @@ hyperparameters:
   num_classes: 1000
   learning_rate: 0.5
   momentum: 0.9
-  weight_decay: 3e-5
+  weight_decay: 3.0e-5
   drop_path_prob: 0.0
   drop_prob: 0.0
   label_smoothing_rate: 0.1
diff --git a/examples/nas/gaea_pytorch/eval/distributed_no_data_download.yaml b/examples/nas/gaea_pytorch/eval/distributed_no_data_download.yaml
index 81be829ac3a..93d6c412155 100644
--- a/examples/nas/gaea_pytorch/eval/distributed_no_data_download.yaml
+++ b/examples/nas/gaea_pytorch/eval/distributed_no_data_download.yaml
@@ -46,7 +46,7 @@ hyperparameters:
   clip_gradients_l2_norm: 5
   learning_rate: 0.5
   momentum: 0.9
-  weight_decay: 3e-5
+  weight_decay: 3.0e-5
   # Choices include linear, efficientnet, and cosine
   lr_scheduler: linear
   lr_epochs: 300
diff --git a/examples/nas/gaea_pytorch/search/const.yaml b/examples/nas/gaea_pytorch/search/const.yaml
index cffb5994aa5..9a1db23b151 100644
--- a/examples/nas/gaea_pytorch/search/const.yaml
+++ b/examples/nas/gaea_pytorch/search/const.yaml
@@ -18,7 +18,7 @@ hyperparameters:
     momentum: 0.9
     min_learning_rate: 0
     scheduler_epochs: 50
-    weight_decay: 3e-4
+    weight_decay: 3.0e-4
     arch_learning_rate: 0.1
     init_channels: 16
     layers: 8
diff --git a/examples/nlp/albert_squad_pytorch/const.yaml b/examples/nlp/albert_squad_pytorch/const.yaml
index 3eb114a1780..77fb821cb4e 100644
--- a/examples/nlp/albert_squad_pytorch/const.yaml
+++ b/examples/nlp/albert_squad_pytorch/const.yaml
@@ -2,9 +2,9 @@
 name: ALBert_SQuAD_PyTorch_1gpu
 hyperparameters:
     global_batch_size: 2
-    learning_rate: 5e-5
+    learning_rate: 5.0e-5
     model_type: 'albert'
-    adam_epsilon: 1e-8
+    adam_epsilon: 1.0e-8
     weight_decay: 0
     num_warmup_steps: 13220  # 10% of total training
     max_seq_length: 384
diff --git a/examples/nlp/albert_squad_pytorch/distributed_64gpu.yaml b/examples/nlp/albert_squad_pytorch/distributed_64gpu.yaml
index 086d72ffa9e..ad9cc15bef6 100644
--- a/examples/nlp/albert_squad_pytorch/distributed_64gpu.yaml
+++ b/examples/nlp/albert_squad_pytorch/distributed_64gpu.yaml
@@ -5,7 +5,7 @@ hyperparameters:
     learning_rate: 0.0002
     model_type: 'albert'
     do_lower_case: true
-    adam_epsilon: 1e-8
+    adam_epsilon: 1.0e-8
     weight_decay: 0
     num_warmup_steps: 206
     max_seq_length: 384
diff --git a/examples/nlp/albert_squad_pytorch/distributed_8gpu.yaml b/examples/nlp/albert_squad_pytorch/distributed_8gpu.yaml
index 7ca5282c0ab..1c0ab541969 100644
--- a/examples/nlp/albert_squad_pytorch/distributed_8gpu.yaml
+++ b/examples/nlp/albert_squad_pytorch/distributed_8gpu.yaml
@@ -2,10 +2,10 @@
 name: ALBert_SQuAD_PyTorch_8gpu
 hyperparameters:
     global_batch_size: 16
-    learning_rate: 5e-5
+    learning_rate: 5.0e-5
     model_type: 'albert'
     do_lower_case: true
-    adam_epsilon: 1e-8
+    adam_epsilon: 1.0e-8
     weight_decay: 0
     num_warmup_steps: 1620
     max_seq_length: 384
diff --git a/examples/nlp/bert_glue_pytorch/const.yaml b/examples/nlp/bert_glue_pytorch/const.yaml
index a4be3aa3b85..9f8610a7920 100644
--- a/examples/nlp/bert_glue_pytorch/const.yaml
+++ b/examples/nlp/bert_glue_pytorch/const.yaml
@@ -1,10 +1,10 @@
 name: bert_glue_pytorch_const
 hyperparameters:
   global_batch_size: 24
-  learning_rate: 2e-5
+  learning_rate: 2.0e-5
   lr_scheduler_epoch_freq: 1
   model_type: 'bert'
-  adam_epsilon: 1e-8
+  adam_epsilon: 1.0e-8
   weight_decay: 0
   num_warmup_steps: 0
   num_training_steps: 459
diff --git a/examples/nlp/bert_glue_pytorch/distributed.yaml b/examples/nlp/bert_glue_pytorch/distributed.yaml
index 0451839021e..9ffecd3ca25 100644
--- a/examples/nlp/bert_glue_pytorch/distributed.yaml
+++ b/examples/nlp/bert_glue_pytorch/distributed.yaml
@@ -1,10 +1,10 @@
 name: bert_glue_pytorch_distributed 
 hyperparameters:
   global_batch_size: 192 # per gpu batch size of 24
-  learning_rate: 2e-5
+  learning_rate: 2.0e-5
   lr_scheduler_epoch_freq: 1
   model_type: 'bert'
-  adam_epsilon: 1e-8
+  adam_epsilon: 1.0e-8
   weight_decay: 0
   num_warmup_steps: 0
   num_training_steps: 459
diff --git a/examples/nlp/bert_squad_pytorch/const.yaml b/examples/nlp/bert_squad_pytorch/const.yaml
index 32d47249aba..6fa4a001ddb 100644
--- a/examples/nlp/bert_squad_pytorch/const.yaml
+++ b/examples/nlp/bert_squad_pytorch/const.yaml
@@ -2,10 +2,10 @@
 name: Bert_SQuAD_PyTorch
 hyperparameters:
     global_batch_size: 12
-    learning_rate: 3e-5
+    learning_rate: 3.0e-5
     lr_scheduler_epoch_freq: 1
     model_type: 'bert'
-    adam_epsilon: 1e-8
+    adam_epsilon: 1.0e-8
     weight_decay: 0
     num_warmup_steps: 0
     max_seq_length: 384
diff --git a/examples/nlp/bert_squad_pytorch/distributed.yaml b/examples/nlp/bert_squad_pytorch/distributed.yaml
index 663210978d4..d14cf6bb38c 100644
--- a/examples/nlp/bert_squad_pytorch/distributed.yaml
+++ b/examples/nlp/bert_squad_pytorch/distributed.yaml
@@ -2,10 +2,10 @@
 name: Bert_SQuAD_PyTorch_distributed
 hyperparameters:
     global_batch_size: 96  # per slot batch size = 12
-    learning_rate: 3e-5
+    learning_rate: 3.0e-5
     lr_scheduler_epoch_freq: 1
     model_type: 'bert'
-    adam_epsilon: 1e-8
+    adam_epsilon: 1.0e-8
     weight_decay: 0
     num_warmup_steps: 0
     max_seq_length: 384
diff --git a/model_hub/examples/huggingface/language-modeling/clm_config.yaml b/model_hub/examples/huggingface/language-modeling/clm_config.yaml
index 7d051e57394..15cca6046ee 100644
--- a/model_hub/examples/huggingface/language-modeling/clm_config.yaml
+++ b/model_hub/examples/huggingface/language-modeling/clm_config.yaml
@@ -7,8 +7,8 @@ hyperparameters:
   cache_dir: null
   # Training Args
   global_batch_size: 8
-  learning_rate: 5e-5
-  adam_epsilon: 1e-8
+  learning_rate: 5.0e-5
+  adam_epsilon: 1.0e-8
   weight_decay: 0
   lr_scheduler_type: linear
   num_warmup_steps: 0
diff --git a/model_hub/examples/huggingface/language-modeling/mlm_config.yaml b/model_hub/examples/huggingface/language-modeling/mlm_config.yaml
index 4c453d4f99c..a4fcdf59832 100644
--- a/model_hub/examples/huggingface/language-modeling/mlm_config.yaml
+++ b/model_hub/examples/huggingface/language-modeling/mlm_config.yaml
@@ -7,8 +7,8 @@ hyperparameters:
   cache_dir: null
   # Training Args
   global_batch_size: 8
-  learning_rate: 5e-5
-  adam_epsilon: 1e-8
+  learning_rate: 5.0e-5
+  adam_epsilon: 1.0e-8
   weight_decay: 0
   lr_scheduler_type: linear
   num_warmup_steps: 0
diff --git a/model_hub/examples/huggingface/language-modeling/plm_config.yaml b/model_hub/examples/huggingface/language-modeling/plm_config.yaml
index de3d3244eb7..97a7ecbb8a2 100644
--- a/model_hub/examples/huggingface/language-modeling/plm_config.yaml
+++ b/model_hub/examples/huggingface/language-modeling/plm_config.yaml
@@ -7,8 +7,8 @@ hyperparameters:
   cache_dir: null
   # Training Args
   global_batch_size: 2
-  learning_rate: 2e-5
-  adam_epsilon: 1e-8
+  learning_rate: 2.0e-5
+  adam_epsilon: 1.0e-8
   weight_decay: 0
   lr_scheduler_type: linear
   num_warmup_steps: 0
diff --git a/model_hub/examples/huggingface/multiple-choice/swag_config.yaml b/model_hub/examples/huggingface/multiple-choice/swag_config.yaml
index 8cdfbe93619..ebaa85ad75d 100644
--- a/model_hub/examples/huggingface/multiple-choice/swag_config.yaml
+++ b/model_hub/examples/huggingface/multiple-choice/swag_config.yaml
@@ -7,8 +7,8 @@ hyperparameters:
   cache_dir: null
   # Training Args
   global_batch_size: 64
-  learning_rate: 5e-5
-  adam_epsilon: 1e-8
+  learning_rate: 5.0e-5
+  adam_epsilon: 1.0e-8
   weight_decay: 0
   lr_scheduler_type: linear
   num_warmup_steps: 0
diff --git a/model_hub/examples/huggingface/question-answering/squad.yaml b/model_hub/examples/huggingface/question-answering/squad.yaml
index 972c2445d57..a99de834c4b 100644
--- a/model_hub/examples/huggingface/question-answering/squad.yaml
+++ b/model_hub/examples/huggingface/question-answering/squad.yaml
@@ -7,8 +7,8 @@ hyperparameters:
   cache_dir: null
   # Training Args
   global_batch_size: 12
-  learning_rate: 3e-5
-  adam_epsilon: 1e-8
+  learning_rate: 3.0e-5
+  adam_epsilon: 1.0e-8
   weight_decay: 0
   lr_scheduler_type: linear
   num_warmup_steps: 0
diff --git a/model_hub/examples/huggingface/question-answering/squad_beam_search.yaml b/model_hub/examples/huggingface/question-answering/squad_beam_search.yaml
index d627feb1400..be9dfc30f1b 100644
--- a/model_hub/examples/huggingface/question-answering/squad_beam_search.yaml
+++ b/model_hub/examples/huggingface/question-answering/squad_beam_search.yaml
@@ -7,8 +7,8 @@ hyperparameters:
   cache_dir: null
   # Training Args
   global_batch_size: 4
-  learning_rate: 3e-5
-  adam_epsilon: 1e-8
+  learning_rate: 3.0e-5
+  adam_epsilon: 1.0e-8
   weight_decay: 0
   lr_scheduler_type: linear
   num_warmup_steps: 0
diff --git a/model_hub/examples/huggingface/question-answering/squad_v2.yaml b/model_hub/examples/huggingface/question-answering/squad_v2.yaml
index b1a6617ff9d..1c2e49e8bc4 100644
--- a/model_hub/examples/huggingface/question-answering/squad_v2.yaml
+++ b/model_hub/examples/huggingface/question-answering/squad_v2.yaml
@@ -7,8 +7,8 @@ hyperparameters:
   cache_dir: null
   # Training Args
   global_batch_size: 12
-  learning_rate: 3e-5
-  adam_epsilon: 1e-8
+  learning_rate: 3.0e-5
+  adam_epsilon: 1.0e-8
   weight_decay: 0
   lr_scheduler_type: linear
   num_warmup_steps: 0
diff --git a/model_hub/examples/huggingface/question-answering/squad_v2_beam_search.yaml b/model_hub/examples/huggingface/question-answering/squad_v2_beam_search.yaml
index ac511b8d562..9de15dd8f64 100644
--- a/model_hub/examples/huggingface/question-answering/squad_v2_beam_search.yaml
+++ b/model_hub/examples/huggingface/question-answering/squad_v2_beam_search.yaml
@@ -7,8 +7,8 @@ hyperparameters:
   cache_dir: null
   # Training Args
   global_batch_size: 4
-  learning_rate: 3e-5
-  adam_epsilon: 1e-8
+  learning_rate: 3.0e-5
+  adam_epsilon: 1.0e-8
   weight_decay: 0
   lr_scheduler_type: linear
   num_warmup_steps: 0
diff --git a/model_hub/examples/huggingface/text-classification/glue_config.yaml b/model_hub/examples/huggingface/text-classification/glue_config.yaml
index 81d8db49b5e..e05701646a9 100644
--- a/model_hub/examples/huggingface/text-classification/glue_config.yaml
+++ b/model_hub/examples/huggingface/text-classification/glue_config.yaml
@@ -18,8 +18,8 @@ hyperparameters:
   use_apex_amp: true
   # Training Args
   global_batch_size: 32
-  learning_rate: 3e-5
-  adam_epsilon: 1e-8
+  learning_rate: 3.0e-5
+  adam_epsilon: 1.0e-8
   weight_decay: 0
   lr_scheduler_type: linear
   num_warmup_steps: 0
diff --git a/model_hub/examples/huggingface/text-classification/xnli_config.yaml b/model_hub/examples/huggingface/text-classification/xnli_config.yaml
index 4405117b656..26eeadcab05 100644
--- a/model_hub/examples/huggingface/text-classification/xnli_config.yaml
+++ b/model_hub/examples/huggingface/text-classification/xnli_config.yaml
@@ -8,8 +8,8 @@ hyperparameters:
   do_lower_case: false
   # Training Args
   global_batch_size: 32
-  learning_rate: 5e-5
-  adam_epsilon: 1e-8
+  learning_rate: 5.0e-5
+  adam_epsilon: 1.0e-8
   weight_decay: 0
   lr_scheduler_type: linear
   num_warmup_steps: 0
diff --git a/model_hub/examples/huggingface/token-classification/ner_config.yaml b/model_hub/examples/huggingface/token-classification/ner_config.yaml
index a91c746162a..41ac2a5334f 100644
--- a/model_hub/examples/huggingface/token-classification/ner_config.yaml
+++ b/model_hub/examples/huggingface/token-classification/ner_config.yaml
@@ -7,8 +7,8 @@ hyperparameters:
   use_apex_amp: false
   # Training Args
   global_batch_size: 8
-  learning_rate: 5e-5
-  adam_epsilon: 1e-8
+  learning_rate: 5.0e-5
+  adam_epsilon: 1.0e-8
   weight_decay: 0
   lr_scheduler_type: linear
   num_warmup_steps: 0