Skip to content

Commit

Permalink
Merge branch 'jitendra/mlper-tlt-bfloat-cpx' into 'develop-ng'
Browse files Browse the repository at this point in the history
mlperf Transformer LT Readme update

See merge request intelai/models!108
  • Loading branch information
ashahba committed May 30, 2020
1 parent de00677 commit b439716
Show file tree
Hide file tree
Showing 273 changed files with 300,364 additions and 3,246 deletions.
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -8,4 +8,5 @@
.coverage
.tox
test_data/
*.bak
download_glue_data.py
data/
2 changes: 1 addition & 1 deletion CODEOWNERS
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
# Each line is a file pattern followed by one or more owners.

# These owners will be the default owners for everything in the repo.
* @mlukaszewski @claynerobison @chuanqi129 @agramesh1
* @mlukaszewski @claynerobison @chuanqi129 @agramesh1 @justkw

# Order is important. The last matching pattern has the most precedence.
# So if a pull request only touches javascript files, only these owners
Expand Down
16 changes: 11 additions & 5 deletions benchmarks/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -20,16 +20,22 @@ dependencies to be installed:
| Image Recognition | TensorFlow | [Inception V4](https://arxiv.org/pdf/1602.07261.pdf) | Inference | [Int8](image_recognition/tensorflow/inceptionv4/README.md#int8-inference-instructions) [FP32](image_recognition/tensorflow/inceptionv4/README.md#fp32-inference-instructions) |
| Image Recognition | TensorFlow | [MobileNet V1*](https://arxiv.org/pdf/1704.04861.pdf) | Inference | [Int8](image_recognition/tensorflow/mobilenet_v1/README.md#int8-inference-instructions) [FP32](image_recognition/tensorflow/mobilenet_v1/README.md#fp32-inference-instructions) |
| Image Recognition | TensorFlow | [ResNet 101](https://arxiv.org/pdf/1512.03385.pdf) | Inference | [Int8](image_recognition/tensorflow/resnet101/README.md#int8-inference-instructions) [FP32](image_recognition/tensorflow/resnet101/README.md#fp32-inference-instructions) |
| Image Recognition | TensorFlow | [ResNet 50](https://arxiv.org/pdf/1512.03385.pdf) | Inference | [Int8](image_recognition/tensorflow/resnet50/README.md#int8-inference-instructions) [FP32](image_recognition/tensorflow/resnet50/README.md#fp32-inference-instructions) |
| Image Recognition | TensorFlow | [ResNet 50v1.5*](https://github.com/tensorflow/models/tree/master/official/resnet) | Inference | [Int8](image_recognition/tensorflow/resnet50v1_5/README.md#int8-inference-instructions) [FP32](image_recognition/tensorflow/resnet50v1_5/README.md#fp32-inference-instructions) |
| Image Recognition | TensorFlow | [ResNet 50v1.5*](https://github.com/tensorflow/models/tree/master/official/resnet) | Training | [FP32](image_recognition/tensorflow/resnet50v1_5/README.md#fp32-training-instructions) |
| Reinforcement | TensorFlow | [MiniGo](https://arxiv.org/abs/1712.01815.pdf) | Training | [FP32](reinforcement/tensorflow/minigo/README.md#fp32-training-instructions)|
| Image Recognition | TensorFlow | [ResNet 50](https://arxiv.org/pdf/1512.03385.pdf) | Inference | [Int8](image_recognition/tensorflow/resnet50/README.md#int8-inference-instructions) [FP32](image_recognition/tensorflow/resnet50/README.md#fp32-inference-instructions)|
| Image Recognition | TensorFlow | [ResNet 50v1.5](https://github.com/tensorflow/models/tree/master/official/resnet) | Inference | [Int8](image_recognition/tensorflow/resnet50v1_5/README.md#int8-inference-instructions) [FP32](image_recognition/tensorflow/resnet50v1_5/README.md#fp32-inference-instructions) [BFloat16**](image_recognition/tensorflow/resnet50v1_5/README.md#bfloat16-inference-instructions)|
| Image Recognition | TensorFlow | [ResNet 50v1.5](https://github.com/tensorflow/models/tree/master/official/resnet) | Training | [FP32](image_recognition/tensorflow/resnet50v1_5/README.md#fp32-training-instructions) [BFloat16**](image_recognition/tensorflow/resnet50v1_5/README.md#bfloat16-training-instructions)|
| Language Modeling | TensorFlow | [BERT](https://arxiv.org/pdf/1810.04805.pdf) | Inference | [FP32](language_modeling/tensorflow/bert_large/README.md#fp32-inference-instructions) [BFloat16**](language_modeling/tensorflow/bert_large/README.md#bfloat16-inference-instructions) |
| Language Modeling | TensorFlow | [BERT](https://arxiv.org/pdf/1810.04805.pdf) | Training | [FP32](language_modeling/tensorflow/bert_large/README.md#fp32-training-instructions) [BFloat16**](language_modeling/tensorflow/bert_large/README.md#bfloat16-training-instructions) |
| Language Translation | TensorFlow | [GNMT*](https://arxiv.org/pdf/1609.08144.pdf) | Inference | [FP32](language_translation/tensorflow/mlperf_gnmt/README.md#fp32-inference-instructions) |
| Reinforcement | TensorFlow | [MiniGo](https://arxiv.org/abs/1712.01815.pdf) | Training | [FP32](reinforcement/tensorflow/minigo/README.md#fp32-training-instructions)|
| Language Translation | TensorFlow | [Transformer_LT_Official ](https://arxiv.org/pdf/1706.03762.pdf)| Inference | [FP32](language_translation/tensorflow/transformer_lt_official/README.md#fp32-inference-instructions) |
| Language Translation | TensorFlow | [Transformer_LT_mlperf ](https://arxiv.org/pdf/1706.03762.pdf)| Training | [FP32](language_translation/tensorflow/transformer_mlperf/README.md#fp32-training-instructions) [BFloat16**](language_translation/tensorflow/transformer_mlperf/README.md#bfloat16-training-instructions) |
| Object Detection | TensorFlow | [R-FCN](https://arxiv.org/pdf/1605.06409.pdf) | Inference | [Int8](object_detection/tensorflow/rfcn/README.md#int8-inference-instructions) [FP32](object_detection/tensorflow/rfcn/README.md#fp32-inference-instructions) |
| Object Detection | TensorFlow | [SSD-MobileNet*](https://arxiv.org/pdf/1704.04861.pdf) | Inference | [Int8](object_detection/tensorflow/ssd-mobilenet/README.md#int8-inference-instructions) [FP32](object_detection/tensorflow/ssd-mobilenet/README.md#fp32-inference-instructions) |
| Object Detection | TensorFlow | [SSD-ResNet34*](https://arxiv.org/pdf/1512.02325.pdf) | Inference | [Int8](object_detection/tensorflow/ssd-resnet34/README.md#int8-inference-instructions) [FP32](object_detection/tensorflow/ssd-resnet34/README.md#fp32-inference-instructions) |
| Object Detection | TensorFlow | [SSD-ResNet34](https://arxiv.org/pdf/1512.02325.pdf) | Training | [FP32](object_detection/tensorflow/ssd-resnet34/README.md#fp32-training-instructions) [BFloat16**](object_detection/tensorflow/ssd-resnet34/README.md#bf16-training-instructions) |
| Recommendation | TensorFlow | [Wide & Deep Large Dataset](https://arxiv.org/pdf/1606.07792.pdf) | Inference | [Int8](recommendation/tensorflow/wide_deep_large_ds/README.md#int8-inference-instructions) [FP32](recommendation/tensorflow/wide_deep_large_ds/README.md#fp32-inference-instructions) |
| Recommendation | TensorFlow | [Wide & Deep](https://arxiv.org/pdf/1606.07792.pdf) | Inference | [FP32](recommendation/tensorflow/wide_deep/README.md#fp32-inference-instructions) |

*Means the model is belong to [MLPerf](https://mlperf.org/) models, will long term support.
*Means the model belongs to [MLPerf](https://mlperf.org/) models and will be supported long-term.

**Means the BFloat16 data type support is experimental.
24 changes: 18 additions & 6 deletions benchmarks/common/base_benchmark_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,8 +67,8 @@ def _define_args(self):

self._common_arg_parser.add_argument(
"-p", "--precision",
help="Specify the model precision to use: fp32, int8",
required=required_arg, choices=["fp32", "int8"],
help="Specify the model precision to use: fp32, int8, or bfloat16",
required=required_arg, choices=["fp32", "int8", "bfloat16"],
dest="precision")

self._common_arg_parser.add_argument(
Expand Down Expand Up @@ -132,6 +132,11 @@ def _define_args(self):
help="Specify the number threads between layers",
dest="num_inter_threads", default=None)

self._common_arg_parser.add_argument(
"-ts", "--num-train-steps", type=check_positive_number,
help="Specify the number of training steps ",
dest="num_train_steps", default=1)

self._common_arg_parser.add_argument(
"--data-num-intra-threads", type=check_positive_number,
help="The number intra op threads for the data layer config",
Expand All @@ -152,6 +157,13 @@ def _define_args(self):
"of using frozen graphs.",
dest="checkpoint", default=None, type=check_valid_folder)

self._common_arg_parser.add_argument(
"-bb", "--backbone-model",
help="Specify the location of backbone-model directory. "
"This option can be used by models (like SSD_Resnet34) "
"to do fine-tuning training or achieve convergence.",
dest="backbone_model", default=None, type=check_valid_folder)

self._common_arg_parser.add_argument(
"-g", "--in-graph", help="Full path to the input graph ",
dest="input_graph", default=None, type=check_valid_filename)
Expand Down Expand Up @@ -220,11 +232,11 @@ def _define_args(self):
def _validate_args(self):
"""validate the args and initializes platform_util"""
# check if socket id is in socket number range
num_sockets = self._platform_util.num_cpu_sockets
num_numas = self._platform_util.num_numa_nodes
args = self.args
if not -1 <= args.socket_id < num_sockets:
raise ValueError("Socket id must be within socket number range: "
"[0, {}].".format(num_sockets - 1))
if not -1 <= args.socket_id < num_numas:
raise ValueError("Socket id must be within NUMA number range: "
"[0, {}].".format(num_numas - 1))

# check number of cores
num_logical_cores_per_socket = \
Expand Down
8 changes: 8 additions & 0 deletions benchmarks/common/base_model_init.py
Original file line number Diff line number Diff line change
Expand Up @@ -144,6 +144,9 @@ def set_num_inter_intra_threads(self, num_inter_threads=None, num_intra_threads=
* num_inter_threads = The number of sockets
* num_intra_threads = The total number of cores across all sockets, or
self.args.num_cores if a specific number of cores was defined.
* in case MPI_NUM_PROCESSES is used
* num_inter_threads = 1
* num_intra_threads = the number of cores on a single socket minus 2
"""
# if num_inter_threads is specified, use that value as long as the arg isn't set
if num_inter_threads and not self.args.num_inter_threads:
Expand All @@ -163,11 +166,16 @@ def set_num_inter_intra_threads(self, num_inter_threads=None, num_intra_threads=
else:
if not self.args.num_inter_threads:
self.args.num_inter_threads = self.platform_util.num_cpu_sockets
if os.environ["MPI_NUM_PROCESSES"] != "None":
self.args.num_inter_threads = 1
if not self.args.num_intra_threads:
if self.args.num_cores == -1:
self.args.num_intra_threads = \
int(self.platform_util.num_cores_per_socket *
self.platform_util.num_cpu_sockets)
if os.environ["MPI_NUM_PROCESSES"] != "None":
self.args.num_intra_threads = \
self.platform_util.num_cores_per_socket - 2
else:
self.args.num_intra_threads = self.args.num_cores

Expand Down
Loading

0 comments on commit b439716

Please sign in to comment.