Checkout remaining changes from develop

kanvi-nervana · Dec 12, 2020 · a2eabde · a2eabde
1 parent bec820e
commit a2eabde
Show file tree

Hide file tree

Showing 88 changed files with 1,308 additions and 641 deletions.
diff --git a/benchmarks/README.md b/benchmarks/README.md
@@ -23,6 +23,7 @@ dependencies to be installed:
 | Image Recognition       | TensorFlow   | [ResNet 50](https://arxiv.org/pdf/1512.03385.pdf) | Inference | Model Containers: [Int8](https://software.intel.com/content/www/us/en/develop/articles/containers/resnet50-int8-inference-tensorflow-container.html) [FP32](https://software.intel.com/content/www/us/en/develop/articles/containers/resnet50-fp32-inference-tensorflow-container.html) <br> Model Packages: [Int8](https://software.intel.com/content/www/us/en/develop/articles/containers/resnet50-int8-inference-tensorflow-model.html) [FP32](https://software.intel.com/content/www/us/en/develop/articles/containers/resnet50-fp32-inference-tensorflow-model.html)  | [Int8](image_recognition/tensorflow/resnet50/README.md#int8-inference-instructions) [FP32](image_recognition/tensorflow/resnet50/README.md#fp32-inference-instructions) |
 | Image Recognition       | TensorFlow   | [ResNet 50v1.5](https://github.com/tensorflow/models/tree/master/official/resnet) | Inference | Model Containers: [Int8](https://software.intel.com/content/www/us/en/develop/articles/containers/resnet50v1-5-int8-inference-tensorflow-container.html) [FP32](https://software.intel.com/content/www/us/en/develop/articles/containers/resnet50v1-5-fp32-inference-container.html) [BFloat16**](https://software.intel.com/content/www/us/en/develop/articles/containers/resnet50v1-5-bfloat16-inference-tensorflow-container.html) <br> Model Packages: [Int8](https://software.intel.com/content/www/us/en/develop/articles/containers/resnet50v1-5-int8-inference-tensorflow-model.html) [FP32](https://software.intel.com/content/www/us/en/develop/articles/containers/resnet50v1-5-fp32-inference-model-package.html) [BFloat16**](https://software.intel.com/content/www/us/en/develop/articles/containers/resnet50v1-5-bfloat16-inference-tensorflow-model.html)  | [Int8](image_recognition/tensorflow/resnet50v1_5/README.md#int8-inference-instructions) [FP32](image_recognition/tensorflow/resnet50v1_5/README.md#fp32-inference-instructions) [BFloat16**](image_recognition/tensorflow/resnet50v1_5/README.md#bfloat16-inference-instructions) |
 | Image Recognition       | TensorFlow   | [ResNet 50v1.5](https://github.com/tensorflow/models/tree/master/official/resnet) | Training | Model Containers: [FP32](https://software.intel.com/content/www/us/en/develop/articles/containers/resnet50v1-5-fp32-training-tensorflow-container.html) [BFloat16**](https://software.intel.com/content/www/us/en/develop/articles/containers/resnet50v1-5-bfloat16-training-tensorflow-container.html) <br> Model Packages: [FP32](https://software.intel.com/content/www/us/en/develop/articles/containers/resnet50v1-5-fp32-training-tensorflow-model.html) [BFloat16**](https://software.intel.com/content/www/us/en/develop/articles/containers/resnet50v1-5-bfloat16-training-tensorflow-model.html)  | [FP32](image_recognition/tensorflow/resnet50v1_5/README.md#fp32-training-instructions) [BFloat16**](image_recognition/tensorflow/resnet50v1_5/README.md#bfloat16-training-instructions) |
+| Image Segmentation      | TensorFlow   | [3D U-Net](https://arxiv.org/pdf/1606.06650.pdf) | Inference |  | [FP32](image_segmentation/tensorflow/3d_unet/README.md) |
 | Image Segmentation      | TensorFlow   | [UNet](https://arxiv.org/pdf/1606.06650.pdf) | Inference | Model Containers: [FP32](https://software.intel.com/content/www/us/en/develop/articles/containers/unet-fp32-inference-tensorflow-container.html) <br> Model Packages: [FP32](https://software.intel.com/content/www/us/en/develop/articles/containers/unet-fp32-inference-tensorflow-model.html)  | [FP32](image_segmentation/tensorflow/unet/README.md#fp32-inference-instructions) |
 | Image Segmentation      | TensorFlow   | [MaskRCNN](https://arxiv.org/abs/1703.06870) | Inference | Model Containers: [FP32](https://software.intel.com/content/www/us/en/develop/articles/containers/mask-rcnn-fp32-inference-tensorflow-container.html) <br> Model Packages: [FP32](https://software.intel.com/content/www/us/en/develop/articles/containers/mask-rcnn-fp32-inference-tensorflow-model.html)  | [FP32](image_segmentation/tensorflow/maskrcnn/README.md#fp32-training-instructions) |
 | Language Modeling       | TensorFlow   | [BERT](https://arxiv.org/pdf/1810.04805.pdf) | Inference | Model Containers: [FP32](https://software.intel.com/content/www/us/en/develop/articles/containers/bert-large-fp32-inference-tensorflow-container.html) <br> Model Packages: [FP32](https://software.intel.com/content/www/us/en/develop/articles/containers/bert-large-fp32-inference-tensorflow-model.html)  | [FP32](language_modeling/tensorflow/bert_large/README.md#fp32-inference-instructions) [BFloat16**](language_modeling/tensorflow/bert_large/README.md#bfloat16-inference-instructions) |

diff --git a/benchmarks/common/tensorflow/container_init.sh b/benchmarks/common/tensorflow/container_init.sh
@@ -17,5 +17,9 @@
 
 # This file includes runtime installs for model containers
 
-apt-get install numactl -y
-
+if (( $(id -u) == 0 )); then
+  apt-get install numactl -y
+else
+  echo "Please run as root"
+  exit 1
+fi
diff --git a/benchmarks/common/tensorflow/start.sh b/benchmarks/common/tensorflow/start.sh
@@ -461,6 +461,20 @@ function add_calibration_arg() {
   echo "${calibration_arg}"
 }
 
+# 3D UNet model
+function 3d_unet() {
+  if [[ ${PRECISION} == "fp32" ]] && [[ ${MODE} == "inference" ]]; then
+    if [[ ${NOINSTALL} != "True" ]]; then
+      pip install -r "${MOUNT_BENCHMARK}/${USE_CASE}/${FRAMEWORK}/${MODEL_NAME}/requirements.txt"
+    fi
+    export PYTHONPATH=${PYTHONPATH}:${MOUNT_INTELAI_MODELS_SOURCE}/inference/fp32
+    PYTHONPATH=${PYTHONPATH} CMD=${CMD} run_model
+  else
+    echo "${PRECISION} ${MODE} is not supported for ${MODEL_NAME}"
+    exit 1
+  fi
+}
+
 #BERT model
 function bert() {
    if [ ${PRECISION} == "fp32" ]; then
@@ -725,7 +739,7 @@ function maskrcnn() {
 
 # mobilenet_v1 model
 function mobilenet_v1() {
-  if [ ${PRECISION} == "fp32" ]; then
+  if [ ${PRECISION} == "fp32" ] || [ ${PRECISION} == "bfloat16" ]; then
     CMD="${CMD} $(add_arg "--input_height" ${input_height}) $(add_arg "--input_width" ${input_width}) \
     $(add_arg "--warmup_steps" ${warmup_steps}) $(add_arg "--steps" ${steps}) \
     $(add_arg "--input_layer" ${input_layer}) $(add_arg "--output_layer" ${output_layer})"
@@ -767,10 +781,24 @@ function mtcc() {
 
 # NCF model
 function ncf() {
-  if [ ${PRECISION} == "fp32" ]; then
-    # For nfc, if dataset location is empty, script downloads dataset at given location.
+  if [[ -n "${clean}" ]]; then
+    CMD="${CMD} --clean"
+  fi
+
+  # NCF supports different datasets including ml-1m and ml-20m.
+  if [[ -n "${dataset}" && ${dataset} != "" ]]; then
+    CMD="${CMD} --dataset=${dataset}"
+  fi
+
+  if [[ -n "${te}" && ${te} != "" ]]; then
+    CMD="${CMD} -te=${te}"
+  fi
+
+  if [ ${PRECISION} == "fp32" -o ${PRECISION} == "bfloat16" ]; then
+    # For ncf, if dataset location is empty, script downloads dataset at given location.
     if [ ! -d "${DATASET_LOCATION}" ]; then
-      mkdir -p /dataset
+      mkdir -p ./dataset
+      CMD="${CMD} --data-location=./dataset"
     fi
 
     export PYTHONPATH=${PYTHONPATH}:${MOUNT_EXTERNAL_MODELS_SOURCE}
@@ -895,7 +923,7 @@ function rfcn() {
 
 # SSD-MobileNet model
 function ssd_mobilenet() {
-  if [ ${PRECISION} == "fp32" ]; then
+  if [ ${PRECISION} == "fp32" ] || [ ${PRECISION} == "bfloat16" ]; then
     if [ ${BATCH_SIZE} != "-1" ]; then
       echo "Warning: SSD-MobileNet FP32 inference script does not use the batch_size arg"
     fi
@@ -1287,9 +1315,6 @@ function wide_deep_large_ds() {
       if [ "${kmp_block_time}" != None ] ; then
         CMD="${CMD} --kmp_block_time=${kmp_block_time}"
       fi
-      if [ "${kmp_affinity}" != None ]; then
-        CMD="${CMD} --kmp_affinity=${kmp_affinity}"
-      fi
       if [ "${kmp_settings}" != None ]; then
         CMD="${CMD} --kmp_settings=${kmp_settings}"
       fi
@@ -1306,7 +1331,9 @@ function wide_deep_large_ds() {
 LOGFILE=${OUTPUT_DIR}/${LOG_FILENAME}
 
 MODEL_NAME=$(echo ${MODEL_NAME} | tr 'A-Z' 'a-z')
-if [ ${MODEL_NAME} == "bert" ]; then
+if [ ${MODEL_NAME} == "3d_unet" ]; then
+  3d_unet
+elif [ ${MODEL_NAME} == "bert" ]; then
   bert
 elif [ ${MODEL_NAME} == "dcgan" ]; then
   dcgan

diff --git a/benchmarks/image_recognition/tensorflow/mobilenet_v1/README.md b/benchmarks/image_recognition/tensorflow/mobilenet_v1/README.md
@@ -4,6 +4,7 @@ This document has instructions for how to run MobileNet V1 for the
 following modes/precisions:
 * [Int8 inference](#int8-inference-instructions)
 * [FP32 inference](#fp32-inference-instructions)
+* [BFloat16 inference](#bfloat16-inference-instructions)
 
 Instructions and scripts for model training are coming
 later.
@@ -279,3 +280,16 @@ $ wget https://storage.googleapis.com/intel-optimized-tensorflow/models/v1_8/mob
      Ran inference with batch size 100
      Log location outside container: {--output-dir value}/benchmark_mobilenet_v1_inference_fp32_20190110_211648.log
      ```
+
+# BFloat16 Inference Instructions
+
+MobileNet v1 BFloat16 inference depends on Auto-Mixed-Precision to convert graph from FP32 to BFloat16 online.
+Before evaluating MobileNet v1 BFloat16 inference, please set the following environment variables:
+
+```
+export TF_AUTO_MIXED_PRECISION_GRAPH_REWRITE_INFERLIST_REMOVE=BiasAdd \
+export TF_AUTO_MIXED_PRECISION_GRAPH_REWRITE_DENYLIST_REMOVE=Softmax \
+export TF_AUTO_MIXED_PRECISION_GRAPH_REWRITE_ALLOWLIST_ADD=BiasAdd,Softmax
+```
+
+The instructions are the same as FP32 inference instructions above, except one needs to change the `--precision=fp32` to `--precision=bfloat16` in the above commands.
diff --git a/benchmarks/image_recognition/tensorflow/mobilenet_v1/inference/bfloat16/__init__.py b/benchmarks/image_recognition/tensorflow/mobilenet_v1/inference/bfloat16/__init__.py
@@ -0,0 +1,17 @@
+#
+# -*- coding: utf-8 -*-
+#
+# Copyright (c) 2020 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
diff --git a/benchmarks/image_recognition/tensorflow/mobilenet_v1/inference/bfloat16/config.json b/benchmarks/image_recognition/tensorflow/mobilenet_v1/inference/bfloat16/config.json
@@ -0,0 +1,6 @@
+{
+    "optimization_parameters": {
+        "KMP_AFFINITY": "granularity=fine,verbose,compact,1,0",
+        "KMP_BLOCKTIME": 1
+    }
+}
diff --git a/benchmarks/image_recognition/tensorflow/mobilenet_v1/inference/bfloat16/model_init.py b/benchmarks/image_recognition/tensorflow/mobilenet_v1/inference/bfloat16/model_init.py
@@ -0,0 +1,104 @@
+#
+# -*- coding: utf-8 -*-
+#
+# Copyright (c) 2020 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import argparse
+import os
+from common.base_model_init import BaseModelInitializer
+from common.base_model_init import set_env_var
+
+
+class ModelInitializer(BaseModelInitializer):
+    """ Model initializer for MobileNet V1 BFloat16 inference """
+
+    def __init__(self, args, custom_args=[], platform_util=None):
+        super(ModelInitializer, self).__init__(args, custom_args, platform_util)
+
+        # use default batch size if -1
+        if self.args.batch_size == -1:
+            self.args.batch_size = 128
+
+        # Set KMP env vars, if they haven't already been set
+        config_file_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), "config.json")
+        self.set_kmp_vars(config_file_path)
+
+        # set num_inter_threads and num_intra_threads (override inter threads to 2)
+        self.set_num_inter_intra_threads(num_inter_threads=2)
+
+        script_name = "accuracy.py" if self.args.accuracy_only \
+            else "benchmark.py"
+        script_path = os.path.join(
+            self.args.intelai_models, self.args.mode, script_name)
+        self.command_prefix = "{} {}".format(self.python_exe, script_path)
+
+        if self.args.socket_id != -1:
+            self.command_prefix = "numactl --cpunodebind={} -l {}".format(
+                str(self.args.socket_id), self.command_prefix)
+
+        set_env_var("OMP_NUM_THREADS", self.args.num_intra_threads)
+
+        self.parse_args()
+
+        if not self.args.accuracy_only:
+            # add args for the benchmark script
+            script_args_list = [
+                "input_graph", "input_height", "input_width", "batch_size",
+                "input_layer", "output_layer", "num_inter_threads",
+                "num_intra_threads", "warmup_steps", "steps", "precision"]
+            self.command_prefix = self.add_args_to_command(
+                self.command_prefix, script_args_list)
+        else:
+            # add args for the accuracy script
+            script_args_list = [
+                "input_graph", "data_location", "input_height", "input_width",
+                "batch_size", "input_layer", "output_layer",
+                "num_inter_threads", "num_intra_threads", "precision"]
+            self.command_prefix = self.add_args_to_command(
+                self.command_prefix, script_args_list)
+
+    def parse_args(self):
+        if self.custom_args is None:
+            return
+
+        parser = argparse.ArgumentParser()
+        parser.add_argument(
+            "--input_height", default=224,
+            dest='input_height', type=int, help="input height")
+        parser.add_argument(
+            "--input_width", default=224,
+            dest='input_width', type=int, help="input width")
+        parser.add_argument(
+            "--warmup_steps", dest="warmup_steps",
+            help="number of warmup steps",
+            type=int, default=10)
+        parser.add_argument(
+            "--steps", dest="steps",
+            help="number of steps",
+            type=int, default=50)
+        parser.add_argument(
+            "--input_layer", dest="input_layer",
+            help="name of input layer",
+            type=str, default="input")
+        parser.add_argument(
+            "--output_layer", dest="output_layer",
+            help="name of output layer",
+            type=str, default="MobilenetV1/Predictions/Reshape_1")
+
+        self.args = parser.parse_args(self.custom_args, namespace=self.args)
+
+    def run(self):
+        self.run_command(self.command_prefix)
diff --git a/benchmarks/image_recognition/tensorflow/mobilenet_v1/inference/fp32/model_init.py b/benchmarks/image_recognition/tensorflow/mobilenet_v1/inference/fp32/model_init.py
@@ -44,8 +44,7 @@ def __init__(self, args, custom_args=[], platform_util=None):
         script_name = "accuracy.py" if self.args.accuracy_only \
             else "benchmark.py"
         script_path = os.path.join(
-            self.args.intelai_models, self.args.mode, self.args.precision,
-            script_name)
+            self.args.intelai_models, self.args.mode, script_name)
         self.command_prefix = "{} {}".format(self.python_exe, script_path)
 
         if self.args.socket_id != -1:
@@ -61,15 +60,15 @@ def __init__(self, args, custom_args=[], platform_util=None):
             script_args_list = [
                 "input_graph", "input_height", "input_width", "batch_size",
                 "input_layer", "output_layer", "num_inter_threads",
-                "num_intra_threads", "warmup_steps", "steps"]
+                "num_intra_threads", "warmup_steps", "steps", "precision"]
             self.command_prefix = self.add_args_to_command(
                 self.command_prefix, script_args_list)
         else:
             # add args for the accuracy script
             script_args_list = [
                 "input_graph", "data_location", "input_height", "input_width",
                 "batch_size", "input_layer", "output_layer",
-                "num_inter_threads", "num_intra_threads"]
+                "num_inter_threads", "num_intra_threads", "precision"]
             self.command_prefix = self.add_args_to_command(
                 self.command_prefix, script_args_list)
 

diff --git a/benchmarks/language_modeling/tensorflow/bert_base/README.md b/benchmarks/language_modeling/tensorflow/bert_base/README.md
@@ -86,7 +86,8 @@ ${PYTHON} export_classifier.py \
           --bert_config_file=$BERT_BASE_DIR/bert_config.json \
           --output_dir=${OUTPUT_DIR} \
           --precision=fp32 \
-          --saved_model=true
+          --saved_model=true \
+          --experimental_gelu=True # Disable this flag if your TenorFlow doesn't support
 ```
 
 ## Inference

diff --git a/benchmarks/language_modeling/tensorflow/bert_base/inference/bfloat16/model_init.py b/benchmarks/language_modeling/tensorflow/bert_base/inference/bfloat16/model_init.py
@@ -51,6 +51,8 @@ def __init__(self, args, custom_args=[], platform_util=None):
                                 default='Classifier')
         arg_parser.add_argument("--max-seq-length", type=int, dest="max_seq_length", default=None)
         arg_parser.add_argument("--profile", dest="profile", default=None)
+        arg_parser.add_argument('--experimental-gelu', help=' [Experimental] Use experimental gelu op.',
+                                dest="experimental_gelu", default="False")
         arg_parser.add_argument("--config-file", dest="bert_config_file", default="bert_config.json")
         arg_parser.add_argument("--vocab-file", dest="vocab_file", default="vocab.txt")
         arg_parser.add_argument('--task-name', help=' Task name for classifier', dest="task_name", default='MRPC')
@@ -111,7 +113,8 @@ def expand_data_path(path):
                 " --do_eval=" + str(self.args.do_eval) + eoo + \
                 " --vocab_file=" + str(self.args.vocab_file) + eoo + \
                 " --data_dir=" + str(self.args.data_dir) + eoo + \
-                " --eval_batch_size=" + str(self.args.batch_size)
+                " --eval_batch_size=" + str(self.args.batch_size) + \
+                " --experimental_gelu=" + str(self.args.experimental_gelu)
 
         if self.args.accuracy_only:
             model_args += " --mode=accuracy"

diff --git a/benchmarks/language_modeling/tensorflow/bert_base/inference/fp32/model_init.py b/benchmarks/language_modeling/tensorflow/bert_base/inference/fp32/model_init.py
@@ -51,6 +51,8 @@ def __init__(self, args, custom_args=[], platform_util=None):
                                 default='Classifier')
         arg_parser.add_argument("--max-seq-length", type=int, dest="max_seq_length", default=None)
         arg_parser.add_argument("--profile", dest="profile", default=None)
+        arg_parser.add_argument('--experimental-gelu', help=' [Experimental] Use experimental gelu op.',
+                                dest="experimental_gelu", default="False")
         arg_parser.add_argument("--config-file", dest="bert_config_file", default="bert_config.json")
         arg_parser.add_argument("--vocab-file", dest="vocab_file", default="vocab.txt")
         arg_parser.add_argument('--task-name', help=' Task name for classifier', dest="task_name", default='MRPC')
@@ -111,7 +113,8 @@ def expand_data_path(path):
                 " --do_eval=" + str(self.args.do_eval) + eoo + \
                 " --vocab_file=" + str(self.args.vocab_file) + eoo + \
                 " --data_dir=" + str(self.args.data_dir) + eoo + \
-                " --eval_batch_size=" + str(self.args.batch_size)
+                " --eval_batch_size=" + str(self.args.batch_size) + eoo + \
+                " --experimental_gelu=" + str(self.args.experimental_gelu)
 
         if self.args.accuracy_only:
             model_args += " --mode=accuracy"

diff --git a/benchmarks/object_detection/tensorflow/ssd-mobilenet/README.md b/benchmarks/object_detection/tensorflow/ssd-mobilenet/README.md
@@ -5,6 +5,7 @@ following modes/precisions:
 - [SSD-MobileNet](#ssd-mobilenet)
   - [Int8 Inference Instructions](#int8-inference-instructions)
   - [FP32 Inference Instructions](#fp32-inference-instructions)
+  - [BFloat16 Inference Instructions](#bfloat16-inference-instructions)
 
 Instructions and scripts for model training and inference
 for other precisions are coming later.
@@ -237,3 +238,16 @@ Below is a sample log file tail when testing accuracy:
 Ran inference with batch size -1
 Log location outside container: {--output-dir value}/benchmark_ssd-mobilenet_inference_fp32_20190123_225145.log
 ```
+
+# BFloat16 Inference Instructions
+
+SSD-MobileNet BFloat16 inference depends on Auto-Mixed-Precision to convert graph from FP32 to BFloat16 online.
+Before evaluating SSD-MobileNet BFloat16 inference, please set the following environment variables:
+
+```
+export TF_AUTO_MIXED_PRECISION_GRAPH_REWRITE_ALLOWLIST_ADD=BiasAdd,Relu6,Mul,AddV2
+export TF_AUTO_MIXED_PRECISION_GRAPH_REWRITE_INFERLIST_REMOVE=BiasAdd,AddV2,Mul
+export TF_AUTO_MIXED_PRECISION_GRAPH_REWRITE_CLEARLIST_REMOVE=Relu6
+```
+
+The instructions are the same as FP32 inference instructions above, except one needs to change the `--precision=fp32` to `--precision=bfloat16` in the above commands.