Add: FastRCNN (ResNet50) FP32 inference benchmark scripts (intel#20)

* add fastrcnn resnet50 inference benchmark script with no checkpoint file attached. * changes in README, fixing style in model_init for code review comments. * minor changes * minor changes and NCF model init update. * minor changes in README * fix style * fix base_benchmark_util. * update squeezenet model_init
kanvi-nervana · Nov 15, 2018 · 952bf69 · 952bf69
1 parent 509dc6d
commit 952bf69
Show file tree

Hide file tree

Showing 22 changed files with 1,659 additions and 45 deletions.
diff --git a/__init__.py b/__init__.py
diff --git a/benchmarks/README.md b/benchmarks/README.md
@@ -14,6 +14,9 @@ Training and inference scripts with intel optimized MKL
                 * [FP32](image_recognition/tensorflow/squeezenet/README.md#fp32-inference-instructions)
 * Object Detection
     * Tensorflow
+        * Fast R-CNN (ResNet50)
+            * Inference
+                * [FP32](object_detection/tensorflow/fastrcnn/README.md#fp32-inference-instructions)
         * SSD-Mobilenet
             * Inference
                 * [FP32](object_detection/tensorflow/ssd-mobilenet/README.md#fp32-inference-instructions)

diff --git a/benchmarks/common/base_benchmark_util.py b/benchmarks/common/base_benchmark_util.py
@@ -175,44 +175,22 @@ def initialize_model(self, args, unknown_args):
         model_initializer = None
         model_init_file = None
         if args.model_name:  # not empty
-            current_path = os.path.dirname(os.path.dirname(
-                os.path.realpath(__file__)))
+            current_path = os.path.dirname(
+                os.path.dirname(os.path.realpath(__file__)))
 
             # find the path to the model_init.py file
             filename = "{}.py".format(self.MODEL_INITIALIZER)
-            print("current path: {}".format(current_path))
-            search_path = os.path.join(current_path, "*", args.framework,
-                                       args.model_name, args.mode,
-                                       args.platform, filename)
-            print("search path: {}".format(search_path))
-            matches = glob.glob(search_path)
-
-            if len(matches) > 1:
-                # we should never get more than one match
-                raise ValueError("Found multiple model_init.py files for "
-                                 "{} {} {} {}".format(
-                                    args.framework, args.model_name,
-                                    args.platform, args.mode))
-            elif len(matches) == 0:
-                raise ValueError("No model_init.py was found for {} {} {} "
-                                 "{}".format(args.framework, args.model_name,
-                                             args.platform, args.mode))
-
-            model_init_file = matches[0]
-
-            print ("Using model init: {}".format(model_init_file))
-            if os.path.exists(model_init_file):
-                dir_list = model_init_file.split("/")
-                framework_index = dir_list.index(args.framework)
-                usecase = dir_list[framework_index - 1]
-
-                package = ".".join([usecase, args.framework, args.model_name,
-                                    args.mode, args.platform])
-                model_init_module = __import__(package + "." +
-                                               self.MODEL_INITIALIZER,
-                                               fromlist=['*'])
-                model_initializer = model_init_module.ModelInitializer(
-                    args, unknown_args, self._platform_util)
+            model_init_file = os.path.join(current_path, args.use_case,
+                                           args.framework, args.model_name,
+                                           args.mode, args.platform,
+                                           filename)
+            package = ".".join([args.use_case, args.framework,
+                                args.model_name, args.mode, args.platform])
+            model_init_module = __import__(package + "." +
+                                           self.MODEL_INITIALIZER,
+                                           fromlist=['*'])
+            model_initializer = model_init_module.ModelInitializer(
+                args, unknown_args, self._platform_util)
 
         if model_initializer is None:
             raise ImportError("Unable to locate {}.".format(model_init_file))

diff --git a/benchmarks/common/tensorflow/run_tf_benchmark.py b/benchmarks/common/tensorflow/run_tf_benchmark.py
@@ -42,6 +42,11 @@ def main(self):
                                 nargs='?',
                                 dest="intelai_models")
 
+        arg_parser.add_argument("--use-case",
+                                help="The corresponding use case of the given model ",
+                                nargs='?',
+                                dest="use_case")
+
         # checkpoint directory location
         arg_parser.add_argument('-c', "--checkpoint",
                                 help='Specify the location of trained model '

diff --git a/benchmarks/common/tensorflow/start.sh b/benchmarks/common/tensorflow/start.sh
@@ -20,6 +20,7 @@
 #
 
 echo 'Running with parameters:'
+echo "    USE_CASE: ${USE_CASE}"
 echo "    FRAMEWORK: ${FRAMEWORK}"
 echo "    WORKSPACE: ${WORKSPACE}"
 echo "    DATASET_LOCATION: ${DATASET_LOCATION}"
@@ -88,6 +89,7 @@ function run_model() {
 # basic run command with commonly used args
 CMD="python ${RUN_SCRIPT_PATH} \
 --framework=${FRAMEWORK} \
+--use-case=${USE_CASE} \
 --model-name=${MODEL_NAME} \
 --platform=${PLATFORM} \
 --mode=${MODE} \
@@ -114,6 +116,40 @@ function install_protoc() {
 
 }
 
+# Fast R-CNN (ResNet50) model
+function fastrcnn() {
+    if [ ${MODE} == "inference" ] && [ ${PLATFORM} == "fp32" ]; then
+        if [[ -z "${config_file}" ]]; then
+            echo "Fast R-CNN requires -- config_file arg to be defined"
+            exit 1
+        fi
+        # install dependencies
+        pip install -r "${MOUNT_BENCHMARK}/object_detection/tensorflow/fastrcnn/requirements.txt"
+        original_dir=$(pwd)
+        cd "${MOUNT_EXTERNAL_MODELS_SOURCE}/research"
+        # install protoc v3.3.0, if necessary, then compile protoc files
+        install_protoc "https://github.com/google/protobuf/releases/download/v3.3.0/protoc-3.3.0-linux-x86_64.zip"
+        echo "Compiling protoc files"
+        ./bin/protoc object_detection/protos/*.proto --python_out=.
+
+        export PYTHONPATH=$PYTHONPATH:`pwd`:`pwd`/slim
+        # install cocoapi
+        cd ${MOUNT_EXTERNAL_MODELS_SOURCE}/cocoapi/PythonAPI
+        echo "Installing COCO API"
+        make
+        cp -r pycocotools ${MOUNT_EXTERNAL_MODELS_SOURCE}/research/
+        export PYTHONPATH=${PYTHONPATH}:${MOUNT_EXTERNAL_MODELS_SOURCE}
+
+        cd $original_dir
+        CMD="${CMD} --checkpoint=${CHECKPOINT_DIRECTORY} \
+        --config_file=${config_file}"
+
+        PYTHONPATH=${PYTHONPATH} CMD=${CMD} run_model
+     else
+        echo "MODE:${MODE} and PLATFORM=${PLATFORM} not supported"
+    fi
+}
+
 # NCF model
 function ncf() {
   # For nfc, if dataset location is empty, script downloads dataset at given location.
@@ -232,7 +268,9 @@ LOGFILE=${LOG_OUTPUT}/benchmark_${MODEL_NAME}_${MODE}_${PLATFORM}.log
 echo 'Log output location: ${LOGFILE}'
 
 MODEL_NAME=$(echo ${MODEL_NAME} | tr 'A-Z' 'a-z')
-if [ ${MODEL_NAME} == "ncf" ]; then
+if [ ${MODEL_NAME} == "fastrcnn" ]; then
+  fastrcnn
+elif [ ${MODEL_NAME} == "ncf" ]; then
   ncf
 elif [ ${MODEL_NAME} == "resnet50" ]; then
   resnet50

diff --git a/benchmarks/image_recognition/tensorflow/squeezenet/inference/fp32/model_init.py b/benchmarks/image_recognition/tensorflow/squeezenet/inference/fp32/model_init.py
@@ -23,8 +23,6 @@ def __init__(self, args, custom_args, platform_util):
             ncores = self.args.num_intra_threads
 
         script_path = os.path.join(self.args.intelai_models,
-                                   "image_recognition", args.framework,
-                                   args.model_name, args.platform,
                                    "train_squeezenet.py")
 
         self.command = ("taskset -c {:.0f}-{:.0f} python {} "

diff --git a/benchmarks/launch_benchmark.py b/benchmarks/launch_benchmark.py
@@ -124,7 +124,38 @@ def run_docker_container(self, args):
         variables to start running the benchmarking job.
         """
         benchmark_scripts = os.getcwd()
-        intelai_models = os.path.join(benchmark_scripts, os.pardir, "models")
+        if args.model_name:  # not empty
+            # find the custom model path
+            print("current path: {}".format(benchmark_scripts))
+            search_path = os.path.join(
+                benchmark_scripts, os.pardir, "models", "*",
+                args.framework, args.model_name, args.mode, args.platform)
+            print("search path: {}".format(search_path))
+            matches = glob.glob(search_path)
+            if len(matches) > 1:
+                # we should never get more than one match
+                raise ValueError("Found multiple model locations for {} {} {}"
+                                 .format(args.framework,
+                                         args.model_name,
+                                         args.platform))
+            elif len(matches) == 0:
+                raise ValueError("No model was found for {} {} {}"
+                                 .format(args.framework,
+                                         args.model_name,
+                                         args.platform))
+
+            intelai_models = matches[0]
+            print ("Using the custom model in : {}".format(intelai_models))
+            if os.path.exists(intelai_models):
+                dir_list = intelai_models.split("/")
+                framework_index = dir_list.index(args.framework)
+                use_case = str(dir_list[framework_index - 1])
+            else:
+                use_case = None
+        else:
+            use_case = None
+            intelai_models = os.path.join(benchmark_scripts, os.pardir, "models")
+
         mount_benchmark = "/workspace/benchmarks"
         mount_external_models_source = "/workspace/models"
         mount_intelai_models = "/workspace/intelai_models"
@@ -151,6 +182,7 @@ def run_docker_container(self, args):
                     "--env MOUNT_BENCHMARK={} "
                     "--env MOUNT_EXTERNAL_MODELS_SOURCE={} "
                     "--env MOUNT_INTELAI_MODELS_SOURCE={} "
+                    "--env USE_CASE={} "
                     "--env FRAMEWORK={} "
                     "--env NUM_CORES={} "
                     "--env DATASET_LOCATION=/dataset "
@@ -163,9 +195,10 @@ def run_docker_container(self, args):
                             args.model_name, args.mode, args.platform,
                             args.verbose, args.batch_size, workspace,
                             in_graph_filename, mount_benchmark,
-                            mount_external_models_source, mount_intelai_models,
-                            args.framework, args.num_cores,
-                            args.benchmark_only, args.accuracy_only))
+                            mount_external_models_source,
+                            mount_intelai_models, use_case,
+                            args.framework, args.num_cores, args.benchmark_only,
+                            args.accuracy_only))
 
         # Add custom model args as env vars
         for custom_arg in args.model_args:

diff --git a/benchmarks/object_detection/tensorflow/fastrcnn/README.md b/benchmarks/object_detection/tensorflow/fastrcnn/README.md
@@ -0,0 +1,161 @@
+# Fast R-CNN (ResNet50)
+
+This document has instructions for how to run FastRCNN for the
+following modes/platforms:
+* [FP32 inference](#fp32-inference-instructions)
+
+Benchmarking instructions and scripts for the Fast R-CNN ResNet50 model training and inference
+other platforms are coming later.
+
+## FP32 Inference Instructions
+
+1. Clone the `tensorflow/models` and `cocoapi` repositories:
+
+```
+$ git clone [email protected]:tensorflow/models.git
+$ cd models
+$ git clone https://github.com/cocodataset/cocoapi.git
+
+```
+
+The TensorFlow models repo will be used for running inference as well as
+converting the coco dataset to the TF records format.
+
+2.  Download the 2017 validation
+[COCO dataset](http://cocodataset.org/#home) and annotations:
+
+```
+$ mkdir val
+$ cd val
+$ wget http://images.cocodataset.org/zips/val2017.zip
+$ unzip val2017.zip
+$ cd ..
+
+$ mkdir annotations
+$ cd annotations
+$ wget http://images.cocodataset.org/annotations/annotations_trainval2017.zip
+$ unzip annotations_trainval2017.zip
+$ cd ..
+```
+
+Since we are only using the validation dataset in this example, we will
+create an empty directory and empty annotations json file to pass as the
+train and test directories in the next step.
+
+```
+$ mkdir empty_dir
+
+$ cd annotations
+$ echo "{ \"images\": {}, \"categories\": {}}" > empty.json
+$ cd ..
+```
+
+3. Now that you have the raw COCO dataset, we need to convert it to the
+TF records format in order to use it with the inference script.  We will
+do this by running the `create_coco_tf_record.py` file in the TensorFlow
+models repo.
+
+Follow the steps below to navigate to the proper directory and point the
+script to the raw COCO dataset files that you have downloaded in step 2.
+The `--output_dir` is the location where the TF record files will be
+located after the script has completed.
+
+```
+
+# We are going to use an older version of the conversion script to checkout the git commit
+$ cd models
+$ git checkout 7a9934df2afdf95be9405b4e9f1f2480d748dc40
+
+$ cd research/object_detection/dataset_tools/
+$ python create_coco_tf_record.py --logtostderr \
+      --train_image_dir="/home/myuser/coco/empty_dir" \
+      --val_image_dir="/home/myuser/coco/val/val2017" \
+      --test_image_dir="/home/myuser/coco/empty_dir" \
+      --train_annotations_file="/home/myuser/coco/annotations/empty.json" \
+      --val_annotations_file="/home/myuser/coco/annotations/instances_val2017.json" \
+      --testdev_annotations_file="/home/myuser/coco/annotations/empty.json" \
+      --output_dir="/home/myuser/coco/output"
+
+$ ll /home/myuser/coco/output
+total 1598276
+-rw-rw-r--. 1 myuser myuser         0 Nov  2 21:46 coco_testdev.record
+-rw-rw-r--. 1 myuser myuser         0 Nov  2 21:46 coco_train.record
+-rw-rw-r--. 1 myuser myuser 818336740 Nov  2 21:46 coco_val.record
+
+# Go back to the main models directory and get master code
+$ cd /home/myuser/models
+$ git checkout master
+```
+
+The `coco_val.record` file is what we will use in this inference example.
+
+4. Download the pre-trained model fast_rcnn_resnet50_fp32_coco_pretrained_model.tar.gz.
+The pre-trained model includes the checkpoint files and the Fast R-CNN ResNet50 model `pipeline.config`.
+Extract and check out its contents as shown:
+```
+$ wget https://storage.cloud.google.com/intel-optimized-tensorflow/models/fast_rcnn_resnet50_fp32_coco_pretrained_model.tar.gz
+$ tar -xzvf fast_rcnn_resnet50_fp32_coco_pretrained_model.tar.gz
+$ ls -l fast_rcnn_resnet50_fp32_coco
+total 374848
+-rw-r--r--  1 myuser  myuser         77 Nov 12 22:33 checkpoint
+-rw-r--r--  1 myuser  myuser  176914228 Nov 12 22:33 model.ckpt.data-00000-of-00001
+-rw-r--r--  1 myuser  myuser      14460 Nov 12 22:33 model.ckpt.index
+-rw-r--r--  1 myuser  myuser    5675175 Nov 12 22:33 model.ckpt.meta
+-rwxr--r--  1 myuser  myuser       5056 Nov 12 22:33 mscoco_label_map.pbtxt
+-rwxr-xr-x  1 myuser  myuser       3244 Nov 12 22:33 pipeline.config
+drwxr-xr-x  4 myuser  myuser        128 Nov 12 22:30 saved_model
+
+```
+Make sure that the `eval_input_reader` section in the `pipeline.config` file has the mounted 
+`coco_val.record` data and pre-trained model `mscoco_label_map.pbtxt` location.
+
+5. Clone the [intelai/models](https://github.com/intelai/models) repo.
+This repo has the launch script for running benchmarking.
+
+```
+$ git clone [email protected]:IntelAI/models.git
+Cloning into 'models'...
+remote: Enumerating objects: 11, done.
+remote: Counting objects: 100% (11/11), done.
+remote: Compressing objects: 100% (7/7), done.
+remote: Total 11 (delta 3), reused 4 (delta 0), pack-reused 0
+Receiving objects: 100% (11/11), done.
+Resolving deltas: 100% (3/3), done.
+```
+
+6. Run the `launch_benchmark.py` script from the intelai/models repo
+, with the appropriate parameters including: the 
+`coco_val.record` data location (from step 3), the pre-trained model
+`pipeline.config` file and the checkpoint location (from step 4, and the
+location of your `tensorflow/models` clone (from step 1).
+
+```
+$ cd /home/myuser/models/benchmarks
+
+$ python launch_benchmark.py \
+    --data-location /home/myuser/coco/output/ \
+    --model-source-dir /home/myuser/tensorflow/models \
+    --model-name fastrcnn \
+    --framework tensorflow \
+    --platform fp32 \
+    --mode inference \
+    --checkpoint /home/myuser/fast_rcnn_resnet50_fp32_coco \
+    --docker-image intelaipg/intel-optimized-tensorflow:latest-devel-mkl \
+    -- config-file=pipeline.config
+```
+
+7. The log file is saved to:
+models/benchmarks/common/tensorflow/logs/benchmark_fastrcnn_inference.log
+
+The tail of the log output when the benchmarking completes should look
+something like this:
+
+```
+Time spent : 172.880 seconds.
+Time spent per BATCH: 0.173 seconds.
+Received these standard args: Namespace(batch_size=-1, checkpoint='/checkpoints', config='/checkpoints/pipeline.config', data_location=/dataset, inference_only=True, num_cores=-1, num_inter_threads=1, num_intra_threads=28, single_socket=True, socket_id=0, verbose=True)
+Received these custom args: []
+Initialize here.
+Run model here. numactl --cpunodebind=0 --membind=0 python object_detection/eval.py --num_inter_threads 1 --num_intra_threads 28 --pipeline_config_path /checkpoints/pipeline.config --checkpoint_dir /checkpoints --eval_dir /tensorflow-models/research/object_detection/log/eval
+```
+