From 5b3ae92215512023567885fd00930acaaae880aa Mon Sep 17 00:00:00 2001 From: "Li, Guizi" Date: Fri, 5 Apr 2019 00:35:35 +0800 Subject: [PATCH 01/62] Add lm-1b FP32 inference benchmarking scripts (#254) * add lm-1b * fix per reviewer comments * fix typo * minor fix * remove unused documents and others * minor fix * Fix formatting issue in the tf_model_args.txt * correct the lm-1b unit test * remove step in benchmarks/launch_benchmark.py --- benchmarks/README.md | 1 + benchmarks/common/tensorflow/start.sh | 14 +++ benchmarks/language_modeling/__init__.py | 19 ++++ .../language_modeling/tensorflow/__init__.py | 19 ++++ .../tensorflow/lm-1b/README.md | 92 +++++++++++++++++++ .../tensorflow/lm-1b/__init__.py | 19 ++++ .../tensorflow/lm-1b/inference/__init__.py | 19 ++++ .../lm-1b/inference/fp32/__init__.py | 19 ++++ .../lm-1b/inference/fp32/model_init.py | 76 +++++++++++++++ .../unit/common/tensorflow/tf_model_args.txt | 1 + 10 files changed, 279 insertions(+) create mode 100644 benchmarks/language_modeling/__init__.py create mode 100644 benchmarks/language_modeling/tensorflow/__init__.py create mode 100644 benchmarks/language_modeling/tensorflow/lm-1b/README.md create mode 100644 benchmarks/language_modeling/tensorflow/lm-1b/__init__.py create mode 100644 benchmarks/language_modeling/tensorflow/lm-1b/inference/__init__.py create mode 100644 benchmarks/language_modeling/tensorflow/lm-1b/inference/fp32/__init__.py create mode 100644 benchmarks/language_modeling/tensorflow/lm-1b/inference/fp32/model_init.py diff --git a/benchmarks/README.md b/benchmarks/README.md index d48642837..e3fda63ef 100644 --- a/benchmarks/README.md +++ b/benchmarks/README.md @@ -28,6 +28,7 @@ dependencies to be installed: | Image Recognition | TensorFlow | [SqueezeNet](https://arxiv.org/pdf/1602.07360.pdf) | Inference | [FP32](image_recognition/tensorflow/squeezenet/README.md#fp32-inference-instructions) | | Image Segmentation | TensorFlow | [Mask R-CNN](https://arxiv.org/pdf/1703.06870.pdf) | Inference | [FP32](image_segmentation/tensorflow/maskrcnn/README.md#fp32-inference-instructions) | | Image Segmentation | TensorFlow | [UNet](https://arxiv.org/pdf/1505.04597.pdf) | Inference | [FP32](image_segmentation/tensorflow/unet/README.md#fp32-inference-instructions) | +| Language Modeling | TensorFlow | [LM-1B](https://arxiv.org/pdf/1602.02410.pdf) | Inference | [FP32](language_modeling/tensorflow/lm-1b/README.md#fp32-inference-instructions) | | Language Translation | TensorFlow | [GNMT](https://arxiv.org/pdf/1609.08144.pdf) | Inference | [FP32](language_translation/tensorflow/gnmt/README.md#fp32-inference-instructions) | | Language Translation | TensorFlow | [Transformer Language](https://arxiv.org/pdf/1706.03762.pdf)| Inference | [FP32](language_translation/tensorflow/transformer_language/README.md#fp32-inference-instructions) | | Language Translation | TensorFlow | [Transformer_LT_Official ](https://arxiv.org/pdf/1706.03762.pdf)| Inference | [FP32](language_translation/tensorflow/transformer_lt_official/README.md#fp32-inference-instructions) | diff --git a/benchmarks/common/tensorflow/start.sh b/benchmarks/common/tensorflow/start.sh index bc7fd699c..60500ba3e 100755 --- a/benchmarks/common/tensorflow/start.sh +++ b/benchmarks/common/tensorflow/start.sh @@ -397,6 +397,18 @@ function inception_resnet_v2() { fi } +# language modeling lm-1b +function lm-1b() { + if [ ${PRECISION} == "fp32" ]; then + CMD="${CMD} $(add_steps_args)" + + PYTHONPATH=${PYTHONPATH} CMD=${CMD} run_model + else + echo "PRECISION=${PRECISION} is not supported for ${MODEL_NAME}" + exit 1 + fi +} + # Mask R-CNN model function maskrcnn() { if [ ${PRECISION} == "fp32" ]; then @@ -803,6 +815,8 @@ elif [ ${MODEL_NAME} == "inceptionv4" ]; then inceptionv4 elif [ ${MODEL_NAME} == "inception_resnet_v2" ]; then inception_resnet_v2 +elif [ ${MODEL_NAME} == "lm-1b" ]; then + lm-1b elif [ ${MODEL_NAME} == "maskrcnn" ]; then maskrcnn elif [ ${MODEL_NAME} == "mobilenet_v1" ]; then diff --git a/benchmarks/language_modeling/__init__.py b/benchmarks/language_modeling/__init__.py new file mode 100644 index 000000000..d9c4123de --- /dev/null +++ b/benchmarks/language_modeling/__init__.py @@ -0,0 +1,19 @@ +# +# -*- coding: utf-8 -*- +# +# Copyright (c) 2019 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# SPDX-License-Identifier: EPL-2.0 +# diff --git a/benchmarks/language_modeling/tensorflow/__init__.py b/benchmarks/language_modeling/tensorflow/__init__.py new file mode 100644 index 000000000..d9c4123de --- /dev/null +++ b/benchmarks/language_modeling/tensorflow/__init__.py @@ -0,0 +1,19 @@ +# +# -*- coding: utf-8 -*- +# +# Copyright (c) 2019 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# SPDX-License-Identifier: EPL-2.0 +# diff --git a/benchmarks/language_modeling/tensorflow/lm-1b/README.md b/benchmarks/language_modeling/tensorflow/lm-1b/README.md new file mode 100644 index 000000000..525ff352b --- /dev/null +++ b/benchmarks/language_modeling/tensorflow/lm-1b/README.md @@ -0,0 +1,92 @@ +# LM-1B + +This document has instructions for how to run LM-1B benchmark for the +following modes/platforms: +* [FP32 inference](#fp32-inference-instructions) + +Benchmarking instructions and scripts for model training and inference for +other platforms are coming later. + +## FP32 Inference Instructions + +1. Clone [mlperf/inference](https://github.com/mlperf/inference.git) and +checkout `setInter` branch. +``` +git clone https://github.com/mlperf/inference.git +cd mlperf +git checkout setInter +``` + +To prepare the checkpoint and dataset, run: +``` +python inference/cloud/language_modeling/benchmark.py +``` + +2. Clone this [intelai/models](https://github.com/IntelAI/models) +repository: + +``` +git clone https://github.com/IntelAI/models.git +``` + +3. Next, navigate to the `benchmarks` directory in your local clone of +the [intelai/models](https://github.com/IntelAI/models) repo (from step 2). +The `launch_benchmark.py` script in the `benchmarks` directory is +used for starting a benchmarking run in a optimized TensorFlow docker +container. It has arguments to specify which model, framework, mode, +precision, and docker image to use, and the checkpoint directory. + +Substitute the `--model-source-dir` to `/inference/cloud/language_modeling`. +Before benchmarking, ensure that you have run the script to prepare checkpoint files and the dataset +from Step 1. + +LM-1B can run for latency or throughput +benchmarking. Use one of the following examples below, depending on +your use case. + +For latency (using `--socket-id 0` and `--batch-size 1`): + +``` +python launch_benchmark.py \ + --model-name lm-1b \ + --precision fp32 \ + --mode inference \ + --framework tensorflow \ + --batch-size 1 \ + --socket-id 0 \ + --docker-image intelaipg/intel-optimized-tensorflow:latest-devel-mkl \ + --model-source-dir /inference/cloud/language_modeling + +``` + +For throughput (using `--socket-id 0` and `--batch-size 1024`): + +``` +python launch_benchmark.py \ + --model-name lm-1b \ + --precision fp32 \ + --mode inference \ + --framework tensorflow \ + --batch-size 1024 \ + --socket-id 0 \ + --docker-image intelaipg/intel-optimized-tensorflow:latest-devel-mkl \ + --model-source-dir /inference/cloud/language_modeling \ + -- steps=4 \ +``` + +Note that the `--verbose` flag can be added to any of the above commands +to get additional debug output. + +4. By default, the log file is saved to the +`models/benchmarks/common/tensorflow/logs` directory. The user can specify a +different directory using `--output-dir`. + +Example log tail when benchmarking for latency or throughput: +``` +Running warmup... +Running benchmark... +Number samples: 4234 +Longest latency was: 2.9153692722320557 seconds. Average latency was:2.891982913017273 +Perplexity: 40.110043230980665, target is 40.209 . +Ran inference with batch size 1024 +``` diff --git a/benchmarks/language_modeling/tensorflow/lm-1b/__init__.py b/benchmarks/language_modeling/tensorflow/lm-1b/__init__.py new file mode 100644 index 000000000..d9c4123de --- /dev/null +++ b/benchmarks/language_modeling/tensorflow/lm-1b/__init__.py @@ -0,0 +1,19 @@ +# +# -*- coding: utf-8 -*- +# +# Copyright (c) 2019 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# SPDX-License-Identifier: EPL-2.0 +# diff --git a/benchmarks/language_modeling/tensorflow/lm-1b/inference/__init__.py b/benchmarks/language_modeling/tensorflow/lm-1b/inference/__init__.py new file mode 100644 index 000000000..d9c4123de --- /dev/null +++ b/benchmarks/language_modeling/tensorflow/lm-1b/inference/__init__.py @@ -0,0 +1,19 @@ +# +# -*- coding: utf-8 -*- +# +# Copyright (c) 2019 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# SPDX-License-Identifier: EPL-2.0 +# diff --git a/benchmarks/language_modeling/tensorflow/lm-1b/inference/fp32/__init__.py b/benchmarks/language_modeling/tensorflow/lm-1b/inference/fp32/__init__.py new file mode 100644 index 000000000..d9c4123de --- /dev/null +++ b/benchmarks/language_modeling/tensorflow/lm-1b/inference/fp32/__init__.py @@ -0,0 +1,19 @@ +# +# -*- coding: utf-8 -*- +# +# Copyright (c) 2019 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# SPDX-License-Identifier: EPL-2.0 +# diff --git a/benchmarks/language_modeling/tensorflow/lm-1b/inference/fp32/model_init.py b/benchmarks/language_modeling/tensorflow/lm-1b/inference/fp32/model_init.py new file mode 100644 index 000000000..77d903020 --- /dev/null +++ b/benchmarks/language_modeling/tensorflow/lm-1b/inference/fp32/model_init.py @@ -0,0 +1,76 @@ +# +# -*- coding: utf-8 -*- +# +# Copyright (c) 2019 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# SPDX-License-Identifier: EPL-2.0 +# + +import os +from argparse import ArgumentParser + +from common.base_model_init import BaseModelInitializer +from common.base_model_init import set_env_var + + +class ModelInitializer(BaseModelInitializer): + """Model initializer for LM-1B FP32 inference""" + + def __init__(self, args, custom_args, platform_util=None): + super(ModelInitializer, self).__init__(args, custom_args, platform_util) + + self.cmd = self.get_numactl_command(self.args.socket_id) + + self.set_num_inter_intra_threads() + + # Set the KMP env vars + self.set_kmp_vars(kmp_blocktime="0", kmp_affinity="granularity=fine,compact,1,0") + + set_env_var("OMP_NUM_THREADS", self.args.num_intra_threads) + + if self.args.socket_id != -1: + if self.args.num_cores != -1: + self.cmd += "--physcpubind=0-" + \ + (str(self.args.num_cores - 1)) + " " + self.cmd += self.python_exe + " " + + run_script = os.path.join(self.args.model_source_dir, + "benchmark.py") + + # Model args + arg_parser = ArgumentParser(description='process custom_args') + + arg_parser.add_argument('-S', '--steps', help='Number of steps', + dest="steps", + default="100") + self.args = arg_parser.parse_args(self.custom_args, + namespace=self.args) + + # Model parameter control + cmd_args = " -b=" + str(self.args.batch_size) + \ + " -I=" + str(self.args.steps) + \ + " --inter=" + \ + str(self.args.num_inter_threads) + \ + " --intra=" + \ + str(self.args.num_intra_threads) + + self.cmd = self.cmd + run_script + cmd_args + + def run(self): + original_dir = os.getcwd() + os.chdir(self.args.model_source_dir) + self.run_command(self.cmd) + + os.chdir(original_dir) diff --git a/tests/unit/common/tensorflow/tf_model_args.txt b/tests/unit/common/tensorflow/tf_model_args.txt index 6381db35a..79d76806f 100755 --- a/tests/unit/common/tensorflow/tf_model_args.txt +++ b/tests/unit/common/tensorflow/tf_model_args.txt @@ -53,6 +53,7 @@ run_tf_benchmark.py --framework=tensorflow --use-case=image_recognition --model- run_tf_benchmark.py --framework=tensorflow --use-case=language_translation --model-name=transformer_lt_official --precision=fp32 --mode=inference --model-source-dir=/workspace/models --benchmark-dir=/workspace/benchmarks --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=1 --socket-id=0 --benchmark-only --verbose --file=/dataset/newstest2014.en --reference=/dataset/newstest2014.de --vocab_file=/dataset/vocab.txt --in_graph=/in_graph/fp32_graphdef.pb,numactl --cpunodebind=0 --membind=0 python /workspace/models/official/transformer/infer_ab.py --param_set=big --in_graph=/in_graph/fp32_graphdef.pb --batch_size=1 --file=/dataset/newstest2014.en --file_out=/models/benchmarks/common/tensorflow/logs/translate.txt --vocab_file=/dataset/vocab.txt run_tf_benchmark.py --framework=tensorflow --use-case=language_translation --model-name=transformer_lt_official --precision=fp32 --mode=inference --model-source-dir=/workspace/models --benchmark-dir=/workspace/benchmarks --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=64 --socket-id=0 --benchmark-only --verbose --file=/dataset/newstest2014.en --reference=/dataset/newstest2014.de --vocab_file=/dataset/vocab.txt --in_graph=/in_graph/fp32_graphdef.pb,numactl --cpunodebind=0 --membind=0 python /workspace/models/official/transformer/infer_ab.py --param_set=big --in_graph=/in_graph/fp32_graphdef.pb --batch_size=64 --file=/dataset/newstest2014.en --file_out=/models/benchmarks/common/tensorflow/logs/translate.txt --vocab_file=/dataset/vocab.txt run_tf_benchmark.py --framework=tensorflow --use-case=image_recognition --model-name=mobilenet_v1 --precision=fp32 --mode=inference --model-source-dir=/workspace/models --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=1 --socket-id 0 --benchmark-only --verbose --checkpoint=/checkpoints --data-location=/dataset,numactl --cpunodebind=0 -l python /workspace/intelai_models/inference/fp32/eval_image_classifier.py --dataset_name imagenet --checkpoint_path /checkpoints --dataset_dir /dataset --dataset_split_name=validation --clone_on_cpu=True --model_name mobilenet_v1 --inter_op_parallelism_threads 2 --intra_op_parallelism_threads 28 --batch_size 1 +python common/tensorflow/run_tf_benchmark.py --framework=tensorflow --use-case=language_modeling --model-name=lm-1b --precision=fp32 --mode=inference --benchmark-dir=/workspace/benchmarks --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=1 --socket-id=0 --output-dir=/workspace/benchmarks/common/tensorflow/logs --benchmark-only --verbose --model-source-dir=/workspace/models,numactl --cpunodebind=0 --membind=0 python /workspace/models/benchmark.py -b=1 -I=100 --inter=1 --intra=28 run_tf_benchmark.py --framework=tensorflow --use-case=image_recognition --model-name=mobilenet_v1 --precision=fp32 --mode=inference --model-source-dir=/workspace/models --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=100 --socket-id 0 --benchmark-only --verbose --checkpoint=/checkpoints --data-location=/dataset,numactl --cpunodebind=0 -l python /workspace/intelai_models/inference/fp32/eval_image_classifier.py --dataset_name imagenet --checkpoint_path /checkpoints --dataset_dir /dataset --dataset_split_name=validation --clone_on_cpu=True --model_name mobilenet_v1 --inter_op_parallelism_threads 2 --intra_op_parallelism_threads 28 --batch_size 100 run_tf_benchmark.py --framework=tensorflow --use-case=image_recognition --model-name=mobilenet_v1 --precision=fp32 --mode=inference --model-source-dir=/workspace/models --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=100 --socket-id=-1 --accuracy-only --verbose --checkpoint=/checkpoints --in-graph=/in_graph/mobilenet_v1_1.0_224_frozen.pb --data-location=/dataset,python /workspace/intelai_models/inference/fp32/accuracy.py --batch_size=100 --num_inter_threads=2 --input_graph=/in_graph/mobilenet_v1_1.0_224_frozen.pb --num_intra_threads=56 --data_location=/dataset run_tf_benchmark.py --framework=tensorflow --use-case=recommendation --model-name=ncf --precision=fp32 --mode=inference --model-source-dir=/workspace/models --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=256 --socket-id 0 --accuracy-only --verbose --checkpoint=/checkpoints --data-location=/dataset,numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/inference/fp32/ncf_main.py --data_dir=/dataset --model_dir=/checkpoints --intra_op_parallelism_threads=28 --inter_op_parallelism_threads=1 --batch_size=256 --inference_only --accuracy_only From d8f901449e287fa02061c2d289121b14fd038023 Mon Sep 17 00:00:00 2001 From: Dina Suehiro Jones Date: Thu, 4 Apr 2019 10:49:07 -0700 Subject: [PATCH 02/62] Allow overwriting the KMP_* env vars in SSD-MobileNet Int8 script (#267) --- .../ssd-mobilenet/inference/int8/run_frozen_graph_ssdmob.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/models/object_detection/tensorflow/ssd-mobilenet/inference/int8/run_frozen_graph_ssdmob.py b/models/object_detection/tensorflow/ssd-mobilenet/inference/int8/run_frozen_graph_ssdmob.py index 89b890ab1..90a1d1fd0 100644 --- a/models/object_detection/tensorflow/ssd-mobilenet/inference/int8/run_frozen_graph_ssdmob.py +++ b/models/object_detection/tensorflow/ssd-mobilenet/inference/int8/run_frozen_graph_ssdmob.py @@ -36,10 +36,6 @@ import argparse from tensorflow.python.client import timeline -os.environ["KMP_BLOCKTIME"] = "0" -os.environ["KMP_SETTINGS"] = "1" -os.environ["KMP_AFFINITY"] = "granularity=fine,verbose,compact,1,0" - parser = argparse.ArgumentParser() parser.add_argument('-g', '--graph', help='Path to input graph to run', type=str, required=True) parser.add_argument('-d', '--dataset', help='Full Path to input dataset to run', type=str, required=True) From 8802bc680393686bbf1372ce0ec65c58e759c412 Mon Sep 17 00:00:00 2001 From: Dina Suehiro Jones Date: Thu, 4 Apr 2019 15:00:11 -0700 Subject: [PATCH 03/62] Add Contribute.md doc with instructions on adding a new model (#266) Also, updated the launch script documentation with instructions on using the `--debug` flag. --- Contribute.md | 176 +++++++++++++++++++++ README.md | 5 +- add_model_init.png | Bin 0 -> 14105 bytes add_readme.png | Bin 0 -> 15909 bytes benchmarks_directory_structure.png | Bin 0 -> 11588 bytes docs/general/tensorflow/LaunchBenchmark.md | 95 +++++++++++ models_directory_structure.png | Bin 0 -> 11038 bytes 7 files changed, 275 insertions(+), 1 deletion(-) create mode 100644 Contribute.md create mode 100644 add_model_init.png create mode 100644 add_readme.png create mode 100644 benchmarks_directory_structure.png create mode 100644 models_directory_structure.png diff --git a/Contribute.md b/Contribute.md new file mode 100644 index 000000000..84e1b2f64 --- /dev/null +++ b/Contribute.md @@ -0,0 +1,176 @@ +# Contributing to the Model Zoo for IntelĀ® Architecture + +## Adding benchmarking scripts for a new TensorFlow model + +### Code updates + +In order to add a new model to the zoo, there are a few things that are +required: + +1. Setup the directory structure to allow the + [launch script](/docs/general/tensorflow/LaunchBenchmark.md) to find + your model. This involves creating folders for: + `/benchmarks/////`. + Note that you will need to add `__init__.py` files in each new + directory that you add, in order for python to find the code. + + ![Benchmarks Directory Structure](benchmarks_directory_structure.png) + +2. Next, in the leaf folder that was created in the previous step, you + will need to create a `model_init.py` file: + + ![Add model init](add_model_init.png) + + This file is used to initialize the best known configuration for the + model, and then start executing inference or training. When the + [launch script](/docs/general/tensorflow/LaunchBenchmark.md) is run, + it will look for the appropriate `model_init.py` file to use + according to the model name, framework, mode, and precision that are + specified by the user. + + The contents of the `model_init.py` file will vary by framework. For + TensorFlow models, we typically use the + [base model init class](/benchmarks/common/base_model_init.py) that + includes functions for doing common tasks such as setting up the best + known environment variables (like `KMP_BLOCKTIME`, `KMP_SETTINGS`, + `KMP_AFFINITY`, and `OMP_NUM_THREADS`), num intra threads, and num + inter threads. The `model_init.py` file also sets up the string that + will ultimately be used to run inference or model training, which + normally includes the use of `numactl` and sending all of the + appropriate arguments to the model's script. Also, if your model + requires any non-standard arguments (arguments that are not part of + the [launch script flags](/docs/general/tensorflow/LaunchBenchmark.md#launch_benchmarkpy-flags)), + the `model_init.py` file is where you would define and parse those + args. + +3. [start.sh](/benchmarks/common/tensorflow/start.sh) is a shell script + that is called by the `launch_benchmarks.py` script in the docker + container. This script installs dependencies that are required by + the model, sets up the `PYTHONPATH` environment variable, and then + calls the [run_tf_benchmark.py](/benchmarks/common/tensorflow/run_tf_benchmark.py) + script with the appropriate args. That run script will end up calling + the `model_init.py` file that you have defined in the previous step. + + To add support for a new model in the `start.sh` script, you will + need to add a function with the same name as your model. Note that + this function name should match the `` folder from the + first step where you setup the directories for your model. In this + function, add commands to install any third-party dependencies within + an `if [ ${NOINSTALL} != "True" ]; then` conditional block. The + purpose of the `NOINSTALL` flag is to be able to skip the installs + for quicker iteration when running on bare metal or debugging. If + your model requires the `PYTHONPATH` environment variable to be setup + to find model code or dependencies, that should be done in the + model's function. Next, setup the command that will be run. The + standard launch script args are already added to the `CMD` variable, + so your model function will only need to add on more args if you have + model-specific args defined in your `model_init.py`. Lastly, call the + `run_model` function with the `PYTHONPATH` and the `CMD` string. + + Below is a sample template of a `start.sh` model function that + installs dependencies from `requirements.txt` file, sets up the + `PYHTONPATH` to find model source files, adds on a custom steps flag + to the run command, and then runs the model: + ```bash + function () { + if [ ${PRECISION} == "fp32" ]; then + if [ ${NOINSTALL} != "True" ]; then + pip install -r ${MOUNT_EXTERNAL_MODELS_SOURCE}/requirements.txt + fi + + export PYTHONPATH=${PYTHONPATH}:${MOUNT_EXTERNAL_MODELS_SOURCE} + CMD="${CMD} $(add_steps_args)" + PYTHONPATH=${PYTHONPATH} CMD=${CMD} run_model + else + echo "PRECISION=${PRECISION} is not supported for ${MODEL_NAME}" + exit 1 + fi + } + ``` + +Optional step: +* If there is CPU-optimized model code that has not been upstreamed to + the original repository, then it can be added to the + [models](/models) directory in the zoo repo. As with the first step + in the previous section, the directory structure should be setup like: + `/models/////`: + + ![Models Directory Structure](models_directory_structure.png) + + If there are model files that can be shared by multiple modes or + precisions, they can be placed the higher-level directory. For + example, if a file could be shared by both `FP32` and `Int8` + precisions, then it could be placed in the directory at: + `/models////` (omitting the + `` directory). Note that if this is being done, you need to + ensure that the license that is associated with the original model + repository is compatible with the license of the model zoo. + +### Debugging + +There are a couple of options for debugging and quicker iteration when +developing new scripts: +* Use the `--debug` flag in the launch_benchmark.py script, which will + give you a shell into the docker container. See the + [debugging section](/docs/general/tensorflow/LaunchBenchmark.md#debugging) + of the launch script documentation for more information on using this + flag. +* Run the launch script on bare metal (without a docker container). The + launch script documentation also has a + [section](/docs/general/tensorflow/LaunchBenchmark.md#alpha-feature-running-on-bare-metal) + with instructions on how to do this. Note that when running without + docker, you are responsible for installing all dependencies on your + system before running the launch script. If you are using this option + during development, be sure to also test _with_ a docker container to + ensure that the `start.sh` script dependency installation is working + properly for your model. + +### Documentation updates + +1. Create a `README.md` file in the + `/benchmarks///` directory: + + ![Add README file](add_readme.png) + + This README file should describe all of the steps necessary to run + the model, including downloading and preprocessing the dataset, + downloading the pretrained model, cloning repositories, and running + the benchmarking script with the appropriate arguments. Most models + have best known settings for throughput and latency performance + testing as well as testing accuracy. The README file should specify + how to set these configs using the `launch_benchmark.py` script. + +2. Update the table in the [benchmarks README](/benchmarks/README.md) + with a link to the model that you are adding. Note that the models + in this table are ordered alphabetically by use case, framework, and + model name. The model name should link to the original paper for the + model. The benchmarking instructions column should link to the README + file that you created in the previous step. + +### Testing + +1. After you've completed the above steps, run the model according to + instructions in the README file for the new model. Ensure that the + performance and accuracy metrics are on par with what you would + expect. + +2. Add unit tests to cover the new model. + * For TensorFlow models, there is a + [parameterized test](/tests/unit/common/tensorflow/test_run_tf_benchmarks.py#L80) + that checks the flow running from `run_tf_benchmarks.py` to the + inference command that is executed by the `model_init.py` file. The + test ensures that the inference command has all of the expected + arguments. + + To add a new parameterized instance of the test for your + new model, update the [tf_models_args.txt](/tests/unit/common/tensorflow/tf_model_args.txt) + file. This file has comma-separated values where each row has two + items: (1) the `run_tf_benchmarks.py` command with the appropriate + flags to run the model (2) the expected inference or training + command that should get run by the `model_init.py` file. + * If any launch script or base class files were changed, then + additional unit tests should be added. + * Unit tests and style checks are run when you post a GitHub PR, and + the tests must be passing before the PR is merged. + * For information on how to run the unit tests and style checks + locally, see the [tests documentation](/tests/README.md). diff --git a/README.md b/README.md index d369ba9e4..e409c28b7 100644 --- a/README.md +++ b/README.md @@ -29,4 +29,7 @@ We hope this structure is intuitive and helps you find what you are looking for; ![Repo Structure](repo_structure.png) -*Note: For model quantization and optimization tools, see [https://github.com/IntelAI/tools](https://github.com/IntelAI/tools)*. \ No newline at end of file +*Note: For model quantization and optimization tools, see [https://github.com/IntelAI/tools](https://github.com/IntelAI/tools)*. + +## How to Contribute +If you would like to add a new benchmarking script, please use [this guide](/Contribute.md). \ No newline at end of file diff --git a/add_model_init.png b/add_model_init.png new file mode 100644 index 0000000000000000000000000000000000000000..6bacd1bb67252070894e20ef9dd53c7e9432d1df GIT binary patch literal 14105 zcmb7rby!qi*Y?aXbPXL6Lw6%c51rD|jUY&e(lA4Zga}ATODWx=z@Q)qC|%M>OLxk5 z{5@|x@AZD)A0O8R*O`6x+I!YIYp->$dmZ9*wN(l6Xz@TG5TUx7l0FCo1_Re?I9R}6 z18s$Q5D4#wv!bG|x}qXn*W1I<+0_9AQj1G}jcsV)OxfFcZWUO_ri9Sge3)WY63Kwr zdSs|TnyRayCf`#{khEl?OVTA=#KMf1gqczYFPg>z-vQ}I z0_<}3{IIEODJRi)Ib!x7{@mW(aX(|Q{)!2TSCUBN_~lp}3~BPPh%+Ptg$oN>alU@x zCEoAL)h3^Y-D@KFBePEWq!G6?xwl*8y4V#ALVaFV!vyKR(UlWP=0P0m+UYfu9V^+T z;p?8Q<4H)Tnd{}f%*_?~s>o^n_LePq$wOi+O+tbWmC6xFL&bz(ef8Lj@-DyTR~V0! zrjY3ug8T8hw4^d!x_*w@nW;U_KFsW<-8nB}+M?o9TGiM4Z(ea^_2S|fRDaJVll`_; z1aB5jenT(yE!fm%kX6&CD7|U4MP#^={s%XE=O~ zeAsirbE~i1=zSOLRl8_74|mv~sqZ}DvE!NQkE&zpUyv}k-bgStkZ_3WZ$G>MqYQ++ z`u*ePbWAlHqVHgZc7+h)?5^(%J zFc}m?k^;uPO28QY#kwJontXpFpT(4y0yol|9l!lqFuVC+Q*M`UZG#`S%?^RbNDK{b1?eo|L*zMQ5mM@qV=&b&$GzRhOeT0nCtF_c>Y-46H~E$<1X!ysG| zeL&2Qaw))=!y-lyZ(<8{!TgC;;5eTZ4P&vK6|qvd9urC++*8EKaiSDQbV<|jaN$C4 zkr@gu1$O07?wB)$iQ%DPu1#rQ%iy&s{m%v&YX|m@Lrk>Pv_(Ktx zp8Ri!snGY9gj-2v38ubmEV~@IwPJ_j60?AqSO~gdb@FqtFjk1V$|wm}Jsp>V#!HR2 zl+Fr2Q*zXZg=2$1FA0hgkl9BZc1PN*2wG_IJ4!gpl+yXY43eFD=DJVpq6HUg4J-L4 zX@8>}?8)jNI|{B$&E!F#AkrX`pnJ2)A<18ssUP^f;o*nAjvPSqbyfB5ex6v)_R4LQ zofBk8p89J3tNnn)pU5gnpnD(9_%;8F-}3b#BktpHa2E=uq{beL`yRI>QZce2lK->F zXB@pmCyf)h3M`vyZ4`smMbcCD|9Q2xAX%_Gz{-Qa;W%W{N0%EkxX@<;xpaEa>vs3ny-51)kcNS zHFm3i)=EB{w~|qiF%N0KVmjs79=d-(>qv{Dl}>~thR{-pUOC}>&s^8}H99$bHpV}C zwe_G)BKP4?0o7RY@a&diHf^?c_F;B{D39on)0rSFnkM3VpPlPtI{P`>;Ji}t#QTYI zZu5;loh`>?ogPjdO{}<3I8QjwPhM4iA150BF}_eaUvR62U2I=MR??vRq~(WlZr9ey zR^Hf$v4m2_oKS13$)?f7Nz0;-I_GVBwRs&@wRuV&F$a6rsgI%r)|EMM4ff7ho(yGok-wgE#c=P+TsgO z=j@~(+t~=uKCVzt{1#$RRFP9L?EBF@j=XKG5ca^*s8T;P6F1jh`E*7;lbr z4sDK=d9+*W?$cSUS!0ohj_gBo(hY9w;!;mO@_xL1PP}t}M{(D9R(z(@tJZr-((u8Y z_ghIUiE2+*-{9THyWM_2cgJ_NeDZcbO@8RPe4p?>VZ3D00X@-uf8tr$Z%gqu^S85s zEdzTdesbp&du8)-0j>UhGD!h9Th70-wgUQ&FL{qmZ%nVBoCeSYczrqxJoOi8H)sz# z**g?ES-vp5l0LkDwsy&I5c0MDu|q9UgVp8CrTVS*Rmj2lf%W<#9TEz~CdX{VehU4H znSm`!B#cK2oq!TzN?{A&x)XIpM@G34eHfUu_nDd4Db$@Bb||c}O{>kUJ*wqcHd-D% zD4jXj8J;nBlzU?@sVZH@1W~GGX;zYs7fl{aykWzmqM&(1aw_^$HqhDEoPf_!ey}j} zye_ovo;_o?X*bu3*~+6|1}pD>$+1FN(^yYfe`r}``17fAsHcr*zR*e1kxh}77Wdq1 z{&36ngDZ;5Z#`?sTy(!bIQP=@c!*cm5^d@!O_7tG63BUq&k=6$jYiwPY**{v%9T%x zkM&V-fO!B$us|s8ooSnKp!$^$>a|UR4Pujpmx3t%MtQAhiiU|MNLco<%3~vkBuBQ# z6Aonc1`RrK_q(%JCh2b9C`;s$x~Lbv<(OjeGv7-Q)e1yleOv!VyXNa_(pFuo8(g;e z{rJcGzUx-O(B(tr?S$@xYJq06Pws~eHw8EO88#W7uN!&qTeLlO-WQ&%ZyzXJa~?1^ zqORdI8R-=EU+a4G_EFaRP)oO#w#TO*@8h~;;>)F^B`*fSey3hL+$ z7P#sN$^F$v|GR_YFus~{mf|&KnCXyZ*eUikV}B_1d`d@QNb$he`5e!j?wmc>vzg#p zrFPcNw;lsKe5dAF<_XOmv&}ZM>H1}(GPCNl6thA7p9g6DGIypMe`$WGy?GvGtvs(i zpZoiH=ycEQMANLF-!4PDOiz&P9p_bDkjKHol}1DVXh>^Fz;)hU^5Wvzt67~1Y+oXB z#tSpH>TLI{xdxeSjJrj!@mpuEIlUTs2v@UK>&nJbp~q?>=+%8T`^ z#3$-c@P8!b4akYl@j5d|3p`kysxMgUit5I1F)KEk6^OR{6wI|b8YN~l`(eQ9wT|^+ ztM~T1*@k>p-hllWB(3a5ROM}*Gd29>LC?hJ!L&U@W2WcZy{KH9uNhpog( z1Y7Wf3@;(D7E+|AugXkfHdjCSiG&z?&lO~PM9=vyjdy;;mYE7OSFDavRdsP2jP!sk zz4!I?eLS_fId!|~xDOtFBPBGOj2jbrn3nn5=9``+J=c=;DRHzbL;Tvn(DQ4wv?R>?o+;?n=j|6h0hd&hs1H2pV8VTpg2{I@&*At^2JSAl<1=wH_QXB04(ES|K$ zf0J@XvSG?Uf#9J2Q*tyZapi``6MU zL18X6zdd$O_x##}suq9v%(JjEUxo@+EOftN!T;ZM_XUYBn4J)7Yq+pYQ%`$lVn#hF(9sm-g!u z0tFd=_vk&HZQ;!W?-S_q(Cfy46M5^`tQIgBl=%gMNOLJU?522eXTR+?f9F(jz+vA@ zmhpxsI&J3*L0kJ_cT#SP9rNaMd-ET+CXB7uT-;`xOSZ?2r`5%~KjMogp0*Yp!C9qRS)U(ui7r>QEUX;2EQaR%Y;sX8 zR!J;MyeB?7@!mK9$!pda+xC1{L*UZ^B;?QK#)5qDXr6>mwBUuHXPI`c20j)4iI`7U zI3}H}4H|T{_i^88tv@~Z;acKi*!Aq<=3y^m*_3ry?)MA8hQ|fU?al7<8h6)=cTPLg zb%yPM&x{n(d5lZ_7DHbD$dmb9#gYcy#)&FQfArp{E;VqwqO9C`tTZhzPdfsOut@UR z_V8QvSDR6Pn7ei{UU*$cHpg-bIN z8WbMyr1n}PhJ*-P{STS*?IjDKI?h!~&J}MTe@k6xwneOn=e^U|o_JrTeR&Zhchhw( z#m3fSo`(k&OL_a2RYOZ_5M@wj*JF}p7YO!Yq*PH+!Aj>d&!G6;5ggEDQBygm&Zca}Rfyi$^K#`XQh zusazY+`&~j_qhjwt1ngk+c4g%;#MY_HaON<7n(IajpRx-a(?gcO%5G|+mu5OC<_## z@ZNOjmuN&T9#Rf|U-pfVAQJfq%umb6P-%KP!Tdl}?)JohVbGKMvr_aI{*>`*%bbFP z6w0wk!h2%6`wQ&>k|dmNoKv>_ir)q^g}xXbk+NwFCkS-(QGU%7vghRCNhlNaBvlwm z=eNwh1)*{U||J+_5&q4-ompy})P}tMo%5-8DxpV)_aaPAihyvg0 zE#N;H6a;i(wFqwiw901wjw3Xp*AV*;{sX=dfdh#b3^Lgi)Rp<-i@~obifW{RAa)Rg z$a8Xse~$0c?Ov1wXZHLIXcN0DR!t^d-^IkI9LLI$lzYBIO{_imyO^!><_KS>VVs41A zn#5@MKv6a%Fg6&k9jP_UlNwS4rJF1qrIUx7K$UQz37OqgP`YR&CphL=xu_}(hZ1$u z#UA@4xVT3(y+6G~a)PI~!A!pyZsP)V^}vl(Fn0XS@TwmlLZSkHmw_BkS8i=#m6C_! z;V4az)(Tr@QEIxfVT}>@8P-bRrahicc?%V_{neXnESI%DK-Kg+sFO&T;eEJ2F1!FHLfVu#&Dc)WN))V@iwf1^`BVPAR&$h!o(4im;l>p(l3-1+CqWg(joGuCZtZQ%8kk0d*ErN*f4VFy;gviDQ;`M?PJR!0trDBN-(yu*7 zS`)ts^e_6FQ@s;GENk<$dihI?h>S#DBzzl$QvGuy4hNIfl6n$y^23>L_hwDp1}UgeI<3#tFu{Z_Oq{RzF^r*^xsjgrA(v~fPk_`} zQF>8wju*W>NYIo;LQhjl((%_W$@}+_4D|7P#mTaEJfQxe7i_DT2yMC;gkh(0xiY@t zg3X_AHw? zVyB6&T>rf}i9beQI{yL=3{9a@eol_@qeCIePZP+Mgi}9yLMa4<4~4ACjCx^2kS#yL zXqnY{g4+CeV@V9CMb%^qm#kMy-zUcSupSI`ErGsSq#sp~ho9kecXPn59o6H)k#HMI zupY$mWvYKTQV}>vOT$U&j<^s<;L?VYD~oY`&j&F0gb|_u>AP^W@Owco#|4A0bB?5%~Xk~cAal~ zcB5>|oAs5Qt*jrwQ8?1L%x4N{qYR|wR{z=VZ0|?Vq3JUcdGu49X$@RtHG$3}>!Rk> zt2T!Bp2O*UruC8A;}wec#9Wu$#%;I(v_M5jEr>)Aj25dBKb@?k>3wh7z)nab9NBVV z=OWyE{rRcW&qgkptIgubE$B*%^H{Xs{cwg#pN$`pgu3~mS6m)qPiN}cP@>TA`D7q7 zIIOeQN}(PAsgR!z3#%mb_R34z6BjNZ$R>{#KqhK);y>$Q=h~_4-dND1a&_m{D~fE9 zy}I&Y7r(0luuzM_-Q`f&dgGG2(|9?F%*n9gCwa|##y@@~Agj`lMLb`Kq}pw;;nlqc z2;*q_NbBH3%ws}8OY^is5E=>%1F6X)XzKy=wD38BKAM8Zh*g;j%N@g&BKiolZwo+> zPiar6k<)me{PwL%mA~wL1whT%w^tln>Bd&JlpH%+-FqrF;CPW8S*V8sfc{J_wQ|G~ zuTHiUJMaETHQ;vz@6R-F9J3+4Ha~3Su?$z(Qn5ZD#v*3W%F4=$^qp3Zqkk12PH3L; z=(edf!Vv=JcV(zQdRDTf49~v%? z-!=9@Ie{`bDXfFDhugS%&}+%}3p&Q$YxJ#pr4t$(bh*i~ci zEvs<8f$2X4Nf6hPKLA+6HP2s0D=}pL;^AN?$7rP}fTnH!PWCi`=~4@H-Ut&vr!X%6 zPEI;0^-~-7K~yzCOZb`kX0pE4yjc7^js|8h6s>@3l88X9C$CfjP-IXYqzUPV7^xI0 zL}drT9ThA}8g;RF2?#vX^fIg(L>v#VrG>FRe+FCO!iDm-3DdcG?g zYUIEPz3H+BpV^+S#uV3EwFip2O!7LahBuJXz5JddPHaMP!DkhGsN8J$^#zThsj2B3 zmGbWp1*#UDWJ?s7@*EG8$t|8A2BGx38n zbP_cFXv!)m(d+9hI9BiW^7HpFH2B59!TlvZOlYIilDCr8rWDC<_|j8HrecZ+$2NAu zHerzf>>_fJS26Kq1U*?A#d6_S1V0q-e;>Afy?CaIp3p%5ndjhxK|iaao5xTGIs6hh zkZ=$T#s)t}!I!*&bl!S{m9mZxr1(_^7}O#O$m*{bL8HO)e=9kRzm*&izK-r8vITIa zz4wuiK7MyP)Dzw8FPi!DFh}grIJ3(GrWUEq3sX32<-6HsOKANBZ*SOiNGMY*gEX$0exW35AK;wi?uZdvdh;Ez%j(Od2t% z#nVyV-{^I+5l=+O%85_OYeNLtK|mW)SCmXTf+dt+X%Idm)es38qP~0HL|V&YnN@Tq ztau-59sYMMwgONJM=+>2nX!z^pwxe#TsQf$h}+*27OSs!Pv|cQ9xer^-JYv+cO>>u&YCoSUT-CN17u+2s*6V81lS?$H4I!{2vu< zh`7xfortufxgWjr_n=i~!*U0)`S<5g98hx0%b*>hahU1Z+{T>1k?TkXWRYLvd>x#O z;n{)*pghTE@m&2BBfW)+F)3-vUO33J%D~fY?gu;emF45QdWE9+bCyL=X=<*v`Uq@d z;X?M%UIX-6jNEW~1RlW)X#h$++4kIhC8t_6)tI^NoG`Kc3*5to&^if`vU4| z8JF>nz*QBfR*K`uI7UOx=X(h>xUI@nl}p5){CJa6k7FpOo4tIE4OCWe-CQ1JDt%ib zZPwpV_vh7?oBa^Iph5|=e-w>PNRB{0r8_<+mb*#mWQ+AwgTJf4SLb~v{;v2PnO$$Z z#zW5U-sg#Wv4Z$Cg`{?wBk}Sgrpm9XwbB`5*$EisMuaim`)*U?z7tM=Eoqjh@o-|m zO;r!#{@csB240|uD8-OOtd_x-#QyqjRc}dqIFblRrvLwgk7sJ5g65nKU%uMv=bfWS zs?Iini2b(W7)zh29Tc!ByK;nLD*-4B zHYz9wrZ8A$4WlsIz7+XbrXeD|DB9|K$hrTHDPK5>X615%Um>y**?k?BN78ZWGiRju zybqOdm54Mwv=xk-X9rfuc>hig@F>pAjU|nRi~~#3{v*EZDEhLbl1vb+z#&FkML zT!?|crj0L`D2N}GY&-5PXeQ)TD%dibp4FwYvI@{I2zM=3C734#M3~<5k)NhLCTb1< z^?iwPkpNSp037P~ZRWH*IR9g65!7O0e$+%BE)IkNZ+m8@aO8U+L@d4A%lL{1`9PB5 z@a2P$muu|E3d_flbZ4mzfIK+?7=RCjN0c>-xz8*AfQj!Qz|y_Og^mq#imbDn~hx@_k%!MF|@f?BUNUX{uSh%(M*ILScvg;d+Mwr_YhNCjfaA5qfi| zT8%H;l2qS#sLQx2{jM}C^BRR?T`Dc8A;f?&TAJ5kD0{?K)idy7L_^{KHs9;&f+Tb7X9}|RTnK|PQyyLnU(Ae%{%x@nDmxC8OnPk@0$JPQ%sML@22iwd}>bq zFkAHSYZq?6La5)ISB&^4fO?R2&^twAnviP17&DTL-Y7+|kAdBW%ueWZ&;TRi`zTL3 zpl|Vat%>L|P>vNm$qR_yILW*l%D&Gt1s@rW-(Bp?kEasQ2Iw!bN-Kbic|do32|&&R zMqZ2yoD`xl+zX;K^hu1ui01Ffst^1CBrbV6@ve?oUe5nf=+6`XD4de?H*YG3MmIc$ z3vBT)5}2$zf=>Vp7sQ91AIBRg&kIg?{-W9J`CFcdFI9#W`mWhSSt2nH??y{B|AN-0 zhrf;iZiS>{29#lUfwN?c)LHJh-YW(;F|ssxH4|MJb#UfW>9cA3g-^|{GYTxKNpBfs z0uKxr94uQsQFnM0+^dP$G0eQUoYg$W)w+(3?AKJfNv2-3gxQ?n1{uZ;{{o(fCzdXh zy&p6pQImD{s!zvC39BqVq;C9qr=2W)ghSll9(1k`5RT;@iygtZ8857C+`phMfJ(To z9&JFg#Ta(u6*@5785S0PiYnvB5?bGG#fmC`UM%rpEU1->6M1x8vX_*U9L|C%h#i1Z z1f9dk8v*zU;MBPf;`MG}rGznTlVv)LKg`!$Hi|3BYJK)GP0vj#$u{HkM#8hlc zCk#nm*Nqw=_Kw)ozxCU#Z2cob0jp1a_<@2$F*=CuDmc3GQVCl`l>5~M1pkPiyJ_>=r*?R2QcMM?PR1ljz z^>rx?Fc=z%ZCY_gJd>u)C4Zl%OwUe(n}X}57KOnpu8P{b`*)7zsfk~nw>b?beiK(-ggW}De5{Do)OU4&d`gn z2TygGJF$`EBhv%FfS;)!z+BUgOu#WeEQ;`Y8qNA^73mAo&@XYo z_eKETqkqHbibDgf(QbR(md2B#;Zc1VCp(+2&O?-2n44vqwpWLfw)!oc)c-Z-FG7md zSeC(Hwm|MCAU+E_bE_F6g0Pxd-^X1*D<@gRs)>Wa{n!N_Dit>zn|q&KJfPNp)Wam$ zJG+>n>oyOtyPW9y#W4i)1@wxb4RjS6*G2kYo|$kRsJ@_=%GeriG>Xt*+%GnP9PWW* zGQ#{u&8|Kj6^oYjCw3k>Tb3&nV%WAAX&#rz7}K~^piidmi8`SdIgu?4j8eGDlhu}* zvn?J&FAeo7Q0FiY0`iHrcxd4L;WB{Tz2)xHSFMBcPSBrK4=N8?d@@{yN^Tuu8=esO zV@P)0BjojzVwB&Hs=?aAWMn~+=?GjNZ#)r@HiB@E+^3NotPYPI7|6EXZ zH9C(O8tx0}&HxzpTv+bCk}m)JN56CR5&#THtgAGx7=qkjYz2Y(NwzGWOP*Nm;{|wb zvM-hI*92N&Ivyhz`mQOHjp@RA>a#))vvmY;MDV8MW=^*TZ@!X))}_f)^k`81Vk@#r zANr)qmusuM#~4`5(3p6&KIaK}*UUP3MDWKkuO{hJ zrGjgisA5UI9(p8T-()C!s#hXMF_dzZ6mq94x&fC!=hy>7q< zJdN`J^}3oq6LVx${}n}q%3(yDOPXwkZ4uNit{G#Xz8(Ua11j84K|=&51Fr=(F8-|8 z36c2Y4aSP`Qxb$6_E0&d_DCmPK<$V=G~h)NgTmZX0917FB*;*Pee|Y z=>ElZMdMs}Ln!$yVl~Lsi>#~H%ic3!<&j>7ife_f57I#JhrSTYY4I3UF5SPi!Gv7) zevZQ50)9UomWEuPyh-bnfJ+tTU+IsF~bj7jn) zW{~a^wa*dvU>{B>oC6wZ8?nEBV^_w@M_`Q{56KPX%c{25(KrLp!}n~*^mDTl;V@W4dAOl>A1w*^I2z4uH7@I)04p zC!?ARzGUoL1wbC%m>$_|r|yp9M8VOiKt`gc!-+QRX9tQ`8}0IIG75eas9zOVd5S4cUc)(AGzr;Sn1rQt|aZ zpUIj4dQKV?PtK)4t#&@=Lrk0v;HP7Eg!Gy`eKZP$k07M%nS1r}W!G`o&SGzpjPx>N z;3ks?lRqAd%ybA^$fU+f<_C6!fMFL%)sn$~30w{IWrBPh9s%9BX4e8iGzUmq$vV7? zSQ+KkTN?{JRMpxHn+cniB5w_Mfbu+sSemvnXCPW}Gy(-6QKUx(C_ApkUvtkSeIhJh z^!qzuCC{@`aRyR-Ps4OTjWIAo}a{pNV3^ae*4?JVs(FPOCxXGLIZv8 z+YoLV*r{8H6X;EU6*}Z5(m4g{f;xp^>zJ8J=oS+OHqmN8GhWLm$>}5C)l6D>n$M|7 z1PO9x6QI>?@Z}$(K38>eqC82`POPAK{%Uf;Tp+%7lh3EW7DT(=EPadZ>hwRfbHoYP zu{qtI*hx{8ovu&d>Ml}FxR5jn^|Ztt{SuG-Lc%T{jP0{NXuHAmCSin}6TUNsnt+sG zhNlG_uh%(@y;Udk81YPE=LqT;S)#eI;frn%Vq=S{`Ld1n1$T4W@Qj)fE)Xa@AjwDI z6sV`7aWYG&43wC*ozg_AmxFt31opYTq0!uwRWq99(_P%Q?M-gCh{k77+gmB z8tBlGNzJMx()MU85b%nmihG=S>YYAP4GDC!u!tyQ)c6a0@ZbYa54VYfP)Zlt` zE84BscgdzOD|%`W%hAzMA7CTZvM#QC4{m;)W(mj#YB3#9uvEe6emrQm(RE2r{L_}T z=x=}&C*OX?zCa1LHo^4AAFVLp_*^z>Q?=w*p1d)YbB>qttOAv=L=Uu zv$lCawR0tOhBA26gQSzX@Gu(|3ChW-jwab|wsOO_0|k2#`LN?MkZw}viNBqyqTqBo z1o0;j^KHM9Gz+F+z|rHp1z#YC+2Mg4;@JvPUfZkTIPrngVC{&{AA`5nE>n!V{`Y6Vd!u zk|^b~uC=gVbM(^Q7Y5|Ik**1&4`ve_NVdj{g+-uY=guR&YcvB81KsZF8CTIsAgmh{ z3ys8|6!Wb#Qo{ysTDH=+`Y%bp8467_ysGlF+r*sLimPfjS&u(GFkzpqG8b>gL;p;0 zt21FCc4XYi9{ZXrWREq5v~3s!7=rTWyFWu;Uu9QtKBp)O4n7M8>H$%^ZXCzsE+fle zA%B3y|D>q%am%SMg$equM$LU%$Y9Fk>&q65I;Ghn2R2Gxwf{!5iO-JJaW6L~qA|-S zt2#_Q2Su^Rkf*<=jwh`uzx7IB1q10g%N@TT&c~N%>f7kmMFiymHCjxIC-1f_0}W7J zS+PCeo#~e}@dpz&uHP}O;pBvsM4WiTiMHa*)rzJPrrjJ%s|)u>#16n2{{?AfL=obc z{Wcrlcuz%&R6Vg*oD(bq3N}7xn6=vw%H&Pk;C&Xe*!_AhWX>kc{-ak65q&0kBT!d< z@?a#8h3`5|R%T7b;YNY4_=jU8OqFmSeDK3)ta8$~2{>bB`rLOQoH3ctla`}s(Zx+6U{xq(3 z6~;e4p2M)%8HZm+ zFb8jqmXQ29G*~k|E znw>1M3X_(6+F-t1{`v2S6zGZFg%NUv5t$AN>jsLufVQG0TEoCHdVrrYOOBpqXz;b^ za;zy?9L;+l3@iOSp=XIPK{-i?4yz41cc-8m_uBp0WR>ln4r9Lqc&-@%q&U_Au@mM^ zNRF7B-A@mvBGe(Q+@AJC#Ey%=8x(8tzJG24p08{P zATJq$4_M^tK1MyU$cBr9g3QSh#g+ax+yD$1AN5^hzbwdf=ax?5jN1(U4+dG)@b+i9EhG9lG_;TR$;3KEI=${NdC^%=|Tn*r6AyV$WJOMScs8u z#>ee}U6klaencfH=r@K^zRDoYN4zCT^bS?PX7{yu+0g?O>a-)A>m0r|N&wA!c~2`1 zu_}~kmZX5K!U|02YzKP00b0cw#lt9EdJ|E1{yJz8>M6kyxn!+t%Df78#{5y=n>aQY z4V`^z!Z0dV^f89_J$jS39%6csmnx$1mw=SJEJAz~*qrheMFcSR`I4rJ=wI?`fczC8 zPZ~>*R5zr&iMC7rpr}dy-~D4CTK-?U%KwbwG35=TqCnZKaS{+aTqWEiz?e!ET&Y!G z0M-H)MU?cPrqn|!*EezkahZ8e=&vES|A!&HjR8ZxkPo}ZcOhQ~m;mFTBo>K-({6%@ z4Nrjk+5kgy^kjF_N6+C{A^-XuG0VHk4A@23cbycEK64nA_0eF3N;3^XUI*A2u=~m` zMO~#bt`b5291XyK52MyPcq*xwWl@}GasSwn+@mJefiDBmU2u~Bav0_Ci^bPw#>8Y< zix-#@{%=z>;C8@kU22=?NFv^IOWSsssSx$DtdJQ!Cm;aFr#BI!G?E}=gT2p#N!FS$ z@vTAQkN{){zuF8(j{VIw+Q0Rjli@H{K>cv2RuMi#TM@B9

PQi59i#t8jtAG?yHW zdm{+C@B3wmj*kM4c;KtUOMwt1x9;k0I8tkKp27!NO>M{is^)w38aDk!<{??!JitIpl;|b9AK%3rjRgpl$RhI6+SSRCR VZ&g*)|GxLDuB@$8tzZ-We*l|k?J@uW literal 0 HcmV?d00001 diff --git a/add_readme.png b/add_readme.png new file mode 100644 index 0000000000000000000000000000000000000000..4899a9fa3a7bc648de4b73c504fb7db5174c60c0 GIT binary patch literal 15909 zcmZv@1y~$GwAg4cE25PWQ#eshF)5267dzF~lEC?>CLt&Oy%sHL zM^6pA|9vqHHA)hEN&{u_3Ia6A5;T!knrpq8B!r?qB73u`%lhB(#!XO1$9DI2;PLZx z2PRY%zAW`v*do*@{zTbaf6(MmsU=+xrkEH!7Bm7SX|2*gTml;67rKJr-j9fYO6=VyOIQ4V1$FzuOoECW`h#fPacaak!u@LA zRw`K^zuUS~)9DZ8Bt&;c#-+5&AC13!qR$>dM$)XE`iv_wc3Mo?`aU_AOmHkf-)!=o zqI+?A%Y5128u=r+il2<{_tS!?3cgMjuq7yG9SEtv!e*L*&X*Vp8S#Ey4gJwS(Qa=4EB#BQkSD}Dr3CLCMC zGrcx`BRn+V_oJ*>l40C7N1*3&`zX$g1p|)-OTGikAP37&i8>063kQXr0*(9>4?Da6 z?wIXkGWT%-jXpC7Il_$&wc9g*&Tz6NuivA-$s5t^97B~fwJqVQ3z?%<<NrH7&uDxrqP6Y5wMlU-;n*9Aoq! ziN)EOS%woK5BQ%Ezr?#|p>7eL&}i7<%Lo1HiG*;y!u4GUNl|CQ;aymBF$wxecHXz+ zh@dSG9`D-!c5FusLdK+lqM|_?!fFuTzzTi`BQGOFWv6hk}hxtAx zU|@%x3k}yI{Awt|Y>(YYiPehVO1PZV9ZNIW_WQ=r?T0A#?Ru>m)+OSzP>2O?1I)RE z163=YKa@X?Kej)~dU9a$;4UGbdoB|zTxP^XA4~t&k&A)F-OsLh?IIiO^v83eaVvwtm%OWHrnj%;SI0ul_6Kxc3DP^#H zhrSOb4!sW1?V*1Vb&V)hc~9?~`azzkRJ-)O2Foe+Db*=Uwh|9@*Qb(E>T3lzqDS!K z`;N?)*kbX9;s;F)Fv>WB0;nw8E%`b_q3pAIVC9$=zb9rbZQ)u!Nl&%JFIMG8b;Ih0wK_#!3Kz9&^#aP9Cc+ZJ zhJoErRCi22XSjb8TM>s63njuN1`-o+J=q{lWgV&R&o9mXSzw)iI_2x+&r_Z$Bv?qE zT|brlO#E5p^VR2gE+(!Sn?LMWQAFWWqaPeBNi8gL0Y?PO; z7x(Nzwu`o#OHVaZKQMkw|JbV8EPRziEU_rVEo)Mp#6c&0pwPh%cwe2{I*E zYMD=5GA`~?z3=>0pWkCrpD*Pcy#?Pb9vIbA7ZmkZ=e6kx7J&=a88)?FC)^!@E$g!q zK4-lCumc2x=qGU$AtTy7aVtp@VK&KkqOUwvoPL%{@4Yxxc(?2}J_uQUFnhmlSt*}5 z7N}WVnNvCI(Pf)+ipc5S1lQDJg>E%7)iSwy^mxR5gvs&+Uj)AcKOMh~*)_u{BRivu zS+K^8$(pH1J8}`-er4^ufo1nZr#pgI)HB*u!Z?w)BbbLdM<|Cl$HXwovHiks9bsLU zQ`w4cW<#jS@rYN@x{JB%)eG~S`&{xucb#{&*R|elM?i~j!>v>Rfxp(p!6V?p@?yw) z?c&FUl6(Hex22BnkJa(j@juFzAbpEN+>4$SXU4pphNbI%Z4eeMQT|osDe(9GZ_}e~(hxW}L_GLrL_4@)_zXl5jQ1!waEoy0@PdeJ z$W9pjQ4x_27#$N!7VfKy=S6DEvyh^%=4tg=_1E?EyE?n`zspyDpUCAyCfe6hH7CDt(yd7?Fxa#f1{d-jjQqyXkd+uc0q&09z39KmAT! zKlvy3&`h&%e+*z3+Vv`?X88v#W!Q z9osrB@4C2=?LVX3qUhk?1`@m&D;FsgHUIu)Fw^4SGZMh|)DxJu-${1X1G|vP%qW}uD8^A;+*BqFxxP`wP(H6 zY&~70VqSP%ejT*#KRz%)?45PK(!8(OQUBr0w@+U-jH<)e7h1mGE zmm3QY`y+=?+YCw!*4d(rzXdRy%t!L*tanV9WU888wY&YST3?K^XI=hTCul1ASbbAR zzizyC=G?wzbMA6!^D}qRw{@c8>}4t>C)26H*|{aCIjdGX-=^bX66X6hK%lZHc3OdhLOV2}3SxLac*^$-U()j~~)yL5V zK)z5=LOue(p(Dh@oYKe9!O2~~N0|CQcL)H-Z>QO)DgSeehrKYhp0XOHq_Z1@lAD!_ zm7Q7yg_4p|$j#D9KtoFAzl#H(gsE*jJX{3W*u1^HS-m+~o!zY2IQaSb+1TH+y?@UF z+`;1R>*QhX!{X#l^Iw(x?|P&l?iOygE*`edPLyx;ntyQi^bn?|erxFe{QTE*LVRrh zZ%a<@|J^O1gKTeC*f?0(+5S)6KvAK$vjS?iJ`e{zDO*Q~lRMCc$a`)Mq5s_f|GDyi zTl^m-_5Wu{_WxP(e_Z+Rl0s~69rzy|`mfRY&sku&L{Nm-{?E{hpkVnLazjCJ*vdqcUv}I-FDXp(`)~CI_6q0Syn5vu z@=#%mi(w)8!(B%OqL+n*g;4e(#WYS}ietrK!AY&slopkJ4ioF~cEGVA?9p$C`cp1}wbipncO{7$T9|lVIWlj1RvSoO z)Znr!(dlzlEbID$&8PQReBM8~PO&0STof)AR8q`r@-3^@q}%S`yAwPLULgjV0N<_m z^8QM!W`wBhe5E$0tjQiL5)uqKwHaTd=FGj+rjAa==D**&gSW}SFX5kgomD6TZ?VaQ{kUY; znyjXZEAsu~$o-`GU?QercM)>2J}P7 z-dn~9Cw1!g1_|Gt%!LO%>>_#%+%4*w9yV|J?4z$W+kQe})D@>BfQcFpni0NN+Z~8> zhmV5Y*ri~8yq;it97+5dWxrfIcQE#;1Dv&$Ovn*-Df+Mj`*XewHSl3Kx*0No-|4!K zz%(#lrdX+4XL37$$~L=f8tkZx0lEiz4M!UurjjR+p24jDq3)Z}%Wl>`I^3=8RvT%{%F5<;`ok-|F4nkg=ihZ+_B<1!T64a4 z^mC)|MIuz$haQx~6qXYG`#@vc+cEZu9viYexzF78d`{sA=zPpP&~w>f zaibOwevcEZ*Qc|(A6?$ghf{fqz&raA^!ij2K)B0+8Zw!~n~Xs&Bcz!tk@&#;SXWZcyDY%nq z;wKNESxspc6Cw#2K{Bb*Vi55BIsN(RSJ$N%-yTUhIn>B_I@7x5SM6&2ZHuR%$7@K> z2$u=6jPX~qTTP8dOL_mzLLZ-DaAm1Ux8Kif%3&lLOg_v$70VNuErufygr)32vdLUN2Jg)5%2l{ryJ(UBV(Enrv@I4K*qIM ztkQmoP9@j$)zzrc;}CE^LNkxh4zqnfe_vttlzeR-_&UMX`&(-(?{*}aLN!_B3CyO+ z9Pxi4Sj+o2 zE3Aa`@n?12Mx4nvA6LJ}!%W}d-@vNVw6daO6Ry{f#`x-SI{)H*I!`-L5JPDe0kkh0 z@qF1^Agg?Vw+yCg+NhuSyJ}s4U8_cT?jy;LJ~FIlXUACQi-<z4>7KzI$3!yDSfk?_|B-Ac4nLOw;{&*0kO7 z+}f>bE}yubE&;Qi+S&Qi>t*l1X@wY1@4|x_(SHYrsRn)zpJ#y8fxi6IQ_gfXhzhNl zCj5=^(;5=Yl2qRz*VgC7j~b{jVIkzu>R!Us*UU%}3^2v#=4b}@Y+ro1 z0X%5+KkSZrl^K+UvSMk*$uUBxFy?U~VE-(h8ZAYvS(+5nOS4Gpl4K9YbSA}?YFdM$ z2Mrl_V0c_J!ugm8QJ8Y2eP|2tyiq?7IhVRxE`7dFnD40>0Veca zS}OQP$GTaqzQtQA6@HEJEAT{N+66SkOkn1RqXe5uqr-nMH#4|OzUKJP1^-^6m80h7 z`bpaZ907~MMfCGhq41pMR<}#uZkWC3?Ly8 z4zQ&SkiM-XXS2$D%0etGh_2kIHWrdQaCPuoXqjJFh(zBH=Y8$+A}{VIIeIKE-3K>x zDFC3H7giJmeZIwaDT<13J0#+=!72bH57J9ZAVL$Zk~K?H+SKwmZH&^_?`fJ7u18ce z8?~pHRj}b0AnCbTwlPjE_J<>`+M$mq$0Na5(U{3Y5T`&g^0LKB(qxZ0hdn^NGUy55 z-*tG~Fq?eYvYh{%uS&uqV!XpryS-S?G-z|0+O5*9Qvsp_WI;1)asUm~6q3PWs8Ox+ z6=?!0;_^o&uHkN|4$ASuV2U+R*nw-I)>sgRA0BSWk8grZNyrS4C}rK;-gkwj9#8RZ zAGp**WDU%=Li>8i5Clh2A(0K3%+RiAJmWAld3)dE*C$HYMCsk0w*h3fDs>O+xrgmL z7&dR{>h&_%T9U?5uLF}ACPT|h+8sWgy}R!N`rm#$Fah&c&*KqEt2fX;CF`OSvy6T} zvIAVoij`(t+?P6g(h4>}1?vnE7*&aBvPrw_V(A&lFkKqTBG?b@dw6^%AjVDeV93^dXo8pP9Hiye!>iq|1WH z<5&>$5YKl*BXQ~vLIfs=xOK)p(yr2xbHaL{WO3s$YQ;F7UM0-ob0#~kw_*=eO(Xl7 z4MR!moDM1*1_?<rDtWG7UASzTzI(J_l*rJgUg^g z5lTa%&5#(_s4g?@){i|JU$EjXF1~(;uKPoa@N547EWR;VJ)vXIbywke>wdt#gdR|9p+#g+$Ts?4B$4FYfz4ACrMX-X| zXGQv@wf+h*RKlY9do7>W&0L3BrcpfYd>;bIikF!1>oAEk`}9g;G+}yyu+u+S46~K* zCQw@_Vqz8WwW6hwt+*VR(B@SQ=8m`Ud4kG2EK})%wq3==ZuID&VO%?AkZdqK1fS>X z$F}J9y zX4FNBjOtA|r)!Y}wjCIK553;6dJO|#nIodcS+c^&^rjz>Sy_h`(5}DvSn`W`r2AetR*3jthq*;Dx z%6|dq%aDR+1pcIE4rxmOccCLP%^NW&fW5(0eNy`gCMcrtV1fA|VzT3I)qElTXMzvU znB$DHc)D@@!{mDICF4jR(PwpxySx$^8rAXvQkW&tA9;-o((x*ZY25znYLIxOpl;wR zp?(-+!pOps$bTbdfGmN9lx1@?oi}k$ct@sg#INa<oB9BuGY_Ej4b2m4gbwcK@FpRUu?2)N8=F%gdf{jO#xfG zE9V~R*HN4aqq+eA^DI9DYz0z&H#l|uG<3m}#wzXV^4!3l&(i&oVJM&suzwOoO>u`n zzymuj(;j~oOfBzAXcRG?hMAANyu7)B&Bf?hRf2&ZLqd6B#L~Q?_E&@Siq*mbm=><3 z^WVC~W$iHj(?Afg-5ZJpfDw(=f%@*ZZbp>a$D3pS^oZ)~P!!acyUk8d8a2IYjH&8L zW>WdBrZeIhMlRi2aFH4di*!1tA49}*^oXK5-;2k)-Vi<-EFE0--3~Tp**v`6wxZn zs)N+7k~?IM4*BuJ#3*?P9nR-!s1Qc^rv#wn4Azl~m`!a!Ne2#bYH7%EJ%SF4* z7|%u#Pz{5iM^G0wvKh%Ahz!OBb*)3Zi$CkdBqd#M7*P3Vlm>vBukH1G}tv9i)grI!BU zeYq9Um`pDGStf~89^l$k^|L$Q?g42a{FTk9T@2tvB2-Q{M^m;ykh$HBrkDqW1zRAL zdR(k=S7}yYShqNDi#(l_BqNEgCX(>tB>4eKlECqDleKEtR6zE7$VoIsP$4od-E7xo z&xG}K2{T_5v9Il5k&vA8$!yW>ruVjMc8ZLLM{{>T7E8eIenc+cYs)aQAPEVW@594x zzG4ND*Hjk(?H4Acx@TT^NU{I80k6TO+YuO(PWrb$>#tJLmz}rph!cR2l|0zxjUR zaa@zarB|_}jrs>HdYhxkY@;6}yiWE1fGzPq(##J{)w%ES1hzvl*yot=X8zOsM-BwQ5B%NGFruq-?wDG=5hHM8N+1K7i%rH0DAD)4!mzD2Ern@EFbu< z|7G_G8T|a&(pM4|gXWl3Pc(XnK zv1*6eNXw=J)--L~+QKxocSz^Yi0H&p0OV-XWq;GhS{)~5&X(&c0AXhy5MkN}`@o%! z>pW|no=k7*$5M^{U{KbW*xvrWujOI%`akvoi3V))raV9t8T*CKOAAb{op1Q8Msjy& z%huK&P;VTAsTF1Uza=8k;)T_oKta!w+0cToG6l5bwFcZnbvk7oRT^b+WUHm6?-W#3 z>6J!6${;+QSk6vHr5TRbk(jS!C-xuG*Ax%4_365|=kpMp68I!D$fX>%P_oH7+ z__f$AHg@S&X~kv|SCRhin-oJHusA9PG0j^(@g-%6TknIrHuA) zB8jZ3=qp!UM}tTKr?m14#Xq_FqEk8Oa{&1E{pDNyG6#ogL28-LVM;! zAuZiFN~?3OCx;CqPa?s6#nf*^>pL1r_H& z|A3zCDZ2EMNX%UfOfD<|!H0gbV72$m*np1dta{eA9aN%TBBQ?D6X^Rx#w3%~c&}F? z5@mJJ+yWB-d}{jo`e~`D{b#`k`?Y$BB8q97n;Gb-E`=QfDy?1uBBS(N`@(`UP7VR)mUa3~bjb&R+~wINL$8us zh(oKO)*lA%{{8Xwe%sd|S=VwJu4{iy7+Tgm)kH}Ntr8MZ;-ceC0BQ3>jk0-c8BeP0 zkoyhng_uDk5fEfn)1FC+9y78e?ymoRRt^~sS`#Zp*SWlpIU5P4|Bd!NI8xGL!6}K0 zrETJ6(c+3zJ|KoNZ+4Z~{-fyR zwBkK=?Uv4$dT$IIaQ61WJj0K?7r*c{6*=JT#j;ieic{vJ;2rUi82?;ut4?IH9bf)2 z|MV-q?{&QEb72h{ZhJq0k+Fa|&j!A+pZwK6f`rbw_CNg!eHnWdn`IKt!c%sI&<%`L5iy8Jc z@MXV#QTOS5K%E<#lRD1y{a7=cTnpLU70bxxHp6<@?Bqa2Ry|1J3ET%+j_vw#2a2?MQl%caMn z+ZYfxt)|o?y`=i#gC_;*P1(K9^~&ewr(v{S#7rTUm|O$c`9a|;=(nC9m_D2GKeLs6 zc&`mk`^m{@;0M0wR?t(VD!X@Hsi$(EwD2a4*6^Vr;c+MgY>}gwDsX#Wd^DM#bkSFs z&~x&I2hRL6>5VdDer6qUc1a8YhH|>sePgy_E*%<_HgW8J%gIgbw0865Ic{41uQ4VZ~`jS;MKW z;f>kVFb9Padoq}URfV+U^9s6&Dt?k050)Q%bsXvVR&2x^>=_+~db!A?^`(zu&ZkLs z2Qx)#^)z7_tMi9zvLWM|= zWZDC&f-CA@z%j#C3HN-LKWpTRv!ovM*Np&jPw){tUj@u0@EO6ht{>_K78n{c#Hu3SHd-7{O44Mujy15v3;Ma|x-;1sotNURK|WmgR?N%+5-{NJ*Zt84(bo zp$Sja2W$^}-jL8@8f7qyj3x^_UV@QK0%5-b##7bpt;ayy8NjXr%D+`B*xS}>b-YT) zg5=_u)V*|BnEskj71~sKZIqY6gu+YBY4GnA4k97L*O~T;D1YXPeKP>Skm*pM%h+kK!Pdi8UiG(;!Z&{5DyP75UbIONfhAs zz4v{GN04D*5Lu~JiDPbu*$Yetjy59zQ(m|-0c5sUmT>f}uGh(X5a}ZRgrNa23lA0D zN@?5r5@4G!f1oDt*5}AtM^*OF)5Q7v|`t2TWPft#hjy_pt71c zzt9zGi(qf~@^JQ}a&XJ%J3M@BeAArv`ik{VvL}BG8I{2}*n>C)faSjhvM40ZDaKJ7 zQDd;bgEBa(oi&YY9q;qBCDS&E1onEG@Dyy4^J|CV6ETB?)e?O5INz?0mn5iF+fYe=2G5GJ}@i)WT7c2^)Uwoy|%`=o~ zP`gqiNo3ks6}pc4E%%Hc8R_%y{>r53S*aVv##Y-=bv#9c{8=Q~YD-?ZreDzTWa0?# z99}IymkF~CxJ#4~ae5mP(%*J=waGp&53Uu6;)R1?^_=2=M9yMe>C#c*1yL zesWWR>#pdaOYtjI3yzd!zZDPe;`s1<=;=Rt$HVSAkk~cJ`#$l$H2qzTA(D@z;-Ss} zLTG)?FD0YAhcNH+q#>IH6;TbrxU)%P^$(ToPf0Qz&pCSvGG382Mu8Gyf{oP?)R7qq zi^~+tVj93qdM=54hdzRY#FB2|p!5eD!b{{brhuxw(5RMxoQCe=51wpEEY;)^AvZ(XuMRv`EoDD@TV- zEAbyo0;p-W{cqu}n0urH{}H+^*~}~*X{fk7KW5<(ik~pVl|N%a44qO!AXX<%g2d#K zl`NZrYGgO$S*8A3QcJXM|9h)-5BBeUKT&y(*p(y6x_Fp-UJ)4J6AUugOsBWwnR2|H zQQ>cN50du_Ws}L3X=IZu4Sm#q;$p(BGh%0H@Y)iG&L5XR8e~z`)`2lX4zp5*CSV|y$ z2sgpQzJSM|Aw@BCW-a{+$GLoPUEq!RvFt1-=tQB zrJ2zN!)5HU5@qbR^3e=D4@g5v=cfs*tDSigf4sqzfvP_SRg5%0=~L7ge^vklL%-)& zqg7h~?vqKV*n-@nfdmaBz3Kv2JkU9}Y$6jGFV4AEF_T~Oa=r#2mecS*CS|a^hqbli zGcn`xj~)Kpz>S;Na5T9I=lx>BL)YIN44E?H8q}$c$BQE*DtXG>)x>7yR)?@5xL-1U z3n;S1jU$^5Gt6xbDo>=uDf4|5MWVi9L_0MFSS4nJ+Ub|->TjAF8d*>Bkt3iyBo%d6 zsn>{`T`?yj3{9UOx@lx!sdgVp8i>>?950RctK0?p22dkeqB?o)H4pGCIi1-c*`7j9 z)Kjd9krVq4xuR5*cXygG-0n$xF^$I|lH_tWr2~It0r8|&nF)jo=SEH0b;W;`LN89~ z@WvF@%hThKVfe9uh@b1Dwn;r7K6YDp@O^O8_P;#hq4s*Q4`B4;Wb>OcmL2 zzmU1p=*?N+yjMWlX`9r_cICxPa?L-~#;4}ANpx&VNsI7|ynXNRja~xBH%|r$-+{R_ zMEZ$aQzJ9Rsg;?yG*V@04lfJJ&xfB$)NvPZi7T_Y99Gzm%3z6TeGbjIUhV4 z<;6~6U+a&37Cy)sZ4Z&40sZCD7-qTbt_FAAufVgE&&pxb($cE?`3YPADJ|<%q_(9m zuduj~d){KYQhF{8sZ_DIc0Sguz9xP_K1EK{GXE?ZED{Q81+4P~zELJY&!192q6cmL zqTSd#Wg8qtODdW!+RtOIRFN`2F?fKOSM-*G0bqnnzVSRVz`wds(>s>2DOJ$~qMSRo zUi+NfVO3!=1`r@%2op{uXwnVmwuAo#8hrENKqq7mNp?Yz>5A~n2hiG=w%LnoqCghl zxx9%n{|l8_H-y%yyyFEVs}7EajQCkO{p#*8Jp=VWlwoZ8s4!O4DZzknZ*@fPiwqMc zmh#5i#9>StL2#2YVqW`GG8>qWd0igqzZeOn>1RxNlrFZ~^(4Jr6CB_yE!*^-CFeNn zw(9jN%i4=VLK4o4W>K6npyPtB#yy{*W1M7~5ZAx?Hocy_BOTMK)S&Hr{?nocuq zYd>2U>UY3|rY0t>U7(h-8%A$Pr131f0J;&^V|jgI~zG7(1&d;Ou}d4}~0;WImM*mSZ^iu6=UlK;~2~qd+l$x?En(DUu=bd_b4s4S0W^`*wP- zn~b|J+}*zFYzg6M^Tsyr#dX#YEo~uUkQM?htq<+Q zNHaN&a0Zbd#9N0hJ^#;IBSf(ZfVwwx;zc>!_A;0v50r@dfY*ey4a95v56zme*~L8MFi>%)VG)3+eMiLucDoTrXU zfW3G<@SfNK6>RS=pv&u`eZT(;#`op;ruV0Vdqg>LSj-eC_{>=r`XOylSPDs%Ijp1-P~5D@Y( zvbDASV+myRnD=6ikN`f!QWZoYoo-e`+~wa?BW<%Nclr^_6DI9SS{vXGoge?CC|@KK z^eP2p$Fy`b6tw;$lBFw!!h{n_4M$%y0`i3|AUDL+_ojS#t;OE=A`zWGMgdmvK1)1A z0(iqZ{5<%Nk z`en;ixbFB{*3==eNGW)oNXlz2vKF~vH zm(kZD@xU*h*T~*E)h}H+JH1;xGfggy_kR5BxB5DPHQ}7I^Q>{NZ@9|snR)8kqmI(I zi5dIBA$8eZm7f(VdE*VvyDFqiQEwBmdUw%K2MR!yg+DcDzLfr+e(~8f zaH40^adwgfsY6SzvdnK3_u<=q!D6-g1#gB#pc6+2B!O(vP?CCVSFq_^_Iq8@7!t9A zLP+jixmi7>@Gh?n%w=58J zff3oyVps0g;mSv-5fRVUU){KDP=7qR8;1WfyxBu`9&-8&WF)N^;c6+iM}N5gxfo9b zDY^yH|14E_99ICAdG!V3h**t(6EHH!mlY|X53oilWEh40ZUI`G+lT`HCL?=;v!n|p zLxf+jNj$k;<``n(Frf9D5HZ$C8Dv+_2NYLj?r4r{)gnK5pL{C1?NYTqLOQEM{9S8) z>#*G;j4M1qs>McG)Q5!j*E>aWfM>%L23JE(t_?`gAhH%$gZ9cnB=-rCXa8YEEr5_U zZZ}5%s*NCji~#@@DoJz#y_0niD3bV( zA88feU*`Jp>{pcq!1yyEvG#K#LhrtFIHf9Vb+f#jhCV1T1>X8SYQi)wgDy{g)YpF$ zQ|q6^tCMm|Hi@#+tVnj#55@p5j7lL`mOt`4Xu+Kp2x7$_zQ=Lofj0VVP{{?c^lAZ* zp-+|`M-R?&id-*%-?L_Mk=VGv`2Q^`(!wb+`}}B#B>;IIv!i&UhTQRF+17igT?0Y< z>PnT;7Y-J!{HCxV%4{k06-G_aBD*LL{>y{g9UIL1#%BW6%u4QxBpT9Yc zUTOq#*7a;2@-j!Kk0Y}3+`W3u&nQ$v+_{QZGzK!32Q#T4a z0uqS6WtZPW(c*e_UqNzlE`&E+u)=U025es&Xp-{WN|QCU`XG6E0UQb)%57a-DZ6mGjn1^ts+q0z8g&6$pUVB~ zk$K@F^#XFVxbHjT?CzTyH5S!4+wZtZs1z*UJ+Vo+c1h=bSWSdGC1w!Nvm;s!gNXrr zT&Vu^Qut6c31E#$R&tjzhlxSG(G$w{kIj@=3P__)-^A^a64Ri;dzQWcyj|utXP2Y* zkWP^mr=$dS9(yxx%{k$ssOxt%WLB?F;M?e0n>M%lP93-*lj1A}%Ec0p_O5%OGfHu3O3rE{SY4-m( mE43OZt+s~>gD<}G4|+t~vQOt;OZe?OF!ItWQneChVgCz^{q$P^ literal 0 HcmV?d00001 diff --git a/benchmarks_directory_structure.png b/benchmarks_directory_structure.png new file mode 100644 index 0000000000000000000000000000000000000000..1bf56d91260b51aefa7196075252023d66c4b66f GIT binary patch literal 11588 zcma*N1yGz#(=LoI!QF!<5Zv88xVzgD+!Ng0A;BfMySqCC4-UcI-QnDM-uM5`^Vg|U zb+(FS@0sc8o@;t~y06_ZMfuOj2m}Za5D>^xl48oh|0CdU`W*;(FZJQ9f`C96w-6Oo zloAysRdlp7v#2;kreK) zK#E@1R7CG)$@xHz5Dgn!O`5X+f+U=S#8-`Xs4^4g($)KOg`*C3o(QK5U6xIs;( z@#Jd0>pJ*?n87}Yr(A1D2)~{UNmvNQbVWhd7+Rq#MPsD~%qubDctpk9T?B5vcpasm zaTyt`1EN$qc`p<(>vr5T@!Z_+{l0&4$HyfT`uM}t0p~5Va?6L7SB6=u7l}PwkpP3g zThYZ#KKXmUg%dfYR$p3VNJ~(7Y?IXP$n%d+DTDCuR4T?&F$IPXa!4CkV$z9thds3n ze|?m3%1Nl7&VMbF+LbIkq+!{L_a@6M|CPxYDVMy>r}UgIl?WaPl~?1XYub{NYx4+KbRq;Rc6|v|rpq`l&EGjJSraC}_!i$1aZC z`vOb*QqzXFqs{G3S~w>`vt|VP#OC;2m<|mJ77dzs9hzDanwu1H2oe(p0zDQI{xJf2 za_i$Bx4XbmTCOB_+)DIA3ACUUcs)bk3d_F=+Nlc?t*Sx0|k&ctj!?6%qicOP5 zWeN4{S!ZHH!ZZoE=nFL5WYU#oFyl7kFL>{SrV?Y(ztVSY9L%&fh zIu@}eYryh=@WAju_aI!3@rvo)z~gdCr)7Xi3LNdC?=Bxa>6zU~b;xKESYaZLnIF*E zYClJJMbVFD>^tis8OZE)*?77jfj9Ms?Dj(wlcWrVFNMz!6b-BmWawe-d8ZU*E`3cZ zf#%lN-xt;Q(nq<8Y%J&ym?zKj$t})U3O`RRk42gOfc${$fIdZ*gS_=e?hyH*MVpNe?!EWV%l1_zs_BZL1ilr(ZxmA6B z5q=%7_D8ZC+QSL?+>w}*&%`*`NBx&9T zxO4T>QFD4ZtqON7r&T{Y^s9b~*@gUpZ5Q?m?x^$*epP>|eS`+FLEAycb*l$Bff$oh zWpP*|v<9rfDENm2!FV)CcLWWD(Re9@{rKe^C9LkIvMk?NuBS z=UxLfrr;`+TK$K`2dNkNN3Zj{bA#Qr_r5SNa9FT)aF#Fwu!(R2C@csVFtaeou)J`L z@U|%3!GS^6D9xjDCQgg9$JvVWli=)fqxh=is>`ZR8|oX==LL)B$CHaXW`gM^JfHar z$)LolJ~W8&g|o%{ih8C%z{SQ_L%(5L5^%S0)Wr`1QjslBPT zrLtMNCHN8MWBkYKkK?ksiLUfgpQPe{Cr2toD+t8$@p0OpHZ;Fbk5dOxyX>Y+=&+rQ zcxF6kT}{v_>UC+^^I@l@#=2A8Ab#>!8OE14DLj!S+?@=G#|FcjMVK!=SI{w%(eGmlS(@4+HqDq*S)jy9t(odsjosMIQivG=8H&b@z0 z7w68vu#(Eo`!xT-Mdvh@P1aoqG`u@Zu;c8k*-}xZ=vlZwb~Ro)^wh-Uy>TIa7||C| z!PubPW_wZlob{ZUXqaf9R7c0I+hS>P#xhsaKAOE_F{-1ES4pKg)yd+z)2)`LmQw1i zXVchXdeh1dZ9j2x$5~_kb(HB{A$M0H}8f1<=OgW7nc*_cK&`3 zfj6qhY7e{fKabM2Bhy|@UT#l6Ph-~BZhtH*%)&XNV3FKwQ&b#s<9SupFV(j^#-3(J zvD-)Gn62B7Y(#&6{7hhFV|h2nd>9_%pBtXb=q>;9FkW3{LO-|~J^z!5zxwLqiK!C5 zYG;#ok?_LgO!DsqH=1iznIB(fh7GKJig|2YTQ(ziQ=+~|eL){ebaI_@GBsl!a9;%O;cMdytiJh}IP1orM$ouWDA9sfX1Z>wv*T9`V2aK-LUfPep z*0pY&LiP&b{6ReWzr3d2lrJ4ktIknx1iy9ExPIMKSQD&7r9ipqOza@`s(mfLT`qBv zKhqBW5JJ*9;VpQXy`OtK7?8OlQ12yw|7>l#f%}|PQ<-Rx))D08!IjF-L$e{ty_E~$ zPmul-r7MjFqDX50coGuA;Ut!(k;oBIvY5nw#epY&a&u>Admctc7Z(=>7gh#4$1jY`+}zxZOe~BnEcCz# zdM7tqXCqg7Tc;2IndJYQM-1#_;%H&-Y++|h`gdLZL~4N?sv733nWu&kIURwy&VJ6p2veaPJ0rb@Zom!Fm1eHIbwhm9tn@C%i| z>ZP<4$;m=T%Q5!-Knk5h_Z90O&C#55#NUFXFV)}ktJPl0 z_lOotO-9<{c>L~lY}N~ksq9vXa}JA?Z?GDzY>C-^uOV>(x+#o~Hy*u4E+X8W%5tvx;| z=~MJ-l~KTo+g`?WsRrXiKchF8M+Sr*eu|Ycrk&>_S@7I@23-YVme#~T}_HiK)voL zkJ+!}K4vrUVhz}FjD3EHwz4g!wW{r;VVSLx_q0hgkZ^E@;%DaBSM`p|}t3 zA3Cq6WgK5_=11i?S3f%K{*qc**WAsspY|}8o@6BnybkuZ*To|Iyh9NzdRf^S?uRn@a3Z)}( z=v5ILiR0J>D+WJ9&f79EZ1Ju=M7KR2mKwisH)z*ewg_e>!J9PQE?L#T`+78ojRzBH zcr;h;Px#op_w$E}JhwQo{12t-<=+!%HKa!q>Dk+xr>Y@81Vd)?zK!#q4SlDOxn6J@ z=l(onjDd&>jcH$Y@aS{i4R?9w^QIGwgm0X4vtj@Gus6E+C9`}W00*a>spojHcIV}8 zEs1r4PetGJLfocW@L?PCV70YTKyj{I=M$4IOx^R#)7|{ntIE1MRh`F;-jLBQKj^P_ z@SoDNL=ytwP!9uccGK)QhMVb_VELl3>CJyo%9-wsZzo>@cz?5Y=$$U~db$l$$#59J zZ;3uV^Mi(SCR#PboIKQm)+WIu;IvI|;=n^`n37JJZ1-@fEg7}*+#(D+k%%Io82|E3 zgfQ^?cZ_1SQg!T%!`ae8RDS2usWI^gJ#}@;u{6$kxSk{fZ;P7hj00jp@3w+|=pmT+ zFUJejEG7d;!q2{t(ARzrQ{uQOA9Zm;p(<7loubUa(Y&YT%5^1cueXhIs(%vAh2^@q zFw04_Dez!=NM6t3wCc>`UbFMXH5O=pR~i_6JNrX#z9B2biTMDBKj|g$z|`S+HO5&_ zd;IBbkk~_(q3vAcWH%Sr2uk}bh4sr}Q=`dXw12kW&4%T)1HK)sCqAv2mrlK9zQ$66 zjZXKs4(NgCZmA!;)=A?})ju8zDsQlNls;a8y^F%-7TuIq& zmQ>$v&(;|4O{nPp8o=<7w)R}mpgqT6ERf$tcSiDd|G683lg~^SeKM-QZ@fS*&G{Fn z^#aogk8xy#zOZGNKrIQQcI10*q;n~eut?Ivh#18B6^SHJaaO3`>Q&>&sFMjja#0;_mt=Atyf1xf}{A~9dHNagT!BWC?xW)uX~ zU=XAIn0bxVkSIY1%IK^MUyv!wiIfMBp?{1$2#+_ty*^=+K;w#VK{1JG_$jt_$C8C7 z`Fd4@IP;L&!qb_Un7GapkEkW19et`Q^EO_yDtFYG_Ww^TWYafXpFr0mn+%bgzSatH?m+hxOzfUp=DTWLIGA}DI%c(_L)pR(>V)hk;*+tVX z@-#6f_PSOQ&sp&}YY(RvAB76B<#N}k^j0O0g_HBGEH$DxBB`Re?@h)u5i(Tk3S#jt z5o+=^-Nz}D#1C*4mx)owTvgfjb6~0Keb~UV6XDo{QwMDsLG1cQx*Zl5#uHdv?y~S8 z1oD`%ClRsB%7&m2VZA`nQ4a+*IZP43(89xn01@$tXEAU%6<6O?_!YQ5Q)L|bOY#De zF9PDLd%Lopjsw45@Cdq&b`@;;UH4%sn3sl!#Ap5#7lEhw?@t~UZiFRjz&J>nf6NKpi0dzwLnJ)t|yl?`}I%AwY?j#5mt9nf>I=tF z37OD;58nk37(fzOJs-ttD1HRaQbb;kTgruo?*PFY&o(wz7!=zad9k^^*)#UgXlu7p z!q_QAgs!3S`7YY~i}^CG4;p%36TDryOHxf$DyDKjA2k|ZT+js{V;hbhW*aduG26A@ zBzi-lRBHEVWhwTyxn03{w;**>p&cc%S0pFjt@#+Vo1HgHgJ{+@{4@z^nV8M<)BPRT zox)MA1eur-ap=%ImvKnUgFQ%xF!S^CSuB53`9VJ*bFO5O1WL33M@MfUny9qG%Dy>( zkWhO=!0S=%HCxHx_CfhLUCQ; zdwae*JVTI0(mgXQBIPw~L)dfRN@6tV(AAq+kDDowBV?w+y`SrWMUg;ZP>mtx$0)+b zL(pB}uw5zQB5;L9267Z28=+`i^KMnsHiZC8B@dg!e5gK(BJ$P0R&N5#AJGz+|* z9auTkJA{s;461N*-S0w<`zfeC8?6^pU%lzL85|~@A|ow*EfEsH0~Lt~S+4i?`tp_N zEnSh}WT}3JEdYe9?7~lKTp*uOf(mn=OaAL8zs}_9K)^3}`T$$5^uzfIy;Av8`~CnO z5>L|&^7}s?XYjg@Oz6RUOdxhH0z4*952G^5yhM_xDe~T`5LA*135#hAYLoc$&dBy^ z90W>`nwgE(iXuaSJ90VH0IH9sxe*i77>+-+(l^I{E`!SLUHCc_OQxJFflNVcd~7-J4C02iM%LC21QK%icZA4<9J1F) z$I_PzCNUPcBzg|cTSm{6Xn(D4qzlco-3$!7(4Zey{YI5!T6UL;y98TKL4}va)nk7V za=~UEQb5rksBw3?3f}#dBHh}Rhf6uk&~a}(0cEJ`HA>%TKKeE(iYg^89z-mf7g^{$ z_>t&Qb^!pT{W(bTgiUm}qUsvOa_7h7hJG4K6g-FDX zK*Xwt)s$X`(|MwI*sPVl@g{tc!ymIOnH+*30>rFBw6uOvZf*E#X_qDrL1T(#=|SkZ z4-&Z)jVB{ZRMB=yc)pJrG9>2xEIr5qr?Wsif_^9#c$~yXKLz}aB5eh~;t;mQ{QM6oP?#z^d60Hxy3NX+sc>)Fl|qpbNg|8=wcs)N&bpX!SW3zh zkvs>8Hai>!-Tk9?#=4%v@ebq-WjF_$;bGOfP#TDukOZaqj$5ICm(K{dHEn2-!s<8p zJYNjLt{OBA<$DXx!=szTT_mdoc=^>nKR;Z}!r>6}l7jIvbv?cKlim?x(VSA@6O``E zg@2$NAXR%N)fe)&@2wR=z@xeB!^2+l$l$Qi79k@wNJnE1CXJJ{)?x++{@|-(1g{HP_WPe9;EjRGt|eL> zFlfuH%-w!RuxTD2Gy{}GrZjR{oQ0`N^@eVer_6evr9v9$SH-*+#rjv&a;k<-Ocse@0&xOL ze)@E^6gOPY@mF>2xaTW4lsicExQtRFEj5H-2{T*m&(DP>I^Y$kwm=ZTquyYOH`@S_T zBC!}m$^pb;|Iu1<{grb%NyK-YXnA-BuLlY~({{6lg8sPVyXGY(Z_5c&@$kjHA`5*) z6r##~)pzQK9>;$SFMqf(Ff;Ey>$~Fon$(sG+~ul8fQ!;A{U@&SJ8{;toQ1`QkV zY^7rO!~g_W;F`Tb3!PFWr9n9V+FSHHFvfAA-Bf8P8IR;UmKsJw8FS^S$Ut9Tn;dMMU!SfZBl9JNvAN8+iC_dCu z;pc@aAC(u|ySEDs`@_R7a^=#eCpEWJb?B^=XuE9fl{5qy1>^yA{@Vm(8Ha~^{ew~f zbm^86V0udc8*#mtwtFsWA(JQ-o9Z(0av+;8D$T?t`2>64Vw7%4yI!-@Zi>z+8>#mF zF98D7;MWn~T%U%i@8$NKJv(lJ1Dsghveb)$B8YSQphz`NVUce+j} z>xLz`ssa}kR&AyA5>FjDkNXmzyDiQTM+2y;$*G?D!Z5)-f$+-O+B8{0>mHRPKF`$y zK92{5j}GSVJ-T6W_vhzHT-PC+k1ATX^ON+yi-w>Kr+oQYT(|_#FMRjw0scC7<&A$Z zJZUgU!UrBIT2Fbp;R+PEcd!5&M+3&d;%)WJyKpUCTC@~Xo4Lp@b z1NkgxOA@F^w2z)nTNQvhjxy8pGHjjpf`WoVn|S?;qQG_(s>mKq6=6Cp*eSNMyKe!E z{6XAPpm#c729}edW3t0(Po5pmlgR7ga_2F~8INq!1*mvg&3;Yh5OX_dG9F_6`3TX*1L7R_n#MEez3d7`}jX7KyB~$=KLXl?evs{w|uBU)#3*W-ibeU?LJ< zU!IO`Yrv!1H}eKb05c|!OkBQDqE>3UGm_wVzAmg)V;VM){mE~#OpDcK(Ru60$9%&u zSuzH0ag1{y6$MDP)@N&jPjhZdh7zOsQ zWZ7ur3sBy07}rG7QWgL^3LnJla!LmVgZ)qGTGHMzabZr5iQxfc++sfcdydI=@jHC_ z0{GQdm-*RtkGoX8B?|FI9QynFi!E@MFXWPlf%BAj7@KBybL<-am-M)WHGnHPDM@oA zg`%&8myG!tz#izAX~elu;wn2gmGjAJpXxP1KX zM0UCEoLIi-WIWjwHj8Po);y=Z@mU5tAa?}=Dc+#uQoal>q!YfXSfmm2X$A{8Jl*Aw zo8v`4E+VmdXs`Ekx8!7h2S7?QQC>wVt5UrM!V&B%!;3np2% zAf4{dji?zJ=Pww?w>SfXGPA26%i}`xp(#PE#dD{I9TD#Wns!sIOxL^oYFsajiPBP8 z^jtTBEcpmIZR7RZ-G^-&R|}N(@$*2=tjy>)Lmy&_lyVb#FMt}Xd7LH&;5sH#bFl-e zjp)L-@G9UZ;V`w6{Ww^EZ4E^I0NAi2>nf=brziq;nm7{eC4j_Ku4up2mNV86IIcbY zJYlv_Wt7@D%!w-lL`Xz6lkZ>fI%{)w`udQy=PLACEoGM4+?>`BY}PvT2-qz@0#par zZgPykqhxFUeONy|z**&vBrj0YUzX~%HYD3NskC^W6jtxbJdkr&&T3B8gNJm1Sh>4& zAxVPpp37dyK(~tv%S1i+7j{;)Sp`u{9#l3>SEj8 zS>eOS2IK+Dn!oW)i~6~6+9p;IEe-kSkJ_l*9%(=&9Dlt*-nmwNe{Sl28C7q){smID?>0yG@t9{MBmm_44oms9= z)Q(CWmO>_wPBTN!&ue(%ZTDta4mbDX#L&G?N28mo>l*Lser~_}^8*$UXWwJ=TxyK6 z>+1xv(G|dPm->`D7!NkDJGENOP;wuY)$%EfEgC9>C6nsFTB>yf`V{H2;y1Xqs=gP< zSxiW`Z;RY_%W>v@pcVr+{M8cYWGegY~!@AV;H_4lBtdB%;c2v_@zXP9WMm!dv4d|G>v{14xLjHrJ- z`WY}?;yoRMCDCfDAwKZXLS&9ccWxJCMTACI3>kgq+tfV@ihtMj+aCdB1TBkf&tBQ_ zSkl7U6PIn!=^ZxAgvX)#FwTFcYfxBF{rEjBj72HSR?5a=F}t>mTT^D_j%1EbTL($~ zww=1hBxmSP66^4vi)2+8MC}Qt0L&f`xT_Z$i5g~-;_>#R^vL^lO7kBBsV8gPSn(T= z1zB3_ba8A}KOBwFn#zC?twib(@Qyyl)vTrJ!B&gi%>}9k%1!*_$9@Ivd9IrYb)FM) zncb9P&E%Iu43wBfrn=kzION?)U$T3@>a{OeJe6%7fB>mXMwud{_$%-s;=&9l|Khcc z5KaQCg!#0+B9lnG?~VE=HFb=f+Gf7fm;PW`At{2|i*F-yTDnPSK8bmiv*p*+o?VH* z;VyH<^1C+3Y;aK9lU@lNL&QgK2WzM2BZ~q znI%BQk1-g?G?aCi8t2GjaVc`ETPr`oEUIzq+o)J1b2R1WF8*8)N$M)`&B-Kyco{HN zv@FtZZy9gd9Q%k_cC-LT)d-&~Rx@BCfm5Wsjhc9VeJ~?8-8X%-^5c_&d@SXdu*AuY~~+Q`;QGjDWP(~=lKv(vq`eDVz8H$5MKhW4B+S>vH#D; zIBG~{y0~J5W&bL0eM0{Ix7}WgwtLm_({PKj#RCnr&mk5?2U%*ZpDQ z{5-Ov?AqE>G-jDI)2SS`csPYtBhG?8EyJqKPy*wp{UBZ2j;U;i-YOWc2(%-Nx*X=w z7`#NX$3M&+TRcWRFx2$)35oPNho!!Y#+v>e*j$9~>vCXYNto$aKfXhfnQVj_EEA?B zxOxsXf=2<8(sCgnDV@UG`-~JJxi_AXUsfi|VZD&Wo`M`qv9IGdKmceMkHN!P0ZfO?(K<%eJjt+*6p&F|bWwF8Ol zctnWgxW<~_7N?0=`T?_5CN%@p*#;iad#9h1mqtOojUH6Zdv$dM^xu#oen_O8?kRAl(BNio9qU^QU4$BiDN?{HVj1f}p)voW z10GQUfS&woyM<{}LjW|xDE|S>WG)LEnOo3UGbh3b{!u-v8o(OU$x^Z2{%of4cv48zs7?PB*vPap?x2t5vssn<0%(H0qEd7V1LoC0dJ5il9kA-M# zSmC*%@4z9MV#AaXK?!v!cDeB_H+{K9n301rHNn%vj2!V{H{h#S0eFdgsuGd%Qe`sH zv=MlTn|~d^%7KH+T-Gm?yhp{C~wf(6A!GCO^9je;za%!Q$5O{Y%Du8>K2$!Gb- zP$Oyv060#}zrg7Mu9e@!M90u5BBAuY*6gE>860}AvMyfYuTzY@+=Ut*s~DsBy;?fIUnYjOSlH@1xSHh@XQGiZGW`2KMm)cPBn znT}zTiN+ZgutM@DYN10&jy)%~B5>bxaEc@cr=n<6WYlpa_3JSMd`+sdAAlQo{l9T5 zX1cp7BqX`9yfcPI`4HS6;wp=g4As|?Cn<%5$V^AwC}zsRO?aG36KX^j+&^gZ{p#cU zT6am=zn#R5lIBxv>bS*WfBSZB-|>eJ9FlW0)Ady~tq^B_cKG%0r)uyGHP9!O4XoD^ zeV0H-9k>4fB6xvZfHnM|BT0<`h|u@w=T<+erQ_jrN;B(41C{u$q^6k+tiSqSzoH#^ h2K?$@jq7ia#0ofg`a+$Xe}5t(B`z;kA!6wNzW^h|%dY?c literal 0 HcmV?d00001 diff --git a/docs/general/tensorflow/LaunchBenchmark.md b/docs/general/tensorflow/LaunchBenchmark.md index 082bcbc8f..d787ff7a8 100644 --- a/docs/general/tensorflow/LaunchBenchmark.md +++ b/docs/general/tensorflow/LaunchBenchmark.md @@ -106,6 +106,101 @@ optional arguments: --debug Launches debug mode which doesn't execute start.sh ``` +## Debugging + +The `--debug` flag in the `launch_benchmarks.py` script gives you a +shell into the docker container with the volumes mounted for any +dataset, pretrained model, model source code, etc that has been +provided by the other flags. It does not execute the `start.sh` script, +and is intended as a way to setup an environment for quicker iteration +when debugging and doing development. From the shell, you can manually +execute the `start.sh` script and select to not re-install dependencies +each time that you re-run, so that the script takes less time to run. + +Below is an example showing how to use the `--debug` flag: + +1. Run the model using your model's `launch_benchmark.py` command, but + add on the `--debug` flag, which will take you to a shell. If you + list the files in the directory at that prompt, you will see the + `start.sh` file: + + ``` + $ python launch_benchmark.py \ + --in-graph /home//resnet50_fp32_pretrained_model.pb \ + --model-name resnet50 \ + --framework tensorflow \ + --precision fp32 \ + --mode inference \ + --batch-size=1 \ + --socket-id 0 \ + --data-location /home//Imagenet_Validation \ + --docker-image intelaipg/intel-optimized-tensorflow:latest-devel-mkl \ + --debug + + # ls + __init__.py logs run_tf_benchmark.py start.sh + ``` + +2. Flags that were passed to the launch script are set as environment + variables in the container: + + ``` + # env + EXTERNAL_MODELS_SOURCE_DIRECTORY=None + IN_GRAPH=/in_graph/resnet50_fp32_pretrained_model.pb + WORKSPACE=/workspace/benchmarks/common/tensorflow + MODEL_NAME=resnet50 + PRECISION=fp32 + BATCH_SIZE=1 + MOUNT_EXTERNAL_MODELS_SOURCE=/workspace/models + DATASET_LOCATION=/dataset + BENCHMARK_ONLY=True + ACCURACY_ONLY=False + ... + ``` +3. Run the `start.sh` script, which will setup the `PYTHONPATH`, install + dependencies, and then run the model: + ``` + # bash start.sh + ... + Iteration 48: 0.011513 sec + Iteration 49: 0.011664 sec + Iteration 50: 0.011802 sec + Average time: 0.011650 sec + Batch size = 1 + Latency: 11.650 ms + Throughput: 85.833 images/sec + Ran inference with batch size 1 + Log location outside container: /benchmark_resnet50_inference_fp32_20190403_212048.log + ``` + +4. Code changes that are made locally will also be made in the container + (and vice versa), since the directories are mounted in the docker + container. Once code changes are made, you can rerun the start + script, except set the `NOINSTALL` variable, since dependencies were + already installed in the previous run. You can also change the + environment variable values for other settings, like the batch size. + + ``` + # NOINSTALL=True + # BATCH_SIZE=128 + # bash start.sh + ... + Iteration 48: 0.631819 sec + Iteration 49: 0.625606 sec + Iteration 50: 0.618813 sec + Average time: 0.625285 sec + Batch size = 128 + Throughput: 204.707 images/sec + Ran inference with batch size 128 + Log location outside container: /benchmark_resnet50_inference_fp32_20190403_212310.log + ``` + +5. Once you are done with the session, exit out of the docker container: + ``` + # exit + ``` + ## Alpha feature: Running on bare metal We recommend using [Docker](https://www.docker.com) to run the diff --git a/models_directory_structure.png b/models_directory_structure.png new file mode 100644 index 0000000000000000000000000000000000000000..906cfdf02a8ef0ac25d9250986c5f6e73373371d GIT binary patch literal 11038 zcmbVxWmsIzmNnM61&0tC2*F*0dvLel)>v=}?m>e~aCg_>!JDALT>`<~AvoXRy?18r zGxP7`2TygK+Pmb`UVH6z!j%=JFi=TQVPIe|WTYijVPIgXfOaS{BJjTxk%|Zg26fa* zTwGa3T%26l$==+`#ta5VI{aHQlDe7|L4W681JB%d5+bs@3b6)-L1ZF(8tP&=@ycS- zqJ359(Q6--Uv;zRy`@Br2F2Er=glL+zMh38R!?-SF%mZe*94`m)VEnY?<`;VwzRCa zulnrVkG5dJlo3c#4hGJ^3=j+#PIUW@Knoz$9athFAZ%Dfa?&dKp0APUNF~(SeslNy zmYikj+@>v0kI&EapUDh=f?&Q%@J7*WnCE-LHQ4Kgt7F0hva=e{CPzB*47t#?iY6lU ze`Fm^sTH{Vj#3oU5B+eTZvzVx(DO$c1f!gxEX)zZAabE>qSAqKvIlFqpkAYu~O}iJJ>#H&e zj$pU4o4I02e4mvwCABs*Jueg-M@-7q|=N`7~G z_I9jURbL@DJ6Wjfyksz8m>UYNr5O6nt9HK=p{>Si7CzK|b^{xr#^yNW9=@!kEfq%Zb`+`ivaT~78d0$ z0)BjhYKJ90hHEGLtu`Y*N{|yZTKi{jYTc2B%x;&OdN(AaLrf*oxTeUnHWaoh#apLz zSO*rd%s@JDKaszh5^PR;_BnBh=OAf|m09y?VcNc6k zjM5q!oG5LW2)_*61uWqa&Fdow#Ya@l05N5B?f}~y(nHw$E>Q;rQh1Ct&i7c%0oFP2 z%ZOMaSi49p-PrC}AIOn=#MQ$Pt;7*y=^z-qB6hJnc*w!x6UsP=s3_!Y1^!85);T66 z2=t-XVl(4o<8-?ME};FOW6}0;m`lVv^tY^_qF&D$VgX#&FWL@7q-bMbKn~0qScF}# z*VvnJgwW@Dcm7zP+BPHkqF}v+p?Hf9#jX`y#`dFvlld_Dimr~7PE0mVwt&D&Y%w-n z8jC&5yJwA+6CKy|%NaDtXq{D0p4pt&T(F4L8CxyJs&5&3X%fP^TBBaUJWFyAU}lP2 z3wJ1HL(z!m1>=R|^~&q@QjAYb?;oP~&KV5M2+2XiT}<7T{YO1Bf6^Q?n}wEH$ztYy z>Ta~3zH-Mjh-QJFbb)_n_qzSLKLevc0%5xYuqC9a!%)gm3WLOh>VuejIC_v(qAX=E z$vMIk&_^isI);g`V^D!2JB>%2i41XpMghAj(;np>#U4|tJU3-qLjC~dxvUfM zE$uCPOY%c_o@i~}jhZSg>JXwVeqpLp%z2V*VVa6h>7Y9AXZVIR&w#s>JC)!flky(T zKcyAQrOKsRd#WCVvg%wKW~HNAzqFV%1gkv7KPyL-m=|qV|5VYf(#&<0J*rx);Zs;K z5EK*C^=ZGOxMJ8J<2ofVCkY@Ch=PmqAtB_vvqb)tvaPf+IXiwm#XNbp_r8@kQ(-KJ za4KeeX-_eQfpg6AniV^Q_{*;W6B`Jr>9VnRRuNAWSCp{r%J)Sp{V5?! zg}L*Yb!&X98LO4qyNX}anA4-vzbjU99;K1;O$%`g>y<5>XlTow!d2LF6TaRn9It*&EB+j$ZIpOW$rCF~cO@CkY{LG_&S;>^>rU{C3hKzCq$mzl|zx|_ornO0f>na`x2y7 zCTh?}Ew41ablj!QDt!-$!?_-zzQG*BeC$`l$invRHrF;5Qwf0(K?}h*f+j}CB)g>4 zq&7zW3L^#!hFp!{84TqHD-K`dQ=CSRSq^T-baul zsN3Sv^pU)C*3pmImcHBai1LW(!dbJf87SAx=i&o>o>tw0CC{ee#CTzg)*bF{0q`&}NNv0eVTQNI&7DG1D zH8#}N%Qu9n5U3KVE~!T4^^)9~WN2g(r&A)8qLqYV1q65;jvHGZ=|<^->D;zc$8x(_%eouh3`$)dqbNW2bnR$ty8O43g?2RuFsM_ zW=Qg8;#kY%7SPPSb<;hL<&^gnK^)v3B-wIt`Pf=jqwHP0`|Dz~eBi#B)%VYt8Tv{Q8!*bg5vUFoLkMl^OT zH5x5_Q!SnpT#{MBU-BC2876T{Ih_B#A=gs#;2LZwxuUp|dEn|h-?uc=u;k`;MAk0Y z=Oy$^dspjafBO4Qwti^Rr`gBjKI=GUb@e)7NofYj1rrZ^qw}t6pO?s|reU$6^)B`} zH;T(4D$jh)VdzivTUaR)YuitMXW90@#subn&1dmde7qg4tubZlUx}W}Vil~tpgMx6 z2B`<95#k58pc&wJ2cs>>xnyQ^=>PDh+no&N)?8{CHb_=7JZpB^FI$=kv1XpzujV%tzAe9~ zrdiToJg{&6ZF%T$V!59&E<6p-u`=rj9tLDp$J&fJeYUetRtLvOYR@ zBWv1M&Y?TSNP&oa1|vQb&#LE6keXAhD`D4;I`_}(N~^-vSnn{eI+HrceCnSnub0Z) z6i;+Q-iCrZ$9#p)b9eKv`@du_3pMyC-aObq{t!MS*HtGOrgsGUc)d>(>t*_10sCQNM-5Zo=_*aQUzhl}0y& zcgf35-FF8!KcQmvoXEH`VN6K~%H*yEOG&Nn!}{C9t(CjDxIpH1cjq2=%}-#*Gx*t- zVo*YT&k|D(j0RQo-_WfYUSS2=5kbv^Ar zQVOAxlamWLLCpD7B|iMy9QY+jY3btPz{kSk=H|xi#=&gwWWmD5%gf8c%Fe>h&IF8L za`v!uF?MINbAJ0TC;#O~!pzy!$;!dS%HEFr#jml6{bv_JO3Igs{`>D=>ojw>`k$HX zod3NoV1q0#PgvNPSy}$uH()C8(#xl8A?OvZBU=t-RUjF`7;{OR)DwLbBN7BupQ3`WiHsOu z!ieZVD5~5Iil!tcYnDcY{YEwT>%_8a|Vl|FyV z)vRuzYN0Ik<3V+2(nP)#nmAY0&UysP=*Hbwd4;YQLX@a9+QHVvh)nf4?@9c<=njJNr~eh-cd->nh^ z?soGhn;lK9Ch{cTuijl9n2qO%PBz&a-cDsdQPrFOV&7tIQZG|4br4!n22OSjbrVfG zV+iYh_4%pkJy?Zavm%9;?8olS4un0GT*AoxYDQ~ozq~qWn5A=BozrTPO1s`X3Z)?1 z=Q@>j&J=z?N%#@ssfj=ZH`ovMv+>ji+lpNOsApDX6br~bj+$a z)7;k`R9$vzu@0Yk;M z9#+49c)V}-eZ2c)dv5f2{}&6K&x3Dek>ydeXWV|fozCvEtIPqRGBWr)qY&a zKRvc2?3h1v<-1P9r@_|G2SH?r_o+q-X`FFr%*`Uy@+st@SYU2vX@uG6H~b`jUvt}7 zQn$zmJ8g!gic_xWOcf~vhxd?)1da5E>YPQl}i74E1$Bny)9d;m|0-h>8qOm0i$I*&Dw_GBAk1~ zP!M>xMLf5A#o0r>Si&~cGZ&EQH^7|86(Y3~oq7k?1+Lg9>u02sNldy0HVbvsC9N|p zjvF70nFofEKM8PSA5{^%?U!j)A8SPZ6Yc(}R08CnOY!y-JH)wXq<|Ns9sr@w zfu&3RWUUW$ua)~FQW<)1ssK;oOZ^cGAZ3-GUks|@NMgOWCM19rNEZR^_|nGNLf~q3 zpjwKYf0M%10Prmp)xCo+UY^FJbofYg*hPH(r;}is7U0QOEl2t?b`m4}KK)1Qi&ZUc z>?v71B@j@UzsgY|O>ihD6ldE%vJ2tD54OkZ%!VO1r+*OTk)S|C+HHe$)026gq#}uo z-7q??JMNDYkL8LWl^s&q@C!$SNwFD|g4L`U9*>MXi!eNW>(Z&&^ z9w5|=Iu52{1Z%AV9@SK?XWIq{7|A@U({W^?7C!oKC5d_j4nr3 z>>`CP9RP;{u5(n_3xj3~`LzCce!Ab5!hrgnZw|ttq3E``R^CQF^qVL@oHhWF;|EXS zkZIPdnZ{ue-My_-!F=g29*Rj!0_!`e8)&{zr0^PdZFl08Ix;|5RxEvM$l)Sz{1tBJ zI-lNE|2Y4OGGwGVDs*pC?A2O{N~I7`fKcEfu`cxdaDiC#;RN}RQItAO`)4z_gPRs^ zPMdJ2%h<`O*nh+rfs)`|P7^c`z9tn}Jyrni07=@2++|#3;!74Os}P3t=hQ3zY#UIDlh8U_j<)qk@3X4z+WCMc6 zC>GG_rS-0-V8hW&)4z!kmDQfU6PDeM`#4;wiM-;~hp&}_Vn z!i>V;{Ji7Dd6)b1vvrHPNv%xE{3T~fhPnG-O_+wl(?#}k3ce%}S?vRkbsJk~?`PH# z6E@jdhu)7S;kj5rvT)3DOtnI!upxbjDFWB)@oI!SBUysFWL}4C+O007C}Bo!+7{8l ztf}+VCAH-5#Cbwer?r%YKYAy$IU|@B>bLrGT^ga{4ZJrgSV~wFbzz{z;UUSIWmyUD zGArq0x|7ZiUDp%v_eTGNtg5)w?$v}6<|@4_Z^~Z+j@#MmppR8Vh3JXM(wv!`1&fGR z(@i2v+YP%&ZBO}VkiW(o_fN#Ate7i7g@nP$q)(4x3r;J%ph7Ny3aYdnnw3C&d*B13 zw6Ipx0C>TBftTQQ>lpxEVzpo~q&=?21^=&s^kSQhN4Y|_U=tD(ZUA&^(9-C$ToPkk zx_yVX&Uec=Axh{(LI6Ax$>xy9P9|Z$YD0;QiinpqGgBrq)$zma1;$3$oVoko%96$D zQZ)?RvJ!)A11%H$fo3w*B1*@(3T@NhlcD6LMZofoU-kzVE@$66%-GS(AgJB%G3EC& zk~qW>x^#aUNv_Ncuo^^kBBQlWK6jUKm~Z$}yk8Otg?I=NDYB*ZIQeDKVuLl!(NYs( zi1+5nVncZ@mVegKM1JV06Kf)jLDvyLkYEgYbMPYH@N1Hrh#et_^)t|ueu+e3Cj1g0 zq5TCBgZa_wQ(WD8&hp|{ECqd!E{MJCWucAN(qR(24HSloY;gu0rm$gH4 zw~q*1iCVEDkrrBdIH4U1#GymC(T4cvJ`Q?UNQFWBVvbub_T}-4ZlnFGo%_-_F9nha zg$M&{>p1M15SQKZJVKA-#!s0}-^a~?xVIq$C}K_as~sVK5k$|U`H!(<+s9GeC)`fg zy6a5)F?(-A++h~FOnft+_?O^0(E852&)K^xDoTy$)N@B1?+3+93pFE5~xwc#RIuuVSnPn z#u+sMTt^$Y585s;r=%IwFb-=9B1K*9aGiCZX6V1j8J}mer{LD%o+8lAD{)J={JP;*zYWjF*s3D^o1jt zOY}a~bS%Qiba*Fa%I~^1S3H!Y2rqzj5-vTG2S|z1QXafYsi8|HRZV>llid^gC^->}5(#=6Q z+wq5_8GZY80=WLXHoYMja8~tBTSI@yvSh+;b_$aX*n3q%qR@F!9A@#-t-p;c@qpN` z8~vy$99M$=@^6QfUk!s(2{7{Is*y;j%geDdZcKe6W~6Ll!em?tNK#mh19J`7Cq|*( z0Q-7dKHM}WFlYFDr#1D(>w3Y$Y_-FOI>yBLLZ%Rj5^rGMANu)JJdbBhuk!8sU3|wd z#|L?7j?nY9FIXY94UD;vWR)JeoyN~N0!QHI8!3)gBcI4GRoA?vvaBzpd>6gZ8=CXK zrKekO7pHU~4i7_kKaA(j^{g--Pria_x!;uLVGIc$T{i1oe&9i&c@jk; z7>3w=<@X{Q(BE#Ljc8O;15x4+c#5P0@D)edazuz1exRwjS!>z6IPl(PD(DF--YEm>C2#zJT?$?A!tu$MvTFvQD6*f?EFWJuqNWJE z>POUnNV+-$08BGXIalR?$wvT~jOkp&0k)!A3Gg2ECjZ%L04j_@oaDiU<1cJax&e;( zo>{jn_@ZnT;5QQFj_vy~{f;Ft0CnW>F0$OBtK&`+OCyVbTNM8)SqZnTfPmBFUxmOI z)%0XBnn6rkO=%53Sda=wAA2QOATPO&Xk+-R2o;hDrwxu}e-$1vR=Z~NsQleq*8|oS zsJ9T;5PcC`s-}K8G>;0zEfKV!Dcjv$?uFzc(7~7#neqKfLq&r!-aFzOWgT=65*Tp zAo(hMxKL7jqNn|Li*vCy_n_DmKmpru%cN5RfF)p9L^{6JZZ=p}wXutgO6WNxiB$}< zk83`dt>j?!x>mEQDZe#EdFN0cOYp)VlQ^8Njq+}HB_<{rjCtSEd~G!D4KbGnl_i#GN3=uOJ6WU4^!{a*%% zRD>#*v4Bia@Bz5)r4#zr8X$F8Lcq8sEHCsWR=7}i%ioqx2s%{SVd)T=8wuiu_kqD!UBO%zB+H;LXv_PI8?9GrW9Y;k_RPN5%V> zF4q6ug<|>AMfx%d!JC4kL>faH#gg&AMj?39aTL6CeWI0IuR@+8HkDkbcj6W`973nn zgjWqDk%?;tiT|r>0G&=VTU^#ioDPS_m8y!;0{13(#6RyFQR;To=1XyuT#q0ATH^Ju zgqIRUcC}GuOg*4NO(fJtdv4RSyG-Qng_rm&eGpUw)hPlS`2X#$jxMlYqJnY&yoCcJ z`~c4!fN*y28;6#yB;BCmAMK2mLe&>n?oN2)WU5XsiU&IE7~!w!w&UJPZO$3`MG0is z3tM)0)9eQn(B&w7qEXln)WEZ+s?x=x3$qw>$A1?FPAj%#P=3gb#Xpi8IV@+;s>-lg zZl1WH?8@jeMEwMCO+rrVEbpj;*`a!HOTEnky9dBsApo+!)a)%lq5-XD``%f39M<-~ z#2y^R`LvqSH?Z$bcfs@Jl+yWbehli4f$!~Bok<_6$N3=5i@I?>Aq5@;vfJje^B9*# zrCy6_mghd*p94O>C+`J-GXkOobb2md3aiwxvm?XzZV#}_9(XC#pC5LF&45DpXEMi_ zfLh6LDwZFu{Xi_m`P2Q$>ulP5`BZk}=iSeOPd6Lv<|9N44c5s3W0P`na*7nraXpKv z185&HUFb2PGVGf=eC1|Hrg>bm-PNlwX{EwnnfzKej;;r@3XQf)=7Bx6ru~raK+sFD z;#x|t0s!#?&>?=#|8O(6(uXnrFo&;c%r*D+J@U`6LP)&u{m96h$;bX7u8t zg?cGOEHcJ91Fy;#rY7`oD$b#v&m3ZUFjvjfUEy)DeGbSpLoc;o;2=xqLp%+*dk;{O z*t#RnTis5Wr;3%5n^t}KWBd>#AKETP`OSc;?WaX~gXB^C6Hwy~&Q|K$#`_Y8!Ddb~ z2e}XOajC^MBVhU-5_y*ZWOhjK?=o&urNvm*WT_fWy|Ddnf!o9SvCT}k-)y2$zX#U) zBFq5E$q5j20JSe#CSNK{quOBQQh8)F+;6uO+j~H%<^>FU2TAK2A%i$-bHGU!N=6WD zvx@Gw|NYI;-M`)Tbazmw-{xwb+;s!svorwpBOQ6Jm9l4#05%&6;;QNZ{P;DeCH8DS z!1d~^rl^f~`^J&qmV7jo4!Sy7T_wZuR3#E;3-P@4uF!Yr2AN*~qSW`$k<)MrBEc61 zB)c=3si1UFR$hg=WBjF+_9Vl36a`!jh)2YlI>OR9p=MDJmoS+{CHloAXA84HYIc!# zr@KT~tFr)-Qod*AI9Z^w|~I?HDC$Huf!TceL5+r_y8y4q_)^U(}CjMvmpY^{6AtH7h@b zD5(|6v-Q>k^3ve-(b5IV(swI0V+H&%>O!-#49CIA- z&c~=GdLuYPB;d4>7u)!^FCXp3q$3xpbFE7-) z9h3Rq7?YeaC6v!MeFFYpC_OH|WXYT%GCNm;R2*jb>Y z@oDtf>Hrvvew*}uNo)?F%SOMRceAK3$*BaWFazleZ(rJfUfeZ9bpn8FhM*ymEsk22 zxQ$lOH92!op#6o9-Sil4?%~bicNf7bBn?;qd*gkv){9`eEvFjaI5H@q4G95R&&)8L zX7lS56VaI+2_&GpMHF-Glm1N_s6%+4Gc%NQmU#y^` zM;huzf`tt_9UWLls7f7e(_FAsR4wLU-oT}{s= z67L7JK2E*JEt-I3GqPbT#!jdnnltAmCaZT$vE)>wB+dhH--N@meQiaRDc#`@H%2KJ zL0tdBhX6dIjtD0fTEAz?oj_)aK;6p!v87+Of~n{x?4MtmUJFs@+xJHjzW zB=+w4K$n~6^DTv2>W+yjnFMnTHfty}Ug7M!Wm-4x-wTSgXGO!*XX9Pe%BeI%-J17s z7oKB+e=|hU`MuPxy{nacFbCy`1r01g%pYWv{l*D7tr)AX6?)p`+$!FYqo zPcMRh@0*Ck2AYpl%#5~Af1BhO<9^qf$z5#hNJ)=D!_z<8E!9y951KyPkZg6q?Hq%h zMXzJpqzPKmf!aB?ZOtb25V1UhO^@QelVJXbDE9K~d8c+E(=(1Rb6+8ei`BgiLPKxk z;|$)I+Xm3>?%+AOc{;akMzQ3ICYh>wIHx!#D5L4ogUo8aXQfV%({H+c$bCVue25Pv(N|i}|8ZHc4tL@fRcRbnc$2vt(|K-;WUha|HBK{60>v4L?c6=q zhv!$uj^y2%yy?=I-(-u|;8&gsFT(FkDKF-;xAU&ZcS!gHjg;ZQ{9b$U+S^nv|b3)RgQBbvSITHVuxOQf{6Dgh8-zuBD zuEJ^Nc0%*cx0e^$E@4qm&^%m&O{e9Du@qmTYfKxK_hC!-zs9TdZJdV_!!g!A2m4MV!Nq|n1uZB4|| zxc{nM@(_W$_xqx0G*Z#L?Bud`9#Hc$T3NQsYmnpOjknpPj?3=n)5pF8(#%kN$n3B> sh+Q Date: Fri, 5 Apr 2019 16:07:36 -0700 Subject: [PATCH 04/62] Add note about user set env vars on bare metal (#268) --- docs/general/tensorflow/LaunchBenchmark.md | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/docs/general/tensorflow/LaunchBenchmark.md b/docs/general/tensorflow/LaunchBenchmark.md index d787ff7a8..f9e79c87b 100644 --- a/docs/general/tensorflow/LaunchBenchmark.md +++ b/docs/general/tensorflow/LaunchBenchmark.md @@ -268,3 +268,11 @@ the following command can be used: --batch-size=1 \ --socket-id 0 ``` + +> When running on bare metal, be aware of environment variables that you +have set on your system. The model zoo scripts intentionally do not +overwrite environment variables that have already been set, such as +`OMP_NUM_THREADS`. The same is true when running in a docker container, +but since a new docker container instance is started with each run, you +won't have previously set environment variables, like you may have on +bare metal. From a7dc810a980e37ef25e72043686a7b0e52a06ef0 Mon Sep 17 00:00:00 2001 From: mjkyung Date: Tue, 9 Apr 2019 10:51:49 -0700 Subject: [PATCH 05/62] ssd-mobilenet int8 inference data-location for accuracy to take full file path including the file name as an input (#271) * Change usage for data location to use the full file path * Update the unit test commands --- .../object_detection/tensorflow/ssd-mobilenet/README.md | 6 +++--- .../tensorflow/ssd-mobilenet/inference/int8/model_init.py | 2 +- tests/unit/common/tensorflow/tf_model_args.txt | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/benchmarks/object_detection/tensorflow/ssd-mobilenet/README.md b/benchmarks/object_detection/tensorflow/ssd-mobilenet/README.md index c6688f159..d2c96dd9a 100644 --- a/benchmarks/object_detection/tensorflow/ssd-mobilenet/README.md +++ b/benchmarks/object_detection/tensorflow/ssd-mobilenet/README.md @@ -123,8 +123,8 @@ python launch_benchmark.py \ --batch-size 1 ``` -Or for accuracy where the `--data-location` is the path the directory -where your `coco_val.record` file is located: +Or for accuracy where the `--data-location` is the path to +the tf record file that you generated in step 2: ``` python launch_benchmark.py \ --model-name ssd-mobilenet \ @@ -134,7 +134,7 @@ python launch_benchmark.py \ --socket-id 0 \ --docker-image intelaipg/intel-optimized-tensorflow:latest-prs-b5d67b7-avx2-devel-mkl \ --model-source-dir /home//tensorflow/models \ - --data-location /home//coco/output \ + --data-location /home//coco/output/coco_val.record \ --in-graph /home//ssdmobilenet_int8_pretrained_model.pb \ --accuracy-only \ --batch-size 1 diff --git a/benchmarks/object_detection/tensorflow/ssd-mobilenet/inference/int8/model_init.py b/benchmarks/object_detection/tensorflow/ssd-mobilenet/inference/int8/model_init.py index 4fdfb3a06..5959abaf2 100644 --- a/benchmarks/object_detection/tensorflow/ssd-mobilenet/inference/int8/model_init.py +++ b/benchmarks/object_detection/tensorflow/ssd-mobilenet/inference/int8/model_init.py @@ -64,7 +64,7 @@ def __init__(self, args, custom_args=[], platform_util=None): accuracy_script = os.path.join( self.args.intelai_models, self.args.mode, self.args.precision, "coco_int8.sh") - self.command_prefix = "sh {} {} {}/coco_val.record".format( + self.command_prefix = "sh {} {} {}".format( accuracy_script, self.args.input_graph, self.args.data_location) diff --git a/tests/unit/common/tensorflow/tf_model_args.txt b/tests/unit/common/tensorflow/tf_model_args.txt index 79d76806f..4dabf304f 100755 --- a/tests/unit/common/tensorflow/tf_model_args.txt +++ b/tests/unit/common/tensorflow/tf_model_args.txt @@ -32,7 +32,7 @@ run_tf_benchmark.py --framework tensorflow --use-case image_recognition --precis run_tf_benchmark.py --framework tensorflow --use-case object_detection --precision fp32 --mode inference --model-name faster_rcnn --checkpoint /checkpoints --intelai-models . --model-source-dir . --socket-id 0 --verbose --config_file=pipeline.config,numactl --cpunodebind=0 --membind=0 python ./inference/fp32/eval.py --num_inter_threads 1 --num_intra_threads 28 --pipeline_config_path /checkpoints/pipeline.config --checkpoint_dir /checkpoints --eval_dir ./research/object_detection/log/eval run_tf_benchmark.py --framework tensorflow --use-case object_detection --precision fp32 --mode inference --model-name faster_rcnn --checkpoint /checkpoints --intelai-models . --model-source-dir . --socket-id 0 --verbose --config_file=pipeline.config --num-inter-threads 4 --num-intra-threads 16,numactl --cpunodebind=0 --membind=0 python ./inference/fp32/eval.py --num_inter_threads 4 --num_intra_threads 16 --pipeline_config_path /checkpoints/pipeline.config --checkpoint_dir /checkpoints --eval_dir ./research/object_detection/log/eval run_tf_benchmark.py --framework=tensorflow --use-case=object_detection --model-name=faster_rcnn --precision=fp32 --mode=inference --model-source-dir=/workspace/models --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=1 --socket-id=-1 --accuracy-only --verbose --checkpoint=/checkpoints --data-location=/dataset --in-graph=/in_graph/frozen_inference_graph.pb,sh /workspace/intelai_models/inference/fp32/coco_accuracy.sh /in_graph/frozen_inference_graph.pb /dataset/coco_val.record /workspace/models -run_tf_benchmark.py --framework=tensorflow --use-case=object_detection --model-name=ssd-mobilenet --precision=int8 --mode=inference --model-source-dir=/workspace/models --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=1 --data-location=/dataset --verbose --in-graph=/in_graph/ssdmobilenet_int8_pretrained_model.pb --accuracy-only --in-graph=/in_graph/ssdmobilenet_int8_pretrained_model.pb,sh /workspace/intelai_models/inference/int8/coco_int8.sh /in_graph/ssdmobilenet_int8_pretrained_model.pb /dataset/coco_val.record +run_tf_benchmark.py --framework=tensorflow --use-case=object_detection --model-name=ssd-mobilenet --precision=int8 --mode=inference --benchmark-dir=/workspace/benchmarks --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=1 --socket-id=0 --output-dir=/workspace/benchmarks/common/tensorflow/logs --accuracy-only --verbose --model-source-dir=/workspace/models --in-graph=/in_graph/ssdmobilenet_int8_pretrained_model.pb --data-location=/dataset, sh /workspace/intelai_models/inference/int8/coco_int8.sh /in_graph/ssdmobilenet_int8_pretrained_model.pb /dataset run_tf_benchmark.py --framework=tensorflow --use-case=object_detection --model-name=ssd-mobilenet --precision=int8 --mode=inference --model-source-dir=/workspace/models --intelai-models=/workspace/intelai_models --batch-size=1 --socket-id 0 --data-location=/dataset --verbose --in-graph=/in_graph/ssdmobilenet_int8_pretrained_model.pb --benchmark-only --in-graph=/in_graph/ssdmobilenet_int8_pretrained_model.pb,numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/inference/int8/run_frozen_graph_ssdmob.py -g /in_graph/ssdmobilenet_int8_pretrained_model.pb -n 5000 -d /dataset -x --num-inter-threads 2 --num-intra-threads 28 run_tf_benchmark.py --framework=tensorflow --use-case=object_detection --model-name=ssd-mobilenet --precision=fp32 --mode=inference --model-source-dir=/workspace/models --benchmark-dir=/workspace/benchmarks --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=-1 --socket-id=0 --benchmark-only --verbose --in-graph=/in_graph/frozen_inference_graph.pb --data-location=/dataset,numactl --cpunodebind=0 --membind=0 python /workspace/benchmarks/object_detection/tensorflow/ssd-mobilenet/inference/fp32/infer_detections.py --input_tfrecord_paths=/dataset --output_tfrecord_path=/SSD-mobilenet-out.tfrecord --inference_graph=/in_graph/frozen_inference_graph.pb --discard_image_pixels=True --num_inter_threads=2 --num_intra_threads=28 run_tf_benchmark.py --framework=tensorflow --use-case=object_detection --model-name=ssd-mobilenet --precision=fp32 --mode=inference --model-source-dir=/workspace/models --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=-1 --socket-id=0 --accuracy-only --verbose --in-graph=/in_graph/frozen_inference_graph.pb --benchmark-dir=/workspace/benchmarks --data-location=/dataset,sh /workspace/benchmarks/object_detection/tensorflow/ssd-mobilenet/inference/ssdmobilenet_accuracy.sh /in_graph/frozen_inference_graph.pb /dataset From e922a5383d765a31f6de163765f0492335402afd Mon Sep 17 00:00:00 2001 From: Dina Suehiro Jones Date: Wed, 10 Apr 2019 10:02:35 -0700 Subject: [PATCH 06/62] Fix links to inference and preprocessing files for ResNet50 and ResNet101 (#273) * Fix links that were moved to be shared between int8 and fp32 --- docs/general/tensorflow/LaunchBenchmark.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/docs/general/tensorflow/LaunchBenchmark.md b/docs/general/tensorflow/LaunchBenchmark.md index f9e79c87b..e52482ade 100644 --- a/docs/general/tensorflow/LaunchBenchmark.md +++ b/docs/general/tensorflow/LaunchBenchmark.md @@ -23,11 +23,11 @@ Below the general description is an [index of links](#model-scripts-for-tensorfl * Image Recognition * ResNet50: [init](/benchmarks/image_recognition/tensorflow/resnet50/inference/fp32/model_init.py) | - [inference](/models/image_recognition/tensorflow/resnet50/fp32/eval_image_classifier_inference.py) | - [preprocessing](/models/image_recognition/tensorflow/resnet50/fp32/preprocessing.py) + [inference](/models/image_recognition/tensorflow/resnet50/inference/eval_image_classifier_inference.py) | + [preprocessing](/models/image_recognition/tensorflow/resnet50/inference/preprocessing.py) * ResNet101: [init](/benchmarks/image_recognition/tensorflow/resnet101/inference/fp32/model_init.py) | - [inference](/models/image_recognition/tensorflow/resnet101/fp32/benchmark.py) | - [preprocessing](/models/image_recognition/tensorflow/resnet101/fp32/preprocessing.py) + [inference](/models/image_recognition/tensorflow/resnet101/inference/eval_image_classifier_inference.py) | + [preprocessing](/models/image_recognition/tensorflow/resnet101/inference/preprocessing.py) * InceptionV3: [init](/benchmarks/image_recognition/tensorflow/inceptionv3/inference/fp32/model_init.py) | [inference](/models/image_recognition/tensorflow/inceptionv3/fp32/eval_image_classifier_inference.py) | [preprocessing](/models/image_recognition/tensorflow/inceptionv3/fp32/preprocessing.py) From 690e26123715d39eb91a602eb878acc98fc9f513 Mon Sep 17 00:00:00 2001 From: mjkyung Date: Wed, 10 Apr 2019 15:17:55 -0700 Subject: [PATCH 07/62] fix a typo (#277) --- .../image_recognition/tensorflow/inception_resnet_v2/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/benchmarks/image_recognition/tensorflow/inception_resnet_v2/README.md b/benchmarks/image_recognition/tensorflow/inception_resnet_v2/README.md index 83577516f..26d2d2508 100644 --- a/benchmarks/image_recognition/tensorflow/inception_resnet_v2/README.md +++ b/benchmarks/image_recognition/tensorflow/inception_resnet_v2/README.md @@ -252,7 +252,7 @@ python launch_benchmark.py \ --accuracy-only \ --batch-size 100 \ --docker-image intelaipg/intel-optimized-tensorflow:latest-devel-mkl \ - --in-graph /home//inception_resnet_v2_int8_pretrained_model.pb \ + --in-graph /home//inception_resnet_v2_fp32_pretrained_model.pb \ --data-location /home//datasets/ImageNet_TFRecords ``` From 5e8d35eeb61ef0e9f9c63a739b6e21a412cbf725 Mon Sep 17 00:00:00 2001 From: mjkyung Date: Thu, 11 Apr 2019 11:03:51 -0700 Subject: [PATCH 08/62] Mobilenet V1 Int8 Inference (#264) * add mobinetv1 benchmark * added README.md * added unit tests and update readme * minor fix * code review updated * updated readme * minor fix * fixed unit tests * unit tests fix * updated readme * Arg update * fix command examples * Fix minor typos * Add unit test commands, fix minor typos * Fix hanging indent * Fix hanging indent * code review items fixed * Add custom args * Update unit test command and README.md * change custom arg to use underscore not dash * add unit test commands * update the default values on custom args * update the default values on custom args * remove Inappropriate Intel licensing header --- benchmarks/README.md | 2 +- benchmarks/common/tensorflow/start.sh | 9 +- .../inceptionv3/inference/int8/__init__.py | 2 +- .../tensorflow/mobilenet_v1/README.md | 146 ++++ .../tensorflow/mobilenet_v1/__init__.py | 2 +- .../mobilenet_v1/inference/__init__.py | 2 +- .../mobilenet_v1/inference/int8/__init__.py | 19 + .../mobilenet_v1/inference/int8/model_init.py | 99 +++ .../mobilenet_v1/inference/int8/__init__.py | 20 + .../mobilenet_v1/inference/int8/accuracy.py | 130 ++++ .../mobilenet_v1/inference/int8/benchmark.py | 146 ++++ .../mobilenet_v1/inference/int8/cnn_util.py | 50 ++ .../mobilenet_v1/inference/int8/datasets.py | 195 ++++++ .../inference/int8/preprocessing.py | 637 ++++++++++++++++++ .../unit/common/tensorflow/tf_model_args.txt | 3 + 15 files changed, 1456 insertions(+), 6 deletions(-) create mode 100644 benchmarks/image_recognition/tensorflow/mobilenet_v1/inference/int8/__init__.py create mode 100644 benchmarks/image_recognition/tensorflow/mobilenet_v1/inference/int8/model_init.py create mode 100644 models/image_recognition/tensorflow/mobilenet_v1/inference/int8/__init__.py create mode 100644 models/image_recognition/tensorflow/mobilenet_v1/inference/int8/accuracy.py create mode 100644 models/image_recognition/tensorflow/mobilenet_v1/inference/int8/benchmark.py create mode 100644 models/image_recognition/tensorflow/mobilenet_v1/inference/int8/cnn_util.py create mode 100644 models/image_recognition/tensorflow/mobilenet_v1/inference/int8/datasets.py create mode 100644 models/image_recognition/tensorflow/mobilenet_v1/inference/int8/preprocessing.py mode change 100755 => 100644 tests/unit/common/tensorflow/tf_model_args.txt diff --git a/benchmarks/README.md b/benchmarks/README.md index e3fda63ef..ad37797fc 100644 --- a/benchmarks/README.md +++ b/benchmarks/README.md @@ -22,7 +22,7 @@ dependencies to be installed: | Image Recognition | TensorFlow | [Inception ResNet V2](https://arxiv.org/pdf/1602.07261.pdf) | Inference | [Int8](image_recognition/tensorflow/inception_resnet_v2/README.md#int8-inference-instructions) [FP32](image_recognition/tensorflow/inception_resnet_v2/README.md#fp32-inference-instructions) | | Image Recognition | TensorFlow | [Inception V3](https://arxiv.org/pdf/1512.00567.pdf) | Inference | [Int8](image_recognition/tensorflow/inceptionv3/README.md#int8-inference-instructions) [FP32](image_recognition/tensorflow/inceptionv3/README.md#fp32-inference-instructions) | | Image Recognition | TensorFlow | [Inception V4](https://arxiv.org/pdf/1602.07261.pdf) | Inference | [Int8](image_recognition/tensorflow/inceptionv4/README.md#int8-inference-instructions) [FP32](image_recognition/tensorflow/inceptionv4/README.md#fp32-inference-instructions) | -| Image Recognition | TensorFlow | [MobileNet V1](https://arxiv.org/pdf/1704.04861.pdf) | Inference | [FP32](image_recognition/tensorflow/mobilenet_v1/README.md#fp32-inference-instructions) | +| Image Recognition | TensorFlow | [MobileNet V1](https://arxiv.org/pdf/1704.04861.pdf) | Inference | [Int8](image_recognition/tensorflow/mobilenet_v1/README.md#int8-inference-instructions) [FP32](image_recognition/tensorflow/mobilenet_v1/README.md#fp32-inference-instructions) | | Image Recognition | TensorFlow | [ResNet 101](https://arxiv.org/pdf/1512.03385.pdf) | Inference | [Int8](image_recognition/tensorflow/resnet101/README.md#int8-inference-instructions) [FP32](image_recognition/tensorflow/resnet101/README.md#fp32-inference-instructions) | | Image Recognition | TensorFlow | [ResNet 50](https://arxiv.org/pdf/1512.03385.pdf) | Inference | [Int8](image_recognition/tensorflow/resnet50/README.md#int8-inference-instructions) [FP32](image_recognition/tensorflow/resnet50/README.md#fp32-inference-instructions) | | Image Recognition | TensorFlow | [SqueezeNet](https://arxiv.org/pdf/1602.07360.pdf) | Inference | [FP32](image_recognition/tensorflow/squeezenet/README.md#fp32-inference-instructions) | diff --git a/benchmarks/common/tensorflow/start.sh b/benchmarks/common/tensorflow/start.sh index 60500ba3e..88492f8c5 100755 --- a/benchmarks/common/tensorflow/start.sh +++ b/benchmarks/common/tensorflow/start.sh @@ -397,11 +397,11 @@ function inception_resnet_v2() { fi } -# language modeling lm-1b +# language modeling lm-1b function lm-1b() { if [ ${PRECISION} == "fp32" ]; then CMD="${CMD} $(add_steps_args)" - + PYTHONPATH=${PYTHONPATH} CMD=${CMD} run_model else echo "PRECISION=${PRECISION} is not supported for ${MODEL_NAME}" @@ -435,6 +435,11 @@ function mobilenet_v1() { if [ ${PRECISION} == "fp32" ]; then export PYTHONPATH=${PYTHONPATH}:${MOUNT_EXTERNAL_MODELS_SOURCE}:${MOUNT_EXTERNAL_MODELS_SOURCE}/research:${MOUNT_EXTERNAL_MODELS_SOURCE}/research/slim PYTHONPATH=${PYTHONPATH} CMD=${CMD} run_model + elif [ ${PRECISION} == "int8" ]; then + CMD="${CMD} $(add_arg "--input_height" ${input_height}) $(add_arg "--input_width" ${input_width}) \ + $(add_arg "--warmup_steps" ${warmup_steps}) $(add_arg "--steps" ${steps}) $(add_arg "--input_layer" ${input_layer}) \ + $(add_arg "--output_layer" ${output_layer})" + PYTHONPATH=${PYTHONPATH} CMD=${CMD} run_model else echo "PRECISION=${PRECISION} is not supported for ${MODEL_NAME}" exit 1 diff --git a/benchmarks/image_recognition/tensorflow/inceptionv3/inference/int8/__init__.py b/benchmarks/image_recognition/tensorflow/inceptionv3/inference/int8/__init__.py index 87301fd64..139d705c0 100644 --- a/benchmarks/image_recognition/tensorflow/inceptionv3/inference/int8/__init__.py +++ b/benchmarks/image_recognition/tensorflow/inceptionv3/inference/int8/__init__.py @@ -1,7 +1,7 @@ #!/usr/bin/env python # -*- coding: utf-8 -*- # -# Copyright (c) 2018 Intel Corporation +# Copyright (c) 2019 Intel Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/benchmarks/image_recognition/tensorflow/mobilenet_v1/README.md b/benchmarks/image_recognition/tensorflow/mobilenet_v1/README.md index 5b2d8e64d..ddbd8858a 100644 --- a/benchmarks/image_recognition/tensorflow/mobilenet_v1/README.md +++ b/benchmarks/image_recognition/tensorflow/mobilenet_v1/README.md @@ -2,11 +2,157 @@ This document has instructions for how to run MobileNet V1 for the following modes/precisions: +* [Int8 inference](#int8-inference-instructions) * [FP32 inference](#fp32-inference-instructions) Benchmarking instructions and scripts for model training is coming later. + +## Int8 Inference Instructions + +1. Download ImageNet dataset. + + This step is required only for running accuracy, for running benchmark we do not need to provide dataset. + + Register and download the ImageNet dataset. Once you have the raw ImageNet dataset downloaded, we need to convert + it to the TFRecord format. The TensorFlow models repo provides + [scripts and instructions](https://github.com/tensorflow/models/tree/master/research/slim#an-automated-script-for-processing-imagenet-data) + to download, process and convert the ImageNet dataset to the TF records format. After converting data, you should have a directory + with the sharded dataset something like below, we only need `validation-*` files, discard `train-*` files: + ``` + $ ll /home/myuser/datasets/ImageNet_TFRecords + -rw-r--r--. 1 user 143009929 Jun 20 14:53 train-00000-of-01024 + -rw-r--r--. 1 user 144699468 Jun 20 14:53 train-00001-of-01024 + -rw-r--r--. 1 user 138428833 Jun 20 14:53 train-00002-of-01024 + ... + -rw-r--r--. 1 user 143137777 Jun 20 15:08 train-01022-of-01024 + -rw-r--r--. 1 user 143315487 Jun 20 15:08 train-01023-of-01024 + -rw-r--r--. 1 user 52223858 Jun 20 15:08 validation-00000-of-00128 + -rw-r--r--. 1 user 51019711 Jun 20 15:08 validation-00001-of-00128 + -rw-r--r--. 1 user 51520046 Jun 20 15:08 validation-00002-of-00128 + ... + -rw-r--r--. 1 user 52508270 Jun 20 15:09 validation-00126-of-00128 + -rw-r--r--. 1 user 55292089 Jun 20 15:09 validation-00127-of-00128 + ``` +2. Download the pretrained model: + + ``` + $ wget https://storage.cloud.google.com/intel-optimized-tensorflow/models/mobilenetv1_int8_pretrained_model.pb + ``` + +3. Clone the [intelai/models](https://github.com/intelai/models) repo + and then run the benchmarking scripts for either benchmarking throughput, + latency or accuracy. For --dataset-location in accuracy run, please use the ImageNet validation data path from step 1. + Each benchmark run has user configurable arguments separated from regular arguments by '--' at the end of the command. + Unless configured, these arguments will run with default values. Below are the example codes for each benchmark case: + + ``` + $ git clone https://github.com/IntelAI/models.git + + $ cd benchmarks + ``` + + For throughput (using `--benchmark-only`, `--socket-id 0` and `--batch-size 240`): + ``` + python launch_benchmark.py \ + --model-name mobilenet_v1 \ + --precision int8 \ + --mode inference \ + --framework tensorflow \ + --benchmark-only \ + --batch-size 240 \ + --socket-id 0 \ + --in-graph /home//mobilenetv1_int8_pretrained_model.pb \ + --docker-image intelaipg/intel-optimized-tensorflow:latest-prs-b5d67b7-avx2-devel-mkl \ + -- input_height=224 input_width=224 warmup_steps=10 steps=50 \ + input_layer="input" output_layer="MobilenetV1/Predictions/Reshape_1" + ``` + + For latency (using `--benchmark-only`, `--socket-id 0` and `--batch-size 1`) + ``` + python launch_benchmark.py \ + --model-name mobilenet_v1 \ + --precision int8 \ + --mode inference \ + --framework tensorflow \ + --benchmark-only \ + --batch-size 1 \ + --socket-id 0 \ + --in-graph /home//mobilenetv1_int8_pretrained_model.pb \ + --docker-image intelaipg/intel-optimized-tensorflow:latest-prs-b5d67b7-avx2-devel-mkl \ + -- input_height=224 input_width=224 warmup_steps=10 steps=50 \ + input_layer="input" output_layer="MobilenetV1/Predictions/Reshape_1" + ``` + + For accuracy (using your `--data-location`, `--accuracy-only` and + `--batch-size 100`): + ``` + python launch_benchmark.py \ + --model-name mobilenet_v1 \ + --precision int8 \ + --mode inference \ + --framework tensorflow \ + --accuracy-only \ + --batch-size 100 \ + --socket-id 0 \ + --in-graph /home//mobilenetv1_int8_pretrained_model.pb \ + --docker-image intelaipg/intel-optimized-tensorflow:latest-prs-b5d67b7-avx2-devel-mkl \ + --data-location /home//imagenet_validation_dataset \ + -- input_height=224 input_width=224 \ + input_layer="input" output_layer="MobilenetV1/Predictions/Reshape_1" + ``` + + Note that the `--verbose` or `--output-dir` flag can be added to any of the above commands + to get additional debug output or change the default output location.. + +4. The log file is saved to the `models/benchmarks/common/tensorflow/logs` directory, + or the directory specified by the `--output-dir` arg. Below are examples of + what the tail of your log file should look like for the different configs. + + Example log tail when benchmarking for throughput: + ``` + OMP: Info #250: KMP_AFFINITY: pid 682 tid 885 thread 55 bound to OS proc set 83 + OMP: Info #250: KMP_AFFINITY: pid 682 tid 886 thread 56 bound to OS proc set 0 + OMP: Info #250: KMP_AFFINITY: pid 682 tid 884 thread 54 bound to OS proc set 82 + [Running warmup steps...] + steps = 10, 1830.24507317 images/sec + [Running benchmark steps...] + steps = 10, 1841.47811007 images/sec + steps = 20, 1848.84108679 images/sec + steps = 30, 1847.84668478 images/sec + steps = 40, 1849.15354305 images/sec + steps = 50, 1840.95611001 images/sec + Ran inference with batch size 240 + Log location outside container: {--output-dir value}/benchmark_mobilenet_v1_inference_int8_20190409_222536.log + ``` + + Example log tail when benchmarking for latency: + ``` + OMP: Info #250: KMP_AFFINITY: pid 681 tid 882 thread 53 bound to OS proc set 81 + OMP: Info #250: KMP_AFFINITY: pid 681 tid 884 thread 55 bound to OS proc set 83 + OMP: Info #250: KMP_AFFINITY: pid 681 tid 885 thread 56 bound to OS proc set 0 + [Running warmup steps...] + steps = 10, 139.81945463 images/sec + [Running benchmark steps...] + steps = 10, 140.212074614 images/sec + steps = 20, 135.230332731 images/sec + steps = 30, 133.508530685 images/sec + steps = 40, 135.724816361 images/sec + steps = 50, 132.714339957 images/sec + Ran inference with batch size 1 + Log location outside container: {--output-dir value}/benchmark_mobilenet_v1_inference_int8_20190409_223122.log + ``` + + Example log tail when running for accuracy: + ``` + Processed 49800 images. (Top1 accuracy, Top5 accuracy) = (0.7009, 0.8933) + Processed 49900 images. (Top1 accuracy, Top5 accuracy) = (0.7011, 0.8933) + Processed 50000 images. (Top1 accuracy, Top5 accuracy) = (0.7013, 0.8933) + Ran inference with batch size 100 + Log location outside container: {--output-dir value}/benchmark_mobilenet_v1_inference_int8_20190409_223621.log + ``` + ## FP32 Inference Instructions 1. Download the ImageNet dataset and convert it to the TF records format diff --git a/benchmarks/image_recognition/tensorflow/mobilenet_v1/__init__.py b/benchmarks/image_recognition/tensorflow/mobilenet_v1/__init__.py index cf793ec6a..d9c4123de 100644 --- a/benchmarks/image_recognition/tensorflow/mobilenet_v1/__init__.py +++ b/benchmarks/image_recognition/tensorflow/mobilenet_v1/__init__.py @@ -1,7 +1,7 @@ # # -*- coding: utf-8 -*- # -# Copyright (c) 2018 Intel Corporation +# Copyright (c) 2019 Intel Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/benchmarks/image_recognition/tensorflow/mobilenet_v1/inference/__init__.py b/benchmarks/image_recognition/tensorflow/mobilenet_v1/inference/__init__.py index cf793ec6a..d9c4123de 100644 --- a/benchmarks/image_recognition/tensorflow/mobilenet_v1/inference/__init__.py +++ b/benchmarks/image_recognition/tensorflow/mobilenet_v1/inference/__init__.py @@ -1,7 +1,7 @@ # # -*- coding: utf-8 -*- # -# Copyright (c) 2018 Intel Corporation +# Copyright (c) 2019 Intel Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/benchmarks/image_recognition/tensorflow/mobilenet_v1/inference/int8/__init__.py b/benchmarks/image_recognition/tensorflow/mobilenet_v1/inference/int8/__init__.py new file mode 100644 index 000000000..cf793ec6a --- /dev/null +++ b/benchmarks/image_recognition/tensorflow/mobilenet_v1/inference/int8/__init__.py @@ -0,0 +1,19 @@ +# +# -*- coding: utf-8 -*- +# +# Copyright (c) 2018 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# SPDX-License-Identifier: EPL-2.0 +# diff --git a/benchmarks/image_recognition/tensorflow/mobilenet_v1/inference/int8/model_init.py b/benchmarks/image_recognition/tensorflow/mobilenet_v1/inference/int8/model_init.py new file mode 100644 index 000000000..0823604c0 --- /dev/null +++ b/benchmarks/image_recognition/tensorflow/mobilenet_v1/inference/int8/model_init.py @@ -0,0 +1,99 @@ +# +# -*- coding: utf-8 -*- +# +# Copyright (c) 2019 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# SPDX-License-Identifier: EPL-2.0 +# + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import argparse +import os + +from common.base_model_init import BaseModelInitializer +from common.base_model_init import set_env_var + + +class ModelInitializer(BaseModelInitializer): + """Model initializer for Mobilenet INT8 inference""" + + def __init__(self, args, custom_args=[], platform_util=None): + super(ModelInitializer, self).__init__(args, custom_args, platform_util) + self.cmd = self.get_numactl_command(self.args.socket_id) + "python " + + # Set KMP env vars, if they haven't already been set + self.set_kmp_vars() + + # Set the num_inter_threads and num_intra_threads + self.set_num_inter_intra_threads() + # Set env vars, if they haven't already been set + set_env_var("OMP_NUM_THREADS", self.args.num_intra_threads) + + self.parse_args() + + if self.args.benchmark_only: + run_script = os.path.join( + self.args.intelai_models, self.args.mode, + self.args.precision, "benchmark.py") + script_args_list = [ + "input_graph", "input_height", "input_width", "batch_size", + "input_layer", "output_layer", "num_inter_threads", + "num_intra_threads", "warmup_steps", "steps"] + if self.args.accuracy_only: + run_script = os.path.join( + self.args.intelai_models, self.args.mode, + self.args.precision, "accuracy.py") + script_args_list = [ + "input_graph", "data_location", "input_height", "input_width", + "batch_size", "input_layer", "output_layer", + "num_inter_threads", "num_intra_threads"] + + self.cmd = self.add_args_to_command(self.cmd + run_script, script_args_list) + + def parse_args(self): + if self.custom_args: + parser = argparse.ArgumentParser() + parser.add_argument( + "--input_height", default=224, + dest='input_height', type=int, help="input height") + parser.add_argument( + "--input_width", default=224, + dest='input_width', type=int, help="input width") + parser.add_argument( + '--warmup_steps', dest='warmup_steps', + help='number of warmup steps', + type=int, default=10) + parser.add_argument( + '--steps', dest='steps', + help='number of steps', + type=int, default=50) + parser.add_argument( + '--input_layer', dest='input_layer', + help='name of input layer', + type=str, default="input") + parser.add_argument( + '--output_layer', dest='output_layer', + help='name of output layer', + type=str, default="MobilenetV1/Predictions/Reshape_1") + + self.args = parser.parse_args(self.custom_args, + namespace=self.args) + + def run(self): + if self.cmd: + self.run_command(self.cmd) diff --git a/models/image_recognition/tensorflow/mobilenet_v1/inference/int8/__init__.py b/models/image_recognition/tensorflow/mobilenet_v1/inference/int8/__init__.py new file mode 100644 index 000000000..159180624 --- /dev/null +++ b/models/image_recognition/tensorflow/mobilenet_v1/inference/int8/__init__.py @@ -0,0 +1,20 @@ +# +# -*- coding: utf-8 -*- +# +# Copyright (c) 2019 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# SPDX-License-Identifier: EPL-2.0 +# + diff --git a/models/image_recognition/tensorflow/mobilenet_v1/inference/int8/accuracy.py b/models/image_recognition/tensorflow/mobilenet_v1/inference/int8/accuracy.py new file mode 100644 index 000000000..69c3c003f --- /dev/null +++ b/models/image_recognition/tensorflow/mobilenet_v1/inference/int8/accuracy.py @@ -0,0 +1,130 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import argparse +import sys +import os +import time +import numpy as np + +from google.protobuf import text_format +import tensorflow as tf +import preprocessing +import datasets + +NUM_TEST_IMAGES = 50000 + +def load_graph(model_file): + graph = tf.Graph() + graph_def = tf.GraphDef() + + import os + file_ext = os.path.splitext(model_file)[1] + + with open(model_file, "rb") as f: + if file_ext == '.pbtxt': + text_format.Merge(f.read(), graph_def) + else: + graph_def.ParseFromString(f.read()) + with graph.as_default(): + tf.import_graph_def(graph_def, name='') + + return graph + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument("--input_graph", default=None, + help="graph/model to be executed") + parser.add_argument("--data_location", default=None, + help="full path to the validation data") + parser.add_argument("--input_height", default=224, + type=int, help="input height") + parser.add_argument("--input_width", default=224, + type=int, help="input width") + parser.add_argument("--batch_size", default=32, + type=int, help="batch size") + parser.add_argument("--input_layer", default="input", + help="name of input layer") + parser.add_argument("--output_layer", default="MobilenetV1/Predictions/Reshape_1", + help="name of output layer") + parser.add_argument( + '--num_inter_threads', + help='number threads across operators', + type=int, default=1) + parser.add_argument( + '--num_intra_threads', + help='number threads for an operator', + type=int, default=1) + args = parser.parse_args() + + if args.input_graph: + model_file = args.input_graph + else: + sys.exit("Please provide a graph file.") + batch_size = args.batch_size + input_layer = args.input_layer + output_layer = args.output_layer + num_inter_threads = args.num_inter_threads + num_intra_threads = args.num_intra_threads + data_location = args.data_location + dataset = datasets.ImagenetData(data_location) + preprocessor = dataset.get_image_preprocessor()( + input_height, input_width, batch_size, + 1, # device count + tf.float32, # data_type for input fed to the graph + train=False, # doing inference + resize_method='bilinear') + + images, labels = preprocessor.minibatch(dataset, subset='validation', + use_datasets=True, cache_data=False) + graph = load_graph(model_file) + input_tensor = graph.get_tensor_by_name(input_layer + ":0") + output_tensor = graph.get_tensor_by_name(output_layer + ":0") + + config = tf.ConfigProto() + config.inter_op_parallelism_threads = num_inter_threads + config.intra_op_parallelism_threads = num_intra_threads + + total_accuracy1, total_accuracy5 = (0.0, 0.0) + num_processed_images = 0 + num_remaining_images = dataset.num_examples_per_epoch(subset='validation') \ + - num_processed_images + with tf.Session() as sess: + sess_graph = tf.Session(graph=graph, config=config) + while num_remaining_images >= batch_size: + # Reads and preprocess data + np_images, np_labels = sess.run([images[0], labels[0]]) + num_processed_images += batch_size + num_remaining_images -= batch_size + # Compute inference on the preprocessed data + predictions = sess_graph.run(output_tensor, + {input_tensor: np_images}) + accuracy1 = tf.reduce_sum( + tf.cast(tf.nn.in_top_k(tf.constant(predictions), + tf.constant(np_labels), 1), tf.float32)) + + accuracy5 = tf.reduce_sum( + tf.cast(tf.nn.in_top_k(tf.constant(predictions), + tf.constant(np_labels), 5), tf.float32)) + np_accuracy1, np_accuracy5 = sess.run([accuracy1, accuracy5]) + total_accuracy1 += np_accuracy1 + total_accuracy5 += np_accuracy5 + print("Processed %d images. (Top1 accuracy, Top5 accuracy) = (%0.4f, %0.4f)" \ + % (num_processed_images, total_accuracy1/num_processed_images, + total_accuracy5/num_processed_images)) diff --git a/models/image_recognition/tensorflow/mobilenet_v1/inference/int8/benchmark.py b/models/image_recognition/tensorflow/mobilenet_v1/inference/int8/benchmark.py new file mode 100644 index 000000000..5ba410415 --- /dev/null +++ b/models/image_recognition/tensorflow/mobilenet_v1/inference/int8/benchmark.py @@ -0,0 +1,146 @@ +# +# -*- coding: utf-8 -*- +# +# Copyright (c) 2019 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# SPDX-License-Identifier: EPL-2.0 +# + +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import argparse +import sys +import os +import time +import numpy as np + +from google.protobuf import text_format +import tensorflow as tf + +def load_graph(model_file): + graph = tf.Graph() + graph_def = tf.GraphDef() + + import os + file_ext = os.path.splitext(model_file)[1] + + with open(model_file, "rb") as f: + if file_ext == '.pbtxt': + text_format.Merge(f.read(), graph_def) + else: + graph_def.ParseFromString(f.read()) + with graph.as_default(): + tf.import_graph_def(graph_def, name='') + + return graph + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument("--input_graph", default=None, + help="graph/model to be executed") + parser.add_argument("--input_height", default=224, + type=int, help="input height") + parser.add_argument("--input_width", default=224, + type=int, help="input width") + parser.add_argument("--batch_size", default=32, + type=int, help="batch size") + parser.add_argument("--input_layer", default="input", + help="name of input layer") + parser.add_argument("--output_layer", default="MobilenetV1/Predictions/Reshape_1", + help="name of output layer") + parser.add_argument( + '--num_inter_threads', + help='number threads across operators', + type=int, default=1) + parser.add_argument( + '--num_intra_threads', + help='number threads for an operator', + type=int, default=1) + parser.add_argument("--warmup_steps", type=int, default=10, + help="number of warmup steps") + parser.add_argument("--steps", type=int, default=50, help="number of steps") + args = parser.parse_args() + + if args.input_graph: + model_file = args.input_graph + else: + sys.exit("Please provide a graph file.") + batch_size = args.batch_size + input_layer = args.input_layer + output_layer = args.output_layer + warmup_steps = args.warmup_steps + steps = args.steps + assert steps > 10, "Benchmark steps should be at least 10." + num_inter_threads = args.num_inter_threads + num_intra_threads = args.num_intra_threads + + input_shape = [batch_size, input_height, input_width, 3] + images = tf.truncated_normal( + input_shape, + dtype=tf.float32, + stddev=10, + name='synthetic_images') + + image_data = None + with tf.Session() as sess: + image_data = sess.run(images) + + graph = load_graph(model_file) + + input_tensor = graph.get_tensor_by_name(input_layer + ":0"); + output_tensor = graph.get_tensor_by_name(output_layer + ":0"); + + config = tf.ConfigProto() + config.inter_op_parallelism_threads = num_inter_threads + config.intra_op_parallelism_threads = num_intra_threads + + with tf.Session(graph=graph, config=config) as sess: + sys.stdout.flush() + print("[Running warmup steps...]") + for t in range(warmup_steps): + start_time = time.time() + sess.run(output_tensor, {input_tensor: image_data}) + elapsed_time = time.time() - start_time + if((t+1) % 10 == 0): + print("steps = {0}, {1} images/sec" + "".format(t+1, batch_size/elapsed_time)) + + print("[Running benchmark steps...]") + total_time = 0; + total_images = 0; + for t in range(steps): + start_time = time.time() + results = sess.run(output_tensor, {input_tensor: image_data}) + elapsed_time = time.time() - start_time + if((t+1) % 10 == 0): + print("steps = {0}, {1} images/sec" + "".format(t+1, batch_size/elapsed_time)); diff --git a/models/image_recognition/tensorflow/mobilenet_v1/inference/int8/cnn_util.py b/models/image_recognition/tensorflow/mobilenet_v1/inference/int8/cnn_util.py new file mode 100644 index 000000000..32902d149 --- /dev/null +++ b/models/image_recognition/tensorflow/mobilenet_v1/inference/int8/cnn_util.py @@ -0,0 +1,50 @@ +# +# -*- coding: utf-8 -*- +# +# Copyright (c) 2019 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# SPDX-License-Identifier: EPL-2.0 +# + +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +"""Utilities for CNN benchmarks.""" + +import tensorflow as tf + + +def tensorflow_version_tuple(): + v = tf.__version__ + major, minor, patch = v.split('.') + return (int(major), int(minor), patch) + + +def tensorflow_version(): + vt = tensorflow_version_tuple() + return vt[0] * 1000 + vt[1] + diff --git a/models/image_recognition/tensorflow/mobilenet_v1/inference/int8/datasets.py b/models/image_recognition/tensorflow/mobilenet_v1/inference/int8/datasets.py new file mode 100644 index 000000000..8734044b5 --- /dev/null +++ b/models/image_recognition/tensorflow/mobilenet_v1/inference/int8/datasets.py @@ -0,0 +1,195 @@ +# +# -*- coding: utf-8 -*- +# +# Copyright (c) 2019 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# SPDX-License-Identifier: EPL-2.0 +# + +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +"""Benchmark dataset utilities. +""" + +from abc import abstractmethod +import os + +import numpy as np +from six.moves import cPickle +from six.moves import xrange # pylint: disable=redefined-builtin +import tensorflow as tf + +from tensorflow.python.platform import gfile +import preprocessing + + +IMAGENET_NUM_TRAIN_IMAGES = 1281167 +IMAGENET_NUM_VAL_IMAGES = 50000 + + +def create_dataset(data_dir, data_name): + """Create a Dataset instance based on data_dir and data_name.""" + supported_datasets = { + 'imagenet': ImagenetData, + 'cifar10': Cifar10Data, + } + if not data_dir and not data_name: + # When using synthetic data, use synthetic imagenet images by default. + data_name = 'imagenet' + + if data_name is None: + for supported_name in supported_datasets: + if supported_name in data_dir: + data_name = supported_name + break + + if data_name is None: + raise ValueError('Could not identify name of dataset. ' + 'Please specify with --data_name option.') + + if data_name not in supported_datasets: + raise ValueError('Unknown dataset. Must be one of %s', ', '.join( + [key for key in sorted(supported_datasets.keys())])) + + return supported_datasets[data_name](data_dir) + + +class Dataset(object): + """Abstract class for cnn benchmarks dataset.""" + + def __init__(self, name, height=None, width=None, depth=None, data_dir=None, + queue_runner_required=False, num_classes=1000): + self.name = name + self.height = height + self.width = width + self.depth = depth or 3 + + self.data_dir = data_dir + self._queue_runner_required = queue_runner_required + self._num_classes = num_classes + + def tf_record_pattern(self, subset): + return os.path.join(self.data_dir, '%s-*-of-*' % subset) + + def reader(self): + return tf.TFRecordReader() + + @property + def num_classes(self): + return self._num_classes + + @num_classes.setter + def num_classes(self, val): + self._num_classes = val + + @abstractmethod + def num_examples_per_epoch(self, subset): + pass + + def __str__(self): + return self.name + + def get_image_preprocessor(self): + return None + + def queue_runner_required(self): + return self._queue_runner_required + + def use_synthetic_gpu_images(self): + return not self.data_dir + + +class ImagenetData(Dataset): + """Configuration for Imagenet dataset.""" + + def __init__(self, data_dir=None): + super(ImagenetData, self).__init__('imagenet', 300, 300, data_dir=data_dir) + + def num_examples_per_epoch(self, subset='train'): + if subset == 'train': + return IMAGENET_NUM_TRAIN_IMAGES + elif subset == 'validation': + return IMAGENET_NUM_VAL_IMAGES + else: + raise ValueError('Invalid data subset "%s"' % subset) + + def get_image_preprocessor(self): + if self.use_synthetic_gpu_images(): + return preprocessing.SyntheticImagePreprocessor + else: + return preprocessing.RecordInputImagePreprocessor + + +class Cifar10Data(Dataset): + """Configuration for cifar 10 dataset. + + It will mount all the input images to memory. + """ + + def __init__(self, data_dir=None): + super(Cifar10Data, self).__init__('cifar10', 32, 32, data_dir=data_dir, + queue_runner_required=True, + num_classes=10) + + def read_data_files(self, subset='train'): + """Reads from data file and returns images and labels in a numpy array.""" + assert self.data_dir, ('Cannot call `read_data_files` when using synthetic ' + 'data') + if subset == 'train': + filenames = [os.path.join(self.data_dir, 'data_batch_%d' % i) + for i in xrange(1, 6)] + elif subset == 'validation': + filenames = [os.path.join(self.data_dir, 'test_batch')] + else: + raise ValueError('Invalid data subset "%s"' % subset) + + inputs = [] + for filename in filenames: + with gfile.Open(filename, 'r') as f: + inputs.append(cPickle.load(f)) + # See http://www.cs.toronto.edu/~kriz/cifar.html for a description of the + # input format. + all_images = np.concatenate( + [each_input['data'] for each_input in inputs]).astype(np.float32) + all_labels = np.concatenate( + [each_input['labels'] for each_input in inputs]) + return all_images, all_labels + + def num_examples_per_epoch(self, subset='train'): + if subset == 'train': + return 50000 + elif subset == 'validation': + return 10000 + else: + raise ValueError('Invalid data subset "%s"' % subset) + + def get_image_preprocessor(self): + if self.use_synthetic_gpu_images(): + return preprocessing.SyntheticImagePreprocessor + else: + return preprocessing.Cifar10ImagePreprocessor diff --git a/models/image_recognition/tensorflow/mobilenet_v1/inference/int8/preprocessing.py b/models/image_recognition/tensorflow/mobilenet_v1/inference/int8/preprocessing.py new file mode 100644 index 000000000..ef94d3e3d --- /dev/null +++ b/models/image_recognition/tensorflow/mobilenet_v1/inference/int8/preprocessing.py @@ -0,0 +1,637 @@ +# +# -*- coding: utf-8 -*- +# +# Copyright (c) 2019 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# SPDX-License-Identifier: EPL-2.0 +# + +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +"""Image pre-processing utilities. +""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function +import math +from six.moves import xrange # pylint: disable=redefined-builtin +import tensorflow as tf + +from tensorflow.contrib.data.python.ops import batching +from tensorflow.contrib.data.python.ops import interleave_ops +from tensorflow.contrib.image.python.ops import distort_image_ops +from tensorflow.python.layers import utils +from tensorflow.python.ops import data_flow_ops +from tensorflow.python.platform import gfile +import cnn_util + +from tensorflow.python.ops import control_flow_ops + +def parse_example_proto(example_serialized): + """Parses an Example proto containing a training example of an image. + + The output of the build_image_data.py image preprocessing script is a dataset + containing serialized Example protocol buffers. Each Example proto contains + the following fields: + + image/height: 462 + image/width: 581 + image/colorspace: 'RGB' + image/channels: 3 + image/class/label: 615 + image/class/synset: 'n03623198' + image/class/text: 'knee pad' + image/object/bbox/xmin: 0.1 + image/object/bbox/xmax: 0.9 + image/object/bbox/ymin: 0.2 + image/object/bbox/ymax: 0.6 + image/object/bbox/label: 615 + image/format: 'JPEG' + image/filename: 'ILSVRC2012_val_00041207.JPEG' + image/encoded: + + Args: + example_serialized: scalar Tensor tf.string containing a serialized + Example protocol buffer. + + Returns: + image_buffer: Tensor tf.string containing the contents of a JPEG file. + label: Tensor tf.int32 containing the label. + bbox: 3-D float Tensor of bounding boxes arranged [1, num_boxes, coords] + where each coordinate is [0, 1) and the coordinates are arranged as + [ymin, xmin, ymax, xmax]. + text: Tensor tf.string containing the human-readable label. + """ + # Dense features in Example proto. + feature_map = { + 'image/encoded': tf.FixedLenFeature([], dtype=tf.string, + default_value=''), + 'image/class/label': tf.FixedLenFeature([1], dtype=tf.int64, + default_value=-1), + 'image/class/text': tf.FixedLenFeature([], dtype=tf.string, + default_value=''), + } + sparse_float32 = tf.VarLenFeature(dtype=tf.float32) + # Sparse features in Example proto. + feature_map.update( + {k: sparse_float32 for k in ['image/object/bbox/xmin', + 'image/object/bbox/ymin', + 'image/object/bbox/xmax', + 'image/object/bbox/ymax']}) + + features = tf.parse_single_example(example_serialized, feature_map) + label = tf.cast(features['image/class/label'], dtype=tf.int32) + + xmin = tf.expand_dims(features['image/object/bbox/xmin'].values, 0) + ymin = tf.expand_dims(features['image/object/bbox/ymin'].values, 0) + xmax = tf.expand_dims(features['image/object/bbox/xmax'].values, 0) + ymax = tf.expand_dims(features['image/object/bbox/ymax'].values, 0) + + # Note that we impose an ordering of (y, x) just to make life difficult. + bbox = tf.concat([ymin, xmin, ymax, xmax], 0) + + # Force the variable number of bounding boxes into the shape + # [1, num_boxes, coords]. + bbox = tf.expand_dims(bbox, 0) + bbox = tf.transpose(bbox, [0, 2, 1]) + + return features['image/encoded'], label, bbox, features['image/class/text'] + + +def get_image_resize_method(resize_method, batch_position=0): + """Get tensorflow resize method. + + If resize_method is 'round_robin', return different methods based on batch + position in a round-robin fashion. NOTE: If the batch size is not a multiple + of the number of methods, then the distribution of methods will not be + uniform. + + Args: + resize_method: (string) nearest, bilinear, bicubic, area, or round_robin. + batch_position: position of the image in a batch. NOTE: this argument can + be an integer or a tensor + Returns: + one of resize type defined in tf.image.ResizeMethod. + """ + resize_methods_map = { + 'nearest': tf.image.ResizeMethod.NEAREST_NEIGHBOR, + 'bilinear': tf.image.ResizeMethod.BILINEAR, + 'bicubic': tf.image.ResizeMethod.BICUBIC, + 'area': tf.image.ResizeMethod.AREA + } + + if resize_method != 'round_robin': + return resize_methods_map[resize_method] + + # return a resize method based on batch position in a round-robin fashion. + resize_methods = resize_methods_map.values() + def lookup(index): + return resize_methods[index] + + def resize_method_0(): + return utils.smart_cond(batch_position % len(resize_methods) == 0, + lambda: lookup(0), resize_method_1) + + def resize_method_1(): + return utils.smart_cond(batch_position % len(resize_methods) == 1, + lambda: lookup(1), resize_method_2) + + def resize_method_2(): + return utils.smart_cond(batch_position % len(resize_methods) == 2, + lambda: lookup(2), lambda: lookup(3)) + + # NOTE(jsimsa): Unfortunately, we cannot use a single recursive function here + # because TF would not be able to construct a finite graph. + + return resize_method_0() + + +def decode_jpeg(image_buffer, scope=None): # , dtype=tf.float32): + """Decode a JPEG string into one 3-D float image Tensor. + + Args: + image_buffer: scalar string Tensor. + scope: Optional scope for op_scope. + Returns: + 3-D float Tensor with values ranging from [0, 1). + """ + # with tf.op_scope([image_buffer], scope, 'decode_jpeg'): + # with tf.name_scope(scope, 'decode_jpeg', [image_buffer]): + with tf.name_scope(scope or 'decode_jpeg'): + # Decode the string as an RGB JPEG. + # Note that the resulting image contains an unknown height and width + # that is set dynamically by decode_jpeg. In other words, the height + # and width of image is unknown at compile-time. + image = tf.image.decode_jpeg(image_buffer, channels=3) #, + # fancy_upscaling=False, + # dct_method='INTEGER_FAST') + + # image = tf.Print(image, [tf.shape(image)], 'Image shape: ') + image = tf.image.convert_image_dtype(image, dtype=tf.float32) + + return image + + + +def preprocess_for_eval(image, height, width, + central_fraction=0.875, scope=None): + """Prepare one image for evaluation. + + If height and width are specified it would output an image with that size by + applying resize_bilinear. + + If central_fraction is specified it would crop the central fraction of the + input image. + + Args: + image: 3-D Tensor of image. If dtype is tf.float32 then the range should be + [0, 1], otherwise it would converted to tf.float32 assuming that the range + is [0, MAX], where MAX is largest positive representable number for + int(8/16/32) data type (see `tf.image.convert_image_dtype` for details). + height: integer + width: integer + central_fraction: Optional Float, fraction of the image to crop. + scope: Optional scope for name_scope. + Returns: + 3-D float Tensor of prepared image. + """ + with tf.name_scope(scope, 'eval_image', [image, height, width]): + if image.dtype != tf.float32: + image = tf.image.convert_image_dtype(image, dtype=tf.float32) + # Crop the central region of the image with an area containing 87.5% of + # the original image. + if central_fraction: + image = tf.image.central_crop(image, central_fraction=central_fraction) + + if height and width: + # Resize the image to the specified height and width. + image = tf.expand_dims(image, 0) + image = tf.image.resize_bilinear(image, [height, width], + align_corners=False) + image = tf.squeeze(image, [0]) + image = tf.subtract(image, 0.5) + image = tf.multiply(image, 2.0) + return image + + + +def apply_with_random_selector(x, func, num_cases): + """Computes func(x, sel), with sel sampled from [0...num_cases-1]. + + Args: + x: input Tensor. + func: Python function to apply. + num_cases: Python int32, number of cases to sample sel from. + + Returns: + The result of func(x, sel), where func receives the value of the + selector as a python integer, but sel is sampled dynamically. + """ + sel = tf.random_uniform([], maxval=num_cases, dtype=tf.int32) + # Pass the real x only to one of the func calls. + return control_flow_ops.merge([ + func(control_flow_ops.switch(x, tf.equal(sel, case))[1], case) + for case in range(num_cases)])[0] + + +def distort_color(image, color_ordering=0, fast_mode=True, scope=None): + """Distort the color of a Tensor image. + + Each color distortion is non-commutative and thus ordering of the color ops + matters. Ideally we would randomly permute the ordering of the color ops. + Rather then adding that level of complication, we select a distinct ordering + of color ops for each preprocessing thread. + + Args: + image: 3-D Tensor containing single image in [0, 1]. + color_ordering: Python int, a type of distortion (valid values: 0-3). + fast_mode: Avoids slower ops (random_hue and random_contrast) + scope: Optional scope for name_scope. + Returns: + 3-D Tensor color-distorted image on range [0, 1] + Raises: + ValueError: if color_ordering not in [0, 3] + """ + with tf.name_scope(scope, 'distort_color', [image]): + if fast_mode: + if color_ordering == 0: + image = tf.image.random_brightness(image, max_delta=32. / 255.) + image = tf.image.random_saturation(image, lower=0.5, upper=1.5) + else: + image = tf.image.random_saturation(image, lower=0.5, upper=1.5) + image = tf.image.random_brightness(image, max_delta=32. / 255.) + else: + if color_ordering == 0: + image = tf.image.random_brightness(image, max_delta=32. / 255.) + image = tf.image.random_saturation(image, lower=0.5, upper=1.5) + image = tf.image.random_hue(image, max_delta=0.2) + image = tf.image.random_contrast(image, lower=0.5, upper=1.5) + elif color_ordering == 1: + image = tf.image.random_saturation(image, lower=0.5, upper=1.5) + image = tf.image.random_brightness(image, max_delta=32. / 255.) + image = tf.image.random_contrast(image, lower=0.5, upper=1.5) + image = tf.image.random_hue(image, max_delta=0.2) + elif color_ordering == 2: + image = tf.image.random_contrast(image, lower=0.5, upper=1.5) + image = tf.image.random_hue(image, max_delta=0.2) + image = tf.image.random_brightness(image, max_delta=32. / 255.) + image = tf.image.random_saturation(image, lower=0.5, upper=1.5) + elif color_ordering == 3: + image = tf.image.random_hue(image, max_delta=0.2) + image = tf.image.random_saturation(image, lower=0.5, upper=1.5) + image = tf.image.random_contrast(image, lower=0.5, upper=1.5) + image = tf.image.random_brightness(image, max_delta=32. / 255.) + else: + raise ValueError('color_ordering must be in [0, 3]') + + # The random_* ops do not necessarily clamp. + return tf.clip_by_value(image, 0.0, 1.0) + + +def distorted_bounding_box_crop(image, + bbox, + min_object_covered=0.1, + aspect_ratio_range=(0.75, 1.33), + area_range=(0.05, 1.0), + max_attempts=100, + scope=None): + """Generates cropped_image using a one of the bboxes randomly distorted. + + See `tf.image.sample_distorted_bounding_box` for more documentation. + + Args: + image: 3-D Tensor of image (it will be converted to floats in [0, 1]). + bbox: 3-D float Tensor of bounding boxes arranged [1, num_boxes, coords] + where each coordinate is [0, 1) and the coordinates are arranged + as [ymin, xmin, ymax, xmax]. If num_boxes is 0 then it would use the whole + image. + min_object_covered: An optional `float`. Defaults to `0.1`. The cropped + area of the image must contain at least this fraction of any bounding box + supplied. + aspect_ratio_range: An optional list of `floats`. The cropped area of the + image must have an aspect ratio = width / height within this range. + area_range: An optional list of `floats`. The cropped area of the image + must contain a fraction of the supplied image within in this range. + max_attempts: An optional `int`. Number of attempts at generating a cropped + region of the image of the specified constraints. After `max_attempts` + failures, return the entire image. + scope: Optional scope for name_scope. + Returns: + A tuple, a 3-D Tensor cropped_image and the distorted bbox + """ + with tf.name_scope(scope, 'distorted_bounding_box_crop', [image, bbox]): + # Each bounding box has shape [1, num_boxes, box coords] and + # the coordinates are ordered [ymin, xmin, ymax, xmax]. + + # A large fraction of image datasets contain a human-annotated bounding + # box delineating the region of the image containing the object of interest. + # We choose to create a new bounding box for the object which is a randomly + # distorted version of the human-annotated bounding box that obeys an + # allowed range of aspect ratios, sizes and overlap with the human-annotated + # bounding box. If no box is supplied, then we assume the bounding box is + # the entire image. + sample_distorted_bounding_box = tf.image.sample_distorted_bounding_box( + tf.shape(image), + bounding_boxes=bbox, + min_object_covered=min_object_covered, + aspect_ratio_range=aspect_ratio_range, + area_range=area_range, + max_attempts=max_attempts, + use_image_if_no_bounding_boxes=True) + bbox_begin, bbox_size, distort_bbox = sample_distorted_bounding_box + + # Crop the image to the specified bounding box. + cropped_image = tf.slice(image, bbox_begin, bbox_size) + return cropped_image, distort_bbox + + + +def preprocess_for_train(image, height,width, bbox, + batch_position, + fast_mode=True, + scope=None, + add_image_summaries=True): + """Distort one image for training a network. + + Distorting images provides a useful technique for augmenting the data + set during training in order to make the network invariant to aspects + of the image that do not effect the label. + + Args: + image: 3-D Tensor of image. If dtype is tf.float32 then the range should be + [0, 1], otherwise it would converted to tf.float32 assuming that the range + is [0, MAX], where MAX is largest positive representable number for + int(8/16/32) data type (see `tf.image.convert_image_dtype` for details). + height: integer + width: integer + bbox: 3-D float Tensor of bounding boxes arranged [1, num_boxes, coords] + where each coordinate is [0, 1) and the coordinates are arranged + as [ymin, xmin, ymax, xmax]. + batch_position: position of the image in a batch, which affects how images + are distorted and resized. NOTE: this argument can be an integer or a + tensor + scope: Optional scope for op_scope. + add_image_summaries: Enable image summaries. + Returns: + 3-D float Tensor of distorted image used for training with range [-1, 1]. + """ + + with tf.name_scope(scope, 'distort_image', [image, height, width, bbox]): + if bbox is None: + bbox = tf.constant([0.0, 0.0, 1.0, 1.0], + dtype=tf.float32, + shape=[1, 1, 4]) + if image.dtype != tf.float32: + image = tf.image.convert_image_dtype(image, dtype=tf.float32) + # Each bounding box has shape [1, num_boxes, box coords] and + # the coordinates are ordered [ymin, xmin, ymax, xmax]. + image_with_box = tf.image.draw_bounding_boxes(tf.expand_dims(image, 0), + bbox) + if add_image_summaries: + tf.summary.image('image_with_bounding_boxes', image_with_box) + + distorted_image, distorted_bbox = distorted_bounding_box_crop(image, bbox) + # Restore the shape since the dynamic slice based upon the bbox_size loses + # the third dimension. + distorted_image.set_shape([None, None, 3]) + image_with_distorted_box = tf.image.draw_bounding_boxes( + tf.expand_dims(image, 0), distorted_bbox) + if add_image_summaries: + tf.summary.image('images_with_distorted_bounding_box', + image_with_distorted_box) + + + # This resizing operation may distort the images because the aspect + # ratio is not respected. We select a resize method in a round robin + # fashion based on the thread number. + # Note that ResizeMethod contains 4 enumerated resizing methods. + + # We select only 1 case for fast_mode bilinear. + num_resize_cases = 1 if fast_mode else 4 + distorted_image = apply_with_random_selector( + distorted_image, + lambda x, method: tf.image.resize_images(x, [height, width], method), + num_cases=num_resize_cases) + + if add_image_summaries: + tf.summary.image('cropped_resized_image', + tf.expand_dims(distorted_image, 0)) + + + # Randomly flip the image horizontally. + distorted_image = tf.image.random_flip_left_right(distorted_image) + # Randomly distort the colors. There are 1 or 4 ways to do it. + num_distort_cases = 1 if fast_mode else 4 + distorted_image = apply_with_random_selector( + distorted_image, + lambda x, ordering: distort_color(x, ordering, fast_mode), + num_cases=num_distort_cases) + + if add_image_summaries: + tf.summary.image('final_distorted_image', + tf.expand_dims(distorted_image, 0)) + distorted_image = tf.subtract(distorted_image, 0.5) + distorted_image = tf.multiply(distorted_image, 2.0) + return distorted_image + + +def distort_color(image, batch_position=0, distort_color_in_yiq=False, + scope=None): + """Distort the color of the image. + + Each color distortion is non-commutative and thus ordering of the color ops + matters. Ideally we would randomly permute the ordering of the color ops. + Rather then adding that level of complication, we select a distinct ordering + of color ops based on the position of the image in a batch. + + Args: + image: float32 Tensor containing single image. Tensor values should be in + range [0, 1]. + batch_position: the position of the image in a batch. NOTE: this argument + can be an integer or a tensor + distort_color_in_yiq: distort color of input images in YIQ space. + scope: Optional scope for op_scope. + Returns: + color-distorted image + """ + with tf.name_scope(scope or 'distort_color'): + + def distort_fn_0(image=image): + """Variant 0 of distort function.""" + image = tf.image.random_brightness(image, max_delta=32. / 255.) + #if distort_color_in_yiq: + # image = distort_image_ops.random_hsv_in_yiq( + # image, lower_saturation=0.5, upper_saturation=1.5, + # max_delta_hue=0.2 * math.pi) + #else: + image = tf.image.random_saturation(image, lower=0.5, upper=1.5) + image = tf.image.random_hue(image, max_delta=0.2) + image = tf.image.random_contrast(image, lower=0.5, upper=1.5) + return image + + def distort_fn_1(image=image): + """Variant 1 of distort function.""" + image = tf.image.random_brightness(image, max_delta=32. / 255.) + image = tf.image.random_contrast(image, lower=0.5, upper=1.5) + #if distort_color_in_yiq: + # image = distort_image_ops.random_hsv_in_yiq( + # image, lower_saturation=0.5, upper_saturation=1.5, + # max_delta_hue=0.2 * math.pi) + #else: + image = tf.image.random_saturation(image, lower=0.5, upper=1.5) + image = tf.image.random_hue(image, max_delta=0.2) + return image + + image = utils.smart_cond(batch_position % 2 == 0, distort_fn_0, + distort_fn_1) + # The random_* ops do not necessarily clamp. + image = tf.clip_by_value(image, 0.0, 1.0) + return image + + +class RecordInputImagePreprocessor(object): + """Preprocessor for images with RecordInput format.""" + + def __init__(self, + height, + width, + batch_size, + num_splits, + dtype, + train, + distortions=False, + resize_method="bilinear", + shift_ratio=0, + summary_verbosity=1, + distort_color_in_yiq=False, + fuse_decode_and_crop=False): + self.height = height + self.width = width + self.batch_size = batch_size + self.num_splits = num_splits + self.dtype = dtype + self.train = train + self.resize_method = resize_method + self.shift_ratio = shift_ratio + self.distortions = distortions + self.distort_color_in_yiq = distort_color_in_yiq + self.fuse_decode_and_crop = fuse_decode_and_crop + if self.batch_size % self.num_splits != 0: + raise ValueError( + ('batch_size must be a multiple of num_splits: ' + 'batch_size %d, num_splits: %d') % + (self.batch_size, self.num_splits)) + self.batch_size_per_split = self.batch_size // self.num_splits + self.summary_verbosity = summary_verbosity + + def image_preprocess(self, image_buffer, bbox, batch_position): + """Preprocessing image_buffer as a function of its batch position.""" + if self.train: + image_buffer = tf.image.decode_jpeg( + image_buffer, channels=3, dct_method='INTEGER_FAST') + image = preprocess_for_train(image_buffer, self.height, self.width, bbox, + batch_position) + else: + image = tf.image.decode_jpeg( + image_buffer, channels=3, dct_method='INTEGER_FAST') + image = preprocess_for_eval(image, self.height, self.width) + return image + + def parse_and_preprocess(self, value, batch_position): + image_buffer, label_index, bbox, _ = parse_example_proto(value) + image = self.image_preprocess(image_buffer, bbox, batch_position) + return (label_index, image) + + def minibatch(self, dataset, subset, use_datasets, cache_data, + shift_ratio=-1): + if shift_ratio < 0: + shift_ratio = self.shift_ratio + with tf.name_scope('batch_processing'): + # Build final results per split. + images = [[] for _ in range(self.num_splits)] + labels = [[] for _ in range(self.num_splits)] + if use_datasets: + glob_pattern = dataset.tf_record_pattern(subset) + file_names = gfile.Glob(glob_pattern) + if not file_names: + raise ValueError('Found no files in --data_dir matching: {}' + .format(glob_pattern)) + ds = tf.data.TFRecordDataset.list_files(file_names) + ds = ds.apply( + interleave_ops.parallel_interleave( + tf.data.TFRecordDataset, cycle_length=10)) + if cache_data: + ds = ds.take(1).cache().repeat() + counter = tf.data.Dataset.range(self.batch_size) + counter = counter.repeat() + ds = tf.data.Dataset.zip((ds, counter)) + ds = ds.prefetch(buffer_size=self.batch_size) + ds = ds.shuffle(buffer_size=10000) + ds = ds.repeat() + ds = ds.apply( + batching.map_and_batch( + map_func=self.parse_and_preprocess, + batch_size=self.batch_size_per_split, + num_parallel_batches=self.num_splits)) + ds = ds.prefetch(buffer_size=self.num_splits) + ds_iterator = ds.make_one_shot_iterator() + for d in xrange(self.num_splits): + labels[d], images[d] = ds_iterator.get_next() + + else: + record_input = data_flow_ops.RecordInput( + file_pattern=dataset.tf_record_pattern(subset), + seed=301, + parallelism=64, + buffer_size=10000, + batch_size=self.batch_size, + shift_ratio=shift_ratio, + name='record_input') + records = record_input.get_yield_op() + records = tf.split(records, self.batch_size, 0) + records = [tf.reshape(record, []) for record in records] + for idx in xrange(self.batch_size): + value = records[idx] + (label, image) = self.parse_and_preprocess(value, idx) + split_index = idx % self.num_splits + labels[split_index].append(label) + images[split_index].append(image) + + for split_index in xrange(self.num_splits): + if not use_datasets: + images[split_index] = tf.parallel_stack(images[split_index]) + labels[split_index] = tf.concat(labels[split_index], 0) + images[split_index] = tf.cast(images[split_index], self.dtype) + depth = 3 + images[split_index] = tf.reshape( + images[split_index], + shape=[self.batch_size_per_split, self.height, self.width, depth]) + labels[split_index] = tf.reshape(labels[split_index], + [self.batch_size_per_split]) + return images, labels + diff --git a/tests/unit/common/tensorflow/tf_model_args.txt b/tests/unit/common/tensorflow/tf_model_args.txt old mode 100755 new mode 100644 index 4dabf304f..be46aea9a --- a/tests/unit/common/tensorflow/tf_model_args.txt +++ b/tests/unit/common/tensorflow/tf_model_args.txt @@ -52,6 +52,9 @@ run_tf_benchmark.py --framework=tensorflow --use-case=image_recognition --model- "run_tf_benchmark.py --framework=tensorflow --use-case=language_translation --model-name=transformer_language --precision=fp32 --mode=inference --model-source-dir=/workspace/models --benchmark-dir=/workspace/benchmarks --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=32 --socket-id=0 --benchmark-only --verbose --checkpoint=/checkpoints --data-location=/dataset --decode_from_file=/checkpoints/newstest2015.en --reference=/checkpoints/newstest2015.de","numactl --cpunodebind=0 --membind=0 python /workspace/models/tensor2tensor/bin/t2t_decoder.py --problem=translate_ende_wmt32k --model=transformer --hparams_set=transformer_base_single_gpu --decode_hparams=beam_size=4,alpha=0.6,batch_size=32 --data_dir=/dataset --output_dir=/checkpoints --decode_from_file=/checkpoints/newstest2015.en --decode_to_file=/workspace/models/out_dir/output_infer --reference=/checkpoints/newstest2015.de --inter_op_parallelism_threads=1 --intra_op_parallelism_threads=28" run_tf_benchmark.py --framework=tensorflow --use-case=language_translation --model-name=transformer_lt_official --precision=fp32 --mode=inference --model-source-dir=/workspace/models --benchmark-dir=/workspace/benchmarks --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=1 --socket-id=0 --benchmark-only --verbose --file=/dataset/newstest2014.en --reference=/dataset/newstest2014.de --vocab_file=/dataset/vocab.txt --in_graph=/in_graph/fp32_graphdef.pb,numactl --cpunodebind=0 --membind=0 python /workspace/models/official/transformer/infer_ab.py --param_set=big --in_graph=/in_graph/fp32_graphdef.pb --batch_size=1 --file=/dataset/newstest2014.en --file_out=/models/benchmarks/common/tensorflow/logs/translate.txt --vocab_file=/dataset/vocab.txt run_tf_benchmark.py --framework=tensorflow --use-case=language_translation --model-name=transformer_lt_official --precision=fp32 --mode=inference --model-source-dir=/workspace/models --benchmark-dir=/workspace/benchmarks --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=64 --socket-id=0 --benchmark-only --verbose --file=/dataset/newstest2014.en --reference=/dataset/newstest2014.de --vocab_file=/dataset/vocab.txt --in_graph=/in_graph/fp32_graphdef.pb,numactl --cpunodebind=0 --membind=0 python /workspace/models/official/transformer/infer_ab.py --param_set=big --in_graph=/in_graph/fp32_graphdef.pb --batch_size=64 --file=/dataset/newstest2014.en --file_out=/models/benchmarks/common/tensorflow/logs/translate.txt --vocab_file=/dataset/vocab.txt +run_tf_benchmark.py --framework=tensorflow --use-case=image_recognition --model-name=mobilenet_v1 --precision=int8 --mode=inference --benchmark-dir=/workspace/benchmarks --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=100 --socket-id=0 --output-dir=/workspace/benchmarks/common/tensorflow/logs --accuracy-only --verbose --model-source-dir=/workspace/models --in-graph=/in_graph/models_mobilenetv1_int8_pretrained_model.pb --data-location=/dataset --input_height=224 --input_width=224, numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/inference/int8/accuracy.py --input_height=224 --input_width=224 --num_intra_threads=28 --output_layer=MobilenetV1/Predictions/Reshape_1 --num_inter_threads=1 --batch_size=100 --input_graph=/in_graph/models_mobilenetv1_int8_pretrained_model.pb --data_location=/dataset --input_layer=input +run_tf_benchmark.py --framework=tensorflow --use-case=image_recognition --model-name=mobilenet_v1 --precision=int8 --mode=inference --benchmark-dir=/workspace/benchmarks --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=1 --socket-id=0 --output-dir=/workspace/benchmarks/common/tensorflow/logs --benchmark-only --verbose --model-source-dir=/workspace/models --in-graph=/in_graph/models_mobilenetv1_int8_pretrained_model.pb --input_height=224 --input_width=224 --warmup_steps=10 --steps=50, numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/inference/int8/benchmark.py --input_height=224 --input_width=224 --warmup_steps=10 --num_intra_threads=28 --output_layer=MobilenetV1/Predictions/Reshape_1 --num_inter_threads=1 --batch_size=1 --input_graph=/in_graph/models_mobilenetv1_int8_pretrained_model.pb --input_layer=input --steps=50 +run_tf_benchmark.py --framework=tensorflow --use-case=image_recognition --model-name=mobilenet_v1 --precision=int8 --mode=inference --benchmark-dir=/workspace/benchmarks --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=240 --socket-id=0 --output-dir=/workspace/benchmarks/common/tensorflow/logs --benchmark-only --verbose --model-source-dir=/workspace/models --in-graph=/in_graph/models_mobilenetv1_int8_pretrained_model.pb --input_height=224 --input_width=224 --warmup_steps=10 --steps=50, numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/inference/int8/benchmark.py --input_height=224 --input_width=224 --warmup_steps=10 --num_intra_threads=28 --output_layer=MobilenetV1/Predictions/Reshape_1 --num_inter_threads=1 --batch_size=240 --input_graph=/in_graph/models_mobilenetv1_int8_pretrained_model.pb --input_layer=input --steps=50 run_tf_benchmark.py --framework=tensorflow --use-case=image_recognition --model-name=mobilenet_v1 --precision=fp32 --mode=inference --model-source-dir=/workspace/models --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=1 --socket-id 0 --benchmark-only --verbose --checkpoint=/checkpoints --data-location=/dataset,numactl --cpunodebind=0 -l python /workspace/intelai_models/inference/fp32/eval_image_classifier.py --dataset_name imagenet --checkpoint_path /checkpoints --dataset_dir /dataset --dataset_split_name=validation --clone_on_cpu=True --model_name mobilenet_v1 --inter_op_parallelism_threads 2 --intra_op_parallelism_threads 28 --batch_size 1 python common/tensorflow/run_tf_benchmark.py --framework=tensorflow --use-case=language_modeling --model-name=lm-1b --precision=fp32 --mode=inference --benchmark-dir=/workspace/benchmarks --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=1 --socket-id=0 --output-dir=/workspace/benchmarks/common/tensorflow/logs --benchmark-only --verbose --model-source-dir=/workspace/models,numactl --cpunodebind=0 --membind=0 python /workspace/models/benchmark.py -b=1 -I=100 --inter=1 --intra=28 run_tf_benchmark.py --framework=tensorflow --use-case=image_recognition --model-name=mobilenet_v1 --precision=fp32 --mode=inference --model-source-dir=/workspace/models --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=100 --socket-id 0 --benchmark-only --verbose --checkpoint=/checkpoints --data-location=/dataset,numactl --cpunodebind=0 -l python /workspace/intelai_models/inference/fp32/eval_image_classifier.py --dataset_name imagenet --checkpoint_path /checkpoints --dataset_dir /dataset --dataset_split_name=validation --clone_on_cpu=True --model_name mobilenet_v1 --inter_op_parallelism_threads 2 --intra_op_parallelism_threads 28 --batch_size 100 From 63c1a9c3a664cb8da45fcbe47c31362a0b3af916 Mon Sep 17 00:00:00 2001 From: Dina Suehiro Jones Date: Thu, 11 Apr 2019 11:40:51 -0700 Subject: [PATCH 09/62] Add deprecation warning for checkpoint argument (#278) --- benchmarks/common/base_benchmark_util.py | 4 +++- benchmarks/launch_benchmark.py | 3 +++ 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/benchmarks/common/base_benchmark_util.py b/benchmarks/common/base_benchmark_util.py index adb102c3c..3fbc4dc94 100644 --- a/benchmarks/common/base_benchmark_util.py +++ b/benchmarks/common/base_benchmark_util.py @@ -128,7 +128,9 @@ def _define_args(self): help="Specify the location of trained model checkpoint directory. " "If mode=training model/weights will be written to this " "location. If mode=inference assumes that the location points" - " to a model that has already been trained.", + " to a model that has already been trained. Note that using " + "checkpoint files for inference is being deprecated, in favor " + "of using frozen graphs.", dest="checkpoint", default=None, type=check_valid_folder) self._common_arg_parser.add_argument( diff --git a/benchmarks/launch_benchmark.py b/benchmarks/launch_benchmark.py index e3e982e70..993dc8785 100644 --- a/benchmarks/launch_benchmark.py +++ b/benchmarks/launch_benchmark.py @@ -86,6 +86,9 @@ def validate_args(self): if not self.args.benchmark_only and not self.args.accuracy_only: self.args.benchmark_only = True + if self.args.mode == "inference" and self.args.checkpoint: + print("Warning: The --checkpoint argument is being deprecated in favor of using frozen graphs.") + def get_model_use_case(self, benchmark_scripts): """ Infers the use case based on the directory structure for the specified model. From 66256b56d05e6daf728d14f0d27eee258e021706 Mon Sep 17 00:00:00 2001 From: Dina Suehiro Jones Date: Thu, 11 Apr 2019 14:25:55 -0700 Subject: [PATCH 10/62] Change Inception ResNet V2 FP32 to use the frozen graph for benchmarking (#276) * Change inception resnet v2 FP32 to use the frozen graph for benchmarking * fix test * Fix file path --- .../tensorflow/inception_resnet_v2/README.md | 60 ++-- .../inference/fp32/model_init.py | 20 +- .../eval_image_classifier.py | 277 ------------------ .../unit/common/tensorflow/tf_model_args.txt | 4 +- 4 files changed, 31 insertions(+), 330 deletions(-) delete mode 100644 models/image_recognition/tensorflow/inception_resnet_v2/eval_image_classifier.py diff --git a/benchmarks/image_recognition/tensorflow/inception_resnet_v2/README.md b/benchmarks/image_recognition/tensorflow/inception_resnet_v2/README.md index 26d2d2508..e547377ca 100644 --- a/benchmarks/image_recognition/tensorflow/inception_resnet_v2/README.md +++ b/benchmarks/image_recognition/tensorflow/inception_resnet_v2/README.md @@ -178,21 +178,12 @@ $ git clone git@github.com:IntelAI/models.git This repository includes launch scripts for running benchmarks and the an optimized version of the Inception ResNet V2 model code. -2. Download the pre-trained Inception ResNet V2 model files: - -For accuracy: +2. Download the pre-trained Inception ResNet V2 model: ``` $ wget https://storage.googleapis.com/intel-optimized-tensorflow/models/inception_resnet_v2_fp32_pretrained_model.pb ``` -For throughput and latency: - -``` -$ wget http://download.tensorflow.org/models/inception_resnet_v2_2016_08_30.tar.gz -$ mkdir -p checkpoints && tar -C ./checkpoints/ -zxf inception_resnet_v2_2016_08_30.tar.gz -``` - 3. If you would like to run Inception ResNet V2 inference and test for accuracy, you will need the full ImageNet dataset. Benchmarking for latency and throughput do not require the ImageNet dataset. @@ -234,7 +225,7 @@ precision, and docker image to use, along with your path to the ImageNet TF Records that you generated in step 3. Substitute in your own `--data-location` (from step 3, for accuracy -only), `--checkpoint` pre-trained model checkpoint file path (from step 2). +only), `--in-graph` frozen graph file path (from step 2). Inception ResNet V2 can be run for accuracy, latency benchmarking, or throughput benchmarking. Use one of the following examples below, depending on @@ -267,9 +258,8 @@ python launch_benchmark.py \ --benchmark-only \ --batch-size 1 \ --socket-id 0 \ - --checkpoint /home//checkpoints \ - --docker-image intelaipg/intel-optimized-tensorflow:latest-devel-mkl \ - --data-location /home//datasets/ImageNet_TFRecords + --in-graph /home//inception_resnet_v2_fp32_pretrained_model.pb \ + --docker-image intelaipg/intel-optimized-tensorflow:latest-devel-mkl ``` For throughput (using `--benchmark-only`, `--socket-id 0` and `--batch-size 128`): @@ -283,9 +273,8 @@ python launch_benchmark.py \ --benchmark-only \ --batch-size 128 \ --socket-id 0 \ - --checkpoint /home//checkpoints \ - --docker-image intelaipg/intel-optimized-tensorflow:latest-devel-mkl \ - --data-location /home//datasets/ImageNet_TFRecords + --in-graph /home//inception_resnet_v2_fp32_pretrained_model.pb \ + --docker-image intelaipg/intel-optimized-tensorflow:latest-devel-mkl ``` Note that the `--verbose` or `--output-dir` flag can be added to any of the above commands @@ -302,36 +291,31 @@ Example log tail when running for accuracy: Processed 49800 images. (Top1 accuracy, Top5 accuracy) = (0.8036, 0.9526) Processed 49900 images. (Top1 accuracy, Top5 accuracy) = (0.8036, 0.9525) Processed 50000 images. (Top1 accuracy, Top5 accuracy) = (0.8037, 0.9525) -lscpu_path_cmd = command -v lscpu -lscpu located here: /usr/bin/lscpu Ran inference with batch size 100 Log location outside container: {--output-dir value}/benchmark_inception_resnet_v2_inference_fp32_20190109_081637.log ``` Example log tail when benchmarking for latency: ``` -eval/Accuracy[0] -eval/Recall_5[0.01] -INFO:tensorflow:Finished evaluation at 2019-01-08-01:51:28 -self._total_images_per_sec = 69.7 -self._displayed_steps = 10 -Total images/sec = 7.0 -Latency ms/step = 143.4 -lscpu_path_cmd = command -v lscpu -lscpu located here: /usr/bin/lscpu +Iteration 38: 0.052 sec +Iteration 39: 0.051 sec +Iteration 40: 0.051 sec +Average time: 0.050 sec +Batch size = 1 +Latency: 50.094 ms +Throughput: 19.963 images/sec Ran inference with batch size 1 -Log location outside container: {--output-dir value}/benchmark_inception_resnet_v2_inference_fp32_20190108_015057.log +Log location outside container: {--output-dir value}/benchmark_inception_resnet_v2_inference_fp32_20190410_205213.log ``` Example log tail when benchmarking for throughput: ``` -eval/Accuracy[0.00078125] -eval/Recall_5[0.00375] -INFO:tensorflow:Finished evaluation at 2019-01-08-01:59:37 -self._total_images_per_sec = 457.0 -self._displayed_steps = 10 -Total images/sec = 45.7 -lscpu_path_cmd = command -v lscpu -lscpu located here: /usr/bin/lscpu +Iteration 38: 1.848 sec +Iteration 39: 1.799 sec +Iteration 40: 1.850 sec +Average time: 1.818 sec +Batch size = 128 +Throughput: 70.402 images/sec Ran inference with batch size 128 -Log location outside container: {--output-dir value}/benchmark_inception_resnet_v2_inference_fp32_20190108_015440.log +Log location outside container: {--output-dir value}/benchmark_inception_resnet_v2_inference_fp32_20190410_205628.log +``` diff --git a/benchmarks/image_recognition/tensorflow/inception_resnet_v2/inference/fp32/model_init.py b/benchmarks/image_recognition/tensorflow/inception_resnet_v2/inference/fp32/model_init.py index 045921acd..064bf7848 100644 --- a/benchmarks/image_recognition/tensorflow/inception_resnet_v2/inference/fp32/model_init.py +++ b/benchmarks/image_recognition/tensorflow/inception_resnet_v2/inference/fp32/model_init.py @@ -45,20 +45,14 @@ def __init__(self, args, custom_args=[], platform_util=None): if self.args.benchmark_only: run_script = os.path.join(self.args.intelai_models, - "eval_image_classifier.py") + "eval_image_classifier_benchmark.py") - cmd_args = " --dataset_name=imagenet" + \ - " --checkpoint_path=" + self.args.checkpoint + \ - " --eval_dir=" + self.args.checkpoint + \ - " --dataset_dir=" + self.args.data_location + \ - " --dataset_split_name=validation" + \ - " --clone_on_cpu=True" + \ - " --model_name=" + str(self.args.model_name) + \ - " --inter_op_parallelism_threads=" + \ - str(self.args.num_inter_threads) + \ - " --intra_op_parallelism_threads=" + \ - str(self.args.num_intra_threads) + \ - " --batch_size=" + str(self.args.batch_size) + cmd_args = " --input-graph=" + self.args.input_graph + \ + " --inter-op-parallelism-threads=" + \ + str(self.args.num_inter_threads) + \ + " --intra-op-parallelism-threads=" + \ + str(self.args.num_intra_threads) + \ + " --batch-size=" + str(self.args.batch_size) elif self.args.accuracy_only: run_script = os.path.join(self.args.intelai_models, "eval_image_classifier_accuracy.py") diff --git a/models/image_recognition/tensorflow/inception_resnet_v2/eval_image_classifier.py b/models/image_recognition/tensorflow/inception_resnet_v2/eval_image_classifier.py deleted file mode 100644 index 361836891..000000000 --- a/models/image_recognition/tensorflow/inception_resnet_v2/eval_image_classifier.py +++ /dev/null @@ -1,277 +0,0 @@ -# -# -*- coding: utf-8 -*- -# -# Copyright (c) 2019 Intel Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -# SPDX-License-Identifier: EPL-2.0 -# - - -# Copyright 2016 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Generic evaluation script that evaluates a model using a given dataset.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import math -import tensorflow as tf -import os -import time -from datetime import datetime - -import dataset_factory -import nets_factory -import preprocessing_factory - -slim = tf.contrib.slim - -tf.app.flags.DEFINE_integer( - 'batch_size', 100, 'The number of samples in each batch.') - -tf.app.flags.DEFINE_integer( - 'max_num_batches', 1, - 'Max number of batches to evaluate by default use all.') - -tf.app.flags.DEFINE_string( - 'master', '', 'The address of the TensorFlow master to use.') - -tf.app.flags.DEFINE_string( - 'checkpoint_path', '/tmp/tfmodel/', - 'The directory where the model was written to or an absolute path to a ' - 'checkpoint file.') - -tf.app.flags.DEFINE_string( - 'eval_dir', '/tmp/tfmodel/', 'Directory where the results are saved to.') - -tf.app.flags.DEFINE_integer( - 'num_preprocessing_threads', 4, - 'The number of threads used to create the batches.') - -tf.app.flags.DEFINE_string( - 'dataset_name', 'imagenet', 'The name of the dataset to load.') - -tf.app.flags.DEFINE_string( - 'dataset_split_name', 'test', 'The name of the train/test split.') - -tf.app.flags.DEFINE_string( - 'dataset_dir', None, 'The directory where the dataset files are stored.') - -tf.app.flags.DEFINE_integer( - 'labels_offset', 0, - 'An offset for the labels in the dataset. This flag is primarily used to ' - 'evaluate the VGG and ResNet architectures which do not use a background ' - 'class for the ImageNet dataset.') - -tf.app.flags.DEFINE_string( - 'model_name', 'inception_resnet_v2', - 'The name of the architecture to evaluate.') - -tf.app.flags.DEFINE_string( - 'preprocessing_name', None, - 'The name of the preprocessing to use. If left ' - 'as `None`, then the model_name flag is used.') - -tf.app.flags.DEFINE_float( - 'moving_average_decay', None, - 'The decay to use for the moving average.' - 'If left as None, then moving averages are not used.') - -tf.app.flags.DEFINE_integer( - 'eval_image_size', None, 'Eval image size') - -tf.app.flags.DEFINE_integer( - 'eval_log_frequency', 10, - 'Number of eval steps to run between displaying ' - 'eval metrics.') - -tf.app.flags.DEFINE_integer( - 'inter_op_parallelism_threads', 1, 'The number of inter-thread.') - -tf.app.flags.DEFINE_integer( - 'intra_op_parallelism_threads', 28, 'The number of intra-thread.') - - -FLAGS = tf.app.flags.FLAGS - -class _LoggerHook(tf.train.SessionRunHook): - """ Logs loss and runtime.""" - - def begin(self): - self._step = -1 - self._displayed_steps = 0 - self._total_images_per_sec = 0 - - def before_run(self, run_context): - self._step += 1 - self._start_time = time.time() - - def after_run(self, run_context, run_values): - duration = time.time() - self._start_time - if (self._step + 1) % FLAGS.eval_log_frequency == 0: - images_per_sec = FLAGS.batch_size / duration - self._displayed_steps += 1 - self._total_images_per_sec += images_per_sec - - format_str = ('%s: step %d, %.1f images/sec') - print ( - format_str % (datetime.now(), (self._step+1), images_per_sec)) - - def end(self, run_context): - print( - 'self._total_images_per_sec = %.1f' % self._total_images_per_sec) - print('self._displayed_steps = %d' % self._displayed_steps) - images_per_sec = self._total_images_per_sec / self._displayed_steps - print('Total images/sec = %.1f' %(images_per_sec)) - if FLAGS.batch_size == 1: - latency = 1000 / images_per_sec - print('Latency ms/step = %.1f' % (latency)) - -def main(_): - if not FLAGS.dataset_dir: - raise ValueError( - 'You must supply the dataset directory with --dataset_dir') - - tf.logging.set_verbosity(tf.logging.INFO) - #os.environ["OMP_NUM_THREADS"] = "54" - with tf.Graph().as_default(): - tf_global_step = slim.get_or_create_global_step() - - ###################### - # Select the dataset # - ###################### - dataset = dataset_factory.get_dataset( - FLAGS.dataset_name, FLAGS.dataset_split_name, FLAGS.dataset_dir) - - #################### - # Select the model # - #################### - network_fn = nets_factory.get_network_fn( - FLAGS.model_name, - num_classes=(dataset.num_classes - FLAGS.labels_offset), - is_training=False) - - ############################################################## - # Create a dataset provider that loads data from the dataset # - ############################################################## - provider = slim.dataset_data_provider.DatasetDataProvider( - dataset, - shuffle=False, - common_queue_capacity=2 * FLAGS.batch_size, - common_queue_min=FLAGS.batch_size) - [image, label] = provider.get(['image', 'label']) - label -= FLAGS.labels_offset - - ##################################### - # Select the preprocessing function # - ##################################### - preprocessing_name = FLAGS.preprocessing_name or FLAGS.model_name - image_preprocessing_fn = preprocessing_factory.get_preprocessing( - preprocessing_name, - is_training=False) - - eval_image_size = \ - FLAGS.eval_image_size or network_fn.default_image_size - - image = image_preprocessing_fn( - image, eval_image_size, eval_image_size) - - images, labels = tf.train.batch( - [image, label], - batch_size=FLAGS.batch_size, - num_threads=FLAGS.num_preprocessing_threads, - capacity=5 * FLAGS.batch_size) - - #################### - # Define the model # - #################### - logits, _ = network_fn(images) - - if FLAGS.moving_average_decay: - variable_averages = tf.train.ExponentialMovingAverage( - FLAGS.moving_average_decay, tf_global_step) - variables_to_restore = variable_averages.variables_to_restore( - slim.get_model_variables()) - variables_to_restore[tf_global_step.op.name] = tf_global_step - else: - variables_to_restore = slim.get_variables_to_restore() - - predictions = tf.argmax(logits, 1) - #labels = tf.squeeze(labels) - - # Define the metrics: - names_to_values, names_to_updates = \ - slim.metrics.aggregate_metric_map({ - 'Accuracy': slim.metrics.streaming_accuracy( - predictions, labels), - 'Recall_5': slim.metrics.streaming_recall_at_k( - logits, labels, 5), - }) - - # Print the summaries to screen. - for name, value in names_to_values.items(): - summary_name = 'eval/%s' % name - op = tf.summary.scalar(summary_name, value, collections=[]) - op = tf.Print(op, [value], summary_name) - tf.add_to_collection(tf.GraphKeys.SUMMARIES, op) - - # TODO(sguada) use num_epochs=1 - if FLAGS.max_num_batches: - num_batches = FLAGS.max_num_batches - else: - # This ensures that we make a single pass over all of the data. - num_batches = math.ceil( - dataset.num_samples / float(FLAGS.batch_size)) - - num_batches = 100 - - config = tf.ConfigProto( - inter_op_parallelism_threads=FLAGS.inter_op_parallelism_threads, - intra_op_parallelism_threads=FLAGS.intra_op_parallelism_threads) - - if tf.gfile.IsDirectory(FLAGS.checkpoint_path): - checkpoint_path = tf.train.latest_checkpoint( - FLAGS.checkpoint_path) - else: - checkpoint_path = FLAGS.checkpoint_path - - tf.logging.info('Evaluating %s' % checkpoint_path) - - slim.evaluation.evaluate_once( - master=FLAGS.master, - checkpoint_path=checkpoint_path, - logdir=FLAGS.eval_dir, - num_evals=num_batches, - eval_op=list(names_to_updates.values()), - variables_to_restore=variables_to_restore, - hooks=[_LoggerHook()], - session_config=config) - - -if __name__ == '__main__': - tf.app.run() diff --git a/tests/unit/common/tensorflow/tf_model_args.txt b/tests/unit/common/tensorflow/tf_model_args.txt index be46aea9a..376f8b602 100644 --- a/tests/unit/common/tensorflow/tf_model_args.txt +++ b/tests/unit/common/tensorflow/tf_model_args.txt @@ -13,8 +13,8 @@ run_tf_benchmark.py --framework=tensorflow --use-case=image_recognition --precis run_tf_benchmark.py --framework=tensorflow --use-case=image_recognition --precision=int8 --mode=inference --model-name=inceptionv4 --model-source-dir=/workspace/models --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=240 --socket-id 0 --benchmark-only --verbose --in-graph=/in_graph/inceptionv4_int8_pretrained_model.pb --data-location=/dataset,numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/inference/benchmark.py --batch_size=240 --num_inter_threads=2 --input_graph=/in_graph/inceptionv4_int8_pretrained_model.pb --num_intra_threads=28 run_tf_benchmark.py --framework=tensorflow --use-case=image_recognition --precision=int8 --mode=inference --model-name=inceptionv4 --model-source-dir=/workspace/models --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=1 --socket-id 0 --benchmark-only --verbose --in-graph=/in_graph/inceptionv4_int8_pretrained_model.pb --data-location=/dataset,numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/inference/benchmark.py --batch_size=1 --num_inter_threads=2 --input_graph=/in_graph/inceptionv4_int8_pretrained_model.pb --num_intra_threads=28 run_tf_benchmark.py --framework=tensorflow --use-case=image_recognition --model-name=inception_resnet_v2 --precision=fp32 --mode=inference --model-source-dir=/workspace/models --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=100 --socket-id=-1 --accuracy-only --verbose --in-graph=/in_graph/inception_resnet_v2_fp32_pretrained_model.pb --data-location=/dataset,python /workspace/intelai_models/eval_image_classifier_accuracy.py --input_graph=/in_graph/inception_resnet_v2_fp32_pretrained_model.pb --data_location=/dataset --input_height=299 --input_width=299 --num_inter_threads=2 --num_intra_threads=56 --output_layer=InceptionResnetV2/Logits/Predictions --batch_size=100 -run_tf_benchmark.py --framework=tensorflow --use-case=image_recognition --model-name=inception_resnet_v2 --precision=fp32 --mode=inference --model-source-dir=/workspace/models --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=1 --socket-id=0 --benchmark-only --verbose --checkpoint=/checkpoints --data-location=/dataset,numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/eval_image_classifier.py --dataset_name=imagenet --checkpoint_path=/checkpoints --eval_dir=/checkpoints --dataset_dir=/dataset --dataset_split_name=validation --clone_on_cpu=True --model_name=inception_resnet_v2 --inter_op_parallelism_threads=2 --intra_op_parallelism_threads=28 --batch_size=1 -run_tf_benchmark.py --framework=tensorflow --use-case=image_recognition --model-name=inception_resnet_v2 --precision=fp32 --mode=inference --model-source-dir=/workspace/models --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=128 --socket-id=0 --benchmark-only --verbose --checkpoint=/checkpoints --data-location=/dataset,numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/eval_image_classifier.py --dataset_name=imagenet --checkpoint_path=/checkpoints --eval_dir=/checkpoints --dataset_dir=/dataset --dataset_split_name=validation --clone_on_cpu=True --model_name=inception_resnet_v2 --inter_op_parallelism_threads=2 --intra_op_parallelism_threads=28 --batch_size=128 +run_tf_benchmark.py --framework=tensorflow --use-case=image_recognition --model-name=inception_resnet_v2 --precision=fp32 --mode=inference --benchmark-dir=/workspace/benchmarks --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=1 --socket-id=0 --output-dir=/workspace/benchmarks/common/tensorflow/logs --benchmark-only --verbose --model-source-dir=/workspace/models --in-graph=/in_graph/inception_resnet_v2_fp32_pretrained_model.pb,numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/eval_image_classifier_benchmark.py --input-graph=/in_graph/inception_resnet_v2_fp32_pretrained_model.pb --inter-op-parallelism-threads=2 --intra-op-parallelism-threads=28 --batch-size=1 +run_tf_benchmark.py --framework=tensorflow --use-case=image_recognition --model-name=inception_resnet_v2 --precision=fp32 --mode=inference --benchmark-dir=/workspace/benchmarks --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=128 --socket-id=0 --output-dir=/workspace/benchmarks/common/tensorflow/logs --benchmark-only --verbose --model-source-dir=/workspace/models --in-graph=/in_graph/inception_resnet_v2_fp32_pretrained_model.pb,numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/eval_image_classifier_benchmark.py --input-graph=/in_graph/inception_resnet_v2_fp32_pretrained_model.pb --inter-op-parallelism-threads=2 --intra-op-parallelism-threads=28 --batch-size=128 run_tf_benchmark.py --framework=tensorflow --use-case=image_recognition --model-name=inception_resnet_v2 --precision=int8 --mode=inference --model-source-dir=/workspace/models --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=100 --socket-id=-1 --accuracy-only --verbose --in-graph=/in_graph/inception_resnet_v2_int8_pretrained_model.pb --data-location=/dataset,python /workspace/intelai_models/eval_image_classifier_accuracy.py --input_graph=/in_graph/inception_resnet_v2_int8_pretrained_model.pb --data_location=/dataset --input_height=299 --input_width=299 --num_inter_threads=2 --num_intra_threads=56 --output_layer=InceptionResnetV2/Logits/Predictions --batch_size=100 run_tf_benchmark.py --framework=tensorflow --use-case=image_recognition --model-name=inception_resnet_v2 --precision=int8 --mode=inference --model-source-dir=/workspace/models --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=1 --socket-id=0 --benchmark-only --verbose --in-graph=/in_graph/inception_resnet_v2_int8_pretrained_model.pb --data-location=/dataset,numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/eval_image_classifier_benchmark.py --input-graph=/in_graph/inception_resnet_v2_int8_pretrained_model.pb --inter-op-parallelism-threads=1 --intra-op-parallelism-threads=28 --batch-size=1 run_tf_benchmark.py --framework=tensorflow --use-case=image_recognition --model-name=inception_resnet_v2 --precision=int8 --mode=inference --model-source-dir=/workspace/models --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=128 --socket-id=0 --benchmark-only --verbose --in-graph=/in_graph/inception_resnet_v2_int8_pretrained_model.pb --data-location=/dataset,numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/eval_image_classifier_benchmark.py --input-graph=/in_graph/inception_resnet_v2_int8_pretrained_model.pb --inter-op-parallelism-threads=1 --intra-op-parallelism-threads=28 --batch-size=128 From c62be36960391710709a2e03811a9425d331e503 Mon Sep 17 00:00:00 2001 From: mjkyung Date: Fri, 12 Apr 2019 09:47:46 -0700 Subject: [PATCH 11/62] Fix input_height/width arg setup for MobileNet V1 Int8 inference (#280) * Fix arg parse * fix small typo --- benchmarks/image_recognition/tensorflow/mobilenet_v1/README.md | 2 +- .../tensorflow/mobilenet_v1/inference/int8/accuracy.py | 2 ++ .../tensorflow/mobilenet_v1/inference/int8/benchmark.py | 2 ++ 3 files changed, 5 insertions(+), 1 deletion(-) diff --git a/benchmarks/image_recognition/tensorflow/mobilenet_v1/README.md b/benchmarks/image_recognition/tensorflow/mobilenet_v1/README.md index ddbd8858a..e4570abbe 100644 --- a/benchmarks/image_recognition/tensorflow/mobilenet_v1/README.md +++ b/benchmarks/image_recognition/tensorflow/mobilenet_v1/README.md @@ -104,7 +104,7 @@ later. ``` Note that the `--verbose` or `--output-dir` flag can be added to any of the above commands - to get additional debug output or change the default output location.. + to get additional debug output or change the default output location. 4. The log file is saved to the `models/benchmarks/common/tensorflow/logs` directory, or the directory specified by the `--output-dir` arg. Below are examples of diff --git a/models/image_recognition/tensorflow/mobilenet_v1/inference/int8/accuracy.py b/models/image_recognition/tensorflow/mobilenet_v1/inference/int8/accuracy.py index 69c3c003f..347c39989 100644 --- a/models/image_recognition/tensorflow/mobilenet_v1/inference/int8/accuracy.py +++ b/models/image_recognition/tensorflow/mobilenet_v1/inference/int8/accuracy.py @@ -77,6 +77,8 @@ def load_graph(model_file): model_file = args.input_graph else: sys.exit("Please provide a graph file.") + input_height = args.input_height + input_width = args.input_width batch_size = args.batch_size input_layer = args.input_layer output_layer = args.output_layer diff --git a/models/image_recognition/tensorflow/mobilenet_v1/inference/int8/benchmark.py b/models/image_recognition/tensorflow/mobilenet_v1/inference/int8/benchmark.py index 5ba410415..7cccb9f23 100644 --- a/models/image_recognition/tensorflow/mobilenet_v1/inference/int8/benchmark.py +++ b/models/image_recognition/tensorflow/mobilenet_v1/inference/int8/benchmark.py @@ -94,6 +94,8 @@ def load_graph(model_file): model_file = args.input_graph else: sys.exit("Please provide a graph file.") + input_height = args.input_height + input_width = args.input_width batch_size = args.batch_size input_layer = args.input_layer output_layer = args.output_layer From 12c35fa7128711448ffad6ef671440c499f5cb1e Mon Sep 17 00:00:00 2001 From: Dina Suehiro Jones Date: Fri, 12 Apr 2019 13:47:03 -0700 Subject: [PATCH 12/62] Add support for custom volumes (#279) * Add support for custom volumes * Launch script documentation update --- benchmarks/common/utils/validators.py | 20 +++++++++ benchmarks/launch_benchmark.py | 17 +++++++- docs/general/tensorflow/LaunchBenchmark.md | 51 +++++++++++++++++++++- tests/unit/common/utils/test_validators.py | 27 +++++++++++- tests/unit/test_launch_benchmark.py | 23 +++++++++- 5 files changed, 133 insertions(+), 5 deletions(-) diff --git a/benchmarks/common/utils/validators.py b/benchmarks/common/utils/validators.py index 54f280dfd..16ec18aba 100644 --- a/benchmarks/common/utils/validators.py +++ b/benchmarks/common/utils/validators.py @@ -88,3 +88,23 @@ def check_valid_file_or_dir(value): raise ArgumentTypeError("{} does not exist.".format(value)) check_for_link(value) return value + + +def check_volume_mount(value): + """ + Verifies that the value is a valid docker volume mount, where there should be + at least two fields separated by a : (for the local directory to mount and the + path to the where the directory will be mounted in the container. The third + optional field is for extra options like read only. + """ + if value: + # Check that we have at least 2 fields and at most 3 fields + if not 3 > value.count(":") > 0: + raise ArgumentTypeError( + "{} is not a valid volume mount string where ':' is used to separate the fields. " + "See https://docs.docker.com/storage/volumes for information on formatting the volume " + "mount string".format(value)) + + # Check that the local directory specified is a valid folder and not a link + check_valid_folder(value.split(':')[0]) + return value diff --git a/benchmarks/launch_benchmark.py b/benchmarks/launch_benchmark.py index 993dc8785..5dd7fbca1 100644 --- a/benchmarks/launch_benchmark.py +++ b/benchmarks/launch_benchmark.py @@ -29,7 +29,7 @@ import sys from argparse import ArgumentParser from common import base_benchmark_util -from common.utils.validators import check_no_spaces +from common.utils.validators import check_no_spaces, check_volume_mount class LaunchBenchmark(base_benchmark_util.BaseBenchmarkUtil): @@ -67,6 +67,13 @@ def parse_args(self): "If no docker image is specified, then no docker container will be used.", dest="docker_image", default=None, type=check_no_spaces) + arg_parser.add_argument( + "--volume", + help="Specify a custom volume to mount in the container, which follows the same format as the " + "docker --volume flag (https://docs.docker.com/storage/volumes/). " + "This argument can only be used in conjunction with a --docker-image.", + action="append", dest="custom_volumes", type=check_volume_mount) + arg_parser.add_argument( "--debug", help="Launches debug mode which doesn't execute " "start.sh when running in a docker container.", action="store_true") @@ -86,6 +93,10 @@ def validate_args(self): if not self.args.benchmark_only and not self.args.accuracy_only: self.args.benchmark_only = True + if self.args.custom_volumes and not self.args.docker_image: + raise ValueError("Volume mounts can only be used when running in a docker container " + "(a --docker-image must be specified when using --volume).") + if self.args.mode == "inference" and self.args.checkpoint: print("Warning: The --checkpoint argument is being deprecated in favor of using frozen graphs.") @@ -310,6 +321,10 @@ def run_docker_container(self, benchmark_scripts, intelai_models, env_var_dict): volume_mounts.extend([ "--volume", "{}:{}".format(in_graph_dir, "/in_graph")]) + if args.custom_volumes: + for custom_volume in args.custom_volumes: + volume_mounts.extend(["--volume", custom_volume]) + docker_run_cmd = ["docker", "run"] # only use -it when debugging, otherwise we might get TTY error diff --git a/docs/general/tensorflow/LaunchBenchmark.md b/docs/general/tensorflow/LaunchBenchmark.md index e52482ade..8544c7320 100644 --- a/docs/general/tensorflow/LaunchBenchmark.md +++ b/docs/general/tensorflow/LaunchBenchmark.md @@ -103,14 +103,61 @@ optional arguments: Folder to dump output into. -g INPUT_GRAPH, --in-graph INPUT_GRAPH Full path to the input graph + --volume CUSTOM_VOLUMES + Specify a custom volume to mount in the container, + which follows the same format as the docker --volume + flag (https://docs.docker.com/storage/volumes/). This + argument can only be used in conjunction with a + --docker-image. --debug Launches debug mode which doesn't execute start.sh ``` +## Volume mounts + +When running the launch script using a docker image, volumes will +automatically get mounted in the container for the following +directories: + +| Directory | Mount location in the container | +|-----------|---------------------------------| +| Model zoo `/benchmarks` code | `/workspace/benchmarks` | +| Model zoo `/models` code | `/workspace/intelai_models` | +| `--model-source-dir` code | `/workspace/models` | +| `--checkpoints` directory | `/checkpoints` | +| `--in-graph` file | `/in_graph` | +| `--dataset-location` | `/dataset` | + +If you would like additional directories mounted in the docker +container, you can specify them by using the `--volume` flag using the +same `:` separated field format [as docker](https://docs.docker.com/storage/volumes/). +For example, the following command will mount `/home//custom_folder_1` +in the container at `custom_folder_1` and `/home//custom_folder_2` +in the container at `custom_folder_2`: + +``` +$ python launch_benchmark.py \ + --in-graph /home//resnet50_fp32_pretrained_model.pb \ + --model-name resnet50 \ + --framework tensorflow \ + --precision fp32 \ + --mode inference \ + --batch-size 1 \ + --socket-id 0 \ + --data-location /home//Imagenet_Validation \ + --docker-image intelaipg/intel-optimized-tensorflow:latest-devel-mkl \ + --volume /home//custom_folder_1:/custom_folder_1 \ + --volume /home//custom_folder_2:/custom_folder_2 +``` + +Note that volume mounting only applies when running in a docker +container. When running on [bare metal](#alpha-feature-running-on-bare-metal), +files are accessed in their original location. + ## Debugging The `--debug` flag in the `launch_benchmarks.py` script gives you a -shell into the docker container with the volumes mounted for any -dataset, pretrained model, model source code, etc that has been +shell into the docker container with the [volumes mounted](#volume-mounts) +for any dataset, pretrained model, model source code, etc that has been provided by the other flags. It does not execute the `start.sh` script, and is intended as a way to setup an environment for quicker iteration when debugging and doing development. From the shell, you can manually diff --git a/tests/unit/common/utils/test_validators.py b/tests/unit/common/utils/test_validators.py index 369ddfd76..2f590a23e 100644 --- a/tests/unit/common/utils/test_validators.py +++ b/tests/unit/common/utils/test_validators.py @@ -26,7 +26,7 @@ from common.utils.validators import (check_for_link, check_no_spaces, check_positive_number, check_positive_number_or_equal_to_negative_one, check_valid_filename, - check_valid_folder, check_valid_file_or_dir) + check_valid_folder, check_valid_file_or_dir, check_volume_mount) @pytest.fixture() @@ -152,3 +152,28 @@ def test_check_valid_file_or_dir(mock_link, mock_exists): def test_check_valid_file_or_dir_bad(): with pytest.raises(ArgumentTypeError): check_valid_file_or_dir('3245jlnsdfnsfd234ofds') + + +@pytest.mark.parametrize("volume_mount_str", + ["foo", + "foo:foo:foo:foo", + "foo,foo"]) +def test_bad_volume_mount_strings(volume_mount_str): + with pytest.raises(ArgumentTypeError): + check_volume_mount(volume_mount_str) + + +def test_valid_volume_mount(): + # create temp directory + temp_dir = tempfile.mkdtemp() + + try: + # test string that mounts local directory with mount path + volume_mount = temp_dir + ":/mount_path" + check_volume_mount(volume_mount) + + # test string that mounts local directory with mount path and specifies read only + volume_mount = temp_dir + ":/mount_path:ro" + check_volume_mount(volume_mount) + finally: + os.rmdir(temp_dir) diff --git a/tests/unit/test_launch_benchmark.py b/tests/unit/test_launch_benchmark.py index 608adc464..35bb4b70f 100644 --- a/tests/unit/test_launch_benchmark.py +++ b/tests/unit/test_launch_benchmark.py @@ -167,7 +167,13 @@ def test_launch_benchmark_parse_unknown_args(launch_benchmark): "--accuracy-only", "--output-results"], "--output-results can only be used when running " - "inference with a dataset"] + "inference with a dataset"], + ['catch_error', SystemExit, ["--model-name", test_model_name, + "--framework", test_framework, + "--mode", test_mode, + "--precision", test_precision, + "--volume", "~:test"], + "Volume mounts can only be used when running in a docker container"], ], indirect=True) def test_launch_benchmark_parse_bad_args(launch_benchmark): """ @@ -216,3 +222,18 @@ def test_bare_metal(launch_benchmark, mock_popen): # ensure env vars are set assert os.environ["TEST_ENV_VAR_1"] == test_env_vars["TEST_ENV_VAR_1"] assert os.environ["TEST_ENV_VAR_2"] == test_env_vars["TEST_ENV_VAR_2"] + + +def test_launch_benchmark_custom_volume(launch_benchmark, mock_popen): + """ + Verifies the docker run command includes custom volumes + """ + custom_volumes = ["~:/foo1", "~:/foo2"] + launch_benchmark.args.custom_volumes = custom_volumes + launch_benchmark.main() + assert mock_popen.called + args, _ = mock_popen.call_args + # convert the run command args to a string and then check for the custom volume mounts + docker_run_cmd = " ".join(args[0]) + for custom_volume in custom_volumes: + assert "--volume {}".format(custom_volume) in docker_run_cmd From 66e48623615eed98c73c7240a6e9c841aa44a66c Mon Sep 17 00:00:00 2001 From: Dina Suehiro Jones Date: Fri, 12 Apr 2019 15:55:08 -0700 Subject: [PATCH 13/62] Fix launch_benchmark.py --help output so that it doesn't require other args (#282) * Fix help so that it doesn't require args * Add test for --help * Fixing file name in comment --- benchmarks/common/base_benchmark_util.py | 12 ++++++++---- tests/unit/test_launch_benchmark.py | 21 +++++++++++++++++++++ 2 files changed, 29 insertions(+), 4 deletions(-) diff --git a/benchmarks/common/base_benchmark_util.py b/benchmarks/common/base_benchmark_util.py index 3fbc4dc94..89df56cde 100644 --- a/benchmarks/common/base_benchmark_util.py +++ b/benchmarks/common/base_benchmark_util.py @@ -23,6 +23,7 @@ from __future__ import print_function import os +import sys from argparse import ArgumentParser from common import platform_util @@ -47,6 +48,9 @@ def _define_args(self): """define args for the benchmark interface shared by FP32 and int8 models""" + # only require the arg, if we aren't just printing out --help + required_arg = "--help" not in sys.argv + self._common_arg_parser = ArgumentParser( add_help=False, description="Parse args for base benchmark " "interface") @@ -54,7 +58,7 @@ def _define_args(self): self._common_arg_parser.add_argument( "-f", "--framework", help="Specify the name of the deep learning framework to use.", - dest="framework", default=None, required=True) + dest="framework", default=None, required=required_arg) self._common_arg_parser.add_argument( "-r", "--model-source-dir", @@ -64,15 +68,15 @@ def _define_args(self): self._common_arg_parser.add_argument( "-p", "--precision", help="Specify the model precision to use: fp32, int8, or bfloat16", - required=True, choices=["fp32", "int8", "bfloat16"], + required=required_arg, choices=["fp32", "int8", "bfloat16"], dest="precision") self._common_arg_parser.add_argument( "-mo", "--mode", help="Specify the type training or inference ", - required=True, choices=["training", "inference"], dest="mode") + required=required_arg, choices=["training", "inference"], dest="mode") self._common_arg_parser.add_argument( - "-m", "--model-name", required=True, + "-m", "--model-name", required=required_arg, help="model name to run benchmarks for", dest="model_name") self._common_arg_parser.add_argument( diff --git a/tests/unit/test_launch_benchmark.py b/tests/unit/test_launch_benchmark.py index 35bb4b70f..03b96f697 100644 --- a/tests/unit/test_launch_benchmark.py +++ b/tests/unit/test_launch_benchmark.py @@ -224,6 +224,27 @@ def test_bare_metal(launch_benchmark, mock_popen): assert os.environ["TEST_ENV_VAR_2"] == test_env_vars["TEST_ENV_VAR_2"] +def test_help(mock_platform_util, capsys): + """ Tests `launch_benchmark.py --help` output and ensures there is no error """ + with mock_patch.object(sys, 'argv', ["launch_benchmark.py", "--help"]): + with pytest.raises(SystemExit) as e: + LaunchBenchmark(mock_platform_util) + assert e.value.code == 0 + + # get the stdout and check the output + captured = capsys.readouterr() + assert "usage: launch_benchmark.py [-h] " in captured.out + + # check for an arg that is only in launch_benchmark.py + assert "--docker-image DOCKER_IMAGE" in captured.out + + # check for an arg that's in base_benchmark_util.py + assert "-f FRAMEWORK, --framework FRAMEWORK" in captured.out + + # make sure there were no errors printed + assert "error" not in captured.out.lower() + + def test_launch_benchmark_custom_volume(launch_benchmark, mock_popen): """ Verifies the docker run command includes custom volumes From 76fdf16bdaac69dd45851768e8c1aecd99bf8840 Mon Sep 17 00:00:00 2001 From: Dina Suehiro Jones Date: Fri, 12 Apr 2019 16:27:11 -0700 Subject: [PATCH 14/62] Clean up log snippets in docs (#283) * Clean up log snippets in docs * Indentation --- .../adversarial_networks/tensorflow/dcgan/README.md | 2 -- .../content_creation/tensorflow/draw/README.md | 4 ---- .../tensorflow/facenet/README.md | 6 ------ .../tensorflow/inceptionv3/README.md | 12 ------------ .../tensorflow/inceptionv4/README.md | 6 ------ .../tensorflow/mobilenet_v1/README.md | 6 ------ .../image_recognition/tensorflow/resnet101/README.md | 10 ---------- .../image_recognition/tensorflow/resnet50/README.md | 10 ---------- .../tensorflow/squeezenet/README.md | 4 ---- .../image_segmentation/tensorflow/maskrcnn/README.md | 2 -- .../image_segmentation/tensorflow/unet/README.md | 2 -- .../language_translation/tensorflow/gnmt/README.md | 4 ---- .../tensorflow/transformer_language/README.md | 4 ---- .../tensorflow/faster_rcnn/README.md | 8 -------- .../object_detection/tensorflow/rfcn/README.md | 11 ++--------- .../tensorflow/ssd-mobilenet/README.md | 8 -------- .../tensorflow/ssd-resnet34/README.md | 2 -- .../recommendation/tensorflow/wide_deep/README.md | 2 -- .../text_to_speech/tensorflow/wavenet/README.md | 2 -- docs/image_recognition/tensorflow/Tutorial.md | 8 +------- docs/recommendation/tensorflow/Tutorial.md | 6 +----- 21 files changed, 4 insertions(+), 115 deletions(-) diff --git a/benchmarks/adversarial_networks/tensorflow/dcgan/README.md b/benchmarks/adversarial_networks/tensorflow/dcgan/README.md index 7852bcae3..d552ac46d 100644 --- a/benchmarks/adversarial_networks/tensorflow/dcgan/README.md +++ b/benchmarks/adversarial_networks/tensorflow/dcgan/README.md @@ -72,8 +72,6 @@ Batch size: 100 Batches number: 500 Time spent per BATCH: 35.8268 ms Total samples/sec: 2791.2030 samples/s -lscpu_path_cmd = command -v lscpu -lscpu located here: /usr/bin/lscpu Ran inference with batch size 100 Log location outside container: {--output-dir value}/benchmark_dcgan_inference_fp32_20190117_220342.log ``` \ No newline at end of file diff --git a/benchmarks/content_creation/tensorflow/draw/README.md b/benchmarks/content_creation/tensorflow/draw/README.md index c56c08712..159f8de7b 100644 --- a/benchmarks/content_creation/tensorflow/draw/README.md +++ b/benchmarks/content_creation/tensorflow/draw/README.md @@ -82,8 +82,6 @@ modes/precisions: Time spent per BATCH: 6.6667 ms Total samples/sec: 149.9996 samples/s Outputs saved in file: /home//mnist/draw_data.npy - lscpu_path_cmd = command -v lscpu - lscpu located here: b'/usr/bin/lscpu' Ran inference with batch size 1 Log location outside container: {--output-dir value}/benchmark_draw_inference_fp32_20190123_012947.log ``` @@ -97,8 +95,6 @@ modes/precisions: Time spent per BATCH: 28.1952 ms Total samples/sec: 3546.7006 samples/s Outputs saved in file: /home//mnist/draw_data.npy - lscpu_path_cmd = command -v lscpu - lscpu located here: b'/usr/bin/lscpu' Ran inference with batch size 100 Log location outside container: {--output-dir value}/benchmark_draw_inference_fp32_20190123_013432.log ``` \ No newline at end of file diff --git a/benchmarks/face_detection_and_alignment/tensorflow/facenet/README.md b/benchmarks/face_detection_and_alignment/tensorflow/facenet/README.md index 7d30e25f2..0e7e0d307 100644 --- a/benchmarks/face_detection_and_alignment/tensorflow/facenet/README.md +++ b/benchmarks/face_detection_and_alignment/tensorflow/facenet/README.md @@ -79,8 +79,6 @@ Total samples/sec: 33.1608 samples/s 2019-03-28 21:00:02.725722: W tensorflow/core/kernels/queue_base.cc:277] _1_batch_join/fifo_queue: Skipping cancelled enqueue attempt with queue not closed 2019-03-28 21:00:02.725746: W tensorflow/core/kernels/queue_base.cc:277] _1_batch_join/fifo_queue: Skipping cancelled enqueue attempt with queue not closed 2019-03-28 21:00:02.725776: W tensorflow/core/kernels/queue_base.cc:277] _1_batch_join/fifo_queue: Skipping cancelled enqueue attempt with queue not closed -lscpu_path_cmd = command -v lscpu -lscpu located here: /usr/bin/lscpu Ran inference with batch size 1 Log location outside container: {--output-dir value}/benchmark_facenet_inference_fp32_20190328_205911.log ``` @@ -112,8 +110,6 @@ Accuracy: 0.98833+-0.00489 Validation rate: 0.96200+-0.01968 @ FAR=0.00100 Area Under Curve (AUC): 0.999 Equal Error Rate (EER): 0.011 -lscpu_path_cmd = command -v lscpu -lscpu located here: /usr/bin/lscpu Ran inference with batch size 100 Log location outside container: {--output-dir value}/benchmark_facenet_inference_fp32_20190329_002623.log ``` @@ -146,8 +142,6 @@ Accuracy: 0.98833+-0.00489 Validation rate: 0.96200+-0.01968 @ FAR=0.00100 Area Under Curve (AUC): 0.999 Equal Error Rate (EER): 0.011 -lscpu_path_cmd = command -v lscpu -lscpu located here: /usr/bin/lscpu Ran inference with batch size 100 Log location outside container: {--output-dir value}/benchmark_facenet_inference_fp32_20190328_214145.log ``` diff --git a/benchmarks/image_recognition/tensorflow/inceptionv3/README.md b/benchmarks/image_recognition/tensorflow/inceptionv3/README.md index 512d4fd1e..7eb091edc 100644 --- a/benchmarks/image_recognition/tensorflow/inceptionv3/README.md +++ b/benchmarks/image_recognition/tensorflow/inceptionv3/README.md @@ -189,8 +189,6 @@ Example log tail when running for accuracy: ``` Processed 50000 images. (Top1 accuracy, Top5 accuracy) = (0.7666, 0.9333) -lscpu_path_cmd = command -v lscpu -lscpu located here: /usr/bin/lscpu Executing command: python /workspace/intelai_models/int8/accuracy.py --input_height=299 --input_width=299 --num_intra_threads=56 --num_inter_threads=2 --batch_size=100 --input_graph=/in_graph/inceptionv3_int8_pretrained_model.pb --data_location=/dataset Ran inference with batch size 100 Log location outside container: {--output-dir value}/benchmark_inceptionv3_inference_int8_20190104_013246.log @@ -203,8 +201,6 @@ steps = 470, 53.7256017113 images/sec steps = 480, 52.5430812016 images/sec steps = 490, 52.9076139058 images/sec steps = 500, 53.5021876395 images/sec -lscpu_path_cmd = command -v lscpu -lscpu located here: /usr/bin/lscpu Ran inference with batch size 1 Log location outside container: {--output-dir value}/benchmark_inceptionv3_inference_int8_20190223_194002.log ``` @@ -216,8 +212,6 @@ steps = 470, 370.435654276 images/sec steps = 480, 369.710160177 images/sec steps = 490, 369.083388904 images/sec steps = 500, 370.287978128 images/sec -lscpu_path_cmd = command -v lscpu -lscpu located here: /usr/bin/lscpu Ran inference with batch size 128 Log location outside container: {--output-dir value}/benchmark_inceptionv3_inference_int8_20190223_194314.log ``` @@ -283,8 +277,6 @@ Average time: 0.014 sec Batch size = 1 Latency: 14.442 ms Throughput: 69.243 images/sec -lscpu_path_cmd = command -v lscpu -lscpu located here: /usr/bin/lscpu Ran inference with batch size 1 Log location outside container: {--output-dir value}/benchmark_inceptionv3_inference_fp32_20190104_025220.log ``` @@ -315,8 +307,6 @@ Iteration 40: 0.757 sec Average time: 0.760 sec Batch size = 128 Throughput: 168.431 images/sec -lscpu_path_cmd = command -v lscpu -lscpu located here: /usr/bin/lscpu Ran inference with batch size 128 Log location outside container: {--output-dir value}/benchmark_inceptionv3_inference_fp32_20190104_024842.log ``` @@ -341,8 +331,6 @@ Example log tail when benchmarking for accuracy: Processed 49800 images. (Top1 accuracy, Top5 accuracy) = (0.7673, 0.9341) Processed 49900 images. (Top1 accuracy, Top5 accuracy) = (0.7674, 0.9341) Processed 50000 images. (Top1 accuracy, Top5 accuracy) = (0.7675, 0.9342) -lscpu_path_cmd = command -v lscpu -lscpu located here: /usr/bin/lscpu Ran inference with batch size 100 Log location outside container: {--output-dir value}/benchmark_inceptionv3_inference_fp32_20190104_023816.log ``` diff --git a/benchmarks/image_recognition/tensorflow/inceptionv4/README.md b/benchmarks/image_recognition/tensorflow/inceptionv4/README.md index 14f1bed98..1f472509b 100644 --- a/benchmarks/image_recognition/tensorflow/inceptionv4/README.md +++ b/benchmarks/image_recognition/tensorflow/inceptionv4/README.md @@ -106,8 +106,6 @@ other precisions are coming later. Processed 49800 images. (Top1 accuracy, Top5 accuracy) = (0.7983, 0.9504) Processed 49900 images. (Top1 accuracy, Top5 accuracy) = (0.7984, 0.9504) Processed 50000 images. (Top1 accuracy, Top5 accuracy) = (0.7984, 0.9504) - lscpu_path_cmd = command -v lscpu - lscpu located here: /usr/bin/lscpu Ran inference with batch size 100 Log location outside container: /benchmark_inceptionv4_inference_int8_20190306_221608.log ``` @@ -122,8 +120,6 @@ other precisions are coming later. steps = 30, 184.620504126 images/sec steps = 40, 183.900309054 images/sec steps = 50, 184.110358713 images/sec - lscpu_path_cmd = command -v lscpu - lscpu located here: /usr/bin/lscpu Ran inference with batch size 240 Log location outside container: /benchmark_inceptionv4_inference_int8_20190306_215858.log ``` @@ -139,8 +135,6 @@ other precisions are coming later. steps = 40, 31.9682931663 images/sec steps = 50, 31.6665962009 images/sec Latency: 31.936 ms - lscpu_path_cmd = command -v lscpu - lscpu located here: /usr/bin/lscpu Ran inference with batch size 1 Log location outside container: /benchmark_inceptionv4_inference_int8_20190306_215702.log ``` diff --git a/benchmarks/image_recognition/tensorflow/mobilenet_v1/README.md b/benchmarks/image_recognition/tensorflow/mobilenet_v1/README.md index e4570abbe..d4e10910b 100644 --- a/benchmarks/image_recognition/tensorflow/mobilenet_v1/README.md +++ b/benchmarks/image_recognition/tensorflow/mobilenet_v1/README.md @@ -261,8 +261,6 @@ later. self._displayed_steps = 10 Total images/sec = 81.0 Latency ms/step = 12.4 - lscpu_path_cmd = command -v lscpu - lscpu located here: /usr/bin/lscpu Ran inference with batch size 1 Log location outside container: {--output-dir value}/benchmark_mobilenet_v1_inference_fp32_20190104_200218.log ``` @@ -278,8 +276,6 @@ later. self._total_images_per_sec = 1810.2 self._displayed_steps = 10 Total images/sec = 181.0 - lscpu_path_cmd = command -v lscpu - lscpu located here: /usr/bin/lscpu Ran inference with batch size 100 Log location outside container: {--output-dir value}/benchmark_mobilenet_v1_inference_fp32_20190104_200512.log ``` @@ -288,8 +284,6 @@ later. Processed 49800 images. (Top1 accuracy, Top5 accuracy) = (0.7104, 0.8999) Processed 49900 images. (Top1 accuracy, Top5 accuracy) = (0.7103, 0.8999) Processed 50000 images. (Top1 accuracy, Top5 accuracy) = (0.7102, 0.8999) - lscpu_path_cmd = command -v lscpu - lscpu located here: /usr/bin/lscpu Ran inference with batch size 100 Log location outside container: {--output-dir value}/benchmark_mobilenet_v1_inference_fp32_20190110_211648.log ``` \ No newline at end of file diff --git a/benchmarks/image_recognition/tensorflow/resnet101/README.md b/benchmarks/image_recognition/tensorflow/resnet101/README.md index 4e25a41f1..7343f472a 100644 --- a/benchmarks/image_recognition/tensorflow/resnet101/README.md +++ b/benchmarks/image_recognition/tensorflow/resnet101/README.md @@ -176,8 +176,6 @@ Example log tail when running for accuracy: Processed 49800 images. (Top1 accuracy, Top5 accuracy) = (0.7690, 0.9304) Processed 49900 images. (Top1 accuracy, Top5 accuracy) = (0.7691, 0.9305) Processed 50000 images. (Top1 accuracy, Top5 accuracy) = (0.7691, 0.9305) -lscpu_path_cmd = command -v lscpu -lscpu located here: /usr/bin/lscpu Ran inference with batch size 100 Log location outside container: {--output-dir value}/benchmark_resnet101_inference_int8_20190104_205838.log ``` @@ -189,8 +187,6 @@ steps = 470, 48.3195530058 images/sec steps = 480, 47.2792312364 images/sec steps = 490, 46.3175214744 images/sec steps = 500, 45.4044245083 images/sec -lscpu_path_cmd = command -v lscpu -lscpu located here: /usr/bin/lscpu Ran inference with batch size 1 Log location outside container: {--output-dir value}/benchmark_resnet101_inference_int8_20190223_191406.log ``` @@ -202,8 +198,6 @@ steps = 470, 328.906266308 images/sec steps = 480, 322.0451309 images/sec steps = 490, 315.455582114 images/sec steps = 500, 309.142758646 images/sec -lscpu_path_cmd = command -v lscpu -lscpu located here: /usr/bin/lscpu Ran inference with batch size 128 Log location outside container: {--output-dir value}/benchmark_resnet101_inference_int8_20190223_192438.log ``` @@ -272,8 +266,6 @@ $ wget https://storage.googleapis.com/intel-optimized-tensorflow/models/resnet10 steps = 80, 169.258177508 images/sec steps = 90, 150.457869027 images/sec steps = 100, 135.433960175 images/sec - lscpu_path_cmd = command -v lscpu - lscpu located here: /usr/bin/lscpu Ran inference with batch size 128 Log location outside container: {--output-dir value}/benchmark_resnet101_inference_fp32_20190104_204615.log ``` @@ -304,8 +296,6 @@ $ wget https://storage.googleapis.com/intel-optimized-tensorflow/models/resnet10 Processed 49800 images. (Top1 accuracy, Top5 accuracy) = (0.7639, 0.9289) Processed 49900 images. (Top1 accuracy, Top5 accuracy) = (0.7641, 0.9289) Processed 50000 images. (Top1 accuracy, Top5 accuracy) = (0.7640, 0.9289) - lscpu_path_cmd = command -v lscpu - lscpu located here: /usr/bin/lscpu Ran inference with batch size 100 Log location outside container: {--output-dir value}/benchmark_resnet101_inference_fp32_20190104_201506.log ``` diff --git a/benchmarks/image_recognition/tensorflow/resnet50/README.md b/benchmarks/image_recognition/tensorflow/resnet50/README.md index 8389c041f..0b73a4e56 100644 --- a/benchmarks/image_recognition/tensorflow/resnet50/README.md +++ b/benchmarks/image_recognition/tensorflow/resnet50/README.md @@ -71,8 +71,6 @@ Processed 49700 images. (Top1 accuracy, Top5 accuracy) = (0.7361, 0.9155) Processed 49800 images. (Top1 accuracy, Top5 accuracy) = (0.7360, 0.9154) Processed 49900 images. (Top1 accuracy, Top5 accuracy) = (0.7361, 0.9155) Processed 50000 images. (Top1 accuracy, Top5 accuracy) = (0.7360, 0.9154) -lscpu_path_cmd = command -v lscpu -lscpu located here: /usr/bin/lscpu Ran inference with batch size 100 Log location outside container: {--output-dir value}/benchmark_resnet50_inference_int8_20190104_212224.log ``` @@ -108,8 +106,6 @@ steps = 470, 460.113806562 images/sec steps = 480, 460.073982602 images/sec steps = 490, 463.289831148 images/sec steps = 500, 463.521427264 images/sec -lscpu_path_cmd = command -v lscpu -lscpu located here: /usr/bin/lscpu Ran inference with batch size 128 Log location outside container: {--output-dir value}/benchmark_resnet50_inference_int8_20190223_180546.log ``` @@ -176,8 +172,6 @@ Average time: 0.011 sec Batch size = 1 Latency: 10.924 ms Throughput: 91.541 images/sec -lscpu_path_cmd = command -v lscpu -lscpu located here: /usr/bin/lscpu Ran inference with batch size 1 Log location outside container: {--output-dir value}/benchmark_resnet50_inference_fp32_20190104_215326.log ``` @@ -213,8 +207,6 @@ Iteration 40: 0.652 sec Average time: 0.653 sec Batch size = 128 Throughput: 196.065 images/sec -lscpu_path_cmd = command -v lscpu -lscpu located here: /usr/bin/lscpu Ran inference with batch size 128 Log location outside container: {--output-dir value}/benchmark_resnet50_inference_fp32_20190104_215655.log ``` @@ -243,8 +235,6 @@ something like this: ``` ... Processed 50000 images. (Top1 accuracy, Top5 accuracy) = (0.7430, 0.9188) -lscpu_path_cmd = command -v lscpu -lscpu located here: /usr/bin/lscpu Ran inference with batch size 100 Log location outside container: {--output-dir value}/benchmark_resnet50_inference_fp32_20190104_213452.log ``` diff --git a/benchmarks/image_recognition/tensorflow/squeezenet/README.md b/benchmarks/image_recognition/tensorflow/squeezenet/README.md index 21bcf3fb0..355efca72 100644 --- a/benchmarks/image_recognition/tensorflow/squeezenet/README.md +++ b/benchmarks/image_recognition/tensorflow/squeezenet/README.md @@ -114,8 +114,6 @@ SqueezeNet Inference Summary: throughput[med] = 837.1 image/sec latency[median] = 1.195 ms -lscpu_path_cmd = command -v lscpu -lscpu located here: /usr/bin/lscpu Ran inference with batch size 64 Log location outside container: {--output-dir value}/benchmark_squeezenet_inference_fp32_20190104_220051.log ``` @@ -129,8 +127,6 @@ SqueezeNet Inference Summary: throughput[med] = 115.3 image/sec latency[median] = 8.67 ms -lscpu_path_cmd = command -v lscpu -lscpu located here: /usr/bin/lscpu Ran inference with batch size 1 Log location outside container: {--output-dir value}/benchmark_squeezenet_inference_fp32_20190104_220712.log ``` diff --git a/benchmarks/image_segmentation/tensorflow/maskrcnn/README.md b/benchmarks/image_segmentation/tensorflow/maskrcnn/README.md index 3c377ea30..c862032f7 100644 --- a/benchmarks/image_segmentation/tensorflow/maskrcnn/README.md +++ b/benchmarks/image_segmentation/tensorflow/maskrcnn/README.md @@ -91,7 +91,5 @@ Batch size: 1 Time spent per BATCH: 609.6943 ms Total samples/sec: 1.6402 samples/s Total time: 35.407243490219116 -lscpu_path_cmd = command -v lscpu -lscpu located here: b'/usr/bin/lscpu' Log location outside container: {--output-dir value}/benchmark_maskrcnn_inference_fp32_20190111_205935.log ``` \ No newline at end of file diff --git a/benchmarks/image_segmentation/tensorflow/unet/README.md b/benchmarks/image_segmentation/tensorflow/unet/README.md index 7660771f9..6f6671e66 100644 --- a/benchmarks/image_segmentation/tensorflow/unet/README.md +++ b/benchmarks/image_segmentation/tensorflow/unet/README.md @@ -73,8 +73,6 @@ modes/precisions: ``` Time spent per BATCH: 1.1043 ms Total samples/sec: 905.5344 samples/s - lscpu_path_cmd = command -v lscpu - lscpu located here: /usr/bin/lscpu Ran inference with batch size 1 Log location outside container: {--output-dir value}/benchmark_unet_inference_fp32_20190201_205601.log ``` \ No newline at end of file diff --git a/benchmarks/language_translation/tensorflow/gnmt/README.md b/benchmarks/language_translation/tensorflow/gnmt/README.md index 285df8ee5..f52bcdfc6 100644 --- a/benchmarks/language_translation/tensorflow/gnmt/README.md +++ b/benchmarks/language_translation/tensorflow/gnmt/README.md @@ -118,8 +118,6 @@ Example log tail when benchmarking for latency: done, num sentences 2169, num translations per input 1, time 1108s, Wed Feb 6 01:36:13 2019. The latency of the model is 511.2466 ms/sentences bleu: 29.2 -lscpu_path_cmd = command -v lscpu -lscpu located here: /usr/bin/lscpu Ran inference with batch size 1 Log location outside container: {--output-dir value}/benchmark_gnmt_inference_fp32_20190206_011740.log ``` @@ -134,8 +132,6 @@ Example log tail when benchmarking for throughput: done, num sentences 2169, num translations per input 1, time 302s, Wed Feb 6 01:48:30 2019. The throughput of the model is 7.1780 sentences/s bleu: 29.2 -lscpu_path_cmd = command -v lscpu -lscpu located here: /usr/bin/lscpu Ran inference with batch size 32 Log location outside container: {--output-dir value}/benchmark_gnmt_inference_fp32_20190206_014324.log ``` diff --git a/benchmarks/language_translation/tensorflow/transformer_language/README.md b/benchmarks/language_translation/tensorflow/transformer_language/README.md index d548bb0aa..abc931d51 100644 --- a/benchmarks/language_translation/tensorflow/transformer_language/README.md +++ b/benchmarks/language_translation/tensorflow/transformer_language/README.md @@ -125,8 +125,6 @@ INFO:tensorflow:Writing decodes into /workspace/models/out_dir/output_infer Inference time 6094.9205, Latency = 2810.0141 ms/setences BLEU_uncased = 22.63 BLEU_cased = 22.20 -lscpu_path_cmd = command -v lscpu -lscpu located here: /usr/bin/lscpu Ran inference with batch size 1 Log location outside container: {--output-dir value}/benchmark_transformer_language_inference_fp32_20190210_050451.log ``` @@ -141,8 +139,6 @@ INFO:tensorflow:Writing decodes into /workspace/models/out_dir/output_infer Inference time 1174.0522, Throughput = 1.8474 sentences/second BLEU_uncased = 22.63 BLEU_cased = 22.20 -lscpu_path_cmd = command -v lscpu -lscpu located here: /usr/bin/lscpu Ran inference with batch size 32 Log location outside container: {--output-dir value}/benchmark_transformer_language_inference_fp32_20190210_072635.log ``` \ No newline at end of file diff --git a/benchmarks/object_detection/tensorflow/faster_rcnn/README.md b/benchmarks/object_detection/tensorflow/faster_rcnn/README.md index 89dc463f9..162acdf07 100644 --- a/benchmarks/object_detection/tensorflow/faster_rcnn/README.md +++ b/benchmarks/object_detection/tensorflow/faster_rcnn/README.md @@ -179,8 +179,6 @@ and latency: ``` Time spent : 167.353 seconds. Time spent per BATCH: 0.167 seconds. -lscpu_path_cmd = command -v lscpu -lscpu located here: /usr/bin/lscpu Received these standard args: Namespace(accuracy_only=False, batch_size=1, benchmark_only=False, checkpoint='/checkpoints', data_location='/dataset', framework='tensorflow', input_graph=None, intelai_models='/workspace/intelai_models', mode='inference', model_args=[], model_name='faster_rcnn', model_source_dir='/workspace/models', num_cores=-1, num_inter_threads=2, num_intra_threads=56, precision='fp32', socket_id=0, use_case='object_detection', verbose=True) Received these custom args: ['--config_file=pipeline.config'] Run model here. @@ -208,8 +206,6 @@ DONE (t=1.35s). Average Recall (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.383 Average Recall (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = -1.000 Average Recall (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = -1.000 -lscpu_path_cmd = command -v lscpu -lscpu located here: /usr/bin/lscpu Ran inference with batch size 1 Log location outside container: {--output-dir value}/benchmark_faster_rcnn_inference_fp32_20190114_205714.log ``` @@ -295,8 +291,6 @@ Step 4970: 0.070191860199 seconds Step 4980: 0.0755469799042 seconds Step 4990: 0.0742928981781 seconds Avg. Duration per Step:0.0760930150986 -lscpu_path_cmd = command -v lscpu -lscpu located here: /usr/bin/lscpu Ran inference with batch size -1 Log location outside container: {--output-dir value}/benchmark_faster_rcnn_inference_int8_20190117_232539.log ``` @@ -317,8 +311,6 @@ DONE (t=1.34s). Average Recall (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.375 Average Recall (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = -1.000 Average Recall (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = -1.000 -lscpu_path_cmd = command -v lscpu -lscpu located here: /usr/bin/lscpu Ran inference with batch size -1 Log location outside container: {--output-dir value}/benchmark_faster_rcnn_inference_int8_20190117_231937.log ``` diff --git a/benchmarks/object_detection/tensorflow/rfcn/README.md b/benchmarks/object_detection/tensorflow/rfcn/README.md index 39e6ac3be..3b3a64b9d 100644 --- a/benchmarks/object_detection/tensorflow/rfcn/README.md +++ b/benchmarks/object_detection/tensorflow/rfcn/README.md @@ -182,8 +182,6 @@ Step 470: 0.202737092972 seconds Step 480: 0.117042064667 seconds Step 490: 0.103501081467 seconds Avg. Duration per Step:0.169812122345 -lscpu_path_cmd = command -v lscpu -lscpu located here: /usr/bin/lscpu Ran inference with batch size -1 Log location outside container: {--output-dir}/benchmark_rfcn_inference_int8_20190227_191959.log ``` @@ -205,8 +203,6 @@ DONE (t=1.03s). Average Recall (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.150 Average Recall (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = -1.000 Average Recall (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = -1.000 -lscpu_path_cmd = command -v lscpu -lscpu located here: /usr/bin/lscpu Ran inference with batch size -1 Log location outside container: {--output-dir}/benchmark_rfcn_inference_int8_20190227_194752.log ``` @@ -364,8 +360,6 @@ and latency: ``` Average time per step: 0.262 sec -lscpu_path_cmd = command -v lscpu -lscpu located here: /usr/bin/lscpu Received these standard args: Namespace(accuracy_only=False, batch_size=1, benchmark_only=False, checkpoint='/checkpoints', data_location='/dataset', framework='tensorflow', input_graph=None, intelai_models='/workspace/intelai_models', mode='inference', model_args=[], model_name='rfcn', model_source_dir='/workspace/models', num_cores=-1, num_inter_threads=2, num_intra_threads=56, precision='fp32, socket_id=0, use_case='object_detection', verbose=True) Received these custom args: ['--config_file=rfcn_pipeline.config'] Run model here. @@ -392,8 +386,7 @@ DONE (t=1.19s). Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.400 Average Recall (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.400 Average Recall (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = -1.000 - Average Recall (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = -1.000lscpu_path_cmd = command -v lscpu -lscpu located here: /usr/bin/lscpu -Ran inference with batch size 1 + Average Recall (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = -1.000 + Ran inference with batch size 1 Log location outside container: {--output-dir value}/benchmark_rfcn_inference_fp32_20181221_211905.log ``` diff --git a/benchmarks/object_detection/tensorflow/ssd-mobilenet/README.md b/benchmarks/object_detection/tensorflow/ssd-mobilenet/README.md index d2c96dd9a..cee1a3848 100644 --- a/benchmarks/object_detection/tensorflow/ssd-mobilenet/README.md +++ b/benchmarks/object_detection/tensorflow/ssd-mobilenet/README.md @@ -159,8 +159,6 @@ Step 4980: 0.0429329872131 seconds Step 4990: 0.0358219146729 seconds Avg. Duration per Step:0.0364457404137 Avg. Duration per Step:0.0365921088491 -lscpu_path_cmd = command -v lscpu -lscpu located here: /usr/bin/lscpu Ran inference with batch size 1 Log location outside container: /benchmark_ssd-mobilenet_inference_int8_20181203_232524.log ``` @@ -185,8 +183,6 @@ DONE (t=1.10s). Average Recall (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.212 Average Recall (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = -1.000 Average Recall (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = -1.000 -lscpu_path_cmd = command -v lscpu -lscpu located here: /usr/bin/lscpu Ran inference with batch size 1 Log location outside container: /benchmark_ssd-mobilenet_inference_int8_20181204_185432.log ``` @@ -382,8 +378,6 @@ Below is a sample log file tail when running benchmarking: INFO:tensorflow:Processed 5001 images... moving average latency 37 ms INFO:tensorflow:Finished processing records Latency: min = 33.8, max = 6635.9, mean= 38.4, median = 37.2 -lscpu_path_cmd = command -v lscpu -lscpu located here: /usr/bin/lscpu Ran inference with batch size -1 Log location outside container: {--output-dir value}/benchmark_ssd-mobilenet_inference_fp32_20190130_225108.log ``` @@ -403,8 +397,6 @@ Below is a sample log file tail when testing accuracy: Average Recall (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.264 Average Recall (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = -1.000 Average Recall (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = -1.000 -lscpu_path_cmd = command -v lscpu -lscpu located here: /usr/bin/lscpu Ran inference with batch size -1 Log location outside container: {--output-dir value}/benchmark_ssd-mobilenet_inference_fp32_20190123_225145.log ``` diff --git a/benchmarks/object_detection/tensorflow/ssd-resnet34/README.md b/benchmarks/object_detection/tensorflow/ssd-resnet34/README.md index 7cf4b2339..0a6915bac 100644 --- a/benchmarks/object_detection/tensorflow/ssd-resnet34/README.md +++ b/benchmarks/object_detection/tensorflow/ssd-resnet34/README.md @@ -180,8 +180,6 @@ Below is a sample log file tail when testing accuracy: Average Recall (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.334 Average Recall (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.494 Current AP: 0.21082 -lscpu_path_cmd = command -v lscpu -lscpu located here: /usr/bin/lscpu Ran inference with batch size -1 Log location outside container: {--output-dir value}/benchmark_ssd-mobilenet_inference_fp32_20190123_225145.log ``` diff --git a/benchmarks/recommendation/tensorflow/wide_deep/README.md b/benchmarks/recommendation/tensorflow/wide_deep/README.md index 2f5229907..95d23d68c 100644 --- a/benchmarks/recommendation/tensorflow/wide_deep/README.md +++ b/benchmarks/recommendation/tensorflow/wide_deep/README.md @@ -94,8 +94,6 @@ use in the next step. recall: 0.0 End-to-End duration is %s 36.5971579552 Latency is: %s 0.00224784460139 - lscpu_path_cmd = command -v lscpu - lscpu located here: /usr/bin/lscpu current path: /workspace/benchmarks search path: /workspace/benchmarks/*/tensorflow/wide_deep/inference/fp32/model_init.py Using model init: /workspace/benchmarks/classification/tensorflow/wide_deep/inference/fp32/model_init.py diff --git a/benchmarks/text_to_speech/tensorflow/wavenet/README.md b/benchmarks/text_to_speech/tensorflow/wavenet/README.md index 49c1a47fb..340736a6e 100644 --- a/benchmarks/text_to_speech/tensorflow/wavenet/README.md +++ b/benchmarks/text_to_speech/tensorflow/wavenet/README.md @@ -99,8 +99,6 @@ Sample: 8500 Average Throughput of whole run: Samples / sec: 289.351783 Average Latency of whole run: msec / sample: 3.456001 Finished generating. The result can be viewed in TensorBoard. -lscpu_path_cmd = command -v lscpu -lscpu located here: /usr/bin/lscpu Ran inference with batch size -1 Log location outside container: {--output-dir value}/benchmark_wavenet_inference_fp32_20190105_015022.log ``` diff --git a/docs/image_recognition/tensorflow/Tutorial.md b/docs/image_recognition/tensorflow/Tutorial.md index 5088a2ac2..f31a49ff6 100644 --- a/docs/image_recognition/tensorflow/Tutorial.md +++ b/docs/image_recognition/tensorflow/Tutorial.md @@ -359,8 +359,6 @@ Note: As per the recommended settings `socket-id` is set to 0 for InceptionV3. T steps = 30, ... images/sec steps = 40, ... images/sec steps = 50, ... images/sec - lscpu_path_cmd = command -v lscpu - lscpu located here: /usr/bin/lscpu Ran inference with batch size 128 Log location outside container: {--output-dir value}/benchmark_resnet50 @@ -384,9 +382,7 @@ you can implement the same strategy on different use cases demoed in Step 3. --debug Example Output - - lscpu_path_cmd = command -v lscpu - lscpu located here: b'/usr/bin/lscpu' + root@a78677f56d69:/workspace/benchmarks/common/tensorflow# To rerun the bechmarking script, execute the ```start.sh``` bash script from your existing directory with additional or modified flags. For e.g to rerun with the best max throughput (batch size=128) settings run with ```BATCH_SIZE``` @@ -429,8 +425,6 @@ All other flags will be defaulted to values passed in the first ```launch_benchm . Batch size = 128 Throughput: ... images/sec - lscpu_path_cmd = command -v lscpu - lscpu located here: /usr/bin/lscpu Ran inference with batch size 128 Log location outside container: {--output-dir value}/benchmark_resnet50_inference_fp32_20190205_201632.log diff --git a/docs/recommendation/tensorflow/Tutorial.md b/docs/recommendation/tensorflow/Tutorial.md index f814daac1..544db76d2 100644 --- a/docs/recommendation/tensorflow/Tutorial.md +++ b/docs/recommendation/tensorflow/Tutorial.md @@ -215,8 +215,6 @@ Set this parameter to a socket id to run the workload on a single socket. Average Latency (ms/batch) : ... Throughput is (records/sec) : ... -------------------------------------------------- - lscpu_path_cmd = command -v lscpu - lscpu located here: /usr/bin/lscpu num_inter_threads: 28 num_intra_threads: 1 Received these standard args: Namespace(accuracy_only=False, batch_size=512, benchmark_dir='/workspace/benchmarks', benchmark_only=True, checkpoint=None, data_location='/dataset', data_num_inter_threads=None, data_num_intra_threads=None, framework='tensorflow', input_graph='/in_graph/wide_deep_fp32_pretrained_model.pb', intelai_models='/workspace/intelai_models', mode='inference', model_args=[], model_name='wide_deep_large_ds', model_source_dir='/workspace/models', num_cores=-1, num_inter_threads=28, num_intra_threads=1, num_parallel_batches=28, output_dir='/workspace/benchmarks/common/tensorflow/logs', output_results=False, precision='fp32', socket_id=-1, use_case='recommendation', verbose=True) @@ -276,9 +274,7 @@ perform necessary installs, run the ```launch_benchmark.py``` script, and does n --debug   Example Output: - - lscpu_path_cmd = command -v lscpu - lscpu located here: b'/usr/bin/lscpu' + root@a78677f56d69:/workspace/benchmarks/common/tensorflow# To rerun the benchmarking script, execute the ```start.sh``` bash script from your existing directory with additional or modified flags. For example, to rerun with the best max throughput (batch size=512) settings, run with ```BATCH_SIZE``` From 059dc96ba657ed9beef57246671170e82e9fffc2 Mon Sep 17 00:00:00 2001 From: mjkyung Date: Fri, 12 Apr 2019 16:31:52 -0700 Subject: [PATCH 15/62] MobileNet V1 INT8 Inference README.md frozen graph info update (#284) --- .../image_recognition/tensorflow/mobilenet_v1/README.md | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/benchmarks/image_recognition/tensorflow/mobilenet_v1/README.md b/benchmarks/image_recognition/tensorflow/mobilenet_v1/README.md index d4e10910b..61a21c3ef 100644 --- a/benchmarks/image_recognition/tensorflow/mobilenet_v1/README.md +++ b/benchmarks/image_recognition/tensorflow/mobilenet_v1/README.md @@ -35,11 +35,7 @@ later. -rw-r--r--. 1 user 52508270 Jun 20 15:09 validation-00126-of-00128 -rw-r--r--. 1 user 55292089 Jun 20 15:09 validation-00127-of-00128 ``` -2. Download the pretrained model: - - ``` - $ wget https://storage.cloud.google.com/intel-optimized-tensorflow/models/mobilenetv1_int8_pretrained_model.pb - ``` +2. A link to download the pre-trained model is coming soon. 3. Clone the [intelai/models](https://github.com/intelai/models) repo and then run the benchmarking scripts for either benchmarking throughput, From 759608dfc5691ca19234d3c92db71944ae689441 Mon Sep 17 00:00:00 2001 From: Karthik Vadla Date: Mon, 15 Apr 2019 09:44:45 -0700 Subject: [PATCH 16/62] Add default config as json file (#272) --- Contribute.md | 24 +++++-- add_model_init.png | Bin 14105 -> 0 bytes add_model_init_and_config.png | Bin 0 -> 15583 bytes add_readme.png | Bin 15909 -> 17730 bytes .../dcgan/inference/fp32/config.json | 8 +++ .../dcgan/inference/fp32/model_init.py | 4 +- benchmarks/common/base_model_init.py | 25 +++++-- .../draw/inference/fp32/config.json | 8 +++ .../draw/inference/fp32/model_init.py | 5 +- .../facenet/inference/fp32/config.json | 7 ++ .../facenet/inference/fp32/model_init.py | 3 +- .../mtcc/inference/fp32/config.json | 7 ++ .../mtcc/inference/fp32/model_init.py | 3 +- .../inference/fp32/config.json | 7 ++ .../inference/fp32/model_init.py | 3 +- .../inference/int8/config.json | 7 ++ .../inference/int8/model_init.py | 6 +- .../inceptionv3/inference/fp32/config.json | 7 ++ .../inceptionv3/inference/fp32/model_init.py | 6 +- .../inceptionv3/inference/int8/config.json | 7 ++ .../inceptionv3/inference/int8/model_init.py | 5 +- .../inceptionv4/inference/config.json | 7 ++ .../inference/inceptionv4_model_init.py | 6 +- .../mobilenet_v1/inference/fp32/config.json | 6 ++ .../mobilenet_v1/inference/fp32/model_init.py | 5 +- .../mobilenet_v1/inference/int8/config.json | 7 ++ .../mobilenet_v1/inference/int8/model_init.py | 3 +- .../resnet101/inference/fp32/config.json | 7 ++ .../resnet101/inference/fp32/model_init.py | 6 +- .../resnet101/inference/int8/config.json | 7 ++ .../resnet101/inference/int8/model_init.py | 5 +- .../resnet50/inference/fp32/config.json | 7 ++ .../resnet50/inference/fp32/model_init.py | 6 +- .../resnet50/inference/int8/config.json | 7 ++ .../resnet50/inference/int8/model_init.py | 5 +- .../maskrcnn/inference/fp32/config.json | 8 +++ .../maskrcnn/inference/fp32/model_init.py | 4 +- .../unet/inference/fp32/config.json | 7 ++ .../unet/inference/fp32/model_init.py | 3 +- .../lm-1b/inference/fp32/config.json | 7 ++ .../lm-1b/inference/fp32/model_init.py | 5 +- .../gnmt/inference/fp32/config.json | 7 ++ .../gnmt/inference/fp32/model_init.py | 5 +- .../inference/fp32/config.json | 7 ++ .../inference/fp32/model_init.py | 5 +- .../inference/fp32/config.json | 7 ++ .../inference/fp32/model_init.py | 5 +- .../faster_rcnn/inference/fp32/config.json | 7 ++ .../faster_rcnn/inference/fp32/model_init.py | 3 +- .../faster_rcnn/inference/int8/config.json | 7 ++ .../faster_rcnn/inference/int8/model_init.py | 5 +- .../rfcn/inference/fp32/config.json | 6 ++ .../rfcn/inference/fp32/model_init.py | 5 +- .../rfcn/inference/int8/config.json | 7 ++ .../rfcn/inference/int8/model_init.py | 5 +- .../ssd-mobilenet/inference/fp32/config.json | 7 ++ .../inference/fp32/model_init.py | 5 +- .../ssd-mobilenet/inference/int8/config.json | 7 ++ .../inference/int8/model_init.py | 5 +- .../ssd-resnet34/inference/fp32/config.json | 7 ++ .../ssd-resnet34/inference/fp32/model_init.py | 6 +- .../tensorflow/ncf/inference/fp32/config.json | 7 ++ .../ncf/inference/fp32/model_init.py | 3 +- .../inference/fp32/config.json | 7 ++ .../inference/fp32/model_init.py | 7 +- .../inference/int8/config.json | 7 ++ .../inference/int8/model_init.py | 7 +- .../wavenet/inference/fp32/config.json | 6 ++ .../wavenet/inference/fp32/model_init.py | 5 +- tests/unit/common/test_base_model_init.py | 63 +++++++++++++++++- 70 files changed, 423 insertions(+), 67 deletions(-) delete mode 100644 add_model_init.png create mode 100644 add_model_init_and_config.png create mode 100644 benchmarks/adversarial_networks/tensorflow/dcgan/inference/fp32/config.json create mode 100644 benchmarks/content_creation/tensorflow/draw/inference/fp32/config.json create mode 100644 benchmarks/face_detection_and_alignment/tensorflow/facenet/inference/fp32/config.json create mode 100644 benchmarks/face_detection_and_alignment/tensorflow/mtcc/inference/fp32/config.json create mode 100644 benchmarks/image_recognition/tensorflow/inception_resnet_v2/inference/fp32/config.json create mode 100644 benchmarks/image_recognition/tensorflow/inception_resnet_v2/inference/int8/config.json create mode 100644 benchmarks/image_recognition/tensorflow/inceptionv3/inference/fp32/config.json create mode 100644 benchmarks/image_recognition/tensorflow/inceptionv3/inference/int8/config.json create mode 100644 benchmarks/image_recognition/tensorflow/inceptionv4/inference/config.json create mode 100644 benchmarks/image_recognition/tensorflow/mobilenet_v1/inference/fp32/config.json create mode 100644 benchmarks/image_recognition/tensorflow/mobilenet_v1/inference/int8/config.json create mode 100644 benchmarks/image_recognition/tensorflow/resnet101/inference/fp32/config.json create mode 100644 benchmarks/image_recognition/tensorflow/resnet101/inference/int8/config.json create mode 100644 benchmarks/image_recognition/tensorflow/resnet50/inference/fp32/config.json create mode 100644 benchmarks/image_recognition/tensorflow/resnet50/inference/int8/config.json create mode 100644 benchmarks/image_segmentation/tensorflow/maskrcnn/inference/fp32/config.json create mode 100644 benchmarks/image_segmentation/tensorflow/unet/inference/fp32/config.json create mode 100644 benchmarks/language_modeling/tensorflow/lm-1b/inference/fp32/config.json create mode 100644 benchmarks/language_translation/tensorflow/gnmt/inference/fp32/config.json create mode 100644 benchmarks/language_translation/tensorflow/transformer_language/inference/fp32/config.json create mode 100644 benchmarks/language_translation/tensorflow/transformer_lt_official/inference/fp32/config.json create mode 100644 benchmarks/object_detection/tensorflow/faster_rcnn/inference/fp32/config.json create mode 100644 benchmarks/object_detection/tensorflow/faster_rcnn/inference/int8/config.json create mode 100644 benchmarks/object_detection/tensorflow/rfcn/inference/fp32/config.json create mode 100644 benchmarks/object_detection/tensorflow/rfcn/inference/int8/config.json create mode 100644 benchmarks/object_detection/tensorflow/ssd-mobilenet/inference/fp32/config.json create mode 100644 benchmarks/object_detection/tensorflow/ssd-mobilenet/inference/int8/config.json create mode 100644 benchmarks/object_detection/tensorflow/ssd-resnet34/inference/fp32/config.json create mode 100644 benchmarks/recommendation/tensorflow/ncf/inference/fp32/config.json create mode 100644 benchmarks/recommendation/tensorflow/wide_deep_large_ds/inference/fp32/config.json create mode 100644 benchmarks/recommendation/tensorflow/wide_deep_large_ds/inference/int8/config.json create mode 100644 benchmarks/text_to_speech/tensorflow/wavenet/inference/fp32/config.json diff --git a/Contribute.md b/Contribute.md index 84e1b2f64..55d2b7e44 100644 --- a/Contribute.md +++ b/Contribute.md @@ -17,11 +17,25 @@ required: ![Benchmarks Directory Structure](benchmarks_directory_structure.png) 2. Next, in the leaf folder that was created in the previous step, you - will need to create a `model_init.py` file: + will need to create `config.json` and `model_init.py` files: - ![Add model init](add_model_init.png) + ![Add model init](add_model_init_and_config.png) - This file is used to initialize the best known configuration for the + The `config.json` file contains the best known KMP environment variable + settings to get optimal performance for the model. Below default settings are recommended for most of + the models in Model Zoo. + + ``` + { + "optimization_parameters": { + "KMP_AFFINITY": "granularity=fine,verbose,compact,1,0", + "KMP_BLOCKTIME": 1, + "KMP_SETTINGS": 1 + } + } + ``` + + The `model_init.py` file is used to initialize the best known configuration for the model, and then start executing inference or training. When the [launch script](/docs/general/tensorflow/LaunchBenchmark.md) is run, it will look for the appropriate `model_init.py` file to use @@ -33,7 +47,7 @@ required: [base model init class](/benchmarks/common/base_model_init.py) that includes functions for doing common tasks such as setting up the best known environment variables (like `KMP_BLOCKTIME`, `KMP_SETTINGS`, - `KMP_AFFINITY`, and `OMP_NUM_THREADS`), num intra threads, and num + `KMP_AFFINITY` by loading **config.json** and `OMP_NUM_THREADS`), num intra threads, and num inter threads. The `model_init.py` file also sets up the string that will ultimately be used to run inference or model training, which normally includes the use of `numactl` and sending all of the @@ -93,7 +107,7 @@ Optional step: the original repository, then it can be added to the [models](/models) directory in the zoo repo. As with the first step in the previous section, the directory structure should be setup like: - `/models/////`: + `/models/////`. ![Models Directory Structure](models_directory_structure.png) diff --git a/add_model_init.png b/add_model_init.png deleted file mode 100644 index 6bacd1bb67252070894e20ef9dd53c7e9432d1df..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 14105 zcmb7rby!qi*Y?aXbPXL6Lw6%c51rD|jUY&e(lA4Zga}ATODWx=z@Q)qC|%M>OLxk5 z{5@|x@AZD)A0O8R*O`6x+I!YIYp->$dmZ9*wN(l6Xz@TG5TUx7l0FCo1_Re?I9R}6 z18s$Q5D4#wv!bG|x}qXn*W1I<+0_9AQj1G}jcsV)OxfFcZWUO_ri9Sge3)WY63Kwr zdSs|TnyRayCf`#{khEl?OVTA=#KMf1gqczYFPg>z-vQ}I z0_<}3{IIEODJRi)Ib!x7{@mW(aX(|Q{)!2TSCUBN_~lp}3~BPPh%+Ptg$oN>alU@x zCEoAL)h3^Y-D@KFBePEWq!G6?xwl*8y4V#ALVaFV!vyKR(UlWP=0P0m+UYfu9V^+T z;p?8Q<4H)Tnd{}f%*_?~s>o^n_LePq$wOi+O+tbWmC6xFL&bz(ef8Lj@-DyTR~V0! zrjY3ug8T8hw4^d!x_*w@nW;U_KFsW<-8nB}+M?o9TGiM4Z(ea^_2S|fRDaJVll`_; z1aB5jenT(yE!fm%kX6&CD7|U4MP#^={s%XE=O~ zeAsirbE~i1=zSOLRl8_74|mv~sqZ}DvE!NQkE&zpUyv}k-bgStkZ_3WZ$G>MqYQ++ z`u*ePbWAlHqVHgZc7+h)?5^(%J zFc}m?k^;uPO28QY#kwJontXpFpT(4y0yol|9l!lqFuVC+Q*M`UZG#`S%?^RbNDK{b1?eo|L*zMQ5mM@qV=&b&$GzRhOeT0nCtF_c>Y-46H~E$<1X!ysG| zeL&2Qaw))=!y-lyZ(<8{!TgC;;5eTZ4P&vK6|qvd9urC++*8EKaiSDQbV<|jaN$C4 zkr@gu1$O07?wB)$iQ%DPu1#rQ%iy&s{m%v&YX|m@Lrk>Pv_(Ktx zp8Ri!snGY9gj-2v38ubmEV~@IwPJ_j60?AqSO~gdb@FqtFjk1V$|wm}Jsp>V#!HR2 zl+Fr2Q*zXZg=2$1FA0hgkl9BZc1PN*2wG_IJ4!gpl+yXY43eFD=DJVpq6HUg4J-L4 zX@8>}?8)jNI|{B$&E!F#AkrX`pnJ2)A<18ssUP^f;o*nAjvPSqbyfB5ex6v)_R4LQ zofBk8p89J3tNnn)pU5gnpnD(9_%;8F-}3b#BktpHa2E=uq{beL`yRI>QZce2lK->F zXB@pmCyf)h3M`vyZ4`smMbcCD|9Q2xAX%_Gz{-Qa;W%W{N0%EkxX@<;xpaEa>vs3ny-51)kcNS zHFm3i)=EB{w~|qiF%N0KVmjs79=d-(>qv{Dl}>~thR{-pUOC}>&s^8}H99$bHpV}C zwe_G)BKP4?0o7RY@a&diHf^?c_F;B{D39on)0rSFnkM3VpPlPtI{P`>;Ji}t#QTYI zZu5;loh`>?ogPjdO{}<3I8QjwPhM4iA150BF}_eaUvR62U2I=MR??vRq~(WlZr9ey zR^Hf$v4m2_oKS13$)?f7Nz0;-I_GVBwRs&@wRuV&F$a6rsgI%r)|EMM4ff7ho(yGok-wgE#c=P+TsgO z=j@~(+t~=uKCVzt{1#$RRFP9L?EBF@j=XKG5ca^*s8T;P6F1jh`E*7;lbr z4sDK=d9+*W?$cSUS!0ohj_gBo(hY9w;!;mO@_xL1PP}t}M{(D9R(z(@tJZr-((u8Y z_ghIUiE2+*-{9THyWM_2cgJ_NeDZcbO@8RPe4p?>VZ3D00X@-uf8tr$Z%gqu^S85s zEdzTdesbp&du8)-0j>UhGD!h9Th70-wgUQ&FL{qmZ%nVBoCeSYczrqxJoOi8H)sz# z**g?ES-vp5l0LkDwsy&I5c0MDu|q9UgVp8CrTVS*Rmj2lf%W<#9TEz~CdX{VehU4H znSm`!B#cK2oq!TzN?{A&x)XIpM@G34eHfUu_nDd4Db$@Bb||c}O{>kUJ*wqcHd-D% zD4jXj8J;nBlzU?@sVZH@1W~GGX;zYs7fl{aykWzmqM&(1aw_^$HqhDEoPf_!ey}j} zye_ovo;_o?X*bu3*~+6|1}pD>$+1FN(^yYfe`r}``17fAsHcr*zR*e1kxh}77Wdq1 z{&36ngDZ;5Z#`?sTy(!bIQP=@c!*cm5^d@!O_7tG63BUq&k=6$jYiwPY**{v%9T%x zkM&V-fO!B$us|s8ooSnKp!$^$>a|UR4Pujpmx3t%MtQAhiiU|MNLco<%3~vkBuBQ# z6Aonc1`RrK_q(%JCh2b9C`;s$x~Lbv<(OjeGv7-Q)e1yleOv!VyXNa_(pFuo8(g;e z{rJcGzUx-O(B(tr?S$@xYJq06Pws~eHw8EO88#W7uN!&qTeLlO-WQ&%ZyzXJa~?1^ zqORdI8R-=EU+a4G_EFaRP)oO#w#TO*@8h~;;>)F^B`*fSey3hL+$ z7P#sN$^F$v|GR_YFus~{mf|&KnCXyZ*eUikV}B_1d`d@QNb$he`5e!j?wmc>vzg#p zrFPcNw;lsKe5dAF<_XOmv&}ZM>H1}(GPCNl6thA7p9g6DGIypMe`$WGy?GvGtvs(i zpZoiH=ycEQMANLF-!4PDOiz&P9p_bDkjKHol}1DVXh>^Fz;)hU^5Wvzt67~1Y+oXB z#tSpH>TLI{xdxeSjJrj!@mpuEIlUTs2v@UK>&nJbp~q?>=+%8T`^ z#3$-c@P8!b4akYl@j5d|3p`kysxMgUit5I1F)KEk6^OR{6wI|b8YN~l`(eQ9wT|^+ ztM~T1*@k>p-hllWB(3a5ROM}*Gd29>LC?hJ!L&U@W2WcZy{KH9uNhpog( z1Y7Wf3@;(D7E+|AugXkfHdjCSiG&z?&lO~PM9=vyjdy;;mYE7OSFDavRdsP2jP!sk zz4!I?eLS_fId!|~xDOtFBPBGOj2jbrn3nn5=9``+J=c=;DRHzbL;Tvn(DQ4wv?R>?o+;?n=j|6h0hd&hs1H2pV8VTpg2{I@&*At^2JSAl<1=wH_QXB04(ES|K$ zf0J@XvSG?Uf#9J2Q*tyZapi``6MU zL18X6zdd$O_x##}suq9v%(JjEUxo@+EOftN!T;ZM_XUYBn4J)7Yq+pYQ%`$lVn#hF(9sm-g!u z0tFd=_vk&HZQ;!W?-S_q(Cfy46M5^`tQIgBl=%gMNOLJU?522eXTR+?f9F(jz+vA@ zmhpxsI&J3*L0kJ_cT#SP9rNaMd-ET+CXB7uT-;`xOSZ?2r`5%~KjMogp0*Yp!C9qRS)U(ui7r>QEUX;2EQaR%Y;sX8 zR!J;MyeB?7@!mK9$!pda+xC1{L*UZ^B;?QK#)5qDXr6>mwBUuHXPI`c20j)4iI`7U zI3}H}4H|T{_i^88tv@~Z;acKi*!Aq<=3y^m*_3ry?)MA8hQ|fU?al7<8h6)=cTPLg zb%yPM&x{n(d5lZ_7DHbD$dmb9#gYcy#)&FQfArp{E;VqwqO9C`tTZhzPdfsOut@UR z_V8QvSDR6Pn7ei{UU*$cHpg-bIN z8WbMyr1n}PhJ*-P{STS*?IjDKI?h!~&J}MTe@k6xwneOn=e^U|o_JrTeR&Zhchhw( z#m3fSo`(k&OL_a2RYOZ_5M@wj*JF}p7YO!Yq*PH+!Aj>d&!G6;5ggEDQBygm&Zca}Rfyi$^K#`XQh zusazY+`&~j_qhjwt1ngk+c4g%;#MY_HaON<7n(IajpRx-a(?gcO%5G|+mu5OC<_## z@ZNOjmuN&T9#Rf|U-pfVAQJfq%umb6P-%KP!Tdl}?)JohVbGKMvr_aI{*>`*%bbFP z6w0wk!h2%6`wQ&>k|dmNoKv>_ir)q^g}xXbk+NwFCkS-(QGU%7vghRCNhlNaBvlwm z=eNwh1)*{U||J+_5&q4-ompy})P}tMo%5-8DxpV)_aaPAihyvg0 zE#N;H6a;i(wFqwiw901wjw3Xp*AV*;{sX=dfdh#b3^Lgi)Rp<-i@~obifW{RAa)Rg z$a8Xse~$0c?Ov1wXZHLIXcN0DR!t^d-^IkI9LLI$lzYBIO{_imyO^!><_KS>VVs41A zn#5@MKv6a%Fg6&k9jP_UlNwS4rJF1qrIUx7K$UQz37OqgP`YR&CphL=xu_}(hZ1$u z#UA@4xVT3(y+6G~a)PI~!A!pyZsP)V^}vl(Fn0XS@TwmlLZSkHmw_BkS8i=#m6C_! z;V4az)(Tr@QEIxfVT}>@8P-bRrahicc?%V_{neXnESI%DK-Kg+sFO&T;eEJ2F1!FHLfVu#&Dc)WN))V@iwf1^`BVPAR&$h!o(4im;l>p(l3-1+CqWg(joGuCZtZQ%8kk0d*ErN*f4VFy;gviDQ;`M?PJR!0trDBN-(yu*7 zS`)ts^e_6FQ@s;GENk<$dihI?h>S#DBzzl$QvGuy4hNIfl6n$y^23>L_hwDp1}UgeI<3#tFu{Z_Oq{RzF^r*^xsjgrA(v~fPk_`} zQF>8wju*W>NYIo;LQhjl((%_W$@}+_4D|7P#mTaEJfQxe7i_DT2yMC;gkh(0xiY@t zg3X_AHw? zVyB6&T>rf}i9beQI{yL=3{9a@eol_@qeCIePZP+Mgi}9yLMa4<4~4ACjCx^2kS#yL zXqnY{g4+CeV@V9CMb%^qm#kMy-zUcSupSI`ErGsSq#sp~ho9kecXPn59o6H)k#HMI zupY$mWvYKTQV}>vOT$U&j<^s<;L?VYD~oY`&j&F0gb|_u>AP^W@Owco#|4A0bB?5%~Xk~cAal~ zcB5>|oAs5Qt*jrwQ8?1L%x4N{qYR|wR{z=VZ0|?Vq3JUcdGu49X$@RtHG$3}>!Rk> zt2T!Bp2O*UruC8A;}wec#9Wu$#%;I(v_M5jEr>)Aj25dBKb@?k>3wh7z)nab9NBVV z=OWyE{rRcW&qgkptIgubE$B*%^H{Xs{cwg#pN$`pgu3~mS6m)qPiN}cP@>TA`D7q7 zIIOeQN}(PAsgR!z3#%mb_R34z6BjNZ$R>{#KqhK);y>$Q=h~_4-dND1a&_m{D~fE9 zy}I&Y7r(0luuzM_-Q`f&dgGG2(|9?F%*n9gCwa|##y@@~Agj`lMLb`Kq}pw;;nlqc z2;*q_NbBH3%ws}8OY^is5E=>%1F6X)XzKy=wD38BKAM8Zh*g;j%N@g&BKiolZwo+> zPiar6k<)me{PwL%mA~wL1whT%w^tln>Bd&JlpH%+-FqrF;CPW8S*V8sfc{J_wQ|G~ zuTHiUJMaETHQ;vz@6R-F9J3+4Ha~3Su?$z(Qn5ZD#v*3W%F4=$^qp3Zqkk12PH3L; z=(edf!Vv=JcV(zQdRDTf49~v%? z-!=9@Ie{`bDXfFDhugS%&}+%}3p&Q$YxJ#pr4t$(bh*i~ci zEvs<8f$2X4Nf6hPKLA+6HP2s0D=}pL;^AN?$7rP}fTnH!PWCi`=~4@H-Ut&vr!X%6 zPEI;0^-~-7K~yzCOZb`kX0pE4yjc7^js|8h6s>@3l88X9C$CfjP-IXYqzUPV7^xI0 zL}drT9ThA}8g;RF2?#vX^fIg(L>v#VrG>FRe+FCO!iDm-3DdcG?g zYUIEPz3H+BpV^+S#uV3EwFip2O!7LahBuJXz5JddPHaMP!DkhGsN8J$^#zThsj2B3 zmGbWp1*#UDWJ?s7@*EG8$t|8A2BGx38n zbP_cFXv!)m(d+9hI9BiW^7HpFH2B59!TlvZOlYIilDCr8rWDC<_|j8HrecZ+$2NAu zHerzf>>_fJS26Kq1U*?A#d6_S1V0q-e;>Afy?CaIp3p%5ndjhxK|iaao5xTGIs6hh zkZ=$T#s)t}!I!*&bl!S{m9mZxr1(_^7}O#O$m*{bL8HO)e=9kRzm*&izK-r8vITIa zz4wuiK7MyP)Dzw8FPi!DFh}grIJ3(GrWUEq3sX32<-6HsOKANBZ*SOiNGMY*gEX$0exW35AK;wi?uZdvdh;Ez%j(Od2t% z#nVyV-{^I+5l=+O%85_OYeNLtK|mW)SCmXTf+dt+X%Idm)es38qP~0HL|V&YnN@Tq ztau-59sYMMwgONJM=+>2nX!z^pwxe#TsQf$h}+*27OSs!Pv|cQ9xer^-JYv+cO>>u&YCoSUT-CN17u+2s*6V81lS?$H4I!{2vu< zh`7xfortufxgWjr_n=i~!*U0)`S<5g98hx0%b*>hahU1Z+{T>1k?TkXWRYLvd>x#O z;n{)*pghTE@m&2BBfW)+F)3-vUO33J%D~fY?gu;emF45QdWE9+bCyL=X=<*v`Uq@d z;X?M%UIX-6jNEW~1RlW)X#h$++4kIhC8t_6)tI^NoG`Kc3*5to&^if`vU4| z8JF>nz*QBfR*K`uI7UOx=X(h>xUI@nl}p5){CJa6k7FpOo4tIE4OCWe-CQ1JDt%ib zZPwpV_vh7?oBa^Iph5|=e-w>PNRB{0r8_<+mb*#mWQ+AwgTJf4SLb~v{;v2PnO$$Z z#zW5U-sg#Wv4Z$Cg`{?wBk}Sgrpm9XwbB`5*$EisMuaim`)*U?z7tM=Eoqjh@o-|m zO;r!#{@csB240|uD8-OOtd_x-#QyqjRc}dqIFblRrvLwgk7sJ5g65nKU%uMv=bfWS zs?Iini2b(W7)zh29Tc!ByK;nLD*-4B zHYz9wrZ8A$4WlsIz7+XbrXeD|DB9|K$hrTHDPK5>X615%Um>y**?k?BN78ZWGiRju zybqOdm54Mwv=xk-X9rfuc>hig@F>pAjU|nRi~~#3{v*EZDEhLbl1vb+z#&FkML zT!?|crj0L`D2N}GY&-5PXeQ)TD%dibp4FwYvI@{I2zM=3C734#M3~<5k)NhLCTb1< z^?iwPkpNSp037P~ZRWH*IR9g65!7O0e$+%BE)IkNZ+m8@aO8U+L@d4A%lL{1`9PB5 z@a2P$muu|E3d_flbZ4mzfIK+?7=RCjN0c>-xz8*AfQj!Qz|y_Og^mq#imbDn~hx@_k%!MF|@f?BUNUX{uSh%(M*ILScvg;d+Mwr_YhNCjfaA5qfi| zT8%H;l2qS#sLQx2{jM}C^BRR?T`Dc8A;f?&TAJ5kD0{?K)idy7L_^{KHs9;&f+Tb7X9}|RTnK|PQyyLnU(Ae%{%x@nDmxC8OnPk@0$JPQ%sML@22iwd}>bq zFkAHSYZq?6La5)ISB&^4fO?R2&^twAnviP17&DTL-Y7+|kAdBW%ueWZ&;TRi`zTL3 zpl|Vat%>L|P>vNm$qR_yILW*l%D&Gt1s@rW-(Bp?kEasQ2Iw!bN-Kbic|do32|&&R zMqZ2yoD`xl+zX;K^hu1ui01Ffst^1CBrbV6@ve?oUe5nf=+6`XD4de?H*YG3MmIc$ z3vBT)5}2$zf=>Vp7sQ91AIBRg&kIg?{-W9J`CFcdFI9#W`mWhSSt2nH??y{B|AN-0 zhrf;iZiS>{29#lUfwN?c)LHJh-YW(;F|ssxH4|MJb#UfW>9cA3g-^|{GYTxKNpBfs z0uKxr94uQsQFnM0+^dP$G0eQUoYg$W)w+(3?AKJfNv2-3gxQ?n1{uZ;{{o(fCzdXh zy&p6pQImD{s!zvC39BqVq;C9qr=2W)ghSll9(1k`5RT;@iygtZ8857C+`phMfJ(To z9&JFg#Ta(u6*@5785S0PiYnvB5?bGG#fmC`UM%rpEU1->6M1x8vX_*U9L|C%h#i1Z z1f9dk8v*zU;MBPf;`MG}rGznTlVv)LKg`!$Hi|3BYJK)GP0vj#$u{HkM#8hlc zCk#nm*Nqw=_Kw)ozxCU#Z2cob0jp1a_<@2$F*=CuDmc3GQVCl`l>5~M1pkPiyJ_>=r*?R2QcMM?PR1ljz z^>rx?Fc=z%ZCY_gJd>u)C4Zl%OwUe(n}X}57KOnpu8P{b`*)7zsfk~nw>b?beiK(-ggW}De5{Do)OU4&d`gn z2TygGJF$`EBhv%FfS;)!z+BUgOu#WeEQ;`Y8qNA^73mAo&@XYo z_eKETqkqHbibDgf(QbR(md2B#;Zc1VCp(+2&O?-2n44vqwpWLfw)!oc)c-Z-FG7md zSeC(Hwm|MCAU+E_bE_F6g0Pxd-^X1*D<@gRs)>Wa{n!N_Dit>zn|q&KJfPNp)Wam$ zJG+>n>oyOtyPW9y#W4i)1@wxb4RjS6*G2kYo|$kRsJ@_=%GeriG>Xt*+%GnP9PWW* zGQ#{u&8|Kj6^oYjCw3k>Tb3&nV%WAAX&#rz7}K~^piidmi8`SdIgu?4j8eGDlhu}* zvn?J&FAeo7Q0FiY0`iHrcxd4L;WB{Tz2)xHSFMBcPSBrK4=N8?d@@{yN^Tuu8=esO zV@P)0BjojzVwB&Hs=?aAWMn~+=?GjNZ#)r@HiB@E+^3NotPYPI7|6EXZ zH9C(O8tx0}&HxzpTv+bCk}m)JN56CR5&#THtgAGx7=qkjYz2Y(NwzGWOP*Nm;{|wb zvM-hI*92N&Ivyhz`mQOHjp@RA>a#))vvmY;MDV8MW=^*TZ@!X))}_f)^k`81Vk@#r zANr)qmusuM#~4`5(3p6&KIaK}*UUP3MDWKkuO{hJ zrGjgisA5UI9(p8T-()C!s#hXMF_dzZ6mq94x&fC!=hy>7q< zJdN`J^}3oq6LVx${}n}q%3(yDOPXwkZ4uNit{G#Xz8(Ua11j84K|=&51Fr=(F8-|8 z36c2Y4aSP`Qxb$6_E0&d_DCmPK<$V=G~h)NgTmZX0917FB*;*Pee|Y z=>ElZMdMs}Ln!$yVl~Lsi>#~H%ic3!<&j>7ife_f57I#JhrSTYY4I3UF5SPi!Gv7) zevZQ50)9UomWEuPyh-bnfJ+tTU+IsF~bj7jn) zW{~a^wa*dvU>{B>oC6wZ8?nEBV^_w@M_`Q{56KPX%c{25(KrLp!}n~*^mDTl;V@W4dAOl>A1w*^I2z4uH7@I)04p zC!?ARzGUoL1wbC%m>$_|r|yp9M8VOiKt`gc!-+QRX9tQ`8}0IIG75eas9zOVd5S4cUc)(AGzr;Sn1rQt|aZ zpUIj4dQKV?PtK)4t#&@=Lrk0v;HP7Eg!Gy`eKZP$k07M%nS1r}W!G`o&SGzpjPx>N z;3ks?lRqAd%ybA^$fU+f<_C6!fMFL%)sn$~30w{IWrBPh9s%9BX4e8iGzUmq$vV7? zSQ+KkTN?{JRMpxHn+cniB5w_Mfbu+sSemvnXCPW}Gy(-6QKUx(C_ApkUvtkSeIhJh z^!qzuCC{@`aRyR-Ps4OTjWIAo}a{pNV3^ae*4?JVs(FPOCxXGLIZv8 z+YoLV*r{8H6X;EU6*}Z5(m4g{f;xp^>zJ8J=oS+OHqmN8GhWLm$>}5C)l6D>n$M|7 z1PO9x6QI>?@Z}$(K38>eqC82`POPAK{%Uf;Tp+%7lh3EW7DT(=EPadZ>hwRfbHoYP zu{qtI*hx{8ovu&d>Ml}FxR5jn^|Ztt{SuG-Lc%T{jP0{NXuHAmCSin}6TUNsnt+sG zhNlG_uh%(@y;Udk81YPE=LqT;S)#eI;frn%Vq=S{`Ld1n1$T4W@Qj)fE)Xa@AjwDI z6sV`7aWYG&43wC*ozg_AmxFt31opYTq0!uwRWq99(_P%Q?M-gCh{k77+gmB z8tBlGNzJMx()MU85b%nmihG=S>YYAP4GDC!u!tyQ)c6a0@ZbYa54VYfP)Zlt` zE84BscgdzOD|%`W%hAzMA7CTZvM#QC4{m;)W(mj#YB3#9uvEe6emrQm(RE2r{L_}T z=x=}&C*OX?zCa1LHo^4AAFVLp_*^z>Q?=w*p1d)YbB>qttOAv=L=Uu zv$lCawR0tOhBA26gQSzX@Gu(|3ChW-jwab|wsOO_0|k2#`LN?MkZw}viNBqyqTqBo z1o0;j^KHM9Gz+F+z|rHp1z#YC+2Mg4;@JvPUfZkTIPrngVC{&{AA`5nE>n!V{`Y6Vd!u zk|^b~uC=gVbM(^Q7Y5|Ik**1&4`ve_NVdj{g+-uY=guR&YcvB81KsZF8CTIsAgmh{ z3ys8|6!Wb#Qo{ysTDH=+`Y%bp8467_ysGlF+r*sLimPfjS&u(GFkzpqG8b>gL;p;0 zt21FCc4XYi9{ZXrWREq5v~3s!7=rTWyFWu;Uu9QtKBp)O4n7M8>H$%^ZXCzsE+fle zA%B3y|D>q%am%SMg$equM$LU%$Y9Fk>&q65I;Ghn2R2Gxwf{!5iO-JJaW6L~qA|-S zt2#_Q2Su^Rkf*<=jwh`uzx7IB1q10g%N@TT&c~N%>f7kmMFiymHCjxIC-1f_0}W7J zS+PCeo#~e}@dpz&uHP}O;pBvsM4WiTiMHa*)rzJPrrjJ%s|)u>#16n2{{?AfL=obc z{Wcrlcuz%&R6Vg*oD(bq3N}7xn6=vw%H&Pk;C&Xe*!_AhWX>kc{-ak65q&0kBT!d< z@?a#8h3`5|R%T7b;YNY4_=jU8OqFmSeDK3)ta8$~2{>bB`rLOQoH3ctla`}s(Zx+6U{xq(3 z6~;e4p2M)%8HZm+ zFb8jqmXQ29G*~k|E znw>1M3X_(6+F-t1{`v2S6zGZFg%NUv5t$AN>jsLufVQG0TEoCHdVrrYOOBpqXz;b^ za;zy?9L;+l3@iOSp=XIPK{-i?4yz41cc-8m_uBp0WR>ln4r9Lqc&-@%q&U_Au@mM^ zNRF7B-A@mvBGe(Q+@AJC#Ey%=8x(8tzJG24p08{P zATJq$4_M^tK1MyU$cBr9g3QSh#g+ax+yD$1AN5^hzbwdf=ax?5jN1(U4+dG)@b+i9EhG9lG_;TR$;3KEI=${NdC^%=|Tn*r6AyV$WJOMScs8u z#>ee}U6klaencfH=r@K^zRDoYN4zCT^bS?PX7{yu+0g?O>a-)A>m0r|N&wA!c~2`1 zu_}~kmZX5K!U|02YzKP00b0cw#lt9EdJ|E1{yJz8>M6kyxn!+t%Df78#{5y=n>aQY z4V`^z!Z0dV^f89_J$jS39%6csmnx$1mw=SJEJAz~*qrheMFcSR`I4rJ=wI?`fczC8 zPZ~>*R5zr&iMC7rpr}dy-~D4CTK-?U%KwbwG35=TqCnZKaS{+aTqWEiz?e!ET&Y!G z0M-H)MU?cPrqn|!*EezkahZ8e=&vES|A!&HjR8ZxkPo}ZcOhQ~m;mFTBo>K-({6%@ z4Nrjk+5kgy^kjF_N6+C{A^-XuG0VHk4A@23cbycEK64nA_0eF3N;3^XUI*A2u=~m` zMO~#bt`b5291XyK52MyPcq*xwWl@}GasSwn+@mJefiDBmU2u~Bav0_Ci^bPw#>8Y< zix-#@{%=z>;C8@kU22=?NFv^IOWSsssSx$DtdJQ!Cm;aFr#BI!G?E}=gT2p#N!FS$ z@vTAQkN{){zuF8(j{VIw+Q0Rjli@H{K>cv2RuMi#TM@B9

PQi59i#t8jtAG?yHW zdm{+C@B3wmj*kM4c;KtUOMwt1x9;k0I8tkKp27!NO>M{is^)w38aDk!<{??!JitIpl;|b9AK%3rjRgpl$RhI6+SSRCR VZ&g*)|GxLDuB@$8tzZ-We*l|k?J@uW diff --git a/add_model_init_and_config.png b/add_model_init_and_config.png new file mode 100644 index 0000000000000000000000000000000000000000..ef9b882905e62cd543114eafb21f9c70f261dddb GIT binary patch literal 15583 zcmZ|01yo!?vo$(cAcG~iLy+L^1P$))65O3ZLtuaa!QI_0XmEFj;O-tE1b2Cd@7{O+ z`>(eiYr#4*r@Q)eovP|xyNVDc1xZve0T=`Vp-M}MsenK*xWM%T(i`BfF!@9Z2m~It z5*1aF78NB^a&|DcvNZ#Nq(Tys5S3K_;6Pu#PK$n)MI!#};D1R+bqNAbbxBBl#*jgx zg)YSOVQV4Nk!xv~iima9fogEXu$IXeI-AMDLTi|CY>Qiw`MXcPUaY0Ln~y)P3*0;w zt=zq&xWJIvREgz6wD6%QIch}sJF-!VQu*6&U{D-|-)n&|!WiGPl9P9X$k~t9cc1h@ z@)Zp~sZE|99$x6^h$_CLg0TD?*(AHig>T_=*B8jEu|QbeoivhksXKz-ON{e{ze|2h z;qE&a;+fg8Dpa@=lf3_6iw4T2d{_}U_ch|-j+C<+! zy~~V?iQyf}LrSB5p+fuTz#6y9s?Cs@Ui38uqqRerT*iX&MM!AiB{3YbD2JvhChhIy zxWgg(%_=3w8RsK2QkD^nML+4`_K3fUc{1afY`PKJh>%)4_cxIRlp-Y9lye>x^Gg&L zDi~#UhVk(DeVO+Utmes7nPc>_VKK{hdWHNxBtm@lLB9~Hk( zo4U!?7hC<#*Vrk2|7do`|4TT}3)pS@Wqz~|#{b1ib#}hQqxS;U&cbWzkUFIQO(hc* z{opbku;V7}7Vlk&#T)(WE5*(_O9D0qYY!R0sUHdw45FhS9%X=BG+I6s?a>b;1x4~S zhS}kfZfcqu+IwUZAL)Z7`66RM)(|Oc0bb;CIk>KOvj@qoZT_J+Pvmmt{;)Kwt*b!@ zvKRjm!);@N594@93yQz_#g)fBc(w%FnPOirmo8wJ)@*|(3@-x(bNG?4!0vStEWlZ8 zz<3LTkYF~YsNWjW8d5x&l4a~b6AX!I;QI`b7hvc*1sy>|@RFUZ6DSG+7aJIjZ<@P^ zPeGRfA5JmC{cjA3{K<&Kf@q>Y=A)+wYeWm|fIUTEl<=_LQVC1HL&x~cW=T~`LKh^H z1GW@WLwrKX@E;e|%qBS?;Q*nEI%NN50e|dL)57Hn-0!YnfkO+-+oWPaANRl5fc3!C zL(}XE-u!sW?~U3HchkeX$%-XRn{7!07tcA2y&DXcq{>xMGM=$kdtZv89%wCGm&>Qz zp!8JyeukO@M_y!la?TPR!ov4vOu`~ZGf#E``yk-pBLwjt#|tD7`W*DZ1j3?$=Z=Dj z8MSHT%o3Q)CWpF5xktW-W{z}064p0WWv+#5%fbbd7OL;Z*mcsyWxQM^upn}bhZa2D ztG&tZw8?{`5K7>qi3}jbyIK)`}E%9t2de8d*qQ;Re1v)`xN@MivR zyE5J>Y`jave`OG*h$)sUkfyS;C=#J5DZbP*5`L=m_H1HrI? zb>LmFBsLR03XL~;Ni2i3PktXIz7=Lk;*3PIq?hc+IQb#L9kH7D)(}0(c3~g6=VB@4 zT4hr@XRLV|4LUB`H8t-7F(o!-V;V;FVO2U+z6yw_l~_u)ZRv5*sER?kUWvC%lcuL! zV6JfyMn!CnO0KiiZs@R!dU|PzHIz=WPWTFXh%hSx4mB1#%d4V2qMeNoi~m5EXf0u; zqFT~7Lovg2Se#m#ny2$qr$VP-3BS&VAk9{rqoo z5AA3_O-e9Jetq7Dj=N^C>IBpQfts*y5cyS1jwyCxK^|H7Mx}v=|xQBiEd%u7A zWn%UK#fz^keNBXvbAlC#D;ocp`+yCPn}a=^b(Gtd!`oDh!h^p+Qp>iR73`wYFAOir7}gDxTA-OnE)#90m-4pFQQ8E#*1NV{K~cv(j(%J( zT-=-0+inVRX&C>T{xCK-#I;(#bnTZ>s{cd3UB7IV@T=_C`fmkKc8E2i**yfoIKlIg z8j*SfZIRy;u!`;!2-BA`+WA^|!ZHZcgZ0DpWA)rxE}K>KDfDFZm|KiiyjSNMGu$3_ zPL7#eh4!9{g6kwx2R=mYUySZK?W_MLyyUnv^*p&bzRCSc<@t0l`_FD;?7ZUKXUk-# z=d9$+v%ktPpAC)O;BM(Y^qbsEB3myTKOf)%~$4LoIQVi$2F0$4Nw_<8I?D^KrVv z-x9Y{R8r5=_lK7I3HQEUjNDSd3W)@6xLP|Nw$GqjZCce?Vg7OY6M=slo*%CIS=}hL zib9o{o%@%hDHR>ol|_@q-$4TG;7}XNXw#5k!snZ}4q3 z*SdD>E$=1mb?A~dwATqY7}|AhxQndM;?m)SB7f!fuwA$jxc}O9qfbc8b+U9@{zDZ~ zw@}~s^H%n4%g!OXn^61b&{>*w+Tb2Be0S)x)+c!x9-QgFhSfcgv~u(+#+G zz6~aek@RMfW)E%{?!#`^Nx=yX$82kLX7fp4pS}#4_2K?iz83D2=c?de`+EjiZKw~B z8|;4%k9ivd?N1^PX{B;y!)S)+rlcM^#Ja0@aYu@-oI0F+oL_7L>z)uP(A{R5W@d?} zcx#(TDw1Z=Ne#RkAAY>VB)&NcdWxv+-^$x4K0XM`it}mz%5bUm;55IDN-d{wEGW&Z zJp<3l=s;k%$9>a4S$mld=AB(Tob)=!P5WZ4NSI#1pXX+Np}n=}{|j^fd_1BqWJzND zVjR7m*~ZuL!*loF$UtI!Tv29&b}WxSpQ-aMR?L2n6je5U;dPz8vcBNlw~?REsK|Pp z{WBk@Kdvv&t!G~gX>o119zEjyI{mPBr~BN%v}(TA?q<=Ik=erP{@{gsbA972tb&O^pW z=UVPXd)4J(zbF_kxaA9j7we_UrL%YbDSCpCTL+u3>y~1cP-Rqq6j~cjN2Z7BxyFUv z-%kVXe@)(-M6Fx2lD;TiDqn1ODOP0h2@QPn;mW)~xG8un>zki*$XO>*FqM=0p=CTb zLj(Pcr63KnA^&4W8N&w6`rGebfuXQirdK$at^ znSZGf!XD4!Q>}^#TUL2;z|ZQ>Fg<5@DftlBYXItISgC12wB+UZOdRZ(j7%Mj&6qsw z90BAD0`YtB0he}W5F;`VJ6n4fJ`Vx%|BT=Ru3x`qCMWyP5QvQcxt6>VnW%%a85t)N z2NMgqAefAdjNjSRoKHne;=iW@p9IJ)ArMDCW@dMHcP4jsCI@E=W>#KaUS<|HW;QlP zU<9L!r#-~TgVElF;=hFazjDOPTuhv;93fT?_GGVejf@>!Ap+#&uM7Rp&wo9qnTOT? zS;^kzzqbW!koolvGbOrt-gj%co@JVP>l(W@Tq)?*gnL$i~gf|DW;ypF96& z#s4)^`+v>kWc}YW|JR-Wp2^Srx`F?-q5sm>f4&9OB?#tc{vXu~f=vn>|A9a(6w+cM zY926uGZ5pk`tGm4u#Lpx8odvsvQYBJ>kLOS78Vv0dA{i(I)PyyglRIwVfu&#-{emw zEvJWd=?^9+%Qh9O!APQEAVc!Ln9=#w*0vhwR#;?l@A*6B@5$j|>0y!iT6yWoy}I8o zK}-VdY*;n2VK7#>vzJ^XSP6zM8!Ovo6W0%{#$gq3egjz~y`2oyTTolFYf@MG#ebE1!7mWqeunXuH936dy60z9z1* zy}GZO$znALA`!>Oq0UsgGyG&Rp04*N z9G_hx!F3{AuK3_Kz@Fk&Kb1 z{Y?4-nS>e!qHZTagX^r9N6DvT7F)>qjx_!!x1V(2X4&5YH|a~he+rWFWs{iBhv_Q^ zrwU|uFaJy*2H*7Iei_tvU!_79yv>$N;~IN0nVBcygGim`1&QKu+ozAFa*p_>v0IF* zZ9YBRB>n4nF=)TrBXm_Lir5Y&@shG{{`1juFU3O5z0m#N+Aw~$9=*oT=yl&0eS@!m zIZHGvGo9`0tY>34dxA%R-zT&07b5tZ#o^bh?(4Ms?nmVfGJu}UMiLp>95;lmrt(9= zNd$DpgWL`fGZWT-jDkc+F$U-gDy9+GzTpbP!XbCE^ZDioF)M_`N(#ik$56yP->-%odm zvi09;GhcWZ8OO4+(7tn3&CIAtOG^(GsHU))#m7l-Bd?Ow{%|{9_@G{@ri^wxn!?V9 zKSFIJ8jPmyw8>JJ!ykZ%o*05j4IXztm=NJ@w5e>}H9a{Fgahhat{+y}agcmIYR#N0#S1L)I&W_N&eQ6}e|wevt!= z*p4!O(HY2@U^7$GV&~bP+NzF34m}HqZ)7JKMcniBTijIIyj-mos&Wj;?g4G8DZs+D z9ncwOb0%|I&q!@ZIWacN$|f?z2!6XSwpXvRq2P^TLQS7XB=vqbwYxi<5GP6EDOUGG zA{_67wzhUDeQ=jP&xA|@UCHlilL2^A0_-12upd6*Vx9UUGbxqBnk0QNK#*+r0AG%O z`d7^8lkGtcC))4^t|X1iPQ?W0+rzcG{@d`$>{&Ce?|&RyljkeH2whzwnb70Wv^}1W z%x?U=<0Cpc8hy}kawXDpSu{(MzByS<*zSfh>d4^>?%Oeh=@U4T9h@6^BL$2V@+dm8hKyLcz}^D%U4KDT1xm$c9e;$~_CRr^GoBz=fcPZY{;RxH52^347xo|wF1i~2*m7=5@D^? z+}>94#8e=fd%Q+-^w|_@>{BQ#go&isbd4PQM=)@Crj+5S0)r0R-|~~$V^sP79U8bD zW5sb%>qT2>K1(;1Z%}S&-cwr)!bVc{%eQS3^B+S|<(4P{&XXUokqvr3#!yr!_~j8| z#b5?>ne6+`368J^5u!IiL*4mO*{rPZRR|puC?q3ig2CKY!6~((_V>(O$MBzRmyB4Q zJjq7QvL9KyFx3RdI$5y-Thz$DfO$^zR>H@=$6oCJ)?_c;llz+QPUo?2+$!J;5Vk8i9bD-O+Rz6XGq=fN)c z3@;BSEsD@YP~j#JXf%8MvUgSoub`F@a$t) zXx~%dxiE2%no2I-eqTWO-Z$#--0AF33)f5r0ac?5v+&{Sv#&_x3hSjKgTYILx7CLI zxd`tZZ1>U}+DqQWFt?ui>jc5SG{G3cCYLyF+NKEa&A*uoGv6ggNtX6zG52RhG5pwd zHJgq@#wc=+ZYt4RXi$T|NLwp2p)W#!x9{gRNcxW6?rw-QSw4IpWQq2D@8MrZrH1;1 z6%eh+l{?Q^arLDdjM$H6O7fJFwanp7eIF+#1VBeP-y6#++jUS+j2$oI6a`~`3qHuL z%LtE}@(hkCy-U;3l7kc-vm|w$4|pkRy+J^`Y`a-gEyv-RRnoEdfoT*)V8C+pF1Iwn zuQElHa?+Y%Q0Zx&j?sEb#ZOF1V9-{n2+91mYgR~;4 zL>x&ErZ$Ay4Vd2^eaPX|$Q64#+jo0D!n}L8pQWj1z(DVR^<$LCc*pMqX->fFtPj`h z>Tq_r#OMyI$}ftDH+PP&yOW+JMT--p&$EMR^{hpSrr5Nhlwsh$Tmy@j%mJq44;ep1 zT~)HCe#gZ=an;2V>o*=8uu>~(`=-ZOg8oIA;FJg(N!;(dJ5$aI zHBu-5M^m5!Tql2NGz{z^1;dAnuFmC<0OlzHWrW39voq6ed_XwR<5yd3Rj1p?2mDQ; zbgZ=)8Vum9GbOVNjS^@axYy1;CXO3l!9as~w+}WggxdsWT&KyoV8!R*uW{xFVuh4d zBKJ-x3~;*RHf{JLujt7oGQI&C{RAr%6$vprNyA%@Tk$;L{0GZ?L3p z6J>La*?pg`G&#$fT=vJ;9_G#>1#*PDKj9j(PlBu1Bbcf_{Zr$qO8I5+JMp7@>R>FD zES5d2X-l2Wyv)n<6GCSZU?X3a>THU(I4viH-=LHHR6-Kw0fj-|X4);+CmdC@C`^=p zepJZdE74x+RJ%Ty6lFO(e~sF4M4uoBWKfYVmNY^}s_1KBIQVyE7QZv;PnvV3&< z;6@Ov{C*VQ}3She0bl`u)r8fY3~x z;eM_Cs!m1gxeO=k3|`VhNagEhBF1{djeZ$Pu#XQ+pPKZCA4CZ~Yn>TD$KZ!yA`y0Y z!W%%>@OU#@ePP&#AofC=nc@!wc5mxZqQQugHJ)J@?tDkki~o6giuVHH}~;_5SLJzKF}`(c{l@L*?Vu{FhLPVEa)xbpC7) zPO_QRZ#S#3KdH@K19;gjC&&P3iKQ2uvY8B1p`0zud2mkXiWq^eP^*|FI9$DvE@2CM z1)_fY6T!lqP+j);2{5r}p~r6(zcCVrU46;>}>l7oR(E=#S1$ z_Nh+1P!jK;9+>4f-`?10<>nVgpphs`N4G%lVE>L;hpEnxnT_pWZF0 z_!4uXqK||5@ho#tvG}uj;BvU=)rzx|SuF25-{z>RwR$-9#dfjkU|MQ30$}W&<|ok| zKb-eO!3v~C{pZ_wT#y5$zFrZVQCl(FRBY+YZ9sl;q*%9k(EU}{f&^KuS@9ouQM)Zx z)^zxO1Fx$?s=iQcReD&%WAP|$D|l*_Ov|CpdWUr;vfBjaFKg@PHFVRu4s8X^-Uq2U zx`-A95kKV1#m?>G_LL#yM(jOL1ll=_J<=3-hZyDWm9DFXVTe!#@zA@iLCppJFnfDr zX(KyBaoF_-%j8oVq zF%C+<@o3JM4j-Rds_Y#vD~$Nr#>x@2cr(CQX31y{9xVl+`!?+w1KbX-{1A&%(>Gzw zM*N5Y4Z`lm*?4ad{pa8_mo}_0)A2L6@an_}@S+jq(Pd-!uVz$bEchAywG+N%+&-W~ zI>AVIBVDTWc&F)x*c6O2#EeZ?WFg~fSsD%pskvGt5}{+Ya-&#yD0Wt%B&RGyGllw} zg&mD2G~FAHw^&z}!4pj$Ezh1eYGmeMO7#XBb2?TdI$?xv4d^(P?T`ly5V3Mz!9b`? zxF|xg@B%RYJ0m{xYa)mRM5@oqAx^J{x-JY~QWe!Uf*KV3Jr|TAOGC$s223@gkLGX* z>uz@mW~%a{WyV<;)mj^`F&o+W^;=NYJH_zsoejR+hkxl7BUEUv#K^Yh3=VL=^7cnl zFja7Rb%H44sL88$jZ62$`pi@91A^7aPJs23OUd3>14hp?I3nCAGw3%I2srV3NHA0n zWH4>%HYjI8bkW}L3~SHnvM1LE?FzS!zU7EBKUrzQkjT9EJ*sT|MFlA#Vp2${zUOAHUg&vOGd+xcN7&h06H&ShE1nRB$IE7ND>tJeAZKx#=LL#}lPo$I}qUTt>8WU$I5wiHV_%Y}kZ z1(HDLlwdA)JY$RoOEH5t-Rk5_IE+29H>>WmI=_*E5k$?+2ZkT!OT(Mxe*DFn z0v@^56|?MJ?16E?@%kT~wj4s4#Ohx2!^eP4x@wxVn3>AT>19hYc2^y*Bd+`+CftTi zdcb(w&0GBeRQnFguz?0X1}_k}KE=*UZtHnY(GzPycGv zB7jl96Ento^#@JBAH2sr5CEh8@ahl#QsJM0skh+MtCv&Gup?~XFs5SXVy`e}zwoB* zOq6ZkaT(|v3Lk?6cQc4nzMj4z)<$}ww&r-otkx>DfQaowN-lQa1vz6{lShOIqjLxo zD;BOaW?G7n?Z@hP)@T2I7Eit;ywI|^k6ahBAy{eB3uXHnyJ( z85s7%I)Sn}<>+yy!Ld4}S4HPr-U_M2{+1H)`!sxft~f23;P!0R!JvOly~1iOlp$j% zq_6T;8-|`XDJPpcBApW^09d2<-WIJD2pKT}U1+ZuA38Yd5B5-oaSLnJI%>6`+jva} z(0+_E6hmNnTE zru$0QMm<48Va)9`HuDwON_%u_#bggR|1|esZnx1rHv@6u=M7wE)d5f7^W06)G~MX* zJAY+Rfap+QHC;$oc6BsA0)Sw;+xu6vg+{_pgG#`mbaw^d9qaiD#sqrJ#8*0J&F8`H zZocE0s^jrIjBW%VFsKJd@X&Wfyk3)VMWH9Pgtu1t`T4ga!E+O`%x#na(Uky)h$a<5 zz)8u;sh)tyqY#g7g-ff_%MeT*wa6$_Wj((jS7zK(vf<5d_xyP0PpUItp>Os(Q{WKM zXKxh$l{9+wQ(n~cbs#U7OXpF>zIvsA9$z`Aik6euW&0NSND=|MSGI^RiWHK>q#vKB z4Fs!~l^oGvQ-G9mz8K>kX>{5)1sJu`Pv2W7b{A{R!kp#gQ#oQL^CU+Aq+LnWljZxI zLQg_824p>zM3_?mzm_(d&Z|0kp#;(w|E;uPa4CeQ5Kt~9Kc>&yAR$LUrZ2SSb)l!-A1O4NrJrV>$)H=y6#NJt zg**cCW1QVN3ZuwV(iX$6<3oC-Y~jv<3Wb#!Aj_i#NUIhC_O9Du0Hja_x1rN1oqqfS z5$R*`2Up@9nm5s z2rs)-srhrJ6te-Ug?zS5Q&TYv!oM@y;p^+`WjO>nSjJEA>WV584iFYyb98 zTIuEc^R^f5qK0?V7v_EW_Dt07PadJ!f+%1Yqd8%bA40 zz+Wa!hXsa+!YuMkL z^%K!$8?9;~O|U=yVsc}M1_sf)LQ$v_&|!1_G0ka|YSDTo@J6*4QK9s}rp{IX@|kUT z&rxl|bnbX8R62w+7Q-7in*f^6K`5*a4Kl&upn-rwBu6-=Fc%ayJ^>7}P2fOh@0i=) zr8~r|%^%R9MxZj0n^0#nmE+v*nB%SrRmj8Dx|j*E0Xf$QRvT6rv!D9+oda*e;m^&mog-r2Qbro@c>V64Pl-62(*Hp%<*(j6Ebw;gfQwgDghBK;YcOo5`G?6G5Ru=jt_$+OY38Sd>R)Y z&7XSPCQAK;=A)plt1-?kY!y(4>JlhX7za`(#^S>m*)CT+DSQ3EQ?>Gc|` zyyybB9qRCJ-Uo?~)*4OBd5|dDR$U@Z4d_U04@Au?KP?-20&LJ!2Sz|Ul0q3!(x{jE z4bYD?*A6vqA=z*lVb5HSgqR3icFP~4>Ps0%ZcL>ErrRaQc^{9DA7$<{V>eVk2G>Rn z44I22t2Z`L8MiaVn?M35%Kc1!4O3{XY1n?*34@etb!pY$*!aL5pz?Us5Ufwg&!t9e zf75P@(EYmtDM{iJ_$zFKre2{EqAZrW=aPu2;HJ^0DD0C zTqfI$4EXnH0N8lp@Mv*n^;ipyn2$2++jUCo{b8B4pI7TTEJMH@3`cj<0%=@AmZhk6hn-yiBTtB2Cvf^O^+Rc+*wu<9ACK zb?XNK5{lWOV@w!eC(R1N9-|6mEyi>@?MK9nLPB+XciwsO9gV4Hl!x zAA-?IHR7A%>(cT{yThlAd^Q58&pBz0 z2KEs)0U)hbtz1VPItZVI;II)1U@NFT=zS#+`VVU!eZLvt>oYFatPNP37lqU-2@+mTd+Dc13MA0irEd z)4)@l)#3w;4wfU^Mup)iRzP*~BudXuvs%N>uGu#`eiD*f;*-EBkCcwAgXhSPfQ*AR zrrd>MW@kB>eRcNF_eGZ%;d|cfB>;#IXzF!GCnWH@ev5|{0Px_sv3)bxJ7GCX3HI+q z6Qzb0r9NDldITaL;?V=!8Qb{Hgs5%aL##6=?)O>{rtdgmbqQDI&X#kJQpVwJM`T;NH550 z(xTW9x#!toeLPk`h%sIU*MM&S%cMv~*>pjJZL*+7#{4;SILZlJ3t}PhNAIT>9z)c7 ztFSx5+@ZEjgy)hX!*Q55Z{&X5Ga}TAb2`RVG7o^&iiM)`n@-tefGgqSt(f zIP%U(%Rh|(fEj#lgkHWD3!4s0%dlqN?!R)EJlw{;NLQl$800LdQB%6Uxk1Diyx|>q zs3zP%wxS`%4=D;a;r$WJngtMHvmPN!!J@rTc1t(+HP50)peB9j#0^zGNZoB%@nnN| z0;t2u6M;(S4AL-Ix|`dh(Z(%D@X|4$Hd3JV$N7Ap6ft3Xa+Q2rVA~3{8Oy`jlq+47 zXIygRR1GPrm~}6AKp!Ztmg4Xca*Z~?hyG?UL=OHAv$F2hQ zm=&ZLd)jNcH<~h4V$B8cc{v1_vC4p?zlaQbx|!^`VK$C%z8=t20Y8!|q*YbX|-!h=UT-3}(Y z)YSnh$9}0R_PQBYunVAM-p3z1$if} znsC=1v(mG3rx5uTU?YSPY5*v=$^Bmq7M=FWa;9dvIxxmebr@^QHM?7FeD24Qt7s9< za!66;V*6W@p;1?MEsNEl7!b#2nrk3#6sIh>IrWH`24T)Kb%=qBT%_}tWTI=D7psN4 zfrK|u9Z7r~;QC8%{kDPv208lsye2o=D9Zp?qiw?#c@tP5c6#P-gr)+p`=ZV2E_$Q7 zu(fpB5##@L+iYNjQ9rjao<4yNQ4u$Bx;O2_z!5Qd(phvdSATm!DP08^wm?6y{7lpyd05yeKSIG$cf5Q$)Sz zh52hEcI|zc1&21VS)(zzKIQkKy z!Hx2@rYvbo!D;I3MG>CH{WlXiqPwp}JH8D;LT-Boj{rOew1CdwxA9Se_r))4h9X@F zO;>ZePex};IW8ed=OiNyQaPWU8!hoY9k%g#H(eO2wh)kIqYfOGr_Z87teaQ#q`r#GLeCHz{IRh>(4HZ(;K4439%QmHm={Y*PHS1YlV5 zuF9R&N;&St8G9U(JpvvrSZffqJ-qmVtU{v0m<(UylvbFJjfl4`n!RiUhBXfa&QRTn z3|N0mOYkUO$K769g2_^oOA%1GmjuG4sRA9-0Fg^b*zwwMZFH1_?(n?zqU+;NHH9H3 zN@{OG^NSc;Ns|I39T1~Vt&$MK=r&9th7{Z*o*#E7yl6yQFFFb%&J)E93XY@Qq|Cug zNt|(4wzxCEYTW-4ErUTqc()Bohh2jCvmr@bV%56`R7NX?kx%0nq03^jD3M5UBz}Fh z>^#^j>^e4o7vI?-!>|S6#rVL#tqn(d?Hkfkv`?7wj}K{^1uSi6b|ig zJ3oo|62848e7pRv@2i;3Kz^p%OgnSn)?~FX*+>2Atm?e zn5voo_C28mqe%hCjv(T#c?DN~8fh3-wN+A4bt_aM=L%tJH4xXJI34-j`vbl%Y1qPD z93Vi;%2QhL4ksuiaMx$kn|=!@vCxzX?D#lX0{h1^J?sj*Np#Z`)`+=lo-<4GMue~; z7!{92IKt$>k=st$0P@!8fV_JscNZ=o;I)+oRy9Z5TdbWjsnXDR zwHiaKq)y^s;S@G+%O`Sa|4j;ZHa4|+_)DK~SI`!`U>KKPlm>cq>VcP`t@-`sAL|Lt z>A#Q_ONY&cDmIu2w1Zd}L9@UK_;{j0Ii3U{Th%e*nVyme&_BnsVTj@jR!eS?9bM?HM^KN5h8s8j3Psi6l5xzk+ z51i<21TVB|d7~;uZ@@X$bzcgx{4H6J16$KxEU% z^hXq{T5TXP#gbqYd`y8g}P^AaO&9ylkXj+1wk6V?`<_R0ux zag%eLsi82e0ewMbbJ&rax1&LGDCv8z#8GGB4a4vl0)w6IeAlU)(G1F2q(s9 z5-;+l)}O2W zye=IROhlR%ohR!C>}If7>h&PGyWg09bJB43aC>|*KpYBz<@~0w%eMGs=jG98Wk?ys zjJ|cowr0_l9F|}dg3S=@FTTA2T``3)*_TF8GRs!86BY~)`Q;)yOkyu7Uw%1cg~YBm zQ#(YB_r=YQrxn6hYi4(9XY~mZ?g=9lL+1h1ZCN?X^Dha^lMii?fD$!ZnVVF09-^id zC}$9`eSJ0_k@~w-TZ3XjhRs6XnHZ-19-Xx3Ke@RPg2&M$K(A7zT7PuZQQ*TFH9wgy ziek{Np)5Au>_tt*qu?n8n1MX!7It=O;xG-6Lt)T8Ll>U=RK9d*L+2MgTfi zBu3n&uqjYKRqoWh7Nw;zm!^1mrDLm%dUgPGsFqDbhWbjaVt$lUVvmf7BL*5(eomy? zr_taIKk?xQD~6@3eeye(q1BL6_@88c@xodq3@etJnVt^ycXd%=pmW#(8lyq0)`xd2 z7KjgylX}L zpyQ&UlvDDM88WlB5bSp;q4T+Pu7sIhF<)x0+Lp=8H6S1+Pw2-n$w%kgkz#8& z<8&^FWV(TK&yOznG@N7oQ&%HLx6~nI5@wH<+6{noho2u*Wi8c^PWpiC6IWuxZnicj zTJw=zw*FR!WeIHB)r5k(c=i)bCX})H(-IfZ&<1qassD<3AG9k{b&Qk<3!}d-pRS{C z&TY@Tx7Du`Vv@o#mph;}+Q(u{ySwACHyuf|E!hG2qatq5lkEWXt#yy%Zc~v>;Ll&* zT}~@vGb0>G8!}bTMelWFs|TKuFj)B8r&T9ly6urPLm7eQ2Dior0=h)D6-Wf0)ONc9 zq7k2-!xb+X>Dd}`zr|d+Zc#wb5|duzLS?^Z-0~XlEso~PVOiyQFR#~SF@ZN{ z7H9%V(d=}uoNHUKyUHVl3uTGm_jU-X;fxe1OTpRpz)|tDT~%v!baILXa?xyyyTf9O zd!<^5N?%su8n5%t4_LNp*?jD`?FBAxD`JX_ZI|jrN$k{h6aNjxzA5?^l|ClxAAc!8 z)m0>iM!5ur31KO7bdjaRaGJ#a-0&uHj4sO1(*%{0UbI;wau+y}l*)*V#U)U`2MHEUDNsJK633qEwsOPWa_NX^gzK!3t=S9> z4k|&Or20KD4Dyj2fDrH*_>ZQ=Gfr|{uQ1$oe)`D)GGvBG z$0aAFRR%PeyEo^y+OUwSV^R|P}%&zu7z&@@(_f;247U>0fFtFmYLzEYrGZ>zvN{+uc? zR%@8%TS9IoN`~wVl`EHsG3~{KMUf9C!c;7jr- zCH>nPQV?FBE5}RgsH}a>BgqKsag}}5Kbr6ERw6O*aSBrw zDE;+(X=*eZV8snCpawJ~6WrnKG!n^})F{pT8BpTwt0H0;zS=hN=*9g<$)}o3_KTBE zJPZQW@x3a1j|pMAZr-BvFn`L|`l%AxBt492U23HYUsX2;)GyD}vOEX0fIZ_OkM5K} zHL*9Q$t&mh|D}O@|L<>e$v71m%-;EVq=FY{05^|dL5f7&p%r%W2nAEd1M1B?H31_m#q0C3w*;!YQPnRyAbvOaq|9MSsEo3Uj2bj{g82w9Hh6 zCd-D={b!3|l?VadYJgXK#tMJ6HqpmYmZf#D<0#elO4!#;1)XvlVZ{MUeiRuEe%-Xy zv%)D42k@5t55!(oDAG>OZ+s0b)k8Xdfvp4O|@x&&+hkE)*Q zBb9lb^UMtA-mL;S?GL@z8TdQ&-P2s)Q&fFqqo4n!Iht3^ZDh6q3tuno2#7M68OY;( zMz95%zN+5`bpbsUJBomH4QKhZ0?nB)guwIIem7zPeECk5%y_@X=AU3!uml27vaV7A pKC`);-1wtMy!PiC7IeNK-mez99&hTDz5e%xw77y;#b-nR{|B(-hJOG6 literal 0 HcmV?d00001 diff --git a/add_readme.png b/add_readme.png index 4899a9fa3a7bc648de4b73c504fb7db5174c60c0..f28783bad6b21892459047001dbec03f5c1469a6 100644 GIT binary patch literal 17730 zcmZ^~1yCHpzdwk(ySpy#65K85;_eWFySo!0!3hvFSb*S8a1ZY8?hssV_`P@U-rc{B zDynA3dZxGg+aKF#Rb^RJBqAgT2nbYpIVp7r2uNJu9|Zvp*mJ~3RSf}wG-fL)sVXli zNulcEXl47^5&}XlIwcKWRpSsZ=*@RpQbG}dRKhXj21I)Ufi%@6BPW5SfWR2E@U;(D z2N^`Aqh%p3)m0BsgC~WvOtsM2LJ=EN!;1I0qzzf9`^@)x?YpPd*z>yZ-E;BE!&`MMZb^UJ zj64@SIyQtKrjXYC3qY(6+GUiZ;ZA4diB`cO|ib% z7JRkFMH6_a-I;uza#awz+g@vRypKd!%Rzg2wZw1m7Nq-wp!tv75z{!WLPFw`>vZUj zhrCC!Uloo(Vy!Rr&N_PvE*3`*1<^$?3IZg&Q!oKds8S+YVG!DLFoawXg0~su4!?YJ z^VHDZGpF=O9~7Cff;B~ByrMPIBCqSgZG(pcgwopPA@x-%uR9;*(yXq&7BPf_bktWq zJ99!vrz1v)8VQ-4(e> zcQ53J+5vsn!?wwRBgU9(!vLMkGmN_%i6l##udZq~^9hVuhN2n%Nvu9!P_0q*r37<^ zo*VC@`1ItQ4Z0hpblO#R=Sl&;vR*_(wb+2;rF52ugD|b}a%=6m0B-O;Z>4 z@N`Zk)IFL#sy#F-gln?czNz0)qcTxrn^5zT~?czu1Pc3_ycBMzhN=2r{1k-4Ff!PbaAj@s>o+mhar+|q}Ig_VTm3lB$v3a>}nMUus3 zWkO-_qbmKzEFVzVM?+|fU79*0(<1AmNSE|+NMuKBeFCK!|;F{*2M*aEs=lEal(R3?stAAGLe~0$g_H_3&_ZnG#<5l9>;`uVS zu$U^+D|T2G?V>U*3@<8})DM(dqgll-lWb;{3AE4A*hRQExVQfcLj6lOO1J)Vac|aO zyE)pmaqKkf>F?YS?`p%+ZE$v(VU%HqVfiYtucB{*f63wdU@L!YHgQ&@VVvPN1CQ357Ii~v14RS2R?`)~)w!l@kEflp6IOT8 zz1QN%dfChY%7p#v(LLvV&2i!z?i&m5v%8bKd|z7cmxI}J`;EU>l~(~<<~u!?rI+6Q zzfB4`(YTBrmL6mLRUQK_T<`RE=l2q>(6898&XH76qYz6Fyhv;a29bZElc2gF>0!>` z#UY}ht`V(_%vjcBxQ|X^q$O%nlI9goluX1-(3_^3ZvCEnxOh%{Sl$ygFsTu1$`v39 zGi((vV5zG)`k`VOApIAx;mOC+B_Bt?^K7oZo$@hbE{QxG;dB>Nyf7m~DD?>f= zDr8cI|=e8c^d(^1C^y6vWIy)E{k^Whi5`?$h54GB%t%-_@+Y+QV` zvKF);oPXBM)~AC+xREh-G>I0`h0+Sr5k<$Gu8W>E!Jn&6hEEa%ss#pIHC@7+=hO6A z*PEVojZD^5m?oGOn3lAOei<~nnBbbvzRpe@?aZFq#m(XTBGePov$wpL zw)aP$ys@KRtkJ~2Yr|7~eHI^t7lZ7}=k&}pvl=p1uz9LG)t$v}Q>DRp? zV(ZQkx`$};8+$dQrN+F~{$JlllSYKbmDQv6e{>Sf0Zkw`q|njVGLI^_KH#NCAD>3hTrDtAee3XRp5_Yxnnzezc=Lx!vKOKRp+0 z40ODRKYcG#Dj!BOLAN0H(j(PheMmZ1ap%$F>EkiB3$K5Hr$+afX`Y!Sof4>PCaX-F zMJG4%YkG=$`!X75;bQPb$P&l|ls`R@b^)iy^hx`&VOM>Z6xr#;(WE8`$gu zohVHV$#Bf@ zBcFjoom;DWqS9VS-hv$!6_x{drz^+xS_yGy0=hoI0x7^ zFOxHEONd)n`SXxoHD4inE(tOU-P~^hn44`2)^XGMs3d6aXwPbD;b>;b>SgZ)AYTXw zAumB-)85j}l)}sYvxBRkmoU|T&JYB)-*>Z7QT*o=H#=b}osX&%l8!Ex6g;fltn5@G zNE8$lLM|3og6dK-|9v^|NtnvU&CN-Wjm^{3lhu=p)zQV8jYB{{fQ_A#jgylFID^I2 z+riD$i^ai}`oC`Sf8R&S($(C>*2&G*(ShRqzNTi5?ry?VRPPP_&)wmu%@PcgbN7y)6+1dW*-oT|o?|TJRZM`f%>qyz!TROM`ZHRF4@d^Fs{Qu*~|I_0C zx>EOlUCGP;zpwmXNB;XtA-4Ay_+KydU%mC8y+C(~APKSkPuGhe8P6cwLqPCH$V-WX zy&zAs;gfOt<_6eIgI8%;o0!G7z9nHveG^GW31Nt0*nF5M&JTdki3uVJ34#fSMk>z@ z+93hcL|Y6|2w3Ze8PW`W>vDYctjP29a5*^4X}-%^MAJ0Xe5uN4)toi_Gknq5C#Qsi z0V@S5%9uE*awx}{AH+a`^cyPUQJ+~ykJ+F)6^&Fhd(ZXmOnCSQTYmx7zr*QBGcz*= zR@SkCW4|vXzx>qBO!iL89bEivm`s{dhcCIJ>&(|F6Trs5O&|^b1gsX=J!1;!JmjBr zjV_Zf$rI(3i&?2SoEHBG$X{-B{Ln_Dn94kpFO3|zc?Opy;2C`3Y*9fhe99m<77LXx zmWX>%Qgn5{o$!IhVYW;&I|82bVl;#6=62apqe8F#;O+JN&E|ZqeeMK}Oh``H=Vs$K zzOmoepUT<8rh%^y^!4`3saE4TA4xxW6z=`~KAmP9AU`xTG-roSDkvrFdt2DzemoWU z_GFyNX^FJj?mJHuu+@v`v;BpyB;e)o=3)QG^x<^zbfK_$KZ?&`QQATCe$KYvM*0W0 z12LfeeCO?c-D%zWPq_Z~n~_=Q4NO|U0`L zr%iVlq5+OuEF3!7=u=MwY5}kP(uDO7e;J_ril6SzuhAV2r;27uHRz9)>Y4s#f0%!y z2G1KcyM$kuM^LA8+hVck*7W*(kHu%(yKeWr`)M5bk}~&8x5pG@Px9w_?88irrQ##d z#m&o8MT+{DtF4D8i?!T7H#Q*s2Idd-_xD~z0xmR$?LM=5GW?{;m}m-i$8#0t z#dqDvY^uOR9qtSz%``e1ml-s3ejyjlA@jdco9os^*7%-t5)0J<+EBZL&k%mtNvd<% zRhSZq9!sQz{&82&-eHhHep+7DF)Y6`nvrzT)0glMc#|;lwdaTHQn6qtaK04cQnM@l z=|XjA3=U)W<{H89%d?{FWj0{M%jToHr2VB(BHYN(MW}TlFPuQ66=9)!i_I{YoBrlM+!^*Nqnd4<0?YdtSJsKJGGQ|mM@ z4#AWE{F&LX>B4Dr`>QOO^FqkvE-Q0JGDvecQ?p1R#fyA3=R2R{l}L-|FaXAgUcWPq zq_O?@w{`(N!0~#_RIXK#xwLD)*&SXn;C;PUzgMg*L9FBE#w{%)^K+6>yVBq!YcgBn z4vrGv7YD0(mDRA*Qw`tZ6V^~)UoK%46~z!%)z{lY)2wL9hFzv0G;qGJ_2h< zp`H4{)1k%dk}9t+7iAT>@LWZ*5plQYe&FZF?@h=%%8%v)@kPK0Ol|eFNnw<7&xowZcIdc4oZEo z_{B)LAPl&4>#?k%F-Yj911uAW*%ZxRI$t-$oLO3=^&1_E99CPL?}ldU*TM>pp^ACz zm$(~Q$CK#M+wazVSD=V^KH(G<7CNP&hBi25JzVeakAETNLm=l+6GMM*pJBl~LAbvH z!hT*X0qVmrw2FJj>!V#ao~V+LX>i_C2;|iEK*Dvp)ZN)?^GFZ}L0!#f)cqOnHr4rZ zZMFcSuxf0SAd!DF8bSx3d`aVH-5Z{6x61gNfbt?Z)8X&yp+gWQja-`EXKOdI#em9> z=61^d8Y*rvgWUgFCXS#B4YQ&)7#hAN)IB*O3m^8mR4l?|?4^siel5NB4m^Q65$|d< zE$VqjV!A?YH}`94+Trx=a)A8Ve4t4#_vC7KWTgUZ$ZLEp5ct}*T1{X)opLMWt$Hc~OPTQ5VYvd@)40w&k&E$;o#dMf3u2^s#FOp4Pz9fFf zhxG$fl(67-X*}xqx->JGh{1*|sdI4VsOxKskq7m{@#E4lL5W8r#ta9AE6K^JM#j0j zg}J(gHPjjXT6B)6aSQv=yF2FeZR~2GS5fr>CsalQXAu{6MT??Dxo^-*fuSkj#V2PI zg&v9wanKnFyA)Q~u+`(sWI_|1@&M+T+9o6BIG%m9tDc7QX&!U-CcXaS-glT@XeUI`*L&&$nhn+9Xa>Eyk_N9DHY9#9#es5P zl-Dp@$9+hi%mEri4}$FGFi!0};TWcuLONGn(eMD_@DyRd)ckYsADC%aZqWk>2{JK* zEim}+;9LrnxF?8#;^5OpZOBl}+kIERMm!-`gj$?r1j^>F(8Uj;4Qhz5oYu-ID22Y3 zCKU{LEMt)SQRw|r>@tv9)J@GXNlS_mz)a?soS6KD_6WJRNC z&%w35UZ`T5Vw%c1LK)hOsj|~riU63aNd$~DC*2; znO3hwqWEB1C8j#BK3A4d#3p9dV`j2OG-r9+Kb*pBbyowdC9cTOZx7-tvf8MHz7y*K z54-q3XA9iJY55_b2BwIb(A^9#$c-xa!v2L#JjTStY*qHwSdJV_tB6{jv83*I`iL}S z8Sex;z|Wx)3A!0owEU^88-ZKZ($g8{a;6h=W;UKA4Dh!Y7Dt zD?o87@jPFHnV8adY?lT~=bqQ@NLn!@G#vXk2nN^B6f3*ileW$VV4_ zPGs~=e~4S0o`8~59Cd)>`)SbZax(f;DMMnl)w9_5?u_FFbu|S*b>F=?D7xEUACF=D z=&W=@AEv5|MHQ5l=?o$>meRjgBz_m8E4i|BhkrpEs|7miL>1$wYN{A(rvMZiC09{> zS%$S}VHT1FA8`(rWqf@69yyJeGCMycfgbwD@4UPI&(2|qaR~|g`~)!J)Lmld_m|s? z&cTsT@1P~`J8a}Jf^GB}{)PtdM3w|N7|5MGL6cB7JwVYzEP+E|phk`RS|V=APxRYz z1Dw2=O1?kY9SrUB_HwuAUD6a%16D{6Bz94z1z#lJf3khk_#i1)R+X1O-X$MA0L79V zEC|0-#8qh7;rEa}6q1UTg{~*QSYv6PiX1=bBc}Lie=Iw!X&8|Ty)I~j9sI~&%pw|Z z8VZMMMu)0r+(5^ms4qS0P2!>qJ8MEGRMcur9B0TcDcABno~dy6eHk8uG-m?v?9X4i zwVbBp=1iZI6!KVuN2EqzJ@!Tkp*Yy+==ztYYP5e=n-93mqZzWht#1@2BMz%Gnx5Iq ziKz2p=S)pkB?yNV(%UvkDCGZxEF;xI4GQgo-xkh2E>A^-i!awKmjTA3nGY#kLKlI< zRAKZ;zRAiy02a1Wfk9lrKua55g_&HWc&LO;Nt2BNMv*@6pZu1Z%!%|^G zce3-N)sbD`z^No`^j?5v%4w%M#h4Do$YaADcCLW@<| zP1w7jWb{fta9o2ZcKGPst+}B^u!mpiP+T z_Ygjoelc&Yx=0IBU@>Y-CuC0FM7hc~>;9!z9|ILVCY1qWYOBxnDu4&vNEWye?(igO z9nGM_f9{0_Ya8;__x0I3lif5#lOvaoG>;J%bBsv0wqO_`9mgrj^K8Ymj&^t&;F^XL z84N!tn0ZpR1`M4+ti9aCIqG=~!s3d8-4Z!M-rq7>f~=waV4)Z7Z%-E6613u|xr?}p zyYcm3A-+R+@qK=}J+1S-bMOF(vB3~!M_(a!e=E9sA_s0o!^4!L?oMrxN1JU*Def_n z!DUTE_pwyH=vNG*8_fd4@C1_cd}q66gi8=F{4{iS=@o8Rk$QKazB95MV=EeK?Y^=_9~;s5k!9Qh+ary^9l8R8^ny>f;PLKefMTo9vzKoI72qL9&q1axO_))RBs2D`Vw+G`Tfp~& zF)Ec}f5Ln@>k5VEc39yrGi>F>J_8FmHX>yUIIam*eRI+jp8;WS3cn_;-C#7#BXLgCt1IMUrr9#9W@G*ve3{_b@enaD(eItk#BKTM*G=GQ zidqU{^*;$~q;5|C)%)$Z#P#RA{O`64?<=w*baXNJp|xAWjDw5i!N{<<;Z;ndtZrt` z=0Hi4bx}}TEGx#VK9}qb9fx+!unGgmH`k=`7fv(CP%?iwtG2F!Yo9tur165zP z>2S`RsfiTI@LCWB3vM}1UN!cx*YHG1-ozhd+tLG_fso=Ewa-H!8{qm$H6H-_Mik>b z7sMNDk=I}qZ-sj07y+W<`bBa`;LQy&e%AdK*lo@bSak>Wg^D)F-Ct{bGP3z`*}|`H zIsfmA8`p(iDzS_6f6UEg!KipoDCl0}s4(8k9?uW{{;&lOQRN9KzCi=_3{d##whyYW zXmHF#*&LY3hiR_ue{x?=3?~d2!$? z;C_GeD9YCNUT=lE%Ir8NicmrV;0zZqgY*djBA54-T6T)5s!hU+c>ZfM@%ZFF7m*66 zDRuesNRmYc6JDXBkC%sQ?Y=hvuU^hk_B?TDbHg8wc0(E@u0N#V7)K1;W<0G2uc%2T z%)hQOM)q<;q6^TZg51gKLp}E%P&1Kl(moc2iHvZs#FcGYf z0+NPB!pZ#+_?pr2c*OL$lcaKRvDssg+8vIJeKplD>7BS11POicbpB_blk&^ppq$;q z#3Lc^XM&IE?7{E41Fb@G?!kc#EBfy=2J)%+sHop5)H?}9Pv%=7EiVP%MhNSW3zMbC zf5(WTzZ>kArvS)vANjJ;1-(>bi44HC_-xp2HlsGV>0*`r{Ix%UKYFw)^rW)|-Sg9o ze4;Cj|0svTqwhIACob37_Mlk-b!+Z7lXj)*pMaMV&2lYn+bJ5o8Vf{E-$eWtUV4=r zi%4*@izSh0fROOxLBR=z!+J+H9T~|epw}Hk!dA~_ z$J1HsoJp2O0|1=I>;Tye-0tHpf+|~UGfCw^0a`<5YZF5y=G~FxZv?=bMl2r7J^xB| z+7@`GvA0kkcpSwvR(s7h0O2L4r^_Am=YEF_HCK%|gZ(9&Kz4-FNZ=IYfZFoWCYS-x zWr)rJJfNAqRf*8TWrK-I#2uB%W7p4;1E?!H+a~bdlc@>*8SFMQ$~UyrYK9e zRY3c*-l^Y6;ZFAHBm1GqtEBa4x?Tz)r-jqwJvc$1gv$!0{rPG{h%V$ZNw?Oj=xNfH zCNHO=zq1aqSF}+VE(@b2+|e0<0r3xJ)HV z8Jt3J=Eo>tpQYo=K|*DA073D}M-Frv<&Q+*eFOlk&DGA(T8-qcpA>%G6*mCCQr4~)Q{;TLd;YVdC>EQV(S^R6b8ev41@l^;K5a#QkahHVOo z+;bGz&9-^F+5wW=X+D5Brvb*$SAntPzTJ=dYB7@fmk1rY&E~KZ6PVm--;$EN{oo#& z>^9@uaf-Lj`1&?3gNl2Tuci@jM zavu~rF;&Ck#>Sre)Q4~U`&zsla=!8N;5;oM*VWD@8K{ZCbG@v?#%upI!oHS8f@hMPcz=E%8$QTI065(#q)g3w%o19mg&4S zOvAplNn3bLLtGICI@hKY3}{dKbm2(2E^0NAUBGZGX!E{S2Og6==h^x^LG*Rw41?@N zkW1*xWc(t3Ozcg5e!iUhACkVLYg+){eA7Y~0Mv86;tj2ZqOcWiF`a6Rbk+eZhb!|6Wdj3&4hCJHfq2;i01Z8Yp^ejF5pS$SN=fI{zw z59=#oWYR$*8jw|{RdJ*^C*H1=^9rN+dNC?A#YuRfT*-gbu}4z0(mqZlC2NsS=3b*3 zE~_q3wFZKQ&&T+N^n&2QJQ2Uq4<#F&F_c?pyKP3tqsN%r6ATO8@vACAcZvb|s_(P? z#2MfL{EdgKm=M?yiy536ic()fz5kYAn$?izLD3 zPkWJPglwh`4}dX-9fZjxw?_?P=xpL(BAnraXnCC{%u1w#)Ij&e@#@LY{8?H;a1;9+sS_a?_ z;=M%8Mdj=vWJhy$4h2^uL$g$>709$X?ul-JM-6=Gn0&uVICDGooOADsaaM@9hu_i{ zPsl6AO6x}8@TYv0ihh|bOlge<%3ev>f8TT2#4ujm8=8&gJ@QNberb>U3mJI}~(c1L{z# zbZhCVFvE&DblOIVmz$iaPgh&{{uTlK<-nSQP+BAprrF@GUXg3}`8UI;;aF5;c67w0 zz3Hge&uLab#V`a&h}~OaCmS{zjtnCOZLnjQ;A* zS8DljHLq(>o1g|%=%?O3rRHbjtCu|cj}&Z28XV2E)#~T?Y$P31Hj^cU+oSHEWH;J- zFBt8QakK~>badt|j`V1*-7Yi_;vX)VUz*u!|B6Oc<;`8rGr5wnXPC(kKJQvvu9Zga zS6e*rE)A_IJNb&sJXTn3iCcX)OE~SgDsH{io@~|^b2;L$7Pm>DM&k^U`IX|tF#U04 z%b30uqf}hi%{xYgBE%Jws~(uCN{0UGUTU3QNQ2Yl+q2Qhayab&=WhAIR5_<_l=}6y z4oZ3;+nvNvF-8m&9gW;G=_&Vj7`Xo^S3-`%UL)C+f9o}birl-6>u-#aWIdi^nY;`5 zVlTOw-bon6W@{~6(nDWPR4FJ(It%5K2>2330|Vy3vFSHp3**Xv+-wE?X|lsI6&V<; zY3=x1POV)!`Q`wzc6+$8q^k`7z0$2-2;g8K3SFp-}kqa!c(0blMn4Uji)H!q=l*bxxZS zP)JjA>gwtT>u;}o?`y_-VqY6=F5@H-)E2~VhEeE{2X%*Kzb_ul%6&FGRJ-y85Of!) zWJ2jdVJGudZ!Ftfej+t;q<|EBDFOQ(DRwcUBsZ2{5>}&qs1>T)oH@)^vz@@6{HzMik5|e@$GlHcGVSLU_6=dFbm;@>cpqu zymD^WJ*73zTlj0Q4WW(SE<1zb=N(U#JdXgH^aC~WP==mGX>i!|g(+0~k#Hs$dOlq3R$M|jx(q;!!4Z-( zgo}s5sR7bU6Y*TGI5aEq6e_rRmC=}DJ4yqwu*EdzcHqOeHZ2pUdYukDLztj108)1} zY=AOEu7d&-^cw(nT=(Ur9KlUpdOE>gnwKHONH~CkwrNEtsMIS8sb%AvvUgm^g=Oz^ z{Qv-0^!oBtOO5E(yvk}4L^Vvk3xUWqBmy>hNd#v>Hhg;P9X|Ia*C!Y=7I0l5s;uni z#Jd+S7zwUKPUY&W`T4=~YzGfUj|F(#E%s+8j*FNsoj`&_7#q_#(hsSvIt#CW8*2Lv zP6TU}O<%GRb;KptH$ryy9{7GkMQt8nYE~L7C!ch(V%B0JodcRnd|&wX&v+j zo)$ogORXPI^%`xQH*AsMOgVT%y2guhd}8z(J`WjU3l#|feov(T&7|zg$L?wJbYYt$ zJo?ftn{C@W%=qfWeCgOJ0CP0$7LXE)HwpXEjG2q!K@U;O#F-Q~`TYCTZNfH3A~8QO z??R1Ukl9vjHj{}M>{5D$u1D!aSC*20TfLgfp8m=0CB3I+HzrSCo6Xr^_GNN7sF>|Q zP)g>f@eqcoPA$Aj__L+gaHwqe0o*3^6EF=A0cSj~*>(Slqs5fqps$lz3oneiYuoOW zU;;qrh@m65%ow{3X$d*|7E018^*!uWzKxE0=S0dD<2NwJ93C4|$-7nHP-RD*O0;Qv zWE_UXvn;_)W~IJMw^-RpFQ_n2^$q3;J*Y>_3R+7f@@Nk7I+2T)X??W1M4#J}p;fZe2d9;^GZp^17UjENEuw1{)>Tj29|c{$yuK4xW7i=y90Xg~ zokJBl?W&b5hTK0ug}s?KaKV9<(1Ro_K|MU;nfr^VJf-swvt&w6kydqBp~V)5UGNv< zN7LO-dH}lJ)BG?viHDN$V9+5TU4ZfYrSd~0Z2p`ygPuV_WlL~)ykx(xl%U>9qR;?1 zCy^kw;9V4BsplAg_rtf|LsV8L_Uw-%ysLhx0=3`z4_F5Qen_ z{u)T7zhScrEhG%AI_WSxT5RfO2ug4u zg!EbcypehQXovsP!4UnA!g=Gs?BP_F5e>xKvsJ~ONc6^3^uwUz5Y9tepl`wYS7xwO z0Ll5#yjAj)>6+|vC-NgUeo#vI$RZCZL8@3yo|5uq2e z0vK~lPyL^n@>~}K<}ci z;MILvaRy~Y{Uw)WP_(ZDf)x(lmNhm7MP0VrCI0+L9;Zu}rm4)j2%!TTBM_d8P0r?f ze>+Qq{vtn10KjslsKvgo^nkA3BAH7thK$kdA}ZH*pJZz-4O7WTDcLN-!{v zp$1B=vM!f);@tFp9Q_8escM)#1O4p**GK)SXJ(65+~i%G%+&4Om;q@Z43i1_lxVD! zRg-yDZ?g@t&dW9jGJ`ZPMXQ@tV$KaKXV6hqT{8}}W615%~!S~K4#MjuKiI1v3b(oF)|KTQR zlw8X!<+thN!@xzbo)NN;5|ALb1Of*5CcU2CJ)i8vVFEvHc3A3XX4J%Ax}<{13#0fY zGBf?)jI-yn6azh_ZGhQl1m^cCrZQ?oc++>PY&o;OtC=XPaBkE=m3}}_2Vt8)C%*vb zO`-U_Em$+;eF%d+fVDO+!yaJwOR}eiu_kk(9Q611yC|Y^w*bKg&N8e`8(dRAtDA$# z8Gz_o{VO}ZSsC^P`INDE>*AJRwtwC0=gaplQWr(+`=DO^d7xtR(*WLi$(UUu66PV& zbd=o$PbWIQ=*+wRiFgNMF-YiA##p&9KV578fC~agO{Re2YbzRlE&02D0nx|X_f!uO zkdRWS*vNOCfwNeP=8buLxEu7ZUMV^iSmSzPx< zMH;OpTDq|r>Xc3Nz_d9oroRbTaD@)LI7o6THerQPEL6Vpe$j1e*z_S$ypu&wp+(o) zxIdJ7H2$kDq^kegu{&9PTF$u0q5`_8CM^6XBch!yV~qy;uvuM0-Lq|`Ukjk*h*sA) z)&v)ZD97UoFj18Cc;3b(pHrHRY2GX@b5g3p?_~5Ozj_v2_Yr9pw)T`SIo5>9q@~I) z*S*Enum_nfSpz)~i=j|n39xpY<}2{#pj(uL{_0-{Mn|FBh^assOugF7Isf}2MbF~s zQ}$mtS%$rT_FuR&s zVUbz>@KkMyOeE7`Y52}=2CxUzswE~RcC}h zRmU7<6;$v#Fi;YZu2vC_k5F53&1H^RHrzoG$4oRCeau$;#@h;LpjV2f=~dKLB(R+S z8q=L)sA&-!pqwZM#%X>&+QatEjzd&93W5(W*kM! z!w60d<~_7n735F2ApO|SQbt`;ypP95&|l;EQ^ZP$sBEF$bpDI*a-zOY`uN5Ll8C#x zXZ%YR_4te#CkAm~8=v@v0Xl1Csot)cAGr9aG_u$`TH_L2%^rjPye)?O>O2-~q33^u zBw1D=q#KJ?dM`VowB7qkSrmiEVT-=9ACiV=*VJR>O=7WH#L#&5Y-LMEwF4Nd0g<2JJ?H~h zj=>nuVB!G)o`gj^RgHiy(NDdJtGSF6EGfD2=j(ByklI)0q;j7C7ll6sqeNbqG=^SI zR%I(fjAxMLxd0%JNrAXGZVU<_mMOJt*Sgt$+~)%76p7lntZc!;f!{mQV9O!73 z`#$E4Vtl+vkO~PzdF9jW2ZtEL2QUS1kh@L*z!?GO8Hf(nRtDKL0s^B<0@=shYrDL; z0Bj>Hg#th%u4F2Pj#mNR|9;<6FSa6` zYO}UE4zC8YV4wmiZ1A8dqX7>afA+1M{nPFT3czf6)ugxvILyoU{6ae5qiIDD(4)sk zX+W;(xhhEL$c6K@016jsDB@zCXgY-ZiRfthFTSK4O4#`bC8jJQG z*UxjLu=38*7a;I@?LIv1Kzy`=2Ag+Y}0fN8U>>8P0 zP{0V-?f|)!`FA{D{Ein%A-Ww`nz}^2Qp5qfbxI%wFczi&r?b>?y#rjX#exfDE&T7Y zdA1&c$D!9-3bk?7<<0i-nbp2a}y%IgYZ7JJnzhM|v44XG%- z`8+=2?DU?xGkEbI6F$s*5!ZFZmO+LTJphaXtjw3;(k2B*AYB+HzGFRFu-<7<*361^=m=xjiPEMIl%1&7< zdUYGDOMn8>1RFjS0Vof<;LVG@@gL)N=px81u4ZTN_MLwP3O?7?lqRU{n!?g+qh^S_ENr}unko>qn44j?ZhuIf=Bd_2IQfQ()U>5tSI5FIvuNMPjz=iKU=n7(7Y{>)43!fC{r{o1) zQ8mD-I5@All*-5w8mxUT;Up1~xl70{>}_0HZqMeio14fH_C2^gT@DkU1$^+U1SRQL zpt&aLym-K+`Q#1)GTmXvt?tMD=2SM8pVvE9i!{Xbnq79cI7m_)f}voWQo=Y*#?V?M zZJKj^t&YK2De^_dtGu5V63eu!#@h7w;{l5+^F13IU*q@SLS_+B8;0G}q}&&SD}cg6 z8W!6OBq(t2+&C?MjcUK;KN)_&+#QpDdny!dzgr4Ccx``5W_RcOaE zNH0JrP@7dY<4@+%g$M*!_deiTJ{v#&9fn3L5%)Jg&cf8}p9wy$pVPepwO*ozrC0ptwX2R^K0=BCef?tB?)_;Ww{%B7%&0VXr3s8N+x*W877McXe|E^ZNMKPye-l`%MUV@VOgbx)c^!qGbpsippCR5CuB$EX zLcAfty6FngDlrpOP>}fD3gQ4V9y3K;9=FYe z7;J03bS#2levx9@l`qeGgyPBd*etpi;Qky>_?yE#XRjN)YxQn{Cgptgg)Kav^ltlA z#Yb`(KO~Q63yfxq5;*nwQr?>~7GM3FH!g0*L9-l9=LnmVdDptO(Lq=wqd;fdu6Otg zQN&z>P~9Xa+5r(RD@M2noj4(j<1Ei#T$xzAsCcZDbCX~GaP%MAkdr3!qp2fzluKC+ni8!HhO+Hw;`_W~H8{@aArg|$ z;Wob3c;kNkMl23__)3t6*TxLJCLo|+|7}@&gxRvO=v$P?do&3CGR~=#ABlP@mKH7kmq(CV+sDDkP+_@Bic`i|Z__sqY@X6DAAp}(1k4&0QR}O9lD@;14I`Ge1&xNVI!?Wel;?0v2JaKD9**@pP|q4t{fm@)3N_U3G*36nd{HywI>3~Ym{yc1P-K5A?=B;2K(r} z^eY7A7Xpru2sF}RsRlrpQ3av{1ewDAk8a&CB9_o_C>t3Q@m8R6uNt0ssH>X+2zN+} zu51J68UMcm3eDeJJXGAI=#JFrku|+~8(}*qPJu8TvdT6*73Px@1)w;Xg@|M+0)}PA zV%e8vK*4b+3W9-$F$c<{zZMb(%}&BlKvmln(zrTM`*^#)qLA<>lH%)0I|*%6XVowd z1v~MJa$GY2Ux|2SIRbJT&D@lNfcq{U?#PRiF4BM5SB#`hzid#s9oMpgr#oR5h)fi1 z)J$H;#LBwpF4G`cf{Fw(dF_Y8{DacPVtt=42MBKq;Zin(f>b9Ye!pnK$PJ44@&5@*=rBexjyL&rDZ}46Pq65>H7Q2Zcp_`OK0? zS;KPqy{|`LxzCdoY91#G9K+ck_{Bs%IJTNou8PJG2w@V=Ep?X=3h6V@xsEmwNZ_fdeoeXMJPp7Q=8$`tiCU9$-6o!FQ!o7ywnbX1)37a)rc8@Qpx4H>gCIj* zCi*WH^1FR3+uY52!arEDHZ8Ket-2##ShD4o1gYd%^yLQI{d_HZ-LTrTB8#g53V^(0 z|IE)%7xQ7aOFq|y&&7-DTK>oC;!s<8I*L1N^!k8*OjiJlLriRU7jR*OlseS;AIodu zBUt&U3|LGJcRju>zO(uNF};2>7h5~Cn&k&Y12IN`oPW!YfwEE+%Xv-DOD&B1KX-en zDgp^8G3IEZ1|*GZ^}tFVC&ksS1o+=K?{1yW~)i3P&IJ+biIxb-pJYohb`) z(4=vtW8J{}B=+^rux|%)v;tMdP*-w`$n~e~oe>rx?S&B=4;YJH_%s{a69GM)tRAb8_k_Gl0g^(wLPbsR@}z09)#h(}5I z2|)Fg`<5L;fM?fKuKd5oT?krfP{DfoZ6{NeHqhB$!eTxNvpU59dxnZ}2aBo~HQeL+ zadhRMi6s+12nqw&$N>Y$FZJW9cuvs%8Q^NKfAWUH$_pC4Z$1XvAnNJr=d#Wzp$P!= CY^c8g literal 15909 zcmZv@1y~$GwAg4cE25PWQ#eshF)5267dzF~lEC?>CLt&Oy%sHL zM^6pA|9vqHHA)hEN&{u_3Ia6A5;T!knrpq8B!r?qB73u`%lhB(#!XO1$9DI2;PLZx z2PRY%zAW`v*do*@{zTbaf6(MmsU=+xrkEH!7Bm7SX|2*gTml;67rKJr-j9fYO6=VyOIQ4V1$FzuOoECW`h#fPacaak!u@LA zRw`K^zuUS~)9DZ8Bt&;c#-+5&AC13!qR$>dM$)XE`iv_wc3Mo?`aU_AOmHkf-)!=o zqI+?A%Y5128u=r+il2<{_tS!?3cgMjuq7yG9SEtv!e*L*&X*Vp8S#Ey4gJwS(Qa=4EB#BQkSD}Dr3CLCMC zGrcx`BRn+V_oJ*>l40C7N1*3&`zX$g1p|)-OTGikAP37&i8>063kQXr0*(9>4?Da6 z?wIXkGWT%-jXpC7Il_$&wc9g*&Tz6NuivA-$s5t^97B~fwJqVQ3z?%<<NrH7&uDxrqP6Y5wMlU-;n*9Aoq! ziN)EOS%woK5BQ%Ezr?#|p>7eL&}i7<%Lo1HiG*;y!u4GUNl|CQ;aymBF$wxecHXz+ zh@dSG9`D-!c5FusLdK+lqM|_?!fFuTzzTi`BQGOFWv6hk}hxtAx zU|@%x3k}yI{Awt|Y>(YYiPehVO1PZV9ZNIW_WQ=r?T0A#?Ru>m)+OSzP>2O?1I)RE z163=YKa@X?Kej)~dU9a$;4UGbdoB|zTxP^XA4~t&k&A)F-OsLh?IIiO^v83eaVvwtm%OWHrnj%;SI0ul_6Kxc3DP^#H zhrSOb4!sW1?V*1Vb&V)hc~9?~`azzkRJ-)O2Foe+Db*=Uwh|9@*Qb(E>T3lzqDS!K z`;N?)*kbX9;s;F)Fv>WB0;nw8E%`b_q3pAIVC9$=zb9rbZQ)u!Nl&%JFIMG8b;Ih0wK_#!3Kz9&^#aP9Cc+ZJ zhJoErRCi22XSjb8TM>s63njuN1`-o+J=q{lWgV&R&o9mXSzw)iI_2x+&r_Z$Bv?qE zT|brlO#E5p^VR2gE+(!Sn?LMWQAFWWqaPeBNi8gL0Y?PO; z7x(Nzwu`o#OHVaZKQMkw|JbV8EPRziEU_rVEo)Mp#6c&0pwPh%cwe2{I*E zYMD=5GA`~?z3=>0pWkCrpD*Pcy#?Pb9vIbA7ZmkZ=e6kx7J&=a88)?FC)^!@E$g!q zK4-lCumc2x=qGU$AtTy7aVtp@VK&KkqOUwvoPL%{@4Yxxc(?2}J_uQUFnhmlSt*}5 z7N}WVnNvCI(Pf)+ipc5S1lQDJg>E%7)iSwy^mxR5gvs&+Uj)AcKOMh~*)_u{BRivu zS+K^8$(pH1J8}`-er4^ufo1nZr#pgI)HB*u!Z?w)BbbLdM<|Cl$HXwovHiks9bsLU zQ`w4cW<#jS@rYN@x{JB%)eG~S`&{xucb#{&*R|elM?i~j!>v>Rfxp(p!6V?p@?yw) z?c&FUl6(Hex22BnkJa(j@juFzAbpEN+>4$SXU4pphNbI%Z4eeMQT|osDe(9GZ_}e~(hxW}L_GLrL_4@)_zXl5jQ1!waEoy0@PdeJ z$W9pjQ4x_27#$N!7VfKy=S6DEvyh^%=4tg=_1E?EyE?n`zspyDpUCAyCfe6hH7CDt(yd7?Fxa#f1{d-jjQqyXkd+uc0q&09z39KmAT! zKlvy3&`h&%e+*z3+Vv`?X88v#W!Q z9osrB@4C2=?LVX3qUhk?1`@m&D;FsgHUIu)Fw^4SGZMh|)DxJu-${1X1G|vP%qW}uD8^A;+*BqFxxP`wP(H6 zY&~70VqSP%ejT*#KRz%)?45PK(!8(OQUBr0w@+U-jH<)e7h1mGE zmm3QY`y+=?+YCw!*4d(rzXdRy%t!L*tanV9WU888wY&YST3?K^XI=hTCul1ASbbAR zzizyC=G?wzbMA6!^D}qRw{@c8>}4t>C)26H*|{aCIjdGX-=^bX66X6hK%lZHc3OdhLOV2}3SxLac*^$-U()j~~)yL5V zK)z5=LOue(p(Dh@oYKe9!O2~~N0|CQcL)H-Z>QO)DgSeehrKYhp0XOHq_Z1@lAD!_ zm7Q7yg_4p|$j#D9KtoFAzl#H(gsE*jJX{3W*u1^HS-m+~o!zY2IQaSb+1TH+y?@UF z+`;1R>*QhX!{X#l^Iw(x?|P&l?iOygE*`edPLyx;ntyQi^bn?|erxFe{QTE*LVRrh zZ%a<@|J^O1gKTeC*f?0(+5S)6KvAK$vjS?iJ`e{zDO*Q~lRMCc$a`)Mq5s_f|GDyi zTl^m-_5Wu{_WxP(e_Z+Rl0s~69rzy|`mfRY&sku&L{Nm-{?E{hpkVnLazjCJ*vdqcUv}I-FDXp(`)~CI_6q0Syn5vu z@=#%mi(w)8!(B%OqL+n*g;4e(#WYS}ietrK!AY&slopkJ4ioF~cEGVA?9p$C`cp1}wbipncO{7$T9|lVIWlj1RvSoO z)Znr!(dlzlEbID$&8PQReBM8~PO&0STof)AR8q`r@-3^@q}%S`yAwPLULgjV0N<_m z^8QM!W`wBhe5E$0tjQiL5)uqKwHaTd=FGj+rjAa==D**&gSW}SFX5kgomD6TZ?VaQ{kUY; znyjXZEAsu~$o-`GU?QercM)>2J}P7 z-dn~9Cw1!g1_|Gt%!LO%>>_#%+%4*w9yV|J?4z$W+kQe})D@>BfQcFpni0NN+Z~8> zhmV5Y*ri~8yq;it97+5dWxrfIcQE#;1Dv&$Ovn*-Df+Mj`*XewHSl3Kx*0No-|4!K zz%(#lrdX+4XL37$$~L=f8tkZx0lEiz4M!UurjjR+p24jDq3)Z}%Wl>`I^3=8RvT%{%F5<;`ok-|F4nkg=ihZ+_B<1!T64a4 z^mC)|MIuz$haQx~6qXYG`#@vc+cEZu9viYexzF78d`{sA=zPpP&~w>f zaibOwevcEZ*Qc|(A6?$ghf{fqz&raA^!ij2K)B0+8Zw!~n~Xs&Bcz!tk@&#;SXWZcyDY%nq z;wKNESxspc6Cw#2K{Bb*Vi55BIsN(RSJ$N%-yTUhIn>B_I@7x5SM6&2ZHuR%$7@K> z2$u=6jPX~qTTP8dOL_mzLLZ-DaAm1Ux8Kif%3&lLOg_v$70VNuErufygr)32vdLUN2Jg)5%2l{ryJ(UBV(Enrv@I4K*qIM ztkQmoP9@j$)zzrc;}CE^LNkxh4zqnfe_vttlzeR-_&UMX`&(-(?{*}aLN!_B3CyO+ z9Pxi4Sj+o2 zE3Aa`@n?12Mx4nvA6LJ}!%W}d-@vNVw6daO6Ry{f#`x-SI{)H*I!`-L5JPDe0kkh0 z@qF1^Agg?Vw+yCg+NhuSyJ}s4U8_cT?jy;LJ~FIlXUACQi-<z4>7KzI$3!yDSfk?_|B-Ac4nLOw;{&*0kO7 z+}f>bE}yubE&;Qi+S&Qi>t*l1X@wY1@4|x_(SHYrsRn)zpJ#y8fxi6IQ_gfXhzhNl zCj5=^(;5=Yl2qRz*VgC7j~b{jVIkzu>R!Us*UU%}3^2v#=4b}@Y+ro1 z0X%5+KkSZrl^K+UvSMk*$uUBxFy?U~VE-(h8ZAYvS(+5nOS4Gpl4K9YbSA}?YFdM$ z2Mrl_V0c_J!ugm8QJ8Y2eP|2tyiq?7IhVRxE`7dFnD40>0Veca zS}OQP$GTaqzQtQA6@HEJEAT{N+66SkOkn1RqXe5uqr-nMH#4|OzUKJP1^-^6m80h7 z`bpaZ907~MMfCGhq41pMR<}#uZkWC3?Ly8 z4zQ&SkiM-XXS2$D%0etGh_2kIHWrdQaCPuoXqjJFh(zBH=Y8$+A}{VIIeIKE-3K>x zDFC3H7giJmeZIwaDT<13J0#+=!72bH57J9ZAVL$Zk~K?H+SKwmZH&^_?`fJ7u18ce z8?~pHRj}b0AnCbTwlPjE_J<>`+M$mq$0Na5(U{3Y5T`&g^0LKB(qxZ0hdn^NGUy55 z-*tG~Fq?eYvYh{%uS&uqV!XpryS-S?G-z|0+O5*9Qvsp_WI;1)asUm~6q3PWs8Ox+ z6=?!0;_^o&uHkN|4$ASuV2U+R*nw-I)>sgRA0BSWk8grZNyrS4C}rK;-gkwj9#8RZ zAGp**WDU%=Li>8i5Clh2A(0K3%+RiAJmWAld3)dE*C$HYMCsk0w*h3fDs>O+xrgmL z7&dR{>h&_%T9U?5uLF}ACPT|h+8sWgy}R!N`rm#$Fah&c&*KqEt2fX;CF`OSvy6T} zvIAVoij`(t+?P6g(h4>}1?vnE7*&aBvPrw_V(A&lFkKqTBG?b@dw6^%AjVDeV93^dXo8pP9Hiye!>iq|1WH z<5&>$5YKl*BXQ~vLIfs=xOK)p(yr2xbHaL{WO3s$YQ;F7UM0-ob0#~kw_*=eO(Xl7 z4MR!moDM1*1_?<rDtWG7UASzTzI(J_l*rJgUg^g z5lTa%&5#(_s4g?@){i|JU$EjXF1~(;uKPoa@N547EWR;VJ)vXIbywke>wdt#gdR|9p+#g+$Ts?4B$4FYfz4ACrMX-X| zXGQv@wf+h*RKlY9do7>W&0L3BrcpfYd>;bIikF!1>oAEk`}9g;G+}yyu+u+S46~K* zCQw@_Vqz8WwW6hwt+*VR(B@SQ=8m`Ud4kG2EK})%wq3==ZuID&VO%?AkZdqK1fS>X z$F}J9y zX4FNBjOtA|r)!Y}wjCIK553;6dJO|#nIodcS+c^&^rjz>Sy_h`(5}DvSn`W`r2AetR*3jthq*;Dx z%6|dq%aDR+1pcIE4rxmOccCLP%^NW&fW5(0eNy`gCMcrtV1fA|VzT3I)qElTXMzvU znB$DHc)D@@!{mDICF4jR(PwpxySx$^8rAXvQkW&tA9;-o((x*ZY25znYLIxOpl;wR zp?(-+!pOps$bTbdfGmN9lx1@?oi}k$ct@sg#INa<oB9BuGY_Ej4b2m4gbwcK@FpRUu?2)N8=F%gdf{jO#xfG zE9V~R*HN4aqq+eA^DI9DYz0z&H#l|uG<3m}#wzXV^4!3l&(i&oVJM&suzwOoO>u`n zzymuj(;j~oOfBzAXcRG?hMAANyu7)B&Bf?hRf2&ZLqd6B#L~Q?_E&@Siq*mbm=><3 z^WVC~W$iHj(?Afg-5ZJpfDw(=f%@*ZZbp>a$D3pS^oZ)~P!!acyUk8d8a2IYjH&8L zW>WdBrZeIhMlRi2aFH4di*!1tA49}*^oXK5-;2k)-Vi<-EFE0--3~Tp**v`6wxZn zs)N+7k~?IM4*BuJ#3*?P9nR-!s1Qc^rv#wn4Azl~m`!a!Ne2#bYH7%EJ%SF4* z7|%u#Pz{5iM^G0wvKh%Ahz!OBb*)3Zi$CkdBqd#M7*P3Vlm>vBukH1G}tv9i)grI!BU zeYq9Um`pDGStf~89^l$k^|L$Q?g42a{FTk9T@2tvB2-Q{M^m;ykh$HBrkDqW1zRAL zdR(k=S7}yYShqNDi#(l_BqNEgCX(>tB>4eKlECqDleKEtR6zE7$VoIsP$4od-E7xo z&xG}K2{T_5v9Il5k&vA8$!yW>ruVjMc8ZLLM{{>T7E8eIenc+cYs)aQAPEVW@594x zzG4ND*Hjk(?H4Acx@TT^NU{I80k6TO+YuO(PWrb$>#tJLmz}rph!cR2l|0zxjUR zaa@zarB|_}jrs>HdYhxkY@;6}yiWE1fGzPq(##J{)w%ES1hzvl*yot=X8zOsM-BwQ5B%NGFruq-?wDG=5hHM8N+1K7i%rH0DAD)4!mzD2Ern@EFbu< z|7G_G8T|a&(pM4|gXWl3Pc(XnK zv1*6eNXw=J)--L~+QKxocSz^Yi0H&p0OV-XWq;GhS{)~5&X(&c0AXhy5MkN}`@o%! z>pW|no=k7*$5M^{U{KbW*xvrWujOI%`akvoi3V))raV9t8T*CKOAAb{op1Q8Msjy& z%huK&P;VTAsTF1Uza=8k;)T_oKta!w+0cToG6l5bwFcZnbvk7oRT^b+WUHm6?-W#3 z>6J!6${;+QSk6vHr5TRbk(jS!C-xuG*Ax%4_365|=kpMp68I!D$fX>%P_oH7+ z__f$AHg@S&X~kv|SCRhin-oJHusA9PG0j^(@g-%6TknIrHuA) zB8jZ3=qp!UM}tTKr?m14#Xq_FqEk8Oa{&1E{pDNyG6#ogL28-LVM;! zAuZiFN~?3OCx;CqPa?s6#nf*^>pL1r_H& z|A3zCDZ2EMNX%UfOfD<|!H0gbV72$m*np1dta{eA9aN%TBBQ?D6X^Rx#w3%~c&}F? z5@mJJ+yWB-d}{jo`e~`D{b#`k`?Y$BB8q97n;Gb-E`=QfDy?1uBBS(N`@(`UP7VR)mUa3~bjb&R+~wINL$8us zh(oKO)*lA%{{8Xwe%sd|S=VwJu4{iy7+Tgm)kH}Ntr8MZ;-ceC0BQ3>jk0-c8BeP0 zkoyhng_uDk5fEfn)1FC+9y78e?ymoRRt^~sS`#Zp*SWlpIU5P4|Bd!NI8xGL!6}K0 zrETJ6(c+3zJ|KoNZ+4Z~{-fyR zwBkK=?Uv4$dT$IIaQ61WJj0K?7r*c{6*=JT#j;ieic{vJ;2rUi82?;ut4?IH9bf)2 z|MV-q?{&QEb72h{ZhJq0k+Fa|&j!A+pZwK6f`rbw_CNg!eHnWdn`IKt!c%sI&<%`L5iy8Jc z@MXV#QTOS5K%E<#lRD1y{a7=cTnpLU70bxxHp6<@?Bqa2Ry|1J3ET%+j_vw#2a2?MQl%caMn z+ZYfxt)|o?y`=i#gC_;*P1(K9^~&ewr(v{S#7rTUm|O$c`9a|;=(nC9m_D2GKeLs6 zc&`mk`^m{@;0M0wR?t(VD!X@Hsi$(EwD2a4*6^Vr;c+MgY>}gwDsX#Wd^DM#bkSFs z&~x&I2hRL6>5VdDer6qUc1a8YhH|>sePgy_E*%<_HgW8J%gIgbw0865Ic{41uQ4VZ~`jS;MKW z;f>kVFb9Padoq}URfV+U^9s6&Dt?k050)Q%bsXvVR&2x^>=_+~db!A?^`(zu&ZkLs z2Qx)#^)z7_tMi9zvLWM|= zWZDC&f-CA@z%j#C3HN-LKWpTRv!ovM*Np&jPw){tUj@u0@EO6ht{>_K78n{c#Hu3SHd-7{O44Mujy15v3;Ma|x-;1sotNURK|WmgR?N%+5-{NJ*Zt84(bo zp$Sja2W$^}-jL8@8f7qyj3x^_UV@QK0%5-b##7bpt;ayy8NjXr%D+`B*xS}>b-YT) zg5=_u)V*|BnEskj71~sKZIqY6gu+YBY4GnA4k97L*O~T;D1YXPeKP>Skm*pM%h+kK!Pdi8UiG(;!Z&{5DyP75UbIONfhAs zz4v{GN04D*5Lu~JiDPbu*$Yetjy59zQ(m|-0c5sUmT>f}uGh(X5a}ZRgrNa23lA0D zN@?5r5@4G!f1oDt*5}AtM^*OF)5Q7v|`t2TWPft#hjy_pt71c zzt9zGi(qf~@^JQ}a&XJ%J3M@BeAArv`ik{VvL}BG8I{2}*n>C)faSjhvM40ZDaKJ7 zQDd;bgEBa(oi&YY9q;qBCDS&E1onEG@Dyy4^J|CV6ETB?)e?O5INz?0mn5iF+fYe=2G5GJ}@i)WT7c2^)Uwoy|%`=o~ zP`gqiNo3ks6}pc4E%%Hc8R_%y{>r53S*aVv##Y-=bv#9c{8=Q~YD-?ZreDzTWa0?# z99}IymkF~CxJ#4~ae5mP(%*J=waGp&53Uu6;)R1?^_=2=M9yMe>C#c*1yL zesWWR>#pdaOYtjI3yzd!zZDPe;`s1<=;=Rt$HVSAkk~cJ`#$l$H2qzTA(D@z;-Ss} zLTG)?FD0YAhcNH+q#>IH6;TbrxU)%P^$(ToPf0Qz&pCSvGG382Mu8Gyf{oP?)R7qq zi^~+tVj93qdM=54hdzRY#FB2|p!5eD!b{{brhuxw(5RMxoQCe=51wpEEY;)^AvZ(XuMRv`EoDD@TV- zEAbyo0;p-W{cqu}n0urH{}H+^*~}~*X{fk7KW5<(ik~pVl|N%a44qO!AXX<%g2d#K zl`NZrYGgO$S*8A3QcJXM|9h)-5BBeUKT&y(*p(y6x_Fp-UJ)4J6AUugOsBWwnR2|H zQQ>cN50du_Ws}L3X=IZu4Sm#q;$p(BGh%0H@Y)iG&L5XR8e~z`)`2lX4zp5*CSV|y$ z2sgpQzJSM|Aw@BCW-a{+$GLoPUEq!RvFt1-=tQB zrJ2zN!)5HU5@qbR^3e=D4@g5v=cfs*tDSigf4sqzfvP_SRg5%0=~L7ge^vklL%-)& zqg7h~?vqKV*n-@nfdmaBz3Kv2JkU9}Y$6jGFV4AEF_T~Oa=r#2mecS*CS|a^hqbli zGcn`xj~)Kpz>S;Na5T9I=lx>BL)YIN44E?H8q}$c$BQE*DtXG>)x>7yR)?@5xL-1U z3n;S1jU$^5Gt6xbDo>=uDf4|5MWVi9L_0MFSS4nJ+Ub|->TjAF8d*>Bkt3iyBo%d6 zsn>{`T`?yj3{9UOx@lx!sdgVp8i>>?950RctK0?p22dkeqB?o)H4pGCIi1-c*`7j9 z)Kjd9krVq4xuR5*cXygG-0n$xF^$I|lH_tWr2~It0r8|&nF)jo=SEH0b;W;`LN89~ z@WvF@%hThKVfe9uh@b1Dwn;r7K6YDp@O^O8_P;#hq4s*Q4`B4;Wb>OcmL2 zzmU1p=*?N+yjMWlX`9r_cICxPa?L-~#;4}ANpx&VNsI7|ynXNRja~xBH%|r$-+{R_ zMEZ$aQzJ9Rsg;?yG*V@04lfJJ&xfB$)NvPZi7T_Y99Gzm%3z6TeGbjIUhV4 z<;6~6U+a&37Cy)sZ4Z&40sZCD7-qTbt_FAAufVgE&&pxb($cE?`3YPADJ|<%q_(9m zuduj~d){KYQhF{8sZ_DIc0Sguz9xP_K1EK{GXE?ZED{Q81+4P~zELJY&!192q6cmL zqTSd#Wg8qtODdW!+RtOIRFN`2F?fKOSM-*G0bqnnzVSRVz`wds(>s>2DOJ$~qMSRo zUi+NfVO3!=1`r@%2op{uXwnVmwuAo#8hrENKqq7mNp?Yz>5A~n2hiG=w%LnoqCghl zxx9%n{|l8_H-y%yyyFEVs}7EajQCkO{p#*8Jp=VWlwoZ8s4!O4DZzknZ*@fPiwqMc zmh#5i#9>StL2#2YVqW`GG8>qWd0igqzZeOn>1RxNlrFZ~^(4Jr6CB_yE!*^-CFeNn zw(9jN%i4=VLK4o4W>K6npyPtB#yy{*W1M7~5ZAx?Hocy_BOTMK)S&Hr{?nocuq zYd>2U>UY3|rY0t>U7(h-8%A$Pr131f0J;&^V|jgI~zG7(1&d;Ou}d4}~0;WImM*mSZ^iu6=UlK;~2~qd+l$x?En(DUu=bd_b4s4S0W^`*wP- zn~b|J+}*zFYzg6M^Tsyr#dX#YEo~uUkQM?htq<+Q zNHaN&a0Zbd#9N0hJ^#;IBSf(ZfVwwx;zc>!_A;0v50r@dfY*ey4a95v56zme*~L8MFi>%)VG)3+eMiLucDoTrXU zfW3G<@SfNK6>RS=pv&u`eZT(;#`op;ruV0Vdqg>LSj-eC_{>=r`XOylSPDs%Ijp1-P~5D@Y( zvbDASV+myRnD=6ikN`f!QWZoYoo-e`+~wa?BW<%Nclr^_6DI9SS{vXGoge?CC|@KK z^eP2p$Fy`b6tw;$lBFw!!h{n_4M$%y0`i3|AUDL+_ojS#t;OE=A`zWGMgdmvK1)1A z0(iqZ{5<%Nk z`en;ixbFB{*3==eNGW)oNXlz2vKF~vH zm(kZD@xU*h*T~*E)h}H+JH1;xGfggy_kR5BxB5DPHQ}7I^Q>{NZ@9|snR)8kqmI(I zi5dIBA$8eZm7f(VdE*VvyDFqiQEwBmdUw%K2MR!yg+DcDzLfr+e(~8f zaH40^adwgfsY6SzvdnK3_u<=q!D6-g1#gB#pc6+2B!O(vP?CCVSFq_^_Iq8@7!t9A zLP+jixmi7>@Gh?n%w=58J zff3oyVps0g;mSv-5fRVUU){KDP=7qR8;1WfyxBu`9&-8&WF)N^;c6+iM}N5gxfo9b zDY^yH|14E_99ICAdG!V3h**t(6EHH!mlY|X53oilWEh40ZUI`G+lT`HCL?=;v!n|p zLxf+jNj$k;<``n(Frf9D5HZ$C8Dv+_2NYLj?r4r{)gnK5pL{C1?NYTqLOQEM{9S8) z>#*G;j4M1qs>McG)Q5!j*E>aWfM>%L23JE(t_?`gAhH%$gZ9cnB=-rCXa8YEEr5_U zZZ}5%s*NCji~#@@DoJz#y_0niD3bV( zA88feU*`Jp>{pcq!1yyEvG#K#LhrtFIHf9Vb+f#jhCV1T1>X8SYQi)wgDy{g)YpF$ zQ|q6^tCMm|Hi@#+tVnj#55@p5j7lL`mOt`4Xu+Kp2x7$_zQ=Lofj0VVP{{?c^lAZ* zp-+|`M-R?&id-*%-?L_Mk=VGv`2Q^`(!wb+`}}B#B>;IIv!i&UhTQRF+17igT?0Y< z>PnT;7Y-J!{HCxV%4{k06-G_aBD*LL{>y{g9UIL1#%BW6%u4QxBpT9Yc zUTOq#*7a;2@-j!Kk0Y}3+`W3u&nQ$v+_{QZGzK!32Q#T4a z0uqS6WtZPW(c*e_UqNzlE`&E+u)=U025es&Xp-{WN|QCU`XG6E0UQb)%57a-DZ6mGjn1^ts+q0z8g&6$pUVB~ zk$K@F^#XFVxbHjT?CzTyH5S!4+wZtZs1z*UJ+Vo+c1h=bSWSdGC1w!Nvm;s!gNXrr zT&Vu^Qut6c31E#$R&tjzhlxSG(G$w{kIj@=3P__)-^A^a64Ri;dzQWcyj|utXP2Y* zkWP^mr=$dS9(yxx%{k$ssOxt%WLB?F;M?e0n>M%lP93-*lj1A}%Ec0p_O5%OGfHu3O3rE{SY4-m( mE43OZt+s~>gD<}G4|+t~vQOt;OZe?OF!ItWQneChVgCz^{q$P^ diff --git a/benchmarks/adversarial_networks/tensorflow/dcgan/inference/fp32/config.json b/benchmarks/adversarial_networks/tensorflow/dcgan/inference/fp32/config.json new file mode 100644 index 000000000..dfac18793 --- /dev/null +++ b/benchmarks/adversarial_networks/tensorflow/dcgan/inference/fp32/config.json @@ -0,0 +1,8 @@ +{ + "optimization_parameters": { + "KMP_AFFINITY": "granularity=fine,verbose,compact,1,0", + "KMP_BLOCKTIME": 1, + "KMP_SETTINGS": 1, + "KMP_HW_SUBSET": "1T" + } +} diff --git a/benchmarks/adversarial_networks/tensorflow/dcgan/inference/fp32/model_init.py b/benchmarks/adversarial_networks/tensorflow/dcgan/inference/fp32/model_init.py index aed323e94..8f4602c2c 100644 --- a/benchmarks/adversarial_networks/tensorflow/dcgan/inference/fp32/model_init.py +++ b/benchmarks/adversarial_networks/tensorflow/dcgan/inference/fp32/model_init.py @@ -37,8 +37,8 @@ def __init__(self, args, custom_args=[], platform_util=None): self.set_num_inter_intra_threads() # Set KMP env vars, if they haven't already been set - self.set_kmp_vars() - set_env_var("KMP_HW_SUBSET", "1T") + config_file_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), "config.json") + self.set_kmp_vars(config_file_path) benchmark_script = os.path.join( self.args.intelai_models, args.mode, args.precision, diff --git a/benchmarks/common/base_model_init.py b/benchmarks/common/base_model_init.py index 6294190d8..9a25ca92a 100644 --- a/benchmarks/common/base_model_init.py +++ b/benchmarks/common/base_model_init.py @@ -18,6 +18,7 @@ # SPDX-License-Identifier: EPL-2.0 # +import json import os @@ -135,14 +136,28 @@ def set_num_inter_intra_threads(self, num_inter_threads=None, num_intra_threads= print("num_inter_threads: {}\nnum_intra_threads: {}".format( self.args.num_inter_threads, self.args.num_intra_threads)) - def set_kmp_vars(self, kmp_settings="1", kmp_blocktime="1", kmp_affinity="granularity=fine,verbose,compact,1,0"): + def set_kmp_vars(self, config_file_path, kmp_settings=None, kmp_blocktime=None, kmp_affinity=None): """ Sets KMP_* environment variables to the specified value, if the environment variable has not already been set. - The default values for this function's args are the most common values that we have seen in the model zoo. + The default values in the json file are the best known settings for the model. """ + if os.path.exists(config_file_path): + with open(config_file_path, 'r') as config: + config_object = json.load(config) + + # First sets default from config file + for param in config_object.keys(): + for env in config_object[param].keys(): + set_env_var(env, config_object[param][env]) + + else: + print("Warning: File {} does not exist and \ + cannot be used to set KMP environment variables".format(config_file_path)) + + # Override user provided envs if kmp_settings: - set_env_var("KMP_SETTINGS", kmp_settings) + set_env_var("KMP_SETTINGS", kmp_settings, overwrite_existing=True) if kmp_blocktime: - set_env_var("KMP_BLOCKTIME", kmp_blocktime) + set_env_var("KMP_BLOCKTIME", kmp_blocktime, overwrite_existing=True) if kmp_affinity: - set_env_var("KMP_AFFINITY", kmp_affinity) + set_env_var("KMP_AFFINITY", kmp_affinity, overwrite_existing=True) diff --git a/benchmarks/content_creation/tensorflow/draw/inference/fp32/config.json b/benchmarks/content_creation/tensorflow/draw/inference/fp32/config.json new file mode 100644 index 000000000..dfac18793 --- /dev/null +++ b/benchmarks/content_creation/tensorflow/draw/inference/fp32/config.json @@ -0,0 +1,8 @@ +{ + "optimization_parameters": { + "KMP_AFFINITY": "granularity=fine,verbose,compact,1,0", + "KMP_BLOCKTIME": 1, + "KMP_SETTINGS": 1, + "KMP_HW_SUBSET": "1T" + } +} diff --git a/benchmarks/content_creation/tensorflow/draw/inference/fp32/model_init.py b/benchmarks/content_creation/tensorflow/draw/inference/fp32/model_init.py index 390bcae82..08c145bca 100644 --- a/benchmarks/content_creation/tensorflow/draw/inference/fp32/model_init.py +++ b/benchmarks/content_creation/tensorflow/draw/inference/fp32/model_init.py @@ -22,7 +22,6 @@ import os import sys from common.base_model_init import BaseModelInitializer -from common.base_model_init import set_env_var class ModelInitializer(BaseModelInitializer): @@ -32,8 +31,8 @@ def __init__(self, args, custom_args=[], platform_util=None): super(ModelInitializer, self).__init__(args, custom_args, platform_util) # Set KMP env vars, if they haven't already been set - self.set_kmp_vars() - set_env_var("KMP_HW_SUBSET", "1T") + config_file_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), "config.json") + self.set_kmp_vars(config_file_path) if self.args.accuracy_only: print("Accuracy testing for DRAW inference is not supported yet.") diff --git a/benchmarks/face_detection_and_alignment/tensorflow/facenet/inference/fp32/config.json b/benchmarks/face_detection_and_alignment/tensorflow/facenet/inference/fp32/config.json new file mode 100644 index 000000000..273b45b40 --- /dev/null +++ b/benchmarks/face_detection_and_alignment/tensorflow/facenet/inference/fp32/config.json @@ -0,0 +1,7 @@ +{ + "optimization_parameters": { + "KMP_AFFINITY": "granularity=fine,verbose,compact,1,0", + "KMP_BLOCKTIME": 1, + "KMP_SETTINGS": 1 + } +} diff --git a/benchmarks/face_detection_and_alignment/tensorflow/facenet/inference/fp32/model_init.py b/benchmarks/face_detection_and_alignment/tensorflow/facenet/inference/fp32/model_init.py index 9bd9c6243..bf4b8132c 100644 --- a/benchmarks/face_detection_and_alignment/tensorflow/facenet/inference/fp32/model_init.py +++ b/benchmarks/face_detection_and_alignment/tensorflow/facenet/inference/fp32/model_init.py @@ -34,7 +34,8 @@ def __init__(self, args, custom_args=[], platform_util=None): self.python_exe + " " # Set KMP env vars, if they haven't already been set - self.set_kmp_vars() + config_file_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), "config.json") + self.set_kmp_vars(config_file_path) pairs_file = os.path.join(self.args.model_source_dir, "data/pairs.txt") diff --git a/benchmarks/face_detection_and_alignment/tensorflow/mtcc/inference/fp32/config.json b/benchmarks/face_detection_and_alignment/tensorflow/mtcc/inference/fp32/config.json new file mode 100644 index 000000000..273b45b40 --- /dev/null +++ b/benchmarks/face_detection_and_alignment/tensorflow/mtcc/inference/fp32/config.json @@ -0,0 +1,7 @@ +{ + "optimization_parameters": { + "KMP_AFFINITY": "granularity=fine,verbose,compact,1,0", + "KMP_BLOCKTIME": 1, + "KMP_SETTINGS": 1 + } +} diff --git a/benchmarks/face_detection_and_alignment/tensorflow/mtcc/inference/fp32/model_init.py b/benchmarks/face_detection_and_alignment/tensorflow/mtcc/inference/fp32/model_init.py index 34409b702..4ef889b36 100644 --- a/benchmarks/face_detection_and_alignment/tensorflow/mtcc/inference/fp32/model_init.py +++ b/benchmarks/face_detection_and_alignment/tensorflow/mtcc/inference/fp32/model_init.py @@ -33,7 +33,8 @@ def __init__(self, args, custom_args, platform_util=None): self.set_num_inter_intra_threads() # Set KMP env vars, if they haven't already been set - self.set_kmp_vars() + config_file_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), "config.json") + self.set_kmp_vars(config_file_path) set_env_var("OMP_NUM_THREADS", self.args.num_intra_threads) diff --git a/benchmarks/image_recognition/tensorflow/inception_resnet_v2/inference/fp32/config.json b/benchmarks/image_recognition/tensorflow/inception_resnet_v2/inference/fp32/config.json new file mode 100644 index 000000000..273b45b40 --- /dev/null +++ b/benchmarks/image_recognition/tensorflow/inception_resnet_v2/inference/fp32/config.json @@ -0,0 +1,7 @@ +{ + "optimization_parameters": { + "KMP_AFFINITY": "granularity=fine,verbose,compact,1,0", + "KMP_BLOCKTIME": 1, + "KMP_SETTINGS": 1 + } +} diff --git a/benchmarks/image_recognition/tensorflow/inception_resnet_v2/inference/fp32/model_init.py b/benchmarks/image_recognition/tensorflow/inception_resnet_v2/inference/fp32/model_init.py index 064bf7848..641821520 100644 --- a/benchmarks/image_recognition/tensorflow/inception_resnet_v2/inference/fp32/model_init.py +++ b/benchmarks/image_recognition/tensorflow/inception_resnet_v2/inference/fp32/model_init.py @@ -32,7 +32,8 @@ def __init__(self, args, custom_args=[], platform_util=None): self.cmd = self.get_numactl_command(self.args.socket_id) + self.python_exe + " " # Set KMP env vars, if they haven't already been set - self.set_kmp_vars() + config_file_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), "config.json") + self.set_kmp_vars(config_file_path) # use default batch size if -1 if self.args.batch_size == -1: diff --git a/benchmarks/image_recognition/tensorflow/inception_resnet_v2/inference/int8/config.json b/benchmarks/image_recognition/tensorflow/inception_resnet_v2/inference/int8/config.json new file mode 100644 index 000000000..273b45b40 --- /dev/null +++ b/benchmarks/image_recognition/tensorflow/inception_resnet_v2/inference/int8/config.json @@ -0,0 +1,7 @@ +{ + "optimization_parameters": { + "KMP_AFFINITY": "granularity=fine,verbose,compact,1,0", + "KMP_BLOCKTIME": 1, + "KMP_SETTINGS": 1 + } +} diff --git a/benchmarks/image_recognition/tensorflow/inception_resnet_v2/inference/int8/model_init.py b/benchmarks/image_recognition/tensorflow/inception_resnet_v2/inference/int8/model_init.py index f2e2e1469..0d7dda4db 100644 --- a/benchmarks/image_recognition/tensorflow/inception_resnet_v2/inference/int8/model_init.py +++ b/benchmarks/image_recognition/tensorflow/inception_resnet_v2/inference/int8/model_init.py @@ -31,7 +31,11 @@ class ModelInitializer(BaseModelInitializer): def __init__(self, args, custom_args=[], platform_util=None): super(ModelInitializer, self).__init__(args, custom_args, platform_util) - self.set_kmp_vars() + + # Set KMP env vars, if they haven't already been set + config_file_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), "config.json") + self.set_kmp_vars(config_file_path) + self.cmd = self.get_numactl_command(self.args.socket_id) + "{} ".format(self.python_exe) # use default batch size if -1 diff --git a/benchmarks/image_recognition/tensorflow/inceptionv3/inference/fp32/config.json b/benchmarks/image_recognition/tensorflow/inceptionv3/inference/fp32/config.json new file mode 100644 index 000000000..273b45b40 --- /dev/null +++ b/benchmarks/image_recognition/tensorflow/inceptionv3/inference/fp32/config.json @@ -0,0 +1,7 @@ +{ + "optimization_parameters": { + "KMP_AFFINITY": "granularity=fine,verbose,compact,1,0", + "KMP_BLOCKTIME": 1, + "KMP_SETTINGS": 1 + } +} diff --git a/benchmarks/image_recognition/tensorflow/inceptionv3/inference/fp32/model_init.py b/benchmarks/image_recognition/tensorflow/inceptionv3/inference/fp32/model_init.py index dd504259e..53c2643bd 100644 --- a/benchmarks/image_recognition/tensorflow/inceptionv3/inference/fp32/model_init.py +++ b/benchmarks/image_recognition/tensorflow/inceptionv3/inference/fp32/model_init.py @@ -60,8 +60,10 @@ def __init__(self, args, custom_args=[], platform_util=None): self.args = arg_parser.parse_args(self.custom_args, namespace=self.args) - # Use default KMP variable values, but override the default KMP_BLOCKTIME value - self.set_kmp_vars(kmp_blocktime=str(self.args.kmp_blocktime)) + # Set KMP env vars, if they haven't already been set, but override the default KMP_BLOCKTIME value + config_file_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), "config.json") + self.set_kmp_vars(config_file_path, kmp_blocktime=str(self.args.kmp_blocktime)) + set_env_var("OMP_NUM_THREADS", self.args.num_intra_threads) benchmark_script = os.path.join( diff --git a/benchmarks/image_recognition/tensorflow/inceptionv3/inference/int8/config.json b/benchmarks/image_recognition/tensorflow/inceptionv3/inference/int8/config.json new file mode 100644 index 000000000..273b45b40 --- /dev/null +++ b/benchmarks/image_recognition/tensorflow/inceptionv3/inference/int8/config.json @@ -0,0 +1,7 @@ +{ + "optimization_parameters": { + "KMP_AFFINITY": "granularity=fine,verbose,compact,1,0", + "KMP_BLOCKTIME": 1, + "KMP_SETTINGS": 1 + } +} diff --git a/benchmarks/image_recognition/tensorflow/inceptionv3/inference/int8/model_init.py b/benchmarks/image_recognition/tensorflow/inceptionv3/inference/int8/model_init.py index 6d586ea80..bd4794638 100644 --- a/benchmarks/image_recognition/tensorflow/inceptionv3/inference/int8/model_init.py +++ b/benchmarks/image_recognition/tensorflow/inceptionv3/inference/int8/model_init.py @@ -60,8 +60,9 @@ def parse_args(self): self.args = parser.parse_args(self.custom_args, namespace=self.args) - # Use default KMP variable values, but override the default KMP_BLOCKTIME value - self.set_kmp_vars(kmp_blocktime=str(self.args.kmp_blocktime)) + # Set KMP env vars, if they haven't already been set, but override the default KMP_BLOCKTIME value + config_file_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), "config.json") + self.set_kmp_vars(config_file_path, kmp_blocktime=str(self.args.kmp_blocktime)) def run_benchmark(self): benchmark_script = os.path.join(self.args.intelai_models, diff --git a/benchmarks/image_recognition/tensorflow/inceptionv4/inference/config.json b/benchmarks/image_recognition/tensorflow/inceptionv4/inference/config.json new file mode 100644 index 000000000..6f1228ba7 --- /dev/null +++ b/benchmarks/image_recognition/tensorflow/inceptionv4/inference/config.json @@ -0,0 +1,7 @@ +{ + "optimization_parameters": { + "KMP_AFFINITY": "granularity=fine,verbose,compact,1,0", + "KMP_BLOCKTIME": 0, + "KMP_SETTINGS": 1 + } +} diff --git a/benchmarks/image_recognition/tensorflow/inceptionv4/inference/inceptionv4_model_init.py b/benchmarks/image_recognition/tensorflow/inceptionv4/inference/inceptionv4_model_init.py index c7d546477..d4294a179 100644 --- a/benchmarks/image_recognition/tensorflow/inceptionv4/inference/inceptionv4_model_init.py +++ b/benchmarks/image_recognition/tensorflow/inceptionv4/inference/inceptionv4_model_init.py @@ -38,7 +38,11 @@ def __init__(self, args, custom_args=[], platform_util=None): # Environment variables set_env_var("OMP_NUM_THREADS", platform_util.num_cores_per_socket if self.args.num_cores == -1 else self.args.num_cores) - self.set_kmp_vars(kmp_blocktime="0") + + # Set KMP env vars, if they haven't already been set + config_file_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), "config.json") + self.set_kmp_vars(config_file_path) + self.set_num_inter_intra_threads(num_inter_threads=platform_util.num_threads_per_core, num_intra_threads=platform_util.num_cores_per_socket) diff --git a/benchmarks/image_recognition/tensorflow/mobilenet_v1/inference/fp32/config.json b/benchmarks/image_recognition/tensorflow/mobilenet_v1/inference/fp32/config.json new file mode 100644 index 000000000..f0b327528 --- /dev/null +++ b/benchmarks/image_recognition/tensorflow/mobilenet_v1/inference/fp32/config.json @@ -0,0 +1,6 @@ +{ + "optimization_parameters": { + "KMP_AFFINITY": "granularity=fine,verbose,compact,1,0", + "KMP_BLOCKTIME": 1 + } +} diff --git a/benchmarks/image_recognition/tensorflow/mobilenet_v1/inference/fp32/model_init.py b/benchmarks/image_recognition/tensorflow/mobilenet_v1/inference/fp32/model_init.py index e75c72194..d4e3ca5d7 100644 --- a/benchmarks/image_recognition/tensorflow/mobilenet_v1/inference/fp32/model_init.py +++ b/benchmarks/image_recognition/tensorflow/mobilenet_v1/inference/fp32/model_init.py @@ -33,8 +33,9 @@ def __init__(self, args, custom_args=[], platform_util=None): if self.args.batch_size == -1: self.args.batch_size = 128 - # Set KMP env vars (except KMP_SETTINGS is not set) - self.set_kmp_vars(kmp_settings=None) + # Set KMP env vars, if they haven't already been set + config_file_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), "config.json") + self.set_kmp_vars(config_file_path) # set num_inter_threads and num_intra_threads (override inter threads to 2) self.set_num_inter_intra_threads(num_inter_threads=2) diff --git a/benchmarks/image_recognition/tensorflow/mobilenet_v1/inference/int8/config.json b/benchmarks/image_recognition/tensorflow/mobilenet_v1/inference/int8/config.json new file mode 100644 index 000000000..273b45b40 --- /dev/null +++ b/benchmarks/image_recognition/tensorflow/mobilenet_v1/inference/int8/config.json @@ -0,0 +1,7 @@ +{ + "optimization_parameters": { + "KMP_AFFINITY": "granularity=fine,verbose,compact,1,0", + "KMP_BLOCKTIME": 1, + "KMP_SETTINGS": 1 + } +} diff --git a/benchmarks/image_recognition/tensorflow/mobilenet_v1/inference/int8/model_init.py b/benchmarks/image_recognition/tensorflow/mobilenet_v1/inference/int8/model_init.py index 0823604c0..6f22fd12a 100644 --- a/benchmarks/image_recognition/tensorflow/mobilenet_v1/inference/int8/model_init.py +++ b/benchmarks/image_recognition/tensorflow/mobilenet_v1/inference/int8/model_init.py @@ -37,7 +37,8 @@ def __init__(self, args, custom_args=[], platform_util=None): self.cmd = self.get_numactl_command(self.args.socket_id) + "python " # Set KMP env vars, if they haven't already been set - self.set_kmp_vars() + config_file_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), "config.json") + self.set_kmp_vars(config_file_path) # Set the num_inter_threads and num_intra_threads self.set_num_inter_intra_threads() diff --git a/benchmarks/image_recognition/tensorflow/resnet101/inference/fp32/config.json b/benchmarks/image_recognition/tensorflow/resnet101/inference/fp32/config.json new file mode 100644 index 000000000..273b45b40 --- /dev/null +++ b/benchmarks/image_recognition/tensorflow/resnet101/inference/fp32/config.json @@ -0,0 +1,7 @@ +{ + "optimization_parameters": { + "KMP_AFFINITY": "granularity=fine,verbose,compact,1,0", + "KMP_BLOCKTIME": 1, + "KMP_SETTINGS": 1 + } +} diff --git a/benchmarks/image_recognition/tensorflow/resnet101/inference/fp32/model_init.py b/benchmarks/image_recognition/tensorflow/resnet101/inference/fp32/model_init.py index 5e35e462b..43f862159 100644 --- a/benchmarks/image_recognition/tensorflow/resnet101/inference/fp32/model_init.py +++ b/benchmarks/image_recognition/tensorflow/resnet101/inference/fp32/model_init.py @@ -60,8 +60,10 @@ def __init__(self, args, custom_args=[], platform_util=None): self.args = arg_parser.parse_args(self.custom_args, namespace=self.args) - # Use default KMP variable values, but override the default KMP_BLOCKTIME value - self.set_kmp_vars(kmp_blocktime=str(self.args.kmp_blocktime)) + # Set KMP env vars, if they haven't already been set, but override the default KMP_BLOCKTIME value + config_file_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), "config.json") + self.set_kmp_vars(config_file_path, kmp_blocktime=str(self.args.kmp_blocktime)) + set_env_var("OMP_NUM_THREADS", self.args.num_intra_threads) benchmark_script = os.path.join( diff --git a/benchmarks/image_recognition/tensorflow/resnet101/inference/int8/config.json b/benchmarks/image_recognition/tensorflow/resnet101/inference/int8/config.json new file mode 100644 index 000000000..6f1228ba7 --- /dev/null +++ b/benchmarks/image_recognition/tensorflow/resnet101/inference/int8/config.json @@ -0,0 +1,7 @@ +{ + "optimization_parameters": { + "KMP_AFFINITY": "granularity=fine,verbose,compact,1,0", + "KMP_BLOCKTIME": 0, + "KMP_SETTINGS": 1 + } +} diff --git a/benchmarks/image_recognition/tensorflow/resnet101/inference/int8/model_init.py b/benchmarks/image_recognition/tensorflow/resnet101/inference/int8/model_init.py index 5e32d3e92..4bd21a12e 100644 --- a/benchmarks/image_recognition/tensorflow/resnet101/inference/int8/model_init.py +++ b/benchmarks/image_recognition/tensorflow/resnet101/inference/int8/model_init.py @@ -41,8 +41,9 @@ def __init__(self, args, custom_args=[], platform_util=None): set_env_var("OMP_NUM_THREADS", platform_util.num_cores_per_socket if args.num_cores == -1 else args.num_cores) - # Set KMP env vars, but override default KMP_BLOCKTIME value - self.set_kmp_vars(kmp_blocktime="0") + # Set KMP env vars, if they haven't already been set + config_file_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), "config.json") + self.set_kmp_vars(config_file_path) def parse_args(self): parser = argparse.ArgumentParser() diff --git a/benchmarks/image_recognition/tensorflow/resnet50/inference/fp32/config.json b/benchmarks/image_recognition/tensorflow/resnet50/inference/fp32/config.json new file mode 100644 index 000000000..273b45b40 --- /dev/null +++ b/benchmarks/image_recognition/tensorflow/resnet50/inference/fp32/config.json @@ -0,0 +1,7 @@ +{ + "optimization_parameters": { + "KMP_AFFINITY": "granularity=fine,verbose,compact,1,0", + "KMP_BLOCKTIME": 1, + "KMP_SETTINGS": 1 + } +} diff --git a/benchmarks/image_recognition/tensorflow/resnet50/inference/fp32/model_init.py b/benchmarks/image_recognition/tensorflow/resnet50/inference/fp32/model_init.py index a2e6be8a3..4c3dfbd1d 100644 --- a/benchmarks/image_recognition/tensorflow/resnet50/inference/fp32/model_init.py +++ b/benchmarks/image_recognition/tensorflow/resnet50/inference/fp32/model_init.py @@ -61,8 +61,10 @@ def __init__(self, args, custom_args=[], platform_util=None): self.args = arg_parser.parse_args(self.custom_args, namespace=self.args) - # Use default KMP variable values, but override the default KMP_BLOCKTIME value - self.set_kmp_vars(kmp_blocktime=str(self.args.kmp_blocktime)) + # Set KMP env vars, if they haven't already been set, but override the default KMP_BLOCKTIME value + config_file_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), "config.json") + self.set_kmp_vars(config_file_path, kmp_blocktime=str(self.args.kmp_blocktime)) + set_env_var("OMP_NUM_THREADS", self.args.num_intra_threads) benchmark_script = os.path.join( diff --git a/benchmarks/image_recognition/tensorflow/resnet50/inference/int8/config.json b/benchmarks/image_recognition/tensorflow/resnet50/inference/int8/config.json new file mode 100644 index 000000000..273b45b40 --- /dev/null +++ b/benchmarks/image_recognition/tensorflow/resnet50/inference/int8/config.json @@ -0,0 +1,7 @@ +{ + "optimization_parameters": { + "KMP_AFFINITY": "granularity=fine,verbose,compact,1,0", + "KMP_BLOCKTIME": 1, + "KMP_SETTINGS": 1 + } +} diff --git a/benchmarks/image_recognition/tensorflow/resnet50/inference/int8/model_init.py b/benchmarks/image_recognition/tensorflow/resnet50/inference/int8/model_init.py index 07dfa5d2f..75e9db07c 100644 --- a/benchmarks/image_recognition/tensorflow/resnet50/inference/int8/model_init.py +++ b/benchmarks/image_recognition/tensorflow/resnet50/inference/int8/model_init.py @@ -65,8 +65,9 @@ def parse_args(self): self.args = parser.parse_args(self.custom_args, namespace=self.args) - # Use default KMP variable values, but override the default KMP_BLOCKTIME value - self.set_kmp_vars(kmp_blocktime=str(self.args.kmp_blocktime)) + # Set KMP env vars, if they haven't already been set, but override the default KMP_BLOCKTIME value + config_file_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), "config.json") + self.set_kmp_vars(config_file_path, kmp_blocktime=str(self.args.kmp_blocktime)) def run_benchmark_or_accuracy(self): cmd = os.path.join( diff --git a/benchmarks/image_segmentation/tensorflow/maskrcnn/inference/fp32/config.json b/benchmarks/image_segmentation/tensorflow/maskrcnn/inference/fp32/config.json new file mode 100644 index 000000000..23d5de76e --- /dev/null +++ b/benchmarks/image_segmentation/tensorflow/maskrcnn/inference/fp32/config.json @@ -0,0 +1,8 @@ +{ + "optimization_parameters": { + "KMP_AFFINITY": "granularity=fine,compact,1,0", + "KMP_BLOCKTIME": 1, + "KMP_SETTINGS": 1, + "KMP_HW_SUBSET": "1T" + } +} diff --git a/benchmarks/image_segmentation/tensorflow/maskrcnn/inference/fp32/model_init.py b/benchmarks/image_segmentation/tensorflow/maskrcnn/inference/fp32/model_init.py index 1fe96fe2b..43f9cdacc 100644 --- a/benchmarks/image_segmentation/tensorflow/maskrcnn/inference/fp32/model_init.py +++ b/benchmarks/image_segmentation/tensorflow/maskrcnn/inference/fp32/model_init.py @@ -37,8 +37,8 @@ def __init__(self, args, custom_args=[], platform_util=None): self.set_num_inter_intra_threads() # Set KMP env vars, if they haven't already been set - self.set_kmp_vars(kmp_affinity="granularity=fine, compact, 1, 0") - set_env_var("KMP_HW_SUBSET", "1T") + config_file_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), "config.json") + self.set_kmp_vars(config_file_path) benchmark_script = os.path.join( self.args.intelai_models, "coco.py") diff --git a/benchmarks/image_segmentation/tensorflow/unet/inference/fp32/config.json b/benchmarks/image_segmentation/tensorflow/unet/inference/fp32/config.json new file mode 100644 index 000000000..ca15cfe6d --- /dev/null +++ b/benchmarks/image_segmentation/tensorflow/unet/inference/fp32/config.json @@ -0,0 +1,7 @@ +{ + "optimization_parameters": { + "KMP_AFFINITY": "granularity=fine, compact", + "KMP_BLOCKTIME": 1, + "KMP_SETTINGS": 1 + } +} diff --git a/benchmarks/image_segmentation/tensorflow/unet/inference/fp32/model_init.py b/benchmarks/image_segmentation/tensorflow/unet/inference/fp32/model_init.py index cd4f5837d..d4998afae 100644 --- a/benchmarks/image_segmentation/tensorflow/unet/inference/fp32/model_init.py +++ b/benchmarks/image_segmentation/tensorflow/unet/inference/fp32/model_init.py @@ -41,7 +41,8 @@ def __init__(self, args, custom_args=[], platform_util=None): self.set_num_inter_intra_threads() # Set KMP env vars, if they haven't already been set - self.set_kmp_vars(kmp_affinity="granularity=fine, compact") + config_file_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), "config.json") + self.set_kmp_vars(config_file_path) # Get path to the inference script script_path = os.path.join( diff --git a/benchmarks/language_modeling/tensorflow/lm-1b/inference/fp32/config.json b/benchmarks/language_modeling/tensorflow/lm-1b/inference/fp32/config.json new file mode 100644 index 000000000..8ae78e72a --- /dev/null +++ b/benchmarks/language_modeling/tensorflow/lm-1b/inference/fp32/config.json @@ -0,0 +1,7 @@ +{ + "optimization_parameters": { + "KMP_AFFINITY": "granularity=fine,compact,1,0", + "KMP_BLOCKTIME": 0, + "KMP_SETTINGS": 1 + } +} diff --git a/benchmarks/language_modeling/tensorflow/lm-1b/inference/fp32/model_init.py b/benchmarks/language_modeling/tensorflow/lm-1b/inference/fp32/model_init.py index 77d903020..6a2b7244f 100644 --- a/benchmarks/language_modeling/tensorflow/lm-1b/inference/fp32/model_init.py +++ b/benchmarks/language_modeling/tensorflow/lm-1b/inference/fp32/model_init.py @@ -35,8 +35,9 @@ def __init__(self, args, custom_args, platform_util=None): self.set_num_inter_intra_threads() - # Set the KMP env vars - self.set_kmp_vars(kmp_blocktime="0", kmp_affinity="granularity=fine,compact,1,0") + # Set KMP env vars, if they haven't already been set + config_file_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), "config.json") + self.set_kmp_vars(config_file_path) set_env_var("OMP_NUM_THREADS", self.args.num_intra_threads) diff --git a/benchmarks/language_translation/tensorflow/gnmt/inference/fp32/config.json b/benchmarks/language_translation/tensorflow/gnmt/inference/fp32/config.json new file mode 100644 index 000000000..4d0e2acf5 --- /dev/null +++ b/benchmarks/language_translation/tensorflow/gnmt/inference/fp32/config.json @@ -0,0 +1,7 @@ +{ + "optimization_parameters": { + "KMP_AFFINITY": "granularity=fine,compact,1,0", + "KMP_BLOCKTIME": 1, + "KMP_SETTINGS": 1 + } +} diff --git a/benchmarks/language_translation/tensorflow/gnmt/inference/fp32/model_init.py b/benchmarks/language_translation/tensorflow/gnmt/inference/fp32/model_init.py index 61ef1bda6..a23403eb0 100644 --- a/benchmarks/language_translation/tensorflow/gnmt/inference/fp32/model_init.py +++ b/benchmarks/language_translation/tensorflow/gnmt/inference/fp32/model_init.py @@ -37,8 +37,9 @@ def __init__(self, args, custom_args=[], platform_util=None): (str(self.args.num_cores - 1)) + " " self.cmd += "{} ".format(self.python_exe) - # Set the KMP env vars - self.set_kmp_vars(kmp_affinity="granularity=fine,compact,1,0") + # Set KMP env vars, if they haven't already been set + config_file_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), "config.json") + self.set_kmp_vars(config_file_path) # use default batch size if -1 if self.args.batch_size == -1: diff --git a/benchmarks/language_translation/tensorflow/transformer_language/inference/fp32/config.json b/benchmarks/language_translation/tensorflow/transformer_language/inference/fp32/config.json new file mode 100644 index 000000000..8ae78e72a --- /dev/null +++ b/benchmarks/language_translation/tensorflow/transformer_language/inference/fp32/config.json @@ -0,0 +1,7 @@ +{ + "optimization_parameters": { + "KMP_AFFINITY": "granularity=fine,compact,1,0", + "KMP_BLOCKTIME": 0, + "KMP_SETTINGS": 1 + } +} diff --git a/benchmarks/language_translation/tensorflow/transformer_language/inference/fp32/model_init.py b/benchmarks/language_translation/tensorflow/transformer_language/inference/fp32/model_init.py index 20790b541..4e1519e03 100644 --- a/benchmarks/language_translation/tensorflow/transformer_language/inference/fp32/model_init.py +++ b/benchmarks/language_translation/tensorflow/transformer_language/inference/fp32/model_init.py @@ -37,8 +37,9 @@ def __init__(self, args, custom_args, platform_util=None): self.set_num_inter_intra_threads() - # Set the KMP env vars - self.set_kmp_vars(kmp_blocktime="0", kmp_affinity="granularity=fine,compact,1,0") + # Set KMP env vars, if they haven't already been set + config_file_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), "config.json") + self.set_kmp_vars(config_file_path) TEMP_DIR = str(self.args.model_source_dir) + "/out_dir" if os.path.exists(TEMP_DIR): diff --git a/benchmarks/language_translation/tensorflow/transformer_lt_official/inference/fp32/config.json b/benchmarks/language_translation/tensorflow/transformer_lt_official/inference/fp32/config.json new file mode 100644 index 000000000..8ae78e72a --- /dev/null +++ b/benchmarks/language_translation/tensorflow/transformer_lt_official/inference/fp32/config.json @@ -0,0 +1,7 @@ +{ + "optimization_parameters": { + "KMP_AFFINITY": "granularity=fine,compact,1,0", + "KMP_BLOCKTIME": 0, + "KMP_SETTINGS": 1 + } +} diff --git a/benchmarks/language_translation/tensorflow/transformer_lt_official/inference/fp32/model_init.py b/benchmarks/language_translation/tensorflow/transformer_lt_official/inference/fp32/model_init.py index b598191f0..00f8b9f3f 100644 --- a/benchmarks/language_translation/tensorflow/transformer_lt_official/inference/fp32/model_init.py +++ b/benchmarks/language_translation/tensorflow/transformer_lt_official/inference/fp32/model_init.py @@ -36,8 +36,9 @@ def __init__(self, args, custom_args, platform_util=None): self.set_num_inter_intra_threads() - # Set the KMP env vars - self.set_kmp_vars(kmp_blocktime="0", kmp_affinity="granularity=fine,compact,1,0") + # Set KMP env vars, if they haven't already been set + config_file_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), "config.json") + self.set_kmp_vars(config_file_path) MODEL_EXEC_DIR = str(self.args.model_source_dir) + "/official/transformer/" diff --git a/benchmarks/object_detection/tensorflow/faster_rcnn/inference/fp32/config.json b/benchmarks/object_detection/tensorflow/faster_rcnn/inference/fp32/config.json new file mode 100644 index 000000000..273b45b40 --- /dev/null +++ b/benchmarks/object_detection/tensorflow/faster_rcnn/inference/fp32/config.json @@ -0,0 +1,7 @@ +{ + "optimization_parameters": { + "KMP_AFFINITY": "granularity=fine,verbose,compact,1,0", + "KMP_BLOCKTIME": 1, + "KMP_SETTINGS": 1 + } +} diff --git a/benchmarks/object_detection/tensorflow/faster_rcnn/inference/fp32/model_init.py b/benchmarks/object_detection/tensorflow/faster_rcnn/inference/fp32/model_init.py index 3e0167f75..a605cc8e3 100644 --- a/benchmarks/object_detection/tensorflow/faster_rcnn/inference/fp32/model_init.py +++ b/benchmarks/object_detection/tensorflow/faster_rcnn/inference/fp32/model_init.py @@ -43,7 +43,8 @@ def __init__(self, args, custom_args, platform_util=None): self.set_num_inter_intra_threads() # Set KMP env vars, if they haven't already been set - self.set_kmp_vars() + config_file_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), "config.json") + self.set_kmp_vars(config_file_path) set_env_var("OMP_NUM_THREADS", self.args.num_intra_threads) diff --git a/benchmarks/object_detection/tensorflow/faster_rcnn/inference/int8/config.json b/benchmarks/object_detection/tensorflow/faster_rcnn/inference/int8/config.json new file mode 100644 index 000000000..6f1228ba7 --- /dev/null +++ b/benchmarks/object_detection/tensorflow/faster_rcnn/inference/int8/config.json @@ -0,0 +1,7 @@ +{ + "optimization_parameters": { + "KMP_AFFINITY": "granularity=fine,verbose,compact,1,0", + "KMP_BLOCKTIME": 0, + "KMP_SETTINGS": 1 + } +} diff --git a/benchmarks/object_detection/tensorflow/faster_rcnn/inference/int8/model_init.py b/benchmarks/object_detection/tensorflow/faster_rcnn/inference/int8/model_init.py index 749026f3c..705ef72c1 100644 --- a/benchmarks/object_detection/tensorflow/faster_rcnn/inference/int8/model_init.py +++ b/benchmarks/object_detection/tensorflow/faster_rcnn/inference/int8/model_init.py @@ -41,8 +41,9 @@ def __init__(self, args, custom_args=[], platform_util=None): self.args.intelai_models, self.args.mode, self.args.precision, self.RFCN_ACCURACY_SCRIPT) - # Set KMP env vars, except override the default KMP_BLOCKTIME value - self.set_kmp_vars(kmp_blocktime="0") + # Set KMP env vars, if they haven't already been set + config_file_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), "config.json") + self.set_kmp_vars(config_file_path) self.validate_args() diff --git a/benchmarks/object_detection/tensorflow/rfcn/inference/fp32/config.json b/benchmarks/object_detection/tensorflow/rfcn/inference/fp32/config.json new file mode 100644 index 000000000..d7f51a4c2 --- /dev/null +++ b/benchmarks/object_detection/tensorflow/rfcn/inference/fp32/config.json @@ -0,0 +1,6 @@ +{ + "optimization_parameters": { + "KMP_BLOCKTIME": 0, + "KMP_SETTINGS": 1 + } +} diff --git a/benchmarks/object_detection/tensorflow/rfcn/inference/fp32/model_init.py b/benchmarks/object_detection/tensorflow/rfcn/inference/fp32/model_init.py index 712da5777..a4ab51dfa 100644 --- a/benchmarks/object_detection/tensorflow/rfcn/inference/fp32/model_init.py +++ b/benchmarks/object_detection/tensorflow/rfcn/inference/fp32/model_init.py @@ -45,8 +45,9 @@ def __init__(self, args, custom_args, platform_util): self.args.intelai_models, self.args.mode, self.args.precision, "eval.py") - # Set KMP env vars, except override the default KMP_BLOCKTIME and KMP_AFFINITY values - self.set_kmp_vars(kmp_blocktime="0", kmp_affinity=None) + # Set KMP env vars, if they haven't already been set + config_file_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), "config.json") + self.set_kmp_vars(config_file_path) self.run_inference_sanity_checks(self.args, self.custom_args) self.parse_custom_args() diff --git a/benchmarks/object_detection/tensorflow/rfcn/inference/int8/config.json b/benchmarks/object_detection/tensorflow/rfcn/inference/int8/config.json new file mode 100644 index 000000000..6f1228ba7 --- /dev/null +++ b/benchmarks/object_detection/tensorflow/rfcn/inference/int8/config.json @@ -0,0 +1,7 @@ +{ + "optimization_parameters": { + "KMP_AFFINITY": "granularity=fine,verbose,compact,1,0", + "KMP_BLOCKTIME": 0, + "KMP_SETTINGS": 1 + } +} diff --git a/benchmarks/object_detection/tensorflow/rfcn/inference/int8/model_init.py b/benchmarks/object_detection/tensorflow/rfcn/inference/int8/model_init.py index eec69455d..d6cb2cc97 100755 --- a/benchmarks/object_detection/tensorflow/rfcn/inference/int8/model_init.py +++ b/benchmarks/object_detection/tensorflow/rfcn/inference/int8/model_init.py @@ -54,8 +54,9 @@ def __init__(self, args, custom_args=[], platform_util=None): self.parse_args() - # Set KMP env vars with defaults, except for KMP_BLOCKTIME - self.set_kmp_vars(kmp_blocktime=0) + # Set KMP env vars, if they haven't already been set + config_file_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), "config.json") + self.set_kmp_vars(config_file_path) # Set num_inter_threads and num_intra_threads self.set_num_inter_intra_threads() diff --git a/benchmarks/object_detection/tensorflow/ssd-mobilenet/inference/fp32/config.json b/benchmarks/object_detection/tensorflow/ssd-mobilenet/inference/fp32/config.json new file mode 100644 index 000000000..6f1228ba7 --- /dev/null +++ b/benchmarks/object_detection/tensorflow/ssd-mobilenet/inference/fp32/config.json @@ -0,0 +1,7 @@ +{ + "optimization_parameters": { + "KMP_AFFINITY": "granularity=fine,verbose,compact,1,0", + "KMP_BLOCKTIME": 0, + "KMP_SETTINGS": 1 + } +} diff --git a/benchmarks/object_detection/tensorflow/ssd-mobilenet/inference/fp32/model_init.py b/benchmarks/object_detection/tensorflow/ssd-mobilenet/inference/fp32/model_init.py index 379e47c67..585d3ed0e 100644 --- a/benchmarks/object_detection/tensorflow/ssd-mobilenet/inference/fp32/model_init.py +++ b/benchmarks/object_detection/tensorflow/ssd-mobilenet/inference/fp32/model_init.py @@ -44,8 +44,9 @@ def __init__(self, args, custom_args, platform_util): self.run_inference_sanity_checks(self.args, self.custom_args) self.research_dir = os.path.join(args.model_source_dir, "research") - # Set KMP env vars, except override the default KMP_BLOCKTIME value - self.set_kmp_vars(kmp_blocktime="0") + # Set KMP env vars, if they haven't already been set + config_file_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), "config.json") + self.set_kmp_vars(config_file_path) # set num_inter_threads and num_intra_threads (override inter threads to 2) self.set_num_inter_intra_threads(num_inter_threads=2) diff --git a/benchmarks/object_detection/tensorflow/ssd-mobilenet/inference/int8/config.json b/benchmarks/object_detection/tensorflow/ssd-mobilenet/inference/int8/config.json new file mode 100644 index 000000000..6f1228ba7 --- /dev/null +++ b/benchmarks/object_detection/tensorflow/ssd-mobilenet/inference/int8/config.json @@ -0,0 +1,7 @@ +{ + "optimization_parameters": { + "KMP_AFFINITY": "granularity=fine,verbose,compact,1,0", + "KMP_BLOCKTIME": 0, + "KMP_SETTINGS": 1 + } +} diff --git a/benchmarks/object_detection/tensorflow/ssd-mobilenet/inference/int8/model_init.py b/benchmarks/object_detection/tensorflow/ssd-mobilenet/inference/int8/model_init.py index 5959abaf2..57114447a 100644 --- a/benchmarks/object_detection/tensorflow/ssd-mobilenet/inference/int8/model_init.py +++ b/benchmarks/object_detection/tensorflow/ssd-mobilenet/inference/int8/model_init.py @@ -31,7 +31,10 @@ class ModelInitializer(BaseModelInitializer): def __init__(self, args, custom_args=[], platform_util=None): super(ModelInitializer, self).__init__(args, custom_args, platform_util) - self.set_kmp_vars(kmp_blocktime="0") + + # Set KMP env vars, if they haven't already been set + config_file_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), "config.json") + self.set_kmp_vars(config_file_path) # set num_inter_threads and num_intra_threads (override inter threads to 2) self.set_num_inter_intra_threads(num_inter_threads=2) diff --git a/benchmarks/object_detection/tensorflow/ssd-resnet34/inference/fp32/config.json b/benchmarks/object_detection/tensorflow/ssd-resnet34/inference/fp32/config.json new file mode 100644 index 000000000..273b45b40 --- /dev/null +++ b/benchmarks/object_detection/tensorflow/ssd-resnet34/inference/fp32/config.json @@ -0,0 +1,7 @@ +{ + "optimization_parameters": { + "KMP_AFFINITY": "granularity=fine,verbose,compact,1,0", + "KMP_BLOCKTIME": 1, + "KMP_SETTINGS": 1 + } +} diff --git a/benchmarks/object_detection/tensorflow/ssd-resnet34/inference/fp32/model_init.py b/benchmarks/object_detection/tensorflow/ssd-resnet34/inference/fp32/model_init.py index 0e6657a11..1ad534ed9 100644 --- a/benchmarks/object_detection/tensorflow/ssd-resnet34/inference/fp32/model_init.py +++ b/benchmarks/object_detection/tensorflow/ssd-resnet34/inference/fp32/model_init.py @@ -42,7 +42,11 @@ def __init__(self, args, custom_args, platform_util): super(ModelInitializer, self).__init__(args, custom_args, platform_util) self.run_inference_sanity_checks(self.args, self.custom_args) - self.set_kmp_vars() + + # Set KMP env vars, if they haven't already been set + config_file_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), "config.json") + self.set_kmp_vars(config_file_path) + self.set_num_inter_intra_threads() set_env_var("OMP_NUM_THREADS", self.args.num_intra_threads) diff --git a/benchmarks/recommendation/tensorflow/ncf/inference/fp32/config.json b/benchmarks/recommendation/tensorflow/ncf/inference/fp32/config.json new file mode 100644 index 000000000..273b45b40 --- /dev/null +++ b/benchmarks/recommendation/tensorflow/ncf/inference/fp32/config.json @@ -0,0 +1,7 @@ +{ + "optimization_parameters": { + "KMP_AFFINITY": "granularity=fine,verbose,compact,1,0", + "KMP_BLOCKTIME": 1, + "KMP_SETTINGS": 1 + } +} diff --git a/benchmarks/recommendation/tensorflow/ncf/inference/fp32/model_init.py b/benchmarks/recommendation/tensorflow/ncf/inference/fp32/model_init.py index 1b6eb1eda..1704839cb 100644 --- a/benchmarks/recommendation/tensorflow/ncf/inference/fp32/model_init.py +++ b/benchmarks/recommendation/tensorflow/ncf/inference/fp32/model_init.py @@ -40,7 +40,8 @@ def __init__(self, args, custom_args=[], platform_util=None): self.args.batch_size = 256 # Set KMP env vars, if they haven't already been set - self.set_kmp_vars() + config_file_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), "config.json") + self.set_kmp_vars(config_file_path) # set num_inter_threads and num_intra_threads self.set_num_inter_intra_threads() diff --git a/benchmarks/recommendation/tensorflow/wide_deep_large_ds/inference/fp32/config.json b/benchmarks/recommendation/tensorflow/wide_deep_large_ds/inference/fp32/config.json new file mode 100644 index 000000000..4efe60b15 --- /dev/null +++ b/benchmarks/recommendation/tensorflow/wide_deep_large_ds/inference/fp32/config.json @@ -0,0 +1,7 @@ +{ + "optimization_parameters": { + "KMP_AFFINITY": "noverbose,warnings,respect,granularity=core,none", + "KMP_BLOCKTIME": 0, + "KMP_SETTINGS": 1 + } +} diff --git a/benchmarks/recommendation/tensorflow/wide_deep_large_ds/inference/fp32/model_init.py b/benchmarks/recommendation/tensorflow/wide_deep_large_ds/inference/fp32/model_init.py index 8f3e15359..6655dce85 100755 --- a/benchmarks/recommendation/tensorflow/wide_deep_large_ds/inference/fp32/model_init.py +++ b/benchmarks/recommendation/tensorflow/wide_deep_large_ds/inference/fp32/model_init.py @@ -36,9 +36,10 @@ def __init__(self, args, custom_args=[], platform_util=None): # Set the num_inter_threads and num_intra_threads self.set_num_inter_intra_threads(num_inter_threads=platform_util.num_cores_per_socket, num_intra_threads=1) - # Use default KMP AFFINITY values, override KMP_BLOCKTIME & enable KMP SETTINGS - self.set_kmp_vars(kmp_settings="1", kmp_blocktime="0", - kmp_affinity="noverbose,warnings,respect,granularity=core,none") + + # Set KMP env vars, if they haven't already been set + config_file_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), "config.json") + self.set_kmp_vars(config_file_path) # Set env vars, if they haven't already been set set_env_var("OMP_NUM_THREADS", self.args.num_intra_threads) diff --git a/benchmarks/recommendation/tensorflow/wide_deep_large_ds/inference/int8/config.json b/benchmarks/recommendation/tensorflow/wide_deep_large_ds/inference/int8/config.json new file mode 100644 index 000000000..4efe60b15 --- /dev/null +++ b/benchmarks/recommendation/tensorflow/wide_deep_large_ds/inference/int8/config.json @@ -0,0 +1,7 @@ +{ + "optimization_parameters": { + "KMP_AFFINITY": "noverbose,warnings,respect,granularity=core,none", + "KMP_BLOCKTIME": 0, + "KMP_SETTINGS": 1 + } +} diff --git a/benchmarks/recommendation/tensorflow/wide_deep_large_ds/inference/int8/model_init.py b/benchmarks/recommendation/tensorflow/wide_deep_large_ds/inference/int8/model_init.py index 2bd55b5a5..9fdef4537 100755 --- a/benchmarks/recommendation/tensorflow/wide_deep_large_ds/inference/int8/model_init.py +++ b/benchmarks/recommendation/tensorflow/wide_deep_large_ds/inference/int8/model_init.py @@ -36,9 +36,10 @@ def __init__(self, args, custom_args=[], platform_util=None): # Set the num_inter_threads and num_intra_threads self.set_num_inter_intra_threads(num_inter_threads=platform_util.num_cores_per_socket, num_intra_threads=1) - # Use default KMP AFFINITY values, override KMP_BLOCKTIME & enable KMP SETTINGS - self.set_kmp_vars(kmp_settings="1", kmp_blocktime="0", - kmp_affinity="noverbose,warnings,respect,granularity=core,none") + + # Set KMP env vars, if they haven't already been set + config_file_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), "config.json") + self.set_kmp_vars(config_file_path) # Set env vars, if they haven't already been set set_env_var("OMP_NUM_THREADS", self.args.num_intra_threads) diff --git a/benchmarks/text_to_speech/tensorflow/wavenet/inference/fp32/config.json b/benchmarks/text_to_speech/tensorflow/wavenet/inference/fp32/config.json new file mode 100644 index 000000000..f0b327528 --- /dev/null +++ b/benchmarks/text_to_speech/tensorflow/wavenet/inference/fp32/config.json @@ -0,0 +1,6 @@ +{ + "optimization_parameters": { + "KMP_AFFINITY": "granularity=fine,verbose,compact,1,0", + "KMP_BLOCKTIME": 1 + } +} diff --git a/benchmarks/text_to_speech/tensorflow/wavenet/inference/fp32/model_init.py b/benchmarks/text_to_speech/tensorflow/wavenet/inference/fp32/model_init.py index 91ebe227c..1756e33ae 100644 --- a/benchmarks/text_to_speech/tensorflow/wavenet/inference/fp32/model_init.py +++ b/benchmarks/text_to_speech/tensorflow/wavenet/inference/fp32/model_init.py @@ -32,8 +32,9 @@ def __init__(self, args, custom_args, platform_util): self.command = "" command_prefix = "{} generate.py".format(self.python_exe) - # Set default KMP env vars, except for KMP_SETTINGS - self.set_kmp_vars(kmp_settings=None) + # Set KMP env vars, if they haven't already been set + config_file_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), "config.json") + self.set_kmp_vars(config_file_path) self.parse_custom_args() # Set the num_inter_threads and num_intra_threads (override inter threads to 1) diff --git a/tests/unit/common/test_base_model_init.py b/tests/unit/common/test_base_model_init.py index 43f3076f1..979a6ac4c 100644 --- a/tests/unit/common/test_base_model_init.py +++ b/tests/unit/common/test_base_model_init.py @@ -17,8 +17,22 @@ # # SPDX-License-Identifier: EPL-2.0 # - +from contextlib import contextmanager import os +import pytest +import sys +import tempfile + +try: + # python 2 + from cStringIO import StringIO +except ImportError: + # python 3 + # only supports unicode so can't be used in python 2 for sys.stdout + # because (from `print` documentation) + # "All non-keyword arguments are converted to strings like str() does" + from io import StringIO + from mock import MagicMock, patch @@ -26,6 +40,22 @@ from benchmarks.common.base_model_init import set_env_var +@contextmanager +def catch_stdout(): + _stdout = sys.stdout + sys.stdout = caught_output = StringIO() + try: + yield caught_output + finally: + sys.stdout = _stdout + caught_output.close() + + +@pytest.fixture +def mock_json(patch): + return patch('json') + + # Example args and output strings for testing mocks test_model_name = "resnet50" test_framework = "tensorflow" @@ -109,3 +139,34 @@ def test_env_var_not_already_set(): finally: if os.environ.get(env_var): del os.environ[env_var] + + +def test_set_kmp_vars_config_json_does_not_exists(): + """Test config.json does not exist""" + # Setup base model init with test settings + platform_util = MagicMock() + args = MagicMock(verbose=True, model_name=test_model_name) + os.environ["PYTHON_EXE"] = "python" + base_model_init = BaseModelInitializer(args, [], platform_util) + + config_file_path = '/test/foo/config.json' + + with catch_stdout() as caught_output: + base_model_init.set_kmp_vars(config_file_path) + output = caught_output.getvalue() + + assert "Warning: File {} does not exist and \ + cannot be used to set KMP environment variables".format(config_file_path) == output.strip() + + +def test_set_kmp_vars_config_json_exists(mock_json): + """Test config.json when exists""" + # Setup base model init with test settings + platform_util = MagicMock() + args = MagicMock(verbose=True, model_name=test_model_name) + os.environ["PYTHON_EXE"] = "python" + base_model_init = BaseModelInitializer(args, [], platform_util) + + file_descriptor, config_file_path = tempfile.mkstemp(suffix=".json") + + base_model_init.set_kmp_vars(config_file_path) From c0d1fed01542dd59efb9521333dc8ce80ec94dbc Mon Sep 17 00:00:00 2001 From: Dina Suehiro Jones Date: Mon, 15 Apr 2019 09:52:16 -0700 Subject: [PATCH 17/62] Add support for dummy data with MobileNet V1 FP32 (#275) --- .../tensorflow/mobilenet_v1/README.md | 14 +++- .../mobilenet_v1/inference/fp32/model_init.py | 7 +- .../inference/fp32/eval_image_classifier.py | 78 +++++++++++-------- .../unit/common/tensorflow/tf_model_args.txt | 1 + 4 files changed, 60 insertions(+), 40 deletions(-) diff --git a/benchmarks/image_recognition/tensorflow/mobilenet_v1/README.md b/benchmarks/image_recognition/tensorflow/mobilenet_v1/README.md index 61a21c3ef..a32138d86 100644 --- a/benchmarks/image_recognition/tensorflow/mobilenet_v1/README.md +++ b/benchmarks/image_recognition/tensorflow/mobilenet_v1/README.md @@ -151,7 +151,11 @@ later. ## FP32 Inference Instructions -1. Download the ImageNet dataset and convert it to the TF records format +1. The ImageNet dataset is required for testing accuracy and can also be + used when running benchmarking. If no datset is provided when running + benchmarking, synthetic data will be used. + + Download the ImageNet dataset and convert it to the TF records format using the instructions [here](https://github.com/tensorflow/models/tree/master/research/slim#an-automated-script-for-processing-imagenet-data). @@ -198,7 +202,9 @@ later. [tensorflow/models](https://github.com/tensorflow/models) repo that was cloned in step 3. - * Run benchmarking for latency (with `--batch-size 1` and `--checkpoint` with a path to the checkpoint file directory): + * Run benchmarking for latency (with `--batch-size 1`, `--checkpoint` + with a path to the checkpoint file directory, and the `--data-location` + is optional): ``` python launch_benchmark.py \ --precision fp32 \ @@ -212,7 +218,9 @@ later. --data-location /dataset/Imagenet_Validation \ --checkpoint /home//mobilenet_v1_fp32_pretrained_model ``` - * Run benchmarking for throughput (with `--batch-size 100` and `--checkpoint` with a path to the checkpoint file directory): + * Run benchmarking for throughput (with `--batch-size 100`, + `--checkpoint` with a path to the checkpoint file directory, and + the `--data-location` is optional): ``` python launch_benchmark.py \ --precision fp32 \ diff --git a/benchmarks/image_recognition/tensorflow/mobilenet_v1/inference/fp32/model_init.py b/benchmarks/image_recognition/tensorflow/mobilenet_v1/inference/fp32/model_init.py index d4e3ca5d7..8fa7391ae 100644 --- a/benchmarks/image_recognition/tensorflow/mobilenet_v1/inference/fp32/model_init.py +++ b/benchmarks/image_recognition/tensorflow/mobilenet_v1/inference/fp32/model_init.py @@ -57,7 +57,6 @@ def __init__(self, args, custom_args=[], platform_util=None): self.command_prefix = ("{prefix} " "--dataset_name imagenet " "--checkpoint_path {checkpoint} " - "--dataset_dir {dataset} " "--dataset_split_name=validation " "--clone_on_cpu=True " "--model_name {model} " @@ -65,9 +64,11 @@ def __init__(self, args, custom_args=[], platform_util=None): "--intra_op_parallelism_threads {intra} " "--batch_size {bz}").format( prefix=self.command_prefix, checkpoint=self.args.checkpoint, - dataset=self.args.data_location, model=self.args.model_name, - inter=self.args.num_inter_threads, + model=self.args.model_name, inter=self.args.num_inter_threads, intra=self.args.num_intra_threads, bz=self.args.batch_size) + + if self.args.data_location: + self.command_prefix += " --dataset_dir {}".format(self.args.data_location) else: # add args for the accuracy script script_args_list = [ diff --git a/models/image_recognition/tensorflow/mobilenet_v1/inference/fp32/eval_image_classifier.py b/models/image_recognition/tensorflow/mobilenet_v1/inference/fp32/eval_image_classifier.py index fd3165387..974913258 100644 --- a/models/image_recognition/tensorflow/mobilenet_v1/inference/fp32/eval_image_classifier.py +++ b/models/image_recognition/tensorflow/mobilenet_v1/inference/fp32/eval_image_classifier.py @@ -146,56 +146,66 @@ def end(self, run_context): print('Latency ms/step = %.1f' % (latency)) def main(_): - if not FLAGS.dataset_dir: - raise ValueError('You must supply the dataset directory with --dataset_dir') - tf.logging.set_verbosity(tf.logging.INFO) - #os.environ["OMP_NUM_THREADS"] = "54" + with tf.Graph().as_default(): tf_global_step = slim.get_or_create_global_step() ###################### # Select the dataset # ###################### - dataset = dataset_factory.get_dataset( - FLAGS.dataset_name, FLAGS.dataset_split_name, FLAGS.dataset_dir) + if FLAGS.dataset_dir: + print("Inference using real data") + dataset = dataset_factory.get_dataset( + FLAGS.dataset_name, FLAGS.dataset_split_name, FLAGS.dataset_dir) + num_classes = dataset.num_classes - FLAGS.labels_offset + else: + print("Inference using synthetic data") + num_classes = 1000 #################### # Select the model # #################### network_fn = nets_factory.get_network_fn( FLAGS.model_name, - num_classes=(dataset.num_classes - FLAGS.labels_offset), - is_training=False) - - ############################################################## - # Create a dataset provider that loads data from the dataset # - ############################################################## - provider = slim.dataset_data_provider.DatasetDataProvider( - dataset, - shuffle=False, - common_queue_capacity=2 * FLAGS.batch_size, - common_queue_min=FLAGS.batch_size) - [image, label] = provider.get(['image', 'label']) - label -= FLAGS.labels_offset - - ##################################### - # Select the preprocessing function # - ##################################### - preprocessing_name = FLAGS.preprocessing_name or FLAGS.model_name - image_preprocessing_fn = preprocessing_factory.get_preprocessing( - preprocessing_name, + num_classes=num_classes, is_training=False) eval_image_size = FLAGS.eval_image_size or network_fn.default_image_size - image = image_preprocessing_fn(image, eval_image_size, eval_image_size) - - images, labels = tf.train.batch( - [image, label], - batch_size=FLAGS.batch_size, - num_threads=FLAGS.num_preprocessing_threads, - capacity=5 * FLAGS.batch_size) + if FLAGS.dataset_dir: + ############################################################## + # Create a dataset provider that loads data from the dataset # + ############################################################## + provider = slim.dataset_data_provider.DatasetDataProvider( + dataset, + shuffle=False, + common_queue_capacity=2 * FLAGS.batch_size, + common_queue_min=FLAGS.batch_size) + [image, label] = provider.get(['image', 'label']) + label -= FLAGS.labels_offset + + ##################################### + # Select the preprocessing function # + ##################################### + preprocessing_name = FLAGS.preprocessing_name or FLAGS.model_name + image_preprocessing_fn = preprocessing_factory.get_preprocessing( + preprocessing_name, + is_training=False) + + image = image_preprocessing_fn(image, eval_image_size, eval_image_size) + + images, labels = tf.train.batch( + [image, label], + batch_size=FLAGS.batch_size, + num_threads=FLAGS.num_preprocessing_threads, + capacity=5 * FLAGS.batch_size) + else: + # Generate random images and labels with constant 0 when no dataset is used + input_shape = [FLAGS.batch_size, eval_image_size, eval_image_size, 3] + label_shape = [FLAGS.batch_size] + images = tf.random.uniform(input_shape, 0.0, 255.0, dtype=tf.float32, name='synthetic_images') + labels = tf.constant(0, shape=label_shape, dtype=tf.int64) #################### # Define the model # @@ -258,4 +268,4 @@ def main(_): if __name__ == '__main__': - tf.app.run() \ No newline at end of file + tf.app.run() diff --git a/tests/unit/common/tensorflow/tf_model_args.txt b/tests/unit/common/tensorflow/tf_model_args.txt index 376f8b602..50fb40e1f 100644 --- a/tests/unit/common/tensorflow/tf_model_args.txt +++ b/tests/unit/common/tensorflow/tf_model_args.txt @@ -58,6 +58,7 @@ run_tf_benchmark.py --framework=tensorflow --use-case=image_recognition --model- run_tf_benchmark.py --framework=tensorflow --use-case=image_recognition --model-name=mobilenet_v1 --precision=fp32 --mode=inference --model-source-dir=/workspace/models --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=1 --socket-id 0 --benchmark-only --verbose --checkpoint=/checkpoints --data-location=/dataset,numactl --cpunodebind=0 -l python /workspace/intelai_models/inference/fp32/eval_image_classifier.py --dataset_name imagenet --checkpoint_path /checkpoints --dataset_dir /dataset --dataset_split_name=validation --clone_on_cpu=True --model_name mobilenet_v1 --inter_op_parallelism_threads 2 --intra_op_parallelism_threads 28 --batch_size 1 python common/tensorflow/run_tf_benchmark.py --framework=tensorflow --use-case=language_modeling --model-name=lm-1b --precision=fp32 --mode=inference --benchmark-dir=/workspace/benchmarks --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=1 --socket-id=0 --output-dir=/workspace/benchmarks/common/tensorflow/logs --benchmark-only --verbose --model-source-dir=/workspace/models,numactl --cpunodebind=0 --membind=0 python /workspace/models/benchmark.py -b=1 -I=100 --inter=1 --intra=28 run_tf_benchmark.py --framework=tensorflow --use-case=image_recognition --model-name=mobilenet_v1 --precision=fp32 --mode=inference --model-source-dir=/workspace/models --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=100 --socket-id 0 --benchmark-only --verbose --checkpoint=/checkpoints --data-location=/dataset,numactl --cpunodebind=0 -l python /workspace/intelai_models/inference/fp32/eval_image_classifier.py --dataset_name imagenet --checkpoint_path /checkpoints --dataset_dir /dataset --dataset_split_name=validation --clone_on_cpu=True --model_name mobilenet_v1 --inter_op_parallelism_threads 2 --intra_op_parallelism_threads 28 --batch_size 100 +run_tf_benchmark.py --framework=tensorflow --use-case=image_recognition --model-name=mobilenet_v1 --precision=fp32 --mode=inference --benchmark-dir=/workspace/benchmarks --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=100 --socket-id=0 --output-dir=/workspace/benchmarks/common/tensorflow/logs --benchmark-only --verbose --model-source-dir=/workspace/models --checkpoint=/checkpoints,numactl --cpunodebind=0 -l python /workspace/intelai_models/inference/fp32/eval_image_classifier.py --dataset_name imagenet --checkpoint_path /checkpoints --dataset_split_name=validation --clone_on_cpu=True --model_name mobilenet_v1 --inter_op_parallelism_threads 2 --intra_op_parallelism_threads 28 --batch_size 100 run_tf_benchmark.py --framework=tensorflow --use-case=image_recognition --model-name=mobilenet_v1 --precision=fp32 --mode=inference --model-source-dir=/workspace/models --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=100 --socket-id=-1 --accuracy-only --verbose --checkpoint=/checkpoints --in-graph=/in_graph/mobilenet_v1_1.0_224_frozen.pb --data-location=/dataset,python /workspace/intelai_models/inference/fp32/accuracy.py --batch_size=100 --num_inter_threads=2 --input_graph=/in_graph/mobilenet_v1_1.0_224_frozen.pb --num_intra_threads=56 --data_location=/dataset run_tf_benchmark.py --framework=tensorflow --use-case=recommendation --model-name=ncf --precision=fp32 --mode=inference --model-source-dir=/workspace/models --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=256 --socket-id 0 --accuracy-only --verbose --checkpoint=/checkpoints --data-location=/dataset,numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/inference/fp32/ncf_main.py --data_dir=/dataset --model_dir=/checkpoints --intra_op_parallelism_threads=28 --inter_op_parallelism_threads=1 --batch_size=256 --inference_only --accuracy_only run_tf_benchmark.py --framework=tensorflow --use-case=recommendation --model-name=ncf --precision=fp32 --mode=inference --model-source-dir=/workspace/models --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=256 --socket-id 0 --benchmark-only --verbose --checkpoint=/checkpoints --data-location=/dataset,numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/inference/fp32/ncf_main.py --data_dir=/dataset --model_dir=/checkpoints --intra_op_parallelism_threads=28 --inter_op_parallelism_threads=1 --batch_size=256 --inference_only --benchmark_only From fdee53ebe72af653889e1b83b86f3ea78df66c07 Mon Sep 17 00:00:00 2001 From: Abolfazl Shahbazi <12436063+ashahba@users.noreply.github.com> Date: Mon, 15 Apr 2019 11:36:07 -0700 Subject: [PATCH 18/62] Use --no-cache-dir option during pip and virtualenv install (#285) --- Jenkinsfile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Jenkinsfile b/Jenkinsfile index eac6e7fc9..0eb363206 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -16,8 +16,8 @@ node('skx') { sudo apt-get install -y python3-dev || sudo yum install -y python36-devel.x86_64 # virtualenv 16.3.0 is broken do not use it - python2 -m pip install --force-reinstall --user --upgrade pip virtualenv!=16.3.0 tox - python3 -m pip install --force-reinstall --user --upgrade pip virtualenv!=16.3.0 tox + python2 -m pip install --no-cache-dir --user --upgrade pip==19.0.3 virtualenv!=16.3.0 tox + python3 -m pip install --no-cache-dir --user --upgrade pip==19.0.3 virtualenv!=16.3.0 tox """ } stage('Style tests') { From c827585ea8087ad8f5428e513d00014dceef911c Mon Sep 17 00:00:00 2001 From: mjkyung Date: Wed, 17 Apr 2019 10:42:35 -0700 Subject: [PATCH 19/62] Add DenseNet 169 FP32 inference benchmarking scripts (#281) * Initial structure of Densenet169 * Densenet169 * add custom args * Updated custom args default values * add argparse import * Unify the default value for output layer name in both accuracy and benchmark * Add Intel License header * flake8 style fix * Update README.md and add unit test commands * Remove unused files and add minor fix per code review * Update unit test command * Update KMP setting * import fix * Change default batch_size to 100 * Fix jason file * Fix typos --- benchmarks/README.md | 1 + benchmarks/common/tensorflow/start.sh | 15 + .../tensorflow/densenet169/README.md | 137 ++++++ .../tensorflow/densenet169/__init__.py | 19 + .../densenet169/inference/__init__.py | 19 + .../densenet169/inference/fp32/__init__.py | 19 + .../densenet169/inference/fp32/config.json | 7 + .../densenet169/inference/fp32/model_init.py | 107 +++++ .../densenet169/inference/fp32/accuracy.py | 134 ++++++ .../densenet169/inference/fp32/benchmark.py | 161 +++++++ .../densenet169/inference/fp32/cnn_util.py | 50 +++ .../densenet169/inference/fp32/dataset.py | 103 +++++ .../inference/fp32/densenet_preprocessing.py | 391 ++++++++++++++++ .../inference/fp32/image_preprocessing.py | 420 ++++++++++++++++++ .../unit/common/tensorflow/tf_model_args.txt | 3 + 15 files changed, 1586 insertions(+) create mode 100644 benchmarks/image_recognition/tensorflow/densenet169/README.md create mode 100644 benchmarks/image_recognition/tensorflow/densenet169/__init__.py create mode 100644 benchmarks/image_recognition/tensorflow/densenet169/inference/__init__.py create mode 100644 benchmarks/image_recognition/tensorflow/densenet169/inference/fp32/__init__.py create mode 100644 benchmarks/image_recognition/tensorflow/densenet169/inference/fp32/config.json create mode 100644 benchmarks/image_recognition/tensorflow/densenet169/inference/fp32/model_init.py create mode 100644 models/image_recognition/tensorflow/densenet169/inference/fp32/accuracy.py create mode 100644 models/image_recognition/tensorflow/densenet169/inference/fp32/benchmark.py create mode 100644 models/image_recognition/tensorflow/densenet169/inference/fp32/cnn_util.py create mode 100644 models/image_recognition/tensorflow/densenet169/inference/fp32/dataset.py create mode 100644 models/image_recognition/tensorflow/densenet169/inference/fp32/densenet_preprocessing.py create mode 100644 models/image_recognition/tensorflow/densenet169/inference/fp32/image_preprocessing.py diff --git a/benchmarks/README.md b/benchmarks/README.md index ad37797fc..4f5a83172 100644 --- a/benchmarks/README.md +++ b/benchmarks/README.md @@ -19,6 +19,7 @@ dependencies to be installed: | Content Creation | TensorFlow | [DRAW](https://arxiv.org/pdf/1502.04623.pdf) | Inference | [FP32](content_creation/tensorflow/draw/README.md#fp32-inference-instructions) | | Face Detection and Alignment | Tensorflow | [FaceNet](https://arxiv.org/pdf/1503.03832.pdf) | Inference | [FP32](face_detection_and_alignment/tensorflow/facenet/README.md#fp32-inference-instructions) | | Face Detection and Alignment | TensorFlow | [MTCC](https://arxiv.org/pdf/1604.02878.pdf) | Inference | [FP32](face_detection_and_alignment/tensorflow/mtcc/README.md#fp32-inference-instructions) | +| Image Recognition | TensorFlow | [DenseNet169](https://arxiv.org/pdf/1608.06993.pdf) | Inference | [FP32](image_recognition/tensorflow/densenet169/README.md#fp32-inference-instructions) | | Image Recognition | TensorFlow | [Inception ResNet V2](https://arxiv.org/pdf/1602.07261.pdf) | Inference | [Int8](image_recognition/tensorflow/inception_resnet_v2/README.md#int8-inference-instructions) [FP32](image_recognition/tensorflow/inception_resnet_v2/README.md#fp32-inference-instructions) | | Image Recognition | TensorFlow | [Inception V3](https://arxiv.org/pdf/1512.00567.pdf) | Inference | [Int8](image_recognition/tensorflow/inceptionv3/README.md#int8-inference-instructions) [FP32](image_recognition/tensorflow/inceptionv3/README.md#fp32-inference-instructions) | | Image Recognition | TensorFlow | [Inception V4](https://arxiv.org/pdf/1602.07261.pdf) | Inference | [Int8](image_recognition/tensorflow/inceptionv4/README.md#int8-inference-instructions) [FP32](image_recognition/tensorflow/inceptionv4/README.md#fp32-inference-instructions) | diff --git a/benchmarks/common/tensorflow/start.sh b/benchmarks/common/tensorflow/start.sh index 88492f8c5..d93183325 100755 --- a/benchmarks/common/tensorflow/start.sh +++ b/benchmarks/common/tensorflow/start.sh @@ -278,6 +278,19 @@ function dcgan() { fi } +# DenseNet 169 model +function densenet169() { + if [ ${PRECISION} == "fp32" ]; then + CMD="${CMD} $(add_arg "--input_height" ${input_height}) $(add_arg "--input_width" ${input_width}) \ + $(add_arg "--warmup_steps" ${warmup_steps}) $(add_arg "--steps" ${steps}) $(add_arg "--input_layer" ${input_layer}) \ + $(add_arg "--output_layer" ${output_layer})" + PYTHONPATH=${PYTHONPATH} CMD=${CMD} run_model + else + echo "PRECISION=${PRECISION} is not supported for ${MODEL_NAME}" + exit 1 + fi +} + # DRAW model function draw() { if [ ${PRECISION} == "fp32" ]; then @@ -806,6 +819,8 @@ echo "Log output location: ${LOGFILE}" MODEL_NAME=$(echo ${MODEL_NAME} | tr 'A-Z' 'a-z') if [ ${MODEL_NAME} == "dcgan" ]; then dcgan +elif [ ${MODEL_NAME} == "densenet169" ]; then + densenet169 elif [ ${MODEL_NAME} == "draw" ]; then draw elif [ ${MODEL_NAME} == "facenet" ]; then diff --git a/benchmarks/image_recognition/tensorflow/densenet169/README.md b/benchmarks/image_recognition/tensorflow/densenet169/README.md new file mode 100644 index 000000000..bf6b1f84f --- /dev/null +++ b/benchmarks/image_recognition/tensorflow/densenet169/README.md @@ -0,0 +1,137 @@ +# DenseNet 169 + +This document has instructions for how to run DenseNet 169 for the +following modes/precisions: +* [FP32 inference](#fp32-inference-instructions) + +## FP32 Inference Instructions + +1. Download ImageNet dataset. + + This step is required only for running accuracy, for running benchmark we do not need to provide dataset. + + Register and download the ImageNet dataset. Once you have the raw ImageNet dataset downloaded, we need to convert + it to the TFRecord format. The TensorFlow models repo provides + [scripts and instructions](https://github.com/tensorflow/models/tree/master/research/slim#an-automated-script-for-processing-imagenet-data) + to download, process and convert the ImageNet dataset to the TF records format. After converting data, you should have a directory + with the sharded dataset something like below, we only need `validation-*` files, discard `train-*` files: + ``` + $ ll /home/myuser/datasets/ImageNet_TFRecords + -rw-r--r--. 1 user 143009929 Jun 20 14:53 train-00000-of-01024 + -rw-r--r--. 1 user 144699468 Jun 20 14:53 train-00001-of-01024 + -rw-r--r--. 1 user 138428833 Jun 20 14:53 train-00002-of-01024 + ... + -rw-r--r--. 1 user 143137777 Jun 20 15:08 train-01022-of-01024 + -rw-r--r--. 1 user 143315487 Jun 20 15:08 train-01023-of-01024 + -rw-r--r--. 1 user 52223858 Jun 20 15:08 validation-00000-of-00128 + -rw-r--r--. 1 user 51019711 Jun 20 15:08 validation-00001-of-00128 + -rw-r--r--. 1 user 51520046 Jun 20 15:08 validation-00002-of-00128 + ... + -rw-r--r--. 1 user 52508270 Jun 20 15:09 validation-00126-of-00128 + -rw-r--r--. 1 user 55292089 Jun 20 15:09 validation-00127-of-00128 + ``` + +2. A link to download the pre-trained model is coming soon. + +3. Clone the [intelai/models](https://github.com/intelai/models) repo + and then run the benchmarking scripts for either benchmarking throughput, + latency or accuracy. For --dataset-location in accuracy run, please use the ImageNet validation data path from step 1. + Each benchmark run has user configurable arguments separated from regular arguments by '--' at the end of the command. + Unless configured, these arguments will run with default values. Below are the example codes for each benchmark case: + + ``` + $ git clone https://github.com/IntelAI/models.git + + $ cd benchmarks + ``` + + For throughput (using `--benchmark-only`, `--socket-id 0` and `--batch-size 100`): + ``` + python launch_benchmark.py \ + --model-name densenet169 \ + --precision fp32 \ + --mode inference \ + --framework tensorflow \ + --benchmark-only \ + --batch-size 100 \ + --socket-id 0 \ + --in-graph /home//densenet169_fp32_pretrained_model.pb \ + --docker-image intelaipg/intel-optimized-tensorflow:latest-devel-mkl \ + -- input_height=224 input_width=224 warmup_steps=20 steps=100 \ + input_layer="input" output_layer="densenet169/predictions/Reshape_1" + ``` + + For latency (using `--benchmark-only`, `--socket-id 0` and `--batch-size 1`) + ``` + python launch_benchmark.py \ + --model-name densenet169 \ + --precision fp32 \ + --mode inference \ + --framework tensorflow \ + --benchmark-only \ + --batch-size 1 \ + --socket-id 0 \ + --in-graph /home//densenet169_fp32_pretrained_model.pb \ + --docker-image intelaipg/intel-optimized-tensorflow:latest-devel-mkl \ + -- input_height=224 input_width=224 warmup_steps=20 steps=100 \ + input_layer="input" output_layer="densenet169/predictions/Reshape_1" + ``` + + For accuracy (using your `--data-location`, `--socket-id 0`, `--accuracy-only` and + `--batch-size 100`): + ``` + python launch_benchmark.py \ + --model-name densenet169 \ + --precision fp32 \ + --mode inference \ + --framework tensorflow \ + --accuracy-only \ + --batch-size 100 \ + --socket-id 0 \ + --in-graph /home//densenet169_fp32_pretrained_model.pb \ + --docker-image intelaipg/intel-optimized-tensorflow:latest-devel-mkl \ + --data-location /home//imagenet_validation_dataset \ + -- input_height=224 input_width=224 \ + input_layer="input" output_layer="densenet169/predictions/Reshape_1" + ``` + + Note that the `--verbose` or `--output-dir` flag can be added to any of the above commands + to get additional debug output or change the default output location. + +4. The log file is saved to the `models/benchmarks/common/tensorflow/logs` directory, + or the directory specified by the `--output-dir` arg. Below are examples of + what the tail of your log file should look like for the different configs. + + Example log tail when benchmarking for throughput: + ``` + steps = 80, 159.83471377 images/sec + Latency: 625.646317005 ms + steps = 90, 159.852789241 images/sec + Latency: 625.57557159 ms + steps = 100, 159.853966416 images/sec + Latency: 625.570964813 ms + Ran inference with batch size 100 + Log location outside container: {--output-dir value}/benchmark_densenet169_inference_fp32_20190412_023940.log + ``` + + Example log tail when benchmarking for latency: + ``` + steps = 80, 34.9948442873 images/sec + Latency: 28.5756379366 ms + steps = 90, 34.9644341907 images/sec + Latency: 28.6004914178 ms + steps = 100, 34.9655988121 images/sec + Latency: 28.5995388031 ms + Ran inference with batch size 1 + Log location outside container: {--output-dir value}/benchmark_densenet169_inference_fp32_20190412_024505.log + ``` + + Example log tail when running for accuracy: + ``` + 0.757505030181 + 0.757489959839 + 0.75749498998 + 0.75748 + Ran inference with batch size 100 + Log location outside container: {--output-dir value}/benchmark_densenet169_inference_fp32_20190412_021545.log + ``` diff --git a/benchmarks/image_recognition/tensorflow/densenet169/__init__.py b/benchmarks/image_recognition/tensorflow/densenet169/__init__.py new file mode 100644 index 000000000..d9c4123de --- /dev/null +++ b/benchmarks/image_recognition/tensorflow/densenet169/__init__.py @@ -0,0 +1,19 @@ +# +# -*- coding: utf-8 -*- +# +# Copyright (c) 2019 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# SPDX-License-Identifier: EPL-2.0 +# diff --git a/benchmarks/image_recognition/tensorflow/densenet169/inference/__init__.py b/benchmarks/image_recognition/tensorflow/densenet169/inference/__init__.py new file mode 100644 index 000000000..d9c4123de --- /dev/null +++ b/benchmarks/image_recognition/tensorflow/densenet169/inference/__init__.py @@ -0,0 +1,19 @@ +# +# -*- coding: utf-8 -*- +# +# Copyright (c) 2019 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# SPDX-License-Identifier: EPL-2.0 +# diff --git a/benchmarks/image_recognition/tensorflow/densenet169/inference/fp32/__init__.py b/benchmarks/image_recognition/tensorflow/densenet169/inference/fp32/__init__.py new file mode 100644 index 000000000..d9c4123de --- /dev/null +++ b/benchmarks/image_recognition/tensorflow/densenet169/inference/fp32/__init__.py @@ -0,0 +1,19 @@ +# +# -*- coding: utf-8 -*- +# +# Copyright (c) 2019 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# SPDX-License-Identifier: EPL-2.0 +# diff --git a/benchmarks/image_recognition/tensorflow/densenet169/inference/fp32/config.json b/benchmarks/image_recognition/tensorflow/densenet169/inference/fp32/config.json new file mode 100644 index 000000000..812311847 --- /dev/null +++ b/benchmarks/image_recognition/tensorflow/densenet169/inference/fp32/config.json @@ -0,0 +1,7 @@ +{ + "optimization_parameters":{ + "KMP_AFFINITY": "granularity=fine,verbose,compact,1,0", + "KMP_BLOCKTIME": 1, + "KMP_SETTINGS": 1 + } +} diff --git a/benchmarks/image_recognition/tensorflow/densenet169/inference/fp32/model_init.py b/benchmarks/image_recognition/tensorflow/densenet169/inference/fp32/model_init.py new file mode 100644 index 000000000..19569b555 --- /dev/null +++ b/benchmarks/image_recognition/tensorflow/densenet169/inference/fp32/model_init.py @@ -0,0 +1,107 @@ +# +# -*- coding: utf-8 -*- +# +# Copyright (c) 2019 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# SPDX-License-Identifier: EPL-2.0 +# + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import argparse +import os + +from common.base_model_init import BaseModelInitializer +from common.base_model_init import set_env_var + + +class ModelInitializer(BaseModelInitializer): + """Model initializer for Densenet169 FP32 inference""" + + def __init__(self, args, custom_args=[], platform_util=None): + super(ModelInitializer, self).__init__(args, custom_args, platform_util) + self.cmd = self.get_numactl_command(self.args.socket_id) + "python " + + # Set KMP env vars, if they haven't already been set + config_file_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), "config.json") + self.set_kmp_vars(config_file_path) + + if self.args.batch_size == -1: + self.args.batch_size = 100 + + # set num_inter_threads and num_intra_threads + self.set_num_inter_intra_threads() + + set_env_var("OMP_NUM_THREADS", self.args.num_intra_threads) + + self.parse_args() + + if self.args.benchmark_only: + run_script = os.path.join(self.args.intelai_models, + self.args.mode, self.args.precision, + "benchmark.py") + + script_args_list = [ + "input_graph", "input_height", "input_width", "batch_size", + "input_layer", "output_layer", "num_inter_threads", + "num_intra_threads", "warmup_steps", "steps"] + + elif self.args.accuracy_only: + run_script = os.path.join(self.args.intelai_models, + self.args.mode, self.args.precision, + "accuracy.py") + + script_args_list = [ + "input_graph", "data_location", "input_height", "input_width", + "batch_size", "input_layer", "output_layer", + "num_inter_threads", "num_intra_threads"] + + self.cmd = self.add_args_to_command(self.cmd + run_script, + script_args_list) + + def parse_args(self): + if self.custom_args: + parser = argparse.ArgumentParser() + parser.add_argument( + "--input_height", default=224, + dest='input_height', type=int, help="input height") + parser.add_argument( + "--input_width", default=224, + dest='input_width', type=int, help="input width") + parser.add_argument( + '--warmup_steps', dest='warmup_steps', + help='number of warmup steps', + type=int, default=20) + parser.add_argument( + '--steps', dest='steps', + help='number of steps', + type=int, default=100) + parser.add_argument( + '--input_layer', dest='input_layer', + help='name of input layer', + type=str, default="input") + parser.add_argument( + '--output_layer', dest='output_layer', + help='name of output layer', + type=str, default="densenet169/predictions/Reshape_1") + + self.args = parser.parse_args(self.custom_args, + namespace=self.args) + + def run(self): + if self.cmd: + self.run_command(self.cmd) diff --git a/models/image_recognition/tensorflow/densenet169/inference/fp32/accuracy.py b/models/image_recognition/tensorflow/densenet169/inference/fp32/accuracy.py new file mode 100644 index 000000000..35d598a48 --- /dev/null +++ b/models/image_recognition/tensorflow/densenet169/inference/fp32/accuracy.py @@ -0,0 +1,134 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import argparse +import sys +import os +import time +import numpy as np +from tensorflow.core.protobuf import rewriter_config_pb2 +from google.protobuf import text_format +import tensorflow as tf +import image_preprocessing +import dataset + +NUM_TEST_IMAGES = 50000 + +def load_graph(model_file): + graph = tf.Graph() + graph_def = tf.GraphDef() + + import os + file_ext = os.path.splitext(model_file)[1] + + with open(model_file, "rb") as f: + if file_ext == '.pbtxt': + text_format.Merge(f.read(), graph_def) + else: + graph_def.ParseFromString(f.read()) + with graph.as_default(): + tf.import_graph_def(graph_def, name='') + return graph + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument("--input_graph", default=None, + help="graph/model to be executed") + parser.add_argument("--data_location", default=None, + help="full path to the validation data") + parser.add_argument("--input_height", default=224, + type=int, help="input height") + parser.add_argument("--input_width", default=224, + type=int, help="input width") + parser.add_argument("--batch_size", default=32, + type=int, help="batch size") + parser.add_argument("--input_layer", default="input", + help="name of input layer") + parser.add_argument("--output_layer", default="densenet169/predictions/Reshape_1", + help="name of output layer") + parser.add_argument( + '--num_inter_threads', + help='number threads across operators', + type=int, default=1) + parser.add_argument( + '--num_intra_threads', + help='number threads for an operator', + type=int, default=1) + args = parser.parse_args() + + if args.input_graph: + model_file = args.input_graph + else: + sys.exit("Please provide a graph file.") + input_height = args.input_height + input_width = args.input_width + batch_size = args.batch_size + input_layer = args.input_layer + output_layer = args.output_layer + num_inter_threads = args.num_inter_threads + num_intra_threads = args.num_intra_threads + data_location = args.data_location + dataset = dataset.ImagenetData(data_location) + preprocessor = image_preprocessing.ImagePreprocessor( + input_height, input_width, batch_size, + 1, # device count + tf.float32, # data_type for input fed to the graph + train=False, # doing inference + resize_method='crop') + images, labels = preprocessor.minibatch(dataset, subset='validation') + graph = load_graph(model_file) + input_tensor = graph.get_tensor_by_name(input_layer + ":0") + output_tensor = graph.get_tensor_by_name(output_layer + ":0") + + rewrite_options = rewriter_config_pb2.RewriterConfig( + layout_optimizer=rewriter_config_pb2.RewriterConfig.ON) + config = tf.ConfigProto() + config.inter_op_parallelism_threads = num_inter_threads + config.intra_op_parallelism_threads = num_intra_threads + config.graph_options.rewrite_options.remapping = ( + rewriter_config_pb2.RewriterConfig.OFF) + + total_accuracy1, total_accuracy5 = (0.0, 0.0) + num_processed_images = 0 + num_remaining_images = dataset.num_examples_per_epoch(subset='validation') \ + - num_processed_images + top1 = 0 + with tf.Session(config=config) as sess: + sess_graph = tf.Session(graph=graph, config=config) + + while num_remaining_images >= batch_size: + # Reads and preprocess data + #import pdb + #pdb.set_trace() + np_images, np_labels = sess.run([images[0], labels[0]]) + np_labels -= 1 + #print(np_labels.shape) + num_processed_images += batch_size + num_remaining_images -= batch_size + # Compute inference on the preprocessed data + predictions1 = sess_graph.run(output_tensor, + {input_tensor: np_images}) + if(batch_size !=1): + predictions1 = sess.run(tf.squeeze(predictions1)) + else : + predictions1 = sess.run(tf.reshape(predictions1,[1,1000])) + predictions2 = tf.argmax(predictions1, axis=1) + predictions = sess.run(predictions2) + top1 += batch_size - (np.count_nonzero(predictions - np_labels)) + print(top1/num_processed_images) diff --git a/models/image_recognition/tensorflow/densenet169/inference/fp32/benchmark.py b/models/image_recognition/tensorflow/densenet169/inference/fp32/benchmark.py new file mode 100644 index 000000000..4091b4137 --- /dev/null +++ b/models/image_recognition/tensorflow/densenet169/inference/fp32/benchmark.py @@ -0,0 +1,161 @@ +# +# -*- coding: utf-8 -*- +# +# Copyright (c) 2019 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# SPDX-License-Identifier: EPL-2.0 +# + +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. # You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import argparse +import sys +import os +import time +import numpy as np +from tensorflow.core.protobuf import rewriter_config_pb2 +from google.protobuf import text_format +import tensorflow as tf + +def load_graph(model_file): + graph = tf.Graph() + graph_def = tf.GraphDef() + + import os + file_ext = os.path.splitext(model_file)[1] + + with open(model_file, "rb") as f: + if file_ext == '.pbtxt': + text_format.Merge(f.read(), graph_def) + else: + graph_def.ParseFromString(f.read()) + with graph.as_default(): + tf.import_graph_def(graph_def, name='') + + return graph + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument("--input_graph", default=None, + help="graph/model to be executed") + parser.add_argument("--input_height", default=224, + type=int, help="input height") + parser.add_argument("--input_width", default=224, + type=int, help="input width") + parser.add_argument("--batch_size", default=32, + type=int, help="batch size") + parser.add_argument("--input_layer", default="input", + help="name of input layer") + parser.add_argument("--output_layer", default="densenet169/predictions/Reshape_1", + help="name of output layer") + parser.add_argument( + '--num_inter_threads', + help='number threads across operators', + type=int, default=1) + parser.add_argument( + '--num_intra_threads', + help='number threads for an operator', + type=int, default=1) + parser.add_argument("-gpu", "--gpu", + default = -1, + type=int, help="Run on gpu, other wise cpu", + required=False) + + parser.add_argument("--warmup_steps", type=int, default=40, + help="number of warmup steps") + parser.add_argument("--steps", type=int, default=100, help="number of steps") + args = parser.parse_args() + + if args.input_graph: + model_file = args.input_graph + else: + sys.exit("Please provide a graph file.") + input_height = args.input_height + input_width = args.input_width + batch_size = args.batch_size + input_layer = args.input_layer + output_layer = args.output_layer + warmup_steps = args.warmup_steps + steps = args.steps + print(steps) + assert steps > 10, "Benchmark steps should be at least 10." + num_inter_threads = args.num_inter_threads + num_intra_threads = args.num_intra_threads + + input_shape = [batch_size, input_height, input_width, 3] + images = tf.truncated_normal( + input_shape, + dtype=tf.float32, + stddev=10, + name='synthetic_images') + + image_data = None + graph = load_graph(model_file) + + input_tensor = graph.get_tensor_by_name(input_layer + ":0"); + output_tensor = graph.get_tensor_by_name(output_layer + ":0"); + + rewrite_options = rewriter_config_pb2.RewriterConfig( + layout_optimizer=rewriter_config_pb2.RewriterConfig.ON) + config = tf.ConfigProto() + if (args.gpu < 0): + config.inter_op_parallelism_threads = num_inter_threads + config.intra_op_parallelism_threads = num_intra_threads + config.graph_options.rewrite_options.remapping = ( + rewriter_config_pb2.RewriterConfig.OFF) + #os.environ["OMP_NUM_THREADS"] = "14" + with tf.Session(config=config) as sess: + image_data = sess.run(images) + + with tf.Session(graph=graph, config=config) as sess: + sys.stdout.flush() + print("[Running warmup steps...]") + for t in range(warmup_steps): + start_time = time.time() + sess.run(output_tensor, {input_tensor: image_data}) + elapsed_time = time.time() - start_time + if((t+1) % 10 == 0): + print("steps = {0}, {1} images/sec" + "".format(t+1, batch_size/elapsed_time)) + avg = 0 + print("[Running benchmark steps...]") + total_time = 0; + total_images = 0; + for t in range(steps): + start_time = time.time() + results = sess.run(output_tensor, {input_tensor: image_data}) + elapsed_time = time.time() - start_time + avg += elapsed_time + if((t+1) % 10 == 0): + print("steps = {0}, {1} images/sec" + "".format(t+1, batch_size*(t+1)/avg)); + print(" Latency: {0} ms" + "".format(avg*1000. /(t+1))) diff --git a/models/image_recognition/tensorflow/densenet169/inference/fp32/cnn_util.py b/models/image_recognition/tensorflow/densenet169/inference/fp32/cnn_util.py new file mode 100644 index 000000000..32902d149 --- /dev/null +++ b/models/image_recognition/tensorflow/densenet169/inference/fp32/cnn_util.py @@ -0,0 +1,50 @@ +# +# -*- coding: utf-8 -*- +# +# Copyright (c) 2019 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# SPDX-License-Identifier: EPL-2.0 +# + +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +"""Utilities for CNN benchmarks.""" + +import tensorflow as tf + + +def tensorflow_version_tuple(): + v = tf.__version__ + major, minor, patch = v.split('.') + return (int(major), int(minor), patch) + + +def tensorflow_version(): + vt = tensorflow_version_tuple() + return vt[0] * 1000 + vt[1] + diff --git a/models/image_recognition/tensorflow/densenet169/inference/fp32/dataset.py b/models/image_recognition/tensorflow/densenet169/inference/fp32/dataset.py new file mode 100644 index 000000000..88fdebce6 --- /dev/null +++ b/models/image_recognition/tensorflow/densenet169/inference/fp32/dataset.py @@ -0,0 +1,103 @@ +# +# -*- coding: utf-8 -*- +# +# Copyright (c) 2019 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# SPDX-License-Identifier: EPL-2.0 +# + +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +"""Benchmark dataset utilities. +""" + +from abc import abstractmethod +import os + +import tensorflow as tf + + +class Dataset(object): + """Abstract class for cnn benchmarks dataset.""" + + def __init__(self, name, data_dir=None): + self.name = name + if data_dir is None: + raise ValueError('Data directory not specified') + self.data_dir = data_dir + + def tf_record_pattern(self, subset): + return os.path.join(self.data_dir, '%s-*-of-*' % subset) + + def reader(self): + return tf.TFRecordReader() + + @abstractmethod + def num_classes(self): + pass + + @abstractmethod + def num_examples_per_epoch(self, subset): + pass + + def __str__(self): + return self.name + + +class FlowersData(Dataset): + + def __init__(self, data_dir=None): + super(FlowersData, self).__init__('Flowers', data_dir) + + def num_classes(self): + return 5 + + def num_examples_per_epoch(self, subset): + if subset == 'train': + return 3170 + elif subset == 'validation': + return 500 + else: + raise ValueError('Invalid data subset "%s"' % subset) + + +class ImagenetData(Dataset): + + def __init__(self, data_dir=None): + super(ImagenetData, self).__init__('ImageNet', data_dir) + + def num_classes(self): + return 1000 + + def num_examples_per_epoch(self, subset='train'): + if subset == 'train': + return 1281167 + elif subset == 'validation': + return 50000 + else: + raise ValueError('Invalid data subset "%s"' % subset) diff --git a/models/image_recognition/tensorflow/densenet169/inference/fp32/densenet_preprocessing.py b/models/image_recognition/tensorflow/densenet169/inference/fp32/densenet_preprocessing.py new file mode 100644 index 000000000..298694af0 --- /dev/null +++ b/models/image_recognition/tensorflow/densenet169/inference/fp32/densenet_preprocessing.py @@ -0,0 +1,391 @@ +# +# -*- coding: utf-8 -*- +# +# Copyright (c) 2019 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# SPDX-License-Identifier: EPL-2.0 +# + +# Copyright 2016 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Provides utilities to preprocess images. + +The preprocessing steps for VGG were introduced in the following technical +report: + + Very Deep Convolutional Networks For Large-Scale Image Recognition + Karen Simonyan and Andrew Zisserman + arXiv technical report, 2015 + PDF: http://arxiv.org/pdf/1409.1556.pdf + ILSVRC 2014 Slides: http://www.robots.ox.ac.uk/~karen/pdf/ILSVRC_2014.pdf + CC-BY-4.0 + +More information can be obtained from the VGG website: +www.robots.ox.ac.uk/~vgg/research/very_deep/ +""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import tensorflow as tf + +slim = tf.contrib.slim + +_R_MEAN = 123.68 +_G_MEAN = 116.78 +_B_MEAN = 103.94 + +_SCALE_FACTOR = 0.017 + +_RESIZE_SIDE_MIN = 256 +_RESIZE_SIDE_MAX = 512 + + +def _crop(image, offset_height, offset_width, crop_height, crop_width): + """Crops the given image using the provided offsets and sizes. + + Note that the method doesn't assume we know the input image size but it does + assume we know the input image rank. + + Args: + image: an image of shape [height, width, channels]. + offset_height: a scalar tensor indicating the height offset. + offset_width: a scalar tensor indicating the width offset. + crop_height: the height of the cropped image. + crop_width: the width of the cropped image. + + Returns: + the cropped (and resized) image. + + Raises: + InvalidArgumentError: if the rank is not 3 or if the image dimensions are + less than the crop size. + """ + original_shape = tf.shape(image) + + rank_assertion = tf.Assert( + tf.equal(tf.rank(image), 3), + ['Rank of image must be equal to 3.']) + with tf.control_dependencies([rank_assertion]): + cropped_shape = tf.stack([crop_height, crop_width, original_shape[2]]) + + size_assertion = tf.Assert( + tf.logical_and( + tf.greater_equal(original_shape[0], crop_height), + tf.greater_equal(original_shape[1], crop_width)), + ['Crop size greater than the image size.']) + + offsets = tf.to_int32(tf.stack([offset_height, offset_width, 0])) + + # Use tf.slice instead of crop_to_bounding box as it accepts tensors to + # define the crop size. + with tf.control_dependencies([size_assertion]): + image = tf.slice(image, offsets, cropped_shape) + return tf.reshape(image, cropped_shape) + + +def _random_crop(image_list, crop_height, crop_width): + """Crops the given list of images. + + The function applies the same crop to each image in the list. This can be + effectively applied when there are multiple image inputs of the same + dimension such as: + + image, depths, normals = _random_crop([image, depths, normals], 120, 150) + + Args: + image_list: a list of image tensors of the same dimension but possibly + varying channel. + crop_height: the new height. + crop_width: the new width. + + Returns: + the image_list with cropped images. + + Raises: + ValueError: if there are multiple image inputs provided with different size + or the images are smaller than the crop dimensions. + """ + if not image_list: + raise ValueError('Empty image_list.') + + # Compute the rank assertions. + rank_assertions = [] + for i in range(len(image_list)): + image_rank = tf.rank(image_list[i]) + rank_assert = tf.Assert( + tf.equal(image_rank, 3), + ['Wrong rank for tensor %s [expected] [actual]', + image_list[i].name, 3, image_rank]) + rank_assertions.append(rank_assert) + + with tf.control_dependencies([rank_assertions[0]]): + image_shape = tf.shape(image_list[0]) + image_height = image_shape[0] + image_width = image_shape[1] + crop_size_assert = tf.Assert( + tf.logical_and( + tf.greater_equal(image_height, crop_height), + tf.greater_equal(image_width, crop_width)), + ['Crop size greater than the image size.']) + + asserts = [rank_assertions[0], crop_size_assert] + + for i in range(1, len(image_list)): + image = image_list[i] + asserts.append(rank_assertions[i]) + with tf.control_dependencies([rank_assertions[i]]): + shape = tf.shape(image) + height = shape[0] + width = shape[1] + + height_assert = tf.Assert( + tf.equal(height, image_height), + ['Wrong height for tensor %s [expected][actual]', + image.name, height, image_height]) + width_assert = tf.Assert( + tf.equal(width, image_width), + ['Wrong width for tensor %s [expected][actual]', + image.name, width, image_width]) + asserts.extend([height_assert, width_assert]) + + # Create a random bounding box. + # + # Use tf.random_uniform and not numpy.random.rand as doing the former would + # generate random numbers at graph eval time, unlike the latter which + # generates random numbers at graph definition time. + with tf.control_dependencies(asserts): + max_offset_height = tf.reshape(image_height - crop_height + 1, []) + with tf.control_dependencies(asserts): + max_offset_width = tf.reshape(image_width - crop_width + 1, []) + offset_height = tf.random_uniform( + [], maxval=max_offset_height, dtype=tf.int32) + offset_width = tf.random_uniform( + [], maxval=max_offset_width, dtype=tf.int32) + + return [_crop(image, offset_height, offset_width, + crop_height, crop_width) for image in image_list] + + +def _central_crop(image_list, crop_height, crop_width): + """Performs central crops of the given image list. + + Args: + image_list: a list of image tensors of the same dimension but possibly + varying channel. + crop_height: the height of the image following the crop. + crop_width: the width of the image following the crop. + + Returns: + the list of cropped images. + """ + outputs = [] + for image in image_list: + image_height = tf.shape(image)[0] + image_width = tf.shape(image)[1] + + offset_height = (image_height - crop_height) / 2 + offset_width = (image_width - crop_width) / 2 + + outputs.append(_crop(image, offset_height, offset_width, + crop_height, crop_width)) + return outputs + + +def _mean_image_subtraction(image, means): + """Subtracts the given means from each image channel. + + For example: + means = [123.68, 116.779, 103.939] + image = _mean_image_subtraction(image, means) + + Note that the rank of `image` must be known. + + Args: + image: a tensor of size [height, width, C]. + means: a C-vector of values to subtract from each channel. + + Returns: + the centered image. + + Raises: + ValueError: If the rank of `image` is unknown, if `image` has a rank other + than three or if the number of channels in `image` doesn't match the + number of values in `means`. + """ + if image.get_shape().ndims != 3: + raise ValueError('Input must be of size [height, width, C>0]') + num_channels = image.get_shape().as_list()[-1] + if len(means) != num_channels: + raise ValueError('len(means) must match the number of channels') + + channels = tf.split(axis=2, num_or_size_splits=num_channels, value=image) + for i in range(num_channels): + channels[i] -= means[i] + return tf.concat(axis=2, values=channels) + + +def _smallest_size_at_least(height, width, smallest_side): + """Computes new shape with the smallest side equal to `smallest_side`. + + Computes new shape with the smallest side equal to `smallest_side` while + preserving the original aspect ratio. + + Args: + height: an int32 scalar tensor indicating the current height. + width: an int32 scalar tensor indicating the current width. + smallest_side: A python integer or scalar `Tensor` indicating the size of + the smallest side after resize. + + Returns: + new_height: an int32 scalar tensor indicating the new height. + new_width: and int32 scalar tensor indicating the new width. + """ + smallest_side = tf.convert_to_tensor(smallest_side, dtype=tf.int32) + + height = tf.to_float(height) + width = tf.to_float(width) + smallest_side = tf.to_float(smallest_side) + + scale = tf.cond(tf.greater(height, width), + lambda: smallest_side / width, + lambda: smallest_side / height) + new_height = tf.to_int32(height * scale) + new_width = tf.to_int32(width * scale) + return new_height, new_width + + +def _aspect_preserving_resize(image, smallest_side): + """Resize images preserving the original aspect ratio. + + Args: + image: A 3-D image `Tensor`. + smallest_side: A python integer or scalar `Tensor` indicating the size of + the smallest side after resize. + + Returns: + resized_image: A 3-D tensor containing the resized image. + """ + smallest_side = tf.convert_to_tensor(smallest_side, dtype=tf.int32) + + shape = tf.shape(image) + height = shape[0] + width = shape[1] + new_height, new_width = _smallest_size_at_least(height, width, smallest_side) + image = tf.expand_dims(image, 0) + resized_image = tf.image.resize_bilinear(image, [new_height, new_width], + align_corners=False) + resized_image = tf.squeeze(resized_image) + resized_image.set_shape([None, None, 3]) + return resized_image + + +def preprocess_for_train(image, + output_height, + output_width, + resize_side_min=_RESIZE_SIDE_MIN, + resize_side_max=_RESIZE_SIDE_MAX): + """Preprocesses the given image for training. + + Note that the actual resizing scale is sampled from + [`resize_size_min`, `resize_size_max`]. + + Args: + image: A `Tensor` representing an image of arbitrary size. + output_height: The height of the image after preprocessing. + output_width: The width of the image after preprocessing. + resize_side_min: The lower bound for the smallest side of the image for + aspect-preserving resizing. + resize_side_max: The upper bound for the smallest side of the image for + aspect-preserving resizing. + + Returns: + A preprocessed image. + """ + resize_side = tf.random_uniform( + [], minval=resize_side_min, maxval=resize_side_max+1, dtype=tf.int32) + + image = _aspect_preserving_resize(image, resize_side) + image = _random_crop([image], output_height, output_width)[0] + image.set_shape([output_height, output_width, 3]) + image = tf.to_float(image) + image = tf.image.random_flip_left_right(image) + + image = _mean_image_subtraction(image, [_R_MEAN, _G_MEAN, _B_MEAN]) + return image * _SCALE_FACTOR + + +def preprocess_for_eval(image, output_height, output_width, resize_side): + """Preprocesses the given image for evaluation. + + Args: + image: A `Tensor` representing an image of arbitrary size. + output_height: The height of the image after preprocessing. + output_width: The width of the image after preprocessing. + resize_side: The smallest side of the image for aspect-preserving resizing. + + Returns: + A preprocessed image. + """ + image = _aspect_preserving_resize(image, resize_side) + image = _central_crop([image], output_height, output_width)[0] + image.set_shape([output_height, output_width, 3]) + image = tf.to_float(image) + + image = _mean_image_subtraction(image, [_R_MEAN, _G_MEAN, _B_MEAN]) + return image * _SCALE_FACTOR + + +def preprocess_image(image, output_height, output_width, is_training=False, + resize_side_min=_RESIZE_SIDE_MIN, + resize_side_max=_RESIZE_SIDE_MAX): + """Preprocesses the given image. + + Args: + image: A `Tensor` representing an image of arbitrary size. + output_height: The height of the image after preprocessing. + output_width: The width of the image after preprocessing. + is_training: `True` if we're preprocessing the image for training and + `False` otherwise. + resize_side_min: The lower bound for the smallest side of the image for + aspect-preserving resizing. If `is_training` is `False`, then this value + is used for rescaling. + resize_side_max: The upper bound for the smallest side of the image for + aspect-preserving resizing. If `is_training` is `False`, this value is + ignored. Otherwise, the resize side is sampled from + [resize_size_min, resize_size_max]. + + Returns: + A preprocessed image. + """ + if is_training: + return preprocess_for_train(image, output_height, output_width, + resize_side_min, resize_side_max) + else: + return preprocess_for_eval(image, output_height, output_width, + resize_side_min) diff --git a/models/image_recognition/tensorflow/densenet169/inference/fp32/image_preprocessing.py b/models/image_recognition/tensorflow/densenet169/inference/fp32/image_preprocessing.py new file mode 100644 index 000000000..fe5d0eee0 --- /dev/null +++ b/models/image_recognition/tensorflow/densenet169/inference/fp32/image_preprocessing.py @@ -0,0 +1,420 @@ +# +# -*- coding: utf-8 -*- +# +# Copyright (c) 2019 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# SPDX-License-Identifier: EPL-2.0 +# + +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +"""Image pre-processing utilities. +""" +from six.moves import xrange # pylint: disable=redefined-builtin +import tensorflow as tf +from random import randint +import densenet_preprocessing +from tensorflow.python.ops import data_flow_ops +import cnn_util + +def parse_example_proto(example_serialized): + """Parses an Example proto containing a training example of an image. + + The output of the build_image_data.py image preprocessing script is a dataset + containing serialized Example protocol buffers. Each Example proto contains + the following fields: + + image/height: 462 + image/width: 581 + image/colorspace: 'RGB' + image/channels: 3 + image/class/label: 615 + image/class/synset: 'n03623198' + image/class/text: 'knee pad' + image/object/bbox/xmin: 0.1 + image/object/bbox/xmax: 0.9 + image/object/bbox/ymin: 0.2 + image/object/bbox/ymax: 0.6 + image/object/bbox/label: 615 + image/format: 'JPEG' + image/filename: 'ILSVRC2012_val_00041207.JPEG' + image/encoded: + + Args: + example_serialized: scalar Tensor tf.string containing a serialized + Example protocol buffer. + + Returns: + image_buffer: Tensor tf.string containing the contents of a JPEG file. + label: Tensor tf.int32 containing the label. + bbox: 3-D float Tensor of bounding boxes arranged [1, num_boxes, coords] + where each coordinate is [0, 1) and the coordinates are arranged as + [ymin, xmin, ymax, xmax]. + text: Tensor tf.string containing the human-readable label. + """ + # Dense features in Example proto. + feature_map = { + 'image/encoded': tf.FixedLenFeature([], dtype=tf.string, + default_value=''), + 'image/class/label': tf.FixedLenFeature([1], dtype=tf.int64, + default_value=-1), + 'image/class/text': tf.FixedLenFeature([], dtype=tf.string, + default_value=''), + } + sparse_float32 = tf.VarLenFeature(dtype=tf.float32) + # Sparse features in Example proto. + feature_map.update( + {k: sparse_float32 for k in ['image/object/bbox/xmin', + 'image/object/bbox/ymin', + 'image/object/bbox/xmax', + 'image/object/bbox/ymax']}) + + features = tf.parse_single_example(example_serialized, feature_map) + label = tf.cast(features['image/class/label'], dtype=tf.int32) + + xmin = tf.expand_dims(features['image/object/bbox/xmin'].values, 0) + ymin = tf.expand_dims(features['image/object/bbox/ymin'].values, 0) + xmax = tf.expand_dims(features['image/object/bbox/xmax'].values, 0) + ymax = tf.expand_dims(features['image/object/bbox/ymax'].values, 0) + + # Note that we impose an ordering of (y, x) just to make life difficult. + bbox = tf.concat([ymin, xmin, ymax, xmax], 0) + + # Force the variable number of bounding boxes into the shape + # [1, num_boxes, coords]. + bbox = tf.expand_dims(bbox, 0) + bbox = tf.transpose(bbox, [0, 2, 1]) + + return features['image/encoded'], label, bbox, features['image/class/text'] + + +def decode_jpeg(image_buffer, scope=None): # , dtype=tf.float32): + """Decode a JPEG string into one 3-D float image Tensor. + + Args: + image_buffer: scalar string Tensor. + scope: Optional scope for op_scope. + Returns: + 3-D float Tensor with values ranging from [0, 1). + """ + # with tf.op_scope([image_buffer], scope, 'decode_jpeg'): + # with tf.name_scope(scope, 'decode_jpeg', [image_buffer]): + with tf.name_scope(scope or 'decode_jpeg'): + # Decode the string as an RGB JPEG. + # Note that the resulting image contains an unknown height and width + # that is set dynamically by decode_jpeg. In other words, the height + # and width of image is unknown at compile-time. + image = tf.image.decode_jpeg(image_buffer, channels=3, + fancy_upscaling=False, + dct_method='INTEGER_FAST') + + # image = tf.Print(image, [tf.shape(image)], 'Image shape: ') + + return image + + +def eval_image(image, height, width, bbox, thread_id, resize): + """Get the image for model evaluation.""" + with tf.name_scope('eval_image'): + if not thread_id: + tf.summary.image( + 'original_image', tf.expand_dims(image, 0)) + + if resize == 'crop': + # Note: This is much slower than crop_to_bounding_box + # It seems that the redundant pad step has huge overhead + # distorted_image = tf.image.resize_image_with_crop_or_pad(image, + # height, width) + shape = tf.shape(image) + image = tf.cond(tf.less(shape[0], shape[1]), + lambda: tf.image.resize_images(image, tf.convert_to_tensor([256, 256*shape[1]/shape[0]], dtype=tf.int32)), + lambda: tf.image.resize_images(image, tf.convert_to_tensor([256*shape[0]/shape[1], 256], dtype=tf.int32))) + shape = tf.shape(image) + + y0 = (shape[0] - height) // 2 + x0 = (shape[1] - width) // 2 + #y0=tf.random_uniform([],minval=0,maxval=(shape[0] - height + 1), dtype=tf.int32) + #x0=tf.random_uniform([],minval=0,maxval=(shape[1] - width + 1), dtype=tf.int32) + ## distorted_image = tf.slice(image, [y0,x0,0], [height,width,3]) + distorted_image = tf.image.crop_to_bounding_box(image, y0, x0, height, + width) + else: + sample_distorted_bounding_box = tf.image.sample_distorted_bounding_box( + tf.shape(image), + bounding_boxes=bbox, + min_object_covered=0.5, + aspect_ratio_range=[0.90, 1.10], + area_range=[0.10, 1.0], + max_attempts=100, + use_image_if_no_bounding_boxes=True) + bbox_begin, bbox_size, _ = sample_distorted_bounding_box + # Crop the image to the specified bounding box. + distorted_image = tf.slice(image, bbox_begin, bbox_size) + resize_method = { + 'nearest': tf.image.ResizeMethod.NEAREST_NEIGHBOR, + 'bilinear': tf.image.ResizeMethod.BILINEAR, + 'bicubic': tf.image.ResizeMethod.BICUBIC, + 'area': tf.image.ResizeMethod.AREA + }[resize] + # This resizing operation may distort the images because the aspect + # ratio is not respected. + if cnn_util.tensorflow_version() >= 11: + distorted_image = tf.image.resize_images( + distorted_image, [height, width], + resize_method, + align_corners=False) + else: + distorted_image = tf.image.resize_images( + distorted_image, height, width, resize_method, align_corners=False) + distorted_image.set_shape([height, width, 3]) + if not thread_id: + tf.summary.image( + 'cropped_resized_image', tf.expand_dims(distorted_image, 0)) + image = distorted_image + return image + + +def distort_image(image, height, width, bbox, thread_id=0, scope=None): + """Distort one image for training a network. + + Distorting images provides a useful technique for augmenting the data + set during training in order to make the network invariant to aspects + of the image that do not effect the label. + + Args: + image: 3-D float Tensor of image + height: integer + width: integer + bbox: 3-D float Tensor of bounding boxes arranged [1, num_boxes, coords] + where each coordinate is [0, 1) and the coordinates are arranged + as [ymin, xmin, ymax, xmax]. + thread_id: integer indicating the preprocessing thread. + scope: Optional scope for op_scope. + Returns: + 3-D float Tensor of distorted image used for training. + """ + # with tf.op_scope([image, height, width, bbox], scope, 'distort_image'): + # with tf.name_scope(scope, 'distort_image', [image, height, width, bbox]): + with tf.name_scope(scope or 'distort_image'): + # Each bounding box has shape [1, num_boxes, box coords] and + # the coordinates are ordered [ymin, xmin, ymax, xmax]. + + # After this point, all image pixels reside in [0,1) + # until the very end, when they're rescaled to (-1, 1). The various + # adjust_* ops all require this range for dtype float. + image = tf.image.convert_image_dtype(image, dtype=tf.float32) + + # Display the bounding box in the first thread only. + if not thread_id: + image_with_box = tf.image.draw_bounding_boxes(tf.expand_dims(image, 0), + bbox) + tf.summary.image( + 'image_with_bounding_boxes', image_with_box) + + # A large fraction of image datasets contain a human-annotated bounding + # box delineating the region of the image containing the object of interest. + # We choose to create a new bounding box for the object which is a randomly + # distorted version of the human-annotated bounding box that obeys an allowed + # range of aspect ratios, sizes and overlap with the human-annotated + # bounding box. If no box is supplied, then we assume the bounding box is + # the entire image. + sample_distorted_bounding_box = tf.image.sample_distorted_bounding_box( + tf.shape(image), + bounding_boxes=bbox, + min_object_covered=0.1, + aspect_ratio_range=[0.99, 1.01], + area_range=[0.05, 1.0], + max_attempts=100, + use_image_if_no_bounding_boxes=True) + bbox_begin, bbox_size, distort_bbox = sample_distorted_bounding_box + if not thread_id: + image_with_distorted_box = tf.image.draw_bounding_boxes( + tf.expand_dims(image, 0), distort_bbox) + tf.summary.image( + 'images_with_distorted_bounding_box', + image_with_distorted_box) + + # Crop the image to the specified bounding box. + distorted_image = tf.slice(image, bbox_begin, bbox_size) + + # This resizing operation may distort the images because the aspect + # ratio is not respected. We select a resize method in a round robin + # fashion based on the thread number. + # Note that ResizeMethod contains 4 enumerated resizing methods. + resize_method = thread_id % 4 + if cnn_util.tensorflow_version() >= 11: + distorted_image = tf.image.resize_images( + distorted_image, [height, width], resize_method, align_corners=False) + else: + distorted_image = tf.image.resize_images( + distorted_image, height, width, resize_method, align_corners=False) + # Restore the shape since the dynamic slice based upon the bbox_size loses + # the third dimension. + distorted_image.set_shape([height, width, 3]) + if not thread_id: + tf.summary.image( + 'cropped_resized_image', + tf.expand_dims(distorted_image, 0)) + + # Randomly flip the image horizontally. + distorted_image = tf.image.random_flip_left_right(distorted_image) + + # Randomly distort the colors. + distorted_image = distort_color(distorted_image, thread_id) + + # Note: This ensures the scaling matches the output of eval_image + distorted_image *= 256 + + if not thread_id: + tf.summary.image( + 'final_distorted_image', + tf.expand_dims(distorted_image, 0)) + return distorted_image + + +def distort_color(image, thread_id=0, scope=None): + """Distort the color of the image. + + Each color distortion is non-commutative and thus ordering of the color ops + matters. Ideally we would randomly permute the ordering of the color ops. + Rather then adding that level of complication, we select a distinct ordering + of color ops for each preprocessing thread. + + Args: + image: Tensor containing single image. + thread_id: preprocessing thread ID. + scope: Optional scope for op_scope. + Returns: + color-distorted image + """ + # with tf.op_scope([image], scope, 'distort_color'): + # with tf.name_scope(scope, 'distort_color', [image]): + with tf.name_scope(scope or 'distort_color'): + color_ordering = thread_id % 2 + + if color_ordering == 0: + image = tf.image.random_brightness(image, max_delta=32. / 255.) + image = tf.image.random_saturation(image, lower=0.5, upper=1.5) + image = tf.image.random_hue(image, max_delta=0.2) + image = tf.image.random_contrast(image, lower=0.5, upper=1.5) + elif color_ordering == 1: + image = tf.image.random_brightness(image, max_delta=32. / 255.) + image = tf.image.random_contrast(image, lower=0.5, upper=1.5) + image = tf.image.random_saturation(image, lower=0.5, upper=1.5) + image = tf.image.random_hue(image, max_delta=0.2) + + # The random_* ops do not necessarily clamp. + image = tf.clip_by_value(image, 0.0, 1.0) + return image + + +class ImagePreprocessor(object): + """Preprocessor for input images.""" + + def __init__(self, + height, + width, + batch_size, + device_count, + dtype=tf.float32, + train=True, + distortions=None, + resize_method=None): + self.height = height + self.width = width + self.batch_size = batch_size + self.device_count = device_count + self.dtype = dtype + self.train = train + self.resize_method = resize_method + if distortions is None: + distortions = False + self.distortions = distortions + if self.batch_size % self.device_count != 0: + raise ValueError( + ('batch_size must be a multiple of device_count: ' + 'batch_size %d, device_count: %d') % + (self.batch_size, self.device_count)) + self.batch_size_per_device = self.batch_size // self.device_count + + def preprocess(self, image_buffer, bbox, thread_id): + """Preprocessing image_buffer using thread_id.""" + # Note: Width and height of image is known only at runtime. + image = tf.image.decode_jpeg(image_buffer, channels=3, + dct_method='INTEGER_FAST') + if self.train and self.distortions: + image = distort_image(image, self.height, self.width, bbox, thread_id) + else: + #image = eval_image(image, self.height, self.width, bbox, thread_id, + # self.resize_method) + image = densenet_preprocessing.preprocess_image(image,224,224,False) + # Note: image is now float32 [height,width,3] with range [0, 255] + + # image = tf.cast(image, tf.uint8) # HACK TESTING + + return image + + def minibatch(self, dataset, subset): + with tf.name_scope('batch_processing'): + images = [[] for i in range(self.device_count)] + labels = [[] for i in range(self.device_count)] + record_input = data_flow_ops.RecordInput( + file_pattern=dataset.tf_record_pattern(subset), + seed=randint(0, 9000), + parallelism=64, + buffer_size=10000, + batch_size=self.batch_size, + name='record_input') + records = record_input.get_yield_op() + records = tf.split(records, self.batch_size, 0) + records = [tf.reshape(record, []) for record in records] + for i in xrange(self.batch_size): + value = records[i] + image_buffer, label_index, bbox, _ = parse_example_proto(value) + image = self.preprocess(image_buffer, bbox, i % 4) + + device_index = i % self.device_count + images[device_index].append(image) + labels[device_index].append(label_index) + label_index_batch = [None] * self.device_count + for device_index in xrange(self.device_count): + images[device_index] = tf.parallel_stack(images[device_index]) + label_index_batch[device_index] = tf.concat(labels[device_index], 0) + + # dynamic_pad=True) # HACK TESTING dynamic_pad=True + images[device_index] = tf.cast(images[device_index], self.dtype) + depth = 3 + images[device_index] = tf.reshape( + images[device_index], + shape=[self.batch_size_per_device, self.height, self.width, depth]) + label_index_batch[device_index] = tf.reshape( + label_index_batch[device_index], [self.batch_size_per_device]) + # Display the training images in the visualizer. + # tf.summary.image('images', images) + + return images, label_index_batch diff --git a/tests/unit/common/tensorflow/tf_model_args.txt b/tests/unit/common/tensorflow/tf_model_args.txt index 50fb40e1f..21f680eda 100644 --- a/tests/unit/common/tensorflow/tf_model_args.txt +++ b/tests/unit/common/tensorflow/tf_model_args.txt @@ -1,4 +1,7 @@ run_tf_benchmark.py --framework tensorflow --use-case recommendation --precision fp32 --mode inference --model-name wide_deep --batch-size 1024 --data-location /dataset --checkpoint /checkpoints --intelai-models . --verbose, OMP_NUM_THREADS=1 numactl --cpunodebind=0 --membind=0 python inference/fp32/wide_deep_inference.py --data_dir=/dataset --model_dir=/checkpoints --batch_size=1024 +run_tf_benchmark.py --framework=tensorflow --use-case=image_recognition --model-name=densenet169 --precision=fp32 --mode=inference --benchmark-dir=/workspace/benchmarks --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=100 --socket-id=0 --output-dir=/workspace/benchmarks/common/tensorflow/logs --benchmark-only --verbose --model-source-dir=/workspace/models --in-graph=/in_graph/densenet169_fp32_pretrained_model.pb, numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/inference/fp32/benchmark.py --num_intra_threads=28 --num_inter_threads=1 --batch_size=100 --input_graph=/in_graph/densenet169_fp32_pretrained_model.pb +run_tf_benchmark.py --framework=tensorflow --use-case=image_recognition --model-name=densenet169 --precision=fp32 --mode=inference --benchmark-dir=/workspace/benchmarks --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=1 --socket-id=0 --output-dir=/workspace/benchmarks/common/tensorflow/logs --benchmark-only --verbose --model-source-dir=/workspace/models --in-graph=/in_graph/densenet169_fp32_pretrained_model.pb, numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/inference/fp32/benchmark.py --num_intra_threads=28 --num_inter_threads=1 --batch_size=1 --input_graph=/in_graph/densenet169_fp32_pretrained_model.pb +run_tf_benchmark.py --framework=tensorflow --use-case=image_recognition --model-name=densenet169 --precision=fp32 --mode=inference --benchmark-dir=/workspace/benchmarks --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=100 --socket-id=0 --output-dir=/workspace/benchmarks/common/tensorflow/logs --accuracy-only --verbose --model-source-dir=/workspace/models --in-graph=/in_graph/densenet169_fp32_pretrained_model.pb --data-location=/dataset, numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/inference/fp32/accuracy.py --num_intra_threads=28 --num_inter_threads=1 --batch_size=100 --input_graph=/in_graph/densenet169_fp32_pretrained_model.pb --data_location=/dataset run_tf_benchmark.py --framework tensorflow --use-case image_recognition --precision int8 --mode inference --model-name inceptionv3 --batch-size 100 --in-graph /final_int8_inceptionv3.pb --intelai-models . --accuracy-only --verbose,python ./int8/accuracy.py --num_intra_threads=56 --num_inter_threads=2 --batch_size=100 --input_graph=/final_int8_inceptionv3.pb run_tf_benchmark.py --framework tensorflow --use-case image_recognition --precision int8 --mode inference --model-name inceptionv3 --batch-size 1 --in-graph /final_int8_inceptionv3.pb --intelai-models . --benchmark-only --socket-id 0 --verbose,numactl --cpunodebind=0 --membind=0 python ./int8/benchmark.py --warmup_steps=10 --num_intra_threads=28 --num_inter_threads=1 --batch_size=1 --input_graph=/final_int8_inceptionv3.pb --steps=50 --num_cores=28 run_tf_benchmark.py --framework tensorflow --use-case image_recognition --precision int8 --mode inference --model-name inceptionv3 --batch-size 128 --in-graph /final_int8_inceptionv3.pb --intelai-models . --benchmark-only --socket-id 0 --verbose,numactl --cpunodebind=0 --membind=0 python ./int8/benchmark.py --warmup_steps=10 --num_intra_threads=28 --num_inter_threads=1 --batch_size=128 --input_graph=/final_int8_inceptionv3.pb --steps=50 --num_cores=28 From 7db6647e7490b42cd0e96b6fd69d980769c5f02d Mon Sep 17 00:00:00 2001 From: Dina Suehiro Jones Date: Thu, 18 Apr 2019 15:13:03 -0700 Subject: [PATCH 20/62] Add support for TCMalloc (#287) --- .../dcgan/inference/fp32/model_init.py | 2 +- benchmarks/common/base_benchmark_util.py | 16 ++ benchmarks/common/base_model_init.py | 32 +++- benchmarks/common/tensorflow/start.sh | 11 ++ .../draw/inference/fp32/model_init.py | 2 +- .../facenet/inference/fp32/model_init.py | 2 +- .../mtcc/inference/fp32/model_init.py | 2 +- .../densenet169/inference/fp32/model_init.py | 2 +- .../tensorflow/inception_resnet_v2/README.md | 30 ++-- .../inference/fp32/model_init.py | 2 +- .../inference/int8/model_init.py | 2 +- .../tensorflow/inceptionv3/README.md | 22 +-- .../inceptionv3/inference/fp32/model_init.py | 2 +- .../inceptionv3/inference/int8/model_init.py | 6 +- .../tensorflow/inceptionv4/README.md | 42 ++--- .../inference/inceptionv4_model_init.py | 2 +- .../mobilenet_v1/inference/int8/model_init.py | 2 +- .../resnet101/inference/fp32/model_init.py | 2 +- .../resnet101/inference/int8/model_init.py | 4 +- .../tensorflow/resnet50/README.md | 13 +- .../resnet50/inference/fp32/model_init.py | 2 +- .../resnet50/inference/int8/model_init.py | 4 +- .../maskrcnn/inference/fp32/model_init.py | 2 +- .../unet/inference/fp32/model_init.py | 2 +- .../lm-1b/inference/fp32/model_init.py | 2 +- .../gnmt/inference/fp32/model_init.py | 2 +- .../inference/fp32/model_init.py | 2 +- .../inference/fp32/model_init.py | 2 +- benchmarks/launch_benchmark.py | 2 + .../faster_rcnn/inference/fp32/model_init.py | 2 +- .../faster_rcnn/inference/int8/model_init.py | 2 +- .../tensorflow/rfcn/README.md | 16 +- .../rfcn/inference/fp32/model_init.py | 2 +- .../rfcn/inference/int8/model_init.py | 6 +- .../tensorflow/ssd-mobilenet/README.md | 12 +- .../inference/fp32/model_init.py | 2 +- .../ssd-mobilenet/inference/int8/config.json | 2 +- .../inference/int8/model_init.py | 2 +- .../ssd-resnet34/inference/fp32/model_init.py | 2 +- .../ncf/inference/fp32/model_init.py | 2 +- .../tensorflow/wide_deep_large_ds/README.md | 6 +- .../inference/fp32/model_init.py | 2 +- .../inference/int8/model_init.py | 2 +- docs/general/tensorflow/LaunchBenchmark.md | 10 ++ .../eval_image_classifier_inference.py | 4 +- .../tensorflow/test_run_tf_benchmarks.py | 9 +- .../unit/common/tensorflow/tf_model_args.txt | 158 +++++++++--------- tests/unit/common/test_base_model_init.py | 38 +++++ 48 files changed, 303 insertions(+), 194 deletions(-) diff --git a/benchmarks/adversarial_networks/tensorflow/dcgan/inference/fp32/model_init.py b/benchmarks/adversarial_networks/tensorflow/dcgan/inference/fp32/model_init.py index 8f4602c2c..2e2f88104 100644 --- a/benchmarks/adversarial_networks/tensorflow/dcgan/inference/fp32/model_init.py +++ b/benchmarks/adversarial_networks/tensorflow/dcgan/inference/fp32/model_init.py @@ -43,7 +43,7 @@ def __init__(self, args, custom_args=[], platform_util=None): benchmark_script = os.path.join( self.args.intelai_models, args.mode, args.precision, "inference_bench.py") - self.benchmark_command = self.get_numactl_command(args.socket_id) + \ + self.benchmark_command = self.get_command_prefix(args.socket_id) + \ self.python_exe + " " + benchmark_script set_env_var("OMP_NUM_THREADS", self.args.num_intra_threads) diff --git a/benchmarks/common/base_benchmark_util.py b/benchmarks/common/base_benchmark_util.py index 89df56cde..0768e8871 100644 --- a/benchmarks/common/base_benchmark_util.py +++ b/benchmarks/common/base_benchmark_util.py @@ -161,6 +161,22 @@ def _define_args(self): "with --accuracy-only and --mode=inference.", dest="output_results", action="store_true") + self._common_arg_parser.add_argument( + "--disable-tcmalloc", + help="Disables the use of TCMalloc for int8 benchmarking. TCMalloc is " + "currently not used for FP32 benchmarking, so using this flag with " + "FP32 models will have no effect.", + dest="disable_tcmalloc", action="store_true" + ) + + self._common_arg_parser.add_argument( + "--tcmalloc-large-alloc-report-threshold", + help="Sets the TCMALLOC_LARGE_ALLOC_REPORT_THRESHOLD environment variable to " + "the specified value. The environment variable sets the threshold (in bytes) " + "for when large memory allocation messages will be displayed.", + dest="tcmalloc_large_alloc_report_threshold", default=2147483648, type=int + ) + self._common_arg_parser.add_argument( "-v", "--verbose", help="Print verbose information.", dest="verbose", action="store_true") diff --git a/benchmarks/common/base_model_init.py b/benchmarks/common/base_model_init.py index 9a25ca92a..8e8d1abb2 100644 --- a/benchmarks/common/base_model_init.py +++ b/benchmarks/common/base_model_init.py @@ -18,6 +18,7 @@ # SPDX-License-Identifier: EPL-2.0 # +import glob import json import os @@ -62,15 +63,32 @@ def run_command(self, cmd): os.system(cmd) - def get_numactl_command(self, socket_id): + def get_command_prefix(self, socket_id, numactl=True): """ - Returns the numactl command with --cpunodebind and --membind set to the - specified socket_id. If socket_id is set to -1 (undefined) then an - empty string is returned. + Returns the command prefix with: + - LD_PRELOAD for int8 models (if tcmalloc is not disabled) + - The numactl command with --cpunodebind and --membind set to the specified socket_id (if numactl=True) """ - return "" if socket_id == -1 else \ - "numactl --cpunodebind={0} --membind={0} ".format( - str(socket_id)) + command = "" + + if not self.args.disable_tcmalloc: + # Try to find the TCMalloc library file + matches = glob.glob("/usr/lib/libtcmalloc.so*") + + if len(matches) == 0: + matches = glob.glob("/usr/lib64/libtcmalloc.so*") + + if len(matches) > 0: + command += "LD_PRELOAD={} ".format(matches[0]) + else: + # Unable to find the TCMalloc library file + print("Warning: Unable to find the TCMalloc library file (libtcmalloc.so) in /usr/lib or /usr/lib64, " + "so the LD_PRELOAD environment variable will not be set.") + + if socket_id != -1 and numactl: + command += "numactl --cpunodebind={0} --membind={0} ".format(str(socket_id)) + + return command def add_args_to_command(self, command, arg_list): """ diff --git a/benchmarks/common/tensorflow/start.sh b/benchmarks/common/tensorflow/start.sh index d93183325..d1ec29216 100755 --- a/benchmarks/common/tensorflow/start.sh +++ b/benchmarks/common/tensorflow/start.sh @@ -45,6 +45,8 @@ echo " NUM_CORES: ${NUM_CORES}" echo " BENCHMARK_ONLY: ${BENCHMARK_ONLY}" echo " ACCURACY_ONLY: ${ACCURACY_ONLY}" echo " OUTPUT_RESULTS: ${OUTPUT_RESULTS}" +echo " DISABLE_TCMALLOC: ${DISABLE_TCMALLOC}" +echo " TCMALLOC_LARGE_ALLOC_REPORT_THRESHOLD: ${TCMALLOC_LARGE_ALLOC_REPORT_THRESHOLD}" echo " NOINSTALL: ${NOINSTALL}" echo " OUTPUT_DIR: ${OUTPUT_DIR}" @@ -62,6 +64,11 @@ if [[ ${NOINSTALL} != "True" ]]; then apt install -y libsm6 libxext6 pip install --upgrade pip pip install requests + + # install google-perftools for tcmalloc + if [[ ${DISABLE_TCMALLOC} != "True" ]]; then + apt-get install google-perftools -y + fi fi verbose_arg="" @@ -170,6 +177,10 @@ if [ ${DATA_NUM_INTRA_THREADS} != "None" ]; then CMD="${CMD} --data-num-intra-threads=${DATA_NUM_INTRA_THREADS}" fi +if [ ${DISABLE_TCMALLOC} == "True" ]; then + CMD="${CMD} --disable-tcmalloc" +fi + function install_protoc() { pushd "${MOUNT_EXTERNAL_MODELS_SOURCE}/research" diff --git a/benchmarks/content_creation/tensorflow/draw/inference/fp32/model_init.py b/benchmarks/content_creation/tensorflow/draw/inference/fp32/model_init.py index 08c145bca..e306ecd55 100644 --- a/benchmarks/content_creation/tensorflow/draw/inference/fp32/model_init.py +++ b/benchmarks/content_creation/tensorflow/draw/inference/fp32/model_init.py @@ -44,7 +44,7 @@ def __init__(self, args, custom_args=[], platform_util=None): # Create the command prefix with numactl and executing the script script_path = os.path.join(self.args.intelai_models, self.args.mode, self.args.precision, "draw_inf.py") - self.command_prefix = self.get_numactl_command(args.socket_id) + \ + self.command_prefix = self.get_command_prefix(args.socket_id) + \ " {} {} ".format(self.python_exe, script_path) # Add additional args to the command diff --git a/benchmarks/face_detection_and_alignment/tensorflow/facenet/inference/fp32/model_init.py b/benchmarks/face_detection_and_alignment/tensorflow/facenet/inference/fp32/model_init.py index bf4b8132c..e00bf70f7 100644 --- a/benchmarks/face_detection_and_alignment/tensorflow/facenet/inference/fp32/model_init.py +++ b/benchmarks/face_detection_and_alignment/tensorflow/facenet/inference/fp32/model_init.py @@ -30,7 +30,7 @@ class ModelInitializer(BaseModelInitializer): def __init__(self, args, custom_args=[], platform_util=None): super(ModelInitializer, self).__init__(args, custom_args, platform_util) - self.cmd = self.get_numactl_command(self.args.socket_id) + \ + self.cmd = self.get_command_prefix(self.args.socket_id) + \ self.python_exe + " " # Set KMP env vars, if they haven't already been set diff --git a/benchmarks/face_detection_and_alignment/tensorflow/mtcc/inference/fp32/model_init.py b/benchmarks/face_detection_and_alignment/tensorflow/mtcc/inference/fp32/model_init.py index 4ef889b36..5d1983139 100644 --- a/benchmarks/face_detection_and_alignment/tensorflow/mtcc/inference/fp32/model_init.py +++ b/benchmarks/face_detection_and_alignment/tensorflow/mtcc/inference/fp32/model_init.py @@ -42,7 +42,7 @@ def __init__(self, args, custom_args, platform_util=None): self.args.intelai_models, self.args.mode, self.args.precision, "one_image_test.py") self.command_prefix = \ - self.get_numactl_command(self.args.socket_id) + \ + self.get_command_prefix(self.args.socket_id) + \ "{} ".format(self.python_exe) + benchmark_script self.run_cmd = \ diff --git a/benchmarks/image_recognition/tensorflow/densenet169/inference/fp32/model_init.py b/benchmarks/image_recognition/tensorflow/densenet169/inference/fp32/model_init.py index 19569b555..3e4a376af 100644 --- a/benchmarks/image_recognition/tensorflow/densenet169/inference/fp32/model_init.py +++ b/benchmarks/image_recognition/tensorflow/densenet169/inference/fp32/model_init.py @@ -34,7 +34,7 @@ class ModelInitializer(BaseModelInitializer): def __init__(self, args, custom_args=[], platform_util=None): super(ModelInitializer, self).__init__(args, custom_args, platform_util) - self.cmd = self.get_numactl_command(self.args.socket_id) + "python " + self.cmd = self.get_command_prefix(self.args.socket_id) + "{} ".format(self.python_exe) # Set KMP env vars, if they haven't already been set config_file_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), "config.json") diff --git a/benchmarks/image_recognition/tensorflow/inception_resnet_v2/README.md b/benchmarks/image_recognition/tensorflow/inception_resnet_v2/README.md index e547377ca..7c9c246fc 100644 --- a/benchmarks/image_recognition/tensorflow/inception_resnet_v2/README.md +++ b/benchmarks/image_recognition/tensorflow/inception_resnet_v2/README.md @@ -139,30 +139,30 @@ Log location outside container: /benchmark_inception_resnet_v2 Example log tail when benchmarking for latency: ``` ... -Iteration 37: 0.046 sec -Iteration 38: 0.046 sec -Iteration 39: 0.046 sec -Iteration 40: 0.046 sec -Average time: 0.045 sec +Iteration 37: 0.043 sec +Iteration 38: 0.042 sec +Iteration 39: 0.043 sec +Iteration 40: 0.043 sec +Average time: 0.043 sec Batch size = 1 -Latency: 45.441 ms -Throughput: 22.007 images/sec +Latency: 42.793 ms +Throughput: 23.368 images/sec Ran inference with batch size 1 -Log location outside container: /benchmark_inception_resnet_v2_inference_int8_20190330_012557.log +Log location outside container: /benchmark_inception_resnet_v2_inference_int8_20190415_231020.log ``` Example log tail when benchmarking for throughput: ``` ... -Iteration 37: 0.975 sec -Iteration 38: 0.975 sec -Iteration 39: 0.987 sec -Iteration 40: 0.974 sec -Average time: 0.976 sec +Iteration 37: 0.932 sec +Iteration 38: 0.928 sec +Iteration 39: 0.927 sec +Iteration 40: 0.928 sec +Average time: 0.928 sec Batch size = 128 -Throughput: 131.178 images/sec +Throughput: 137.978 images/sec Ran inference with batch size 128 -Log location outside container: /benchmark_inception_resnet_v2_inference_int8_20190330_012719.log +Log location outside container: /benchmark_inception_resnet_v2_inference_int8_20190415_225215.log ``` diff --git a/benchmarks/image_recognition/tensorflow/inception_resnet_v2/inference/fp32/model_init.py b/benchmarks/image_recognition/tensorflow/inception_resnet_v2/inference/fp32/model_init.py index 641821520..13fd8a79f 100644 --- a/benchmarks/image_recognition/tensorflow/inception_resnet_v2/inference/fp32/model_init.py +++ b/benchmarks/image_recognition/tensorflow/inception_resnet_v2/inference/fp32/model_init.py @@ -29,7 +29,7 @@ class ModelInitializer(BaseModelInitializer): def __init__(self, args, custom_args=[], platform_util=None): super(ModelInitializer, self).__init__(args, custom_args, platform_util) - self.cmd = self.get_numactl_command(self.args.socket_id) + self.python_exe + " " + self.cmd = self.get_command_prefix(self.args.socket_id) + self.python_exe + " " # Set KMP env vars, if they haven't already been set config_file_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), "config.json") diff --git a/benchmarks/image_recognition/tensorflow/inception_resnet_v2/inference/int8/model_init.py b/benchmarks/image_recognition/tensorflow/inception_resnet_v2/inference/int8/model_init.py index 0d7dda4db..90ce7bcb2 100644 --- a/benchmarks/image_recognition/tensorflow/inception_resnet_v2/inference/int8/model_init.py +++ b/benchmarks/image_recognition/tensorflow/inception_resnet_v2/inference/int8/model_init.py @@ -36,7 +36,7 @@ def __init__(self, args, custom_args=[], platform_util=None): config_file_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), "config.json") self.set_kmp_vars(config_file_path) - self.cmd = self.get_numactl_command(self.args.socket_id) + "{} ".format(self.python_exe) + self.cmd = self.get_command_prefix(self.args.socket_id) + "{} ".format(self.python_exe) # use default batch size if -1 if self.args.batch_size == -1: diff --git a/benchmarks/image_recognition/tensorflow/inceptionv3/README.md b/benchmarks/image_recognition/tensorflow/inceptionv3/README.md index 7eb091edc..1da257669 100644 --- a/benchmarks/image_recognition/tensorflow/inceptionv3/README.md +++ b/benchmarks/image_recognition/tensorflow/inceptionv3/README.md @@ -197,23 +197,25 @@ Log location outside container: {--output-dir value}/benchmark_inceptionv3_infer Example log tail when benchmarking for latency: ``` ... -steps = 470, 53.7256017113 images/sec -steps = 480, 52.5430812016 images/sec -steps = 490, 52.9076139058 images/sec -steps = 500, 53.5021876395 images/sec +steps = 470, 134.912798739 images/sec +steps = 480, 132.379245045 images/sec +steps = 490, 133.977640069 images/sec +steps = 500, 132.083262478 images/sec +Average throughput for batch size 1: 133.440858806 images/sec Ran inference with batch size 1 -Log location outside container: {--output-dir value}/benchmark_inceptionv3_inference_int8_20190223_194002.log +Log location outside container: {--output-dir value}/benchmark_inceptionv3_inference_int8_20190415_220455.log ``` Example log tail when benchmarking for throughput: ``` ... -steps = 470, 370.435654276 images/sec -steps = 480, 369.710160177 images/sec -steps = 490, 369.083388904 images/sec -steps = 500, 370.287978128 images/sec +steps = 470, 369.151656047 images/sec +steps = 480, 373.174541014 images/sec +steps = 490, 372.402638382 images/sec +steps = 500, 371.836748659 images/sec +Average throughput for batch size 128: 371.269087408 images/sec Ran inference with batch size 128 -Log location outside container: {--output-dir value}/benchmark_inceptionv3_inference_int8_20190223_194314.log +Log location outside container: {--output-dir value}/benchmark_inceptionv3_inference_int8_20190416_162155.log ``` ## FP32 Inference Instructions diff --git a/benchmarks/image_recognition/tensorflow/inceptionv3/inference/fp32/model_init.py b/benchmarks/image_recognition/tensorflow/inceptionv3/inference/fp32/model_init.py index 53c2643bd..f550765f4 100644 --- a/benchmarks/image_recognition/tensorflow/inceptionv3/inference/fp32/model_init.py +++ b/benchmarks/image_recognition/tensorflow/inceptionv3/inference/fp32/model_init.py @@ -70,7 +70,7 @@ def __init__(self, args, custom_args=[], platform_util=None): self.args.intelai_models, self.args.precision, "eval_image_classifier_inference.py") - self.benchmark_command = self.get_numactl_command(args.socket_id) + \ + self.benchmark_command = self.get_command_prefix(args.socket_id) + \ self.python_exe + " " + benchmark_script num_cores = self.platform_util.num_cores_per_socket if self.args.num_cores == -1 \ diff --git a/benchmarks/image_recognition/tensorflow/inceptionv3/inference/int8/model_init.py b/benchmarks/image_recognition/tensorflow/inceptionv3/inference/int8/model_init.py index bd4794638..645f2f92e 100644 --- a/benchmarks/image_recognition/tensorflow/inceptionv3/inference/int8/model_init.py +++ b/benchmarks/image_recognition/tensorflow/inceptionv3/inference/int8/model_init.py @@ -74,7 +74,7 @@ def run_benchmark(self): "data_num_inter_threads", "data_num_intra_threads", "warmup_steps", "steps"] - cmd_prefix = self.get_numactl_command(self.args.socket_id) + \ + cmd_prefix = self.get_command_prefix(self.args.socket_id) + \ self.python_exe + " " + benchmark_script cmd = self.add_args_to_command(cmd_prefix, script_args_list) # add num_cores @@ -94,7 +94,7 @@ def run_accuracy(self): "batch_size", "num_inter_threads", "num_intra_threads"] - cmd_prefix = self.get_numactl_command(self.args.socket_id) + \ + cmd_prefix = self.get_command_prefix(self.args.socket_id) + \ self.python_exe + " " + accuracy_script cmd = self.add_args_to_command(cmd_prefix, script_args_list) self.run_command(cmd) @@ -106,7 +106,7 @@ def run_calibration(self): "input_graph", "data_location", "batch_size", "num_inter_threads", "num_intra_threads"] - cmd_prefix = self.get_numactl_command(self.args.socket_id) + \ + cmd_prefix = self.get_command_prefix(self.args.socket_id) + \ self.python_exe + " " + calibration_script cmd = self.add_args_to_command(cmd_prefix, script_args_list) self.run_command(cmd) diff --git a/benchmarks/image_recognition/tensorflow/inceptionv4/README.md b/benchmarks/image_recognition/tensorflow/inceptionv4/README.md index 1f472509b..13fb7c060 100644 --- a/benchmarks/image_recognition/tensorflow/inceptionv4/README.md +++ b/benchmarks/image_recognition/tensorflow/inceptionv4/README.md @@ -112,31 +112,31 @@ other precisions are coming later. Example log tail when benchmarking for throughput: ``` - [Running warmup steps...] - steps = 10, 185.108768528 images/sec - [Running benchmark steps...] - steps = 10, 184.482999017 images/sec - steps = 20, 184.561572444 images/sec - steps = 30, 184.620504126 images/sec - steps = 40, 183.900309054 images/sec - steps = 50, 184.110358713 images/sec - Ran inference with batch size 240 - Log location outside container: /benchmark_inceptionv4_inference_int8_20190306_215858.log + [Running warmup steps...] + steps = 10, 184.497605972 images/sec + [Running benchmark steps...] + steps = 10, 184.664702184 images/sec + steps = 20, 184.938455688 images/sec + steps = 30, 184.454197634 images/sec + steps = 40, 184.491891402 images/sec + steps = 50, 184.390001575 images/sec + Ran inference with batch size 240 + Log location outside container: /benchmark_inceptionv4_inference_int8_20190415_233517.log ``` Example log tail when benchmarking for latency: ``` - [Running warmup steps...] - steps = 10, 30.8738415788 images/sec - [Running benchmark steps...] - steps = 10, 31.8633787623 images/sec - steps = 20, 31.1129375635 images/sec - steps = 30, 31.2716048462 images/sec - steps = 40, 31.9682931663 images/sec - steps = 50, 31.6665962009 images/sec - Latency: 31.936 ms - Ran inference with batch size 1 - Log location outside container: /benchmark_inceptionv4_inference_int8_20190306_215702.log + [Running warmup steps...] + steps = 10, 32.6095380262 images/sec + [Running benchmark steps...] + steps = 10, 32.9024373024 images/sec + steps = 20, 32.5328989723 images/sec + steps = 30, 32.5988932413 images/sec + steps = 40, 31.3991914957 images/sec + steps = 50, 32.7053998207 images/sec + Latency: 30.598 ms + Ran inference with batch size 1 + Log location outside container: /benchmark_inceptionv4_inference_int8_20190415_232441.log ``` ## FP32 Inference Instructions diff --git a/benchmarks/image_recognition/tensorflow/inceptionv4/inference/inceptionv4_model_init.py b/benchmarks/image_recognition/tensorflow/inceptionv4/inference/inceptionv4_model_init.py index d4294a179..74da197fd 100644 --- a/benchmarks/image_recognition/tensorflow/inceptionv4/inference/inceptionv4_model_init.py +++ b/benchmarks/image_recognition/tensorflow/inceptionv4/inference/inceptionv4_model_init.py @@ -73,7 +73,7 @@ def parse_args(self): def add_command_prefix(self, script_path): """ Uses the specified script path and adds on the command prefix """ - return self.get_numactl_command(self.args.socket_id) + self.python_exe + " " + \ + return self.get_command_prefix(self.args.socket_id) + self.python_exe + " " + \ script_path def run_benchmark(self): diff --git a/benchmarks/image_recognition/tensorflow/mobilenet_v1/inference/int8/model_init.py b/benchmarks/image_recognition/tensorflow/mobilenet_v1/inference/int8/model_init.py index 6f22fd12a..c693b055c 100644 --- a/benchmarks/image_recognition/tensorflow/mobilenet_v1/inference/int8/model_init.py +++ b/benchmarks/image_recognition/tensorflow/mobilenet_v1/inference/int8/model_init.py @@ -34,7 +34,7 @@ class ModelInitializer(BaseModelInitializer): def __init__(self, args, custom_args=[], platform_util=None): super(ModelInitializer, self).__init__(args, custom_args, platform_util) - self.cmd = self.get_numactl_command(self.args.socket_id) + "python " + self.cmd = self.get_command_prefix(self.args.socket_id) + "python " # Set KMP env vars, if they haven't already been set config_file_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), "config.json") diff --git a/benchmarks/image_recognition/tensorflow/resnet101/inference/fp32/model_init.py b/benchmarks/image_recognition/tensorflow/resnet101/inference/fp32/model_init.py index 43f862159..98962a670 100644 --- a/benchmarks/image_recognition/tensorflow/resnet101/inference/fp32/model_init.py +++ b/benchmarks/image_recognition/tensorflow/resnet101/inference/fp32/model_init.py @@ -70,7 +70,7 @@ def __init__(self, args, custom_args=[], platform_util=None): self.args.intelai_models, self.args.mode, "eval_image_classifier_inference.py") - self.benchmark_command = self.get_numactl_command(args.socket_id) + \ + self.benchmark_command = self.get_command_prefix(args.socket_id) + \ self.python_exe + " " + benchmark_script self.benchmark_command = \ diff --git a/benchmarks/image_recognition/tensorflow/resnet101/inference/int8/model_init.py b/benchmarks/image_recognition/tensorflow/resnet101/inference/int8/model_init.py index 4bd21a12e..a53cf6884 100644 --- a/benchmarks/image_recognition/tensorflow/resnet101/inference/int8/model_init.py +++ b/benchmarks/image_recognition/tensorflow/resnet101/inference/int8/model_init.py @@ -78,7 +78,7 @@ def run_benchmark_or_accuracy(self): self.args.intelai_models, self.args.mode, "eval_image_classifier_inference.py") - cmd = self.get_numactl_command(self.args.socket_id) + self.python_exe + " " + cmd + cmd = self.get_command_prefix(self.args.socket_id) + self.python_exe + " " + cmd cmd += " --input-graph=" + self.args.input_graph + \ " --num-inter-threads=" + str(self.args.num_inter_threads) + \ @@ -107,7 +107,7 @@ def run_calibration(self): "input_graph", "data_location", "batch_size", "num_inter_threads", "num_intra_threads"] - cmd_prefix = self.get_numactl_command(self.args.socket_id) + \ + cmd_prefix = self.get_command_prefix(self.args.socket_id) + \ self.python_exe + " " + calibration_script cmd = self.add_args_to_command(cmd_prefix, script_args_list) self.run_command(cmd) diff --git a/benchmarks/image_recognition/tensorflow/resnet50/README.md b/benchmarks/image_recognition/tensorflow/resnet50/README.md index 0b73a4e56..a34a52139 100644 --- a/benchmarks/image_recognition/tensorflow/resnet50/README.md +++ b/benchmarks/image_recognition/tensorflow/resnet50/README.md @@ -102,12 +102,15 @@ The tail of the log output when the benchmarking completes should look something like this: ``` ... -steps = 470, 460.113806562 images/sec -steps = 480, 460.073982602 images/sec -steps = 490, 463.289831148 images/sec -steps = 500, 463.521427264 images/sec +Iteration 497: 0.253495 sec +Iteration 498: 0.253033 sec +Iteration 499: 0.258083 sec +Iteration 500: 0.254541 sec +Average time: 0.254572 sec +Batch size = 128 +Throughput: 502.805 images/sec Ran inference with batch size 128 -Log location outside container: {--output-dir value}/benchmark_resnet50_inference_int8_20190223_180546.log +Log location outside container: {--output-dir value}/benchmark_resnet50_inference_int8_20190416_172735.log ``` Note that the `--verbose` or `--output-dir` flag can be added to any of the above commands diff --git a/benchmarks/image_recognition/tensorflow/resnet50/inference/fp32/model_init.py b/benchmarks/image_recognition/tensorflow/resnet50/inference/fp32/model_init.py index 4c3dfbd1d..88520cbdd 100644 --- a/benchmarks/image_recognition/tensorflow/resnet50/inference/fp32/model_init.py +++ b/benchmarks/image_recognition/tensorflow/resnet50/inference/fp32/model_init.py @@ -71,7 +71,7 @@ def __init__(self, args, custom_args=[], platform_util=None): self.args.intelai_models, self.args.mode, "eval_image_classifier_inference.py") - self.benchmark_command = self.get_numactl_command(args.socket_id) + \ + self.benchmark_command = self.get_command_prefix(args.socket_id) + \ self.python_exe + " " + benchmark_script num_cores = self.platform_util.num_cores_per_socket if self.args.num_cores == -1 \ diff --git a/benchmarks/image_recognition/tensorflow/resnet50/inference/int8/model_init.py b/benchmarks/image_recognition/tensorflow/resnet50/inference/int8/model_init.py index 75e9db07c..41571564c 100644 --- a/benchmarks/image_recognition/tensorflow/resnet50/inference/int8/model_init.py +++ b/benchmarks/image_recognition/tensorflow/resnet50/inference/int8/model_init.py @@ -74,7 +74,7 @@ def run_benchmark_or_accuracy(self): self.args.intelai_models, self.args.mode, "eval_image_classifier_inference.py") - cmd = self.get_numactl_command(self.args.socket_id) + self.python_exe + " " + cmd + cmd = self.get_command_prefix(self.args.socket_id) + self.python_exe + " " + cmd cmd += " --input-graph=" + self.args.input_graph + \ " --num-inter-threads=" + str(self.args.num_inter_threads) + \ @@ -106,7 +106,7 @@ def run_calibration(self): "input_graph", "data_location", "batch_size", "num_inter_threads", "num_intra_threads"] - cmd_prefix = self.get_numactl_command(self.args.socket_id) + \ + cmd_prefix = self.get_command_prefix(self.args.socket_id) + \ self.python_exe + " " + calibration_script cmd = self.add_args_to_command(cmd_prefix, script_args_list) self.run_command(cmd) diff --git a/benchmarks/image_segmentation/tensorflow/maskrcnn/inference/fp32/model_init.py b/benchmarks/image_segmentation/tensorflow/maskrcnn/inference/fp32/model_init.py index 43f9cdacc..35412be2f 100644 --- a/benchmarks/image_segmentation/tensorflow/maskrcnn/inference/fp32/model_init.py +++ b/benchmarks/image_segmentation/tensorflow/maskrcnn/inference/fp32/model_init.py @@ -42,7 +42,7 @@ def __init__(self, args, custom_args=[], platform_util=None): benchmark_script = os.path.join( self.args.intelai_models, "coco.py") - self.benchmark_command = self.get_numactl_command(args.socket_id) + \ + self.benchmark_command = self.get_command_prefix(args.socket_id) + \ self.python_exe + " " + benchmark_script + " evaluate " set_env_var("OMP_NUM_THREADS", self.args.num_intra_threads) diff --git a/benchmarks/image_segmentation/tensorflow/unet/inference/fp32/model_init.py b/benchmarks/image_segmentation/tensorflow/unet/inference/fp32/model_init.py index d4998afae..3cdcf1701 100644 --- a/benchmarks/image_segmentation/tensorflow/unet/inference/fp32/model_init.py +++ b/benchmarks/image_segmentation/tensorflow/unet/inference/fp32/model_init.py @@ -51,7 +51,7 @@ def __init__(self, args, custom_args=[], platform_util=None): "unet_infer.py") # Create the command prefix using numactl - self.command_prefix = self.get_numactl_command(self.args.socket_id) +\ + self.command_prefix = self.get_command_prefix(self.args.socket_id) +\ "{} {}".format(self.python_exe, script_path) # Add batch size arg diff --git a/benchmarks/language_modeling/tensorflow/lm-1b/inference/fp32/model_init.py b/benchmarks/language_modeling/tensorflow/lm-1b/inference/fp32/model_init.py index 6a2b7244f..535f42416 100644 --- a/benchmarks/language_modeling/tensorflow/lm-1b/inference/fp32/model_init.py +++ b/benchmarks/language_modeling/tensorflow/lm-1b/inference/fp32/model_init.py @@ -31,7 +31,7 @@ class ModelInitializer(BaseModelInitializer): def __init__(self, args, custom_args, platform_util=None): super(ModelInitializer, self).__init__(args, custom_args, platform_util) - self.cmd = self.get_numactl_command(self.args.socket_id) + self.cmd = self.get_command_prefix(self.args.socket_id) self.set_num_inter_intra_threads() diff --git a/benchmarks/language_translation/tensorflow/gnmt/inference/fp32/model_init.py b/benchmarks/language_translation/tensorflow/gnmt/inference/fp32/model_init.py index a23403eb0..6f46f2c80 100644 --- a/benchmarks/language_translation/tensorflow/gnmt/inference/fp32/model_init.py +++ b/benchmarks/language_translation/tensorflow/gnmt/inference/fp32/model_init.py @@ -30,7 +30,7 @@ class ModelInitializer(BaseModelInitializer): def __init__(self, args, custom_args=[], platform_util=None): super(ModelInitializer, self).__init__(args, custom_args, platform_util) - self.cmd = self.get_numactl_command(self.args.socket_id) + self.cmd = self.get_command_prefix(self.args.socket_id) if self.args.socket_id != -1 and self.args.num_cores != -1: self.cmd += "--physcpubind=0-" + \ diff --git a/benchmarks/language_translation/tensorflow/transformer_language/inference/fp32/model_init.py b/benchmarks/language_translation/tensorflow/transformer_language/inference/fp32/model_init.py index 4e1519e03..b4fd1bc30 100644 --- a/benchmarks/language_translation/tensorflow/transformer_language/inference/fp32/model_init.py +++ b/benchmarks/language_translation/tensorflow/transformer_language/inference/fp32/model_init.py @@ -32,7 +32,7 @@ class ModelInitializer(BaseModelInitializer): def __init__(self, args, custom_args, platform_util=None): super(ModelInitializer, self).__init__(args, custom_args, platform_util) - self.cmd = self.get_numactl_command(self.args.socket_id) + self.cmd = self.get_command_prefix(self.args.socket_id) self.bleu_params = "" self.set_num_inter_intra_threads() diff --git a/benchmarks/language_translation/tensorflow/transformer_lt_official/inference/fp32/model_init.py b/benchmarks/language_translation/tensorflow/transformer_lt_official/inference/fp32/model_init.py index 00f8b9f3f..85dae1e68 100644 --- a/benchmarks/language_translation/tensorflow/transformer_lt_official/inference/fp32/model_init.py +++ b/benchmarks/language_translation/tensorflow/transformer_lt_official/inference/fp32/model_init.py @@ -31,7 +31,7 @@ class ModelInitializer(BaseModelInitializer): def __init__(self, args, custom_args, platform_util=None): super(ModelInitializer, self).__init__(args, custom_args, platform_util) - self.cmd = self.get_numactl_command(self.args.socket_id) + self.cmd = self.get_command_prefix(self.args.socket_id) self.bleu_params = "" self.set_num_inter_intra_threads() diff --git a/benchmarks/launch_benchmark.py b/benchmarks/launch_benchmark.py index 5dd7fbca1..7515936eb 100644 --- a/benchmarks/launch_benchmark.py +++ b/benchmarks/launch_benchmark.py @@ -175,6 +175,8 @@ def get_env_vars(self, benchmark_scripts, use_case, intelai_models): "BENCHMARK_ONLY": args.benchmark_only, "ACCURACY_ONLY": args.accuracy_only, "OUTPUT_RESULTS": args.output_results, + "DISABLE_TCMALLOC": args.disable_tcmalloc, + "TCMALLOC_LARGE_ALLOC_REPORT_THRESHOLD": args.tcmalloc_large_alloc_report_threshold, "DOCKER": str(args.docker_image is not None), "PYTHON_EXE": sys.executable if not args.docker_image else "python" } diff --git a/benchmarks/object_detection/tensorflow/faster_rcnn/inference/fp32/model_init.py b/benchmarks/object_detection/tensorflow/faster_rcnn/inference/fp32/model_init.py index a605cc8e3..c30f39ada 100644 --- a/benchmarks/object_detection/tensorflow/faster_rcnn/inference/fp32/model_init.py +++ b/benchmarks/object_detection/tensorflow/faster_rcnn/inference/fp32/model_init.py @@ -65,7 +65,7 @@ def __init__(self, args, custom_args, platform_util=None): self.args.intelai_models, self.args.mode, self.args.precision, "eval.py") self.command_prefix = \ - self.get_numactl_command(self.args.socket_id) + self.python_exe + " " + \ + self.get_command_prefix(self.args.socket_id) + self.python_exe + " " + \ benchmark_script config_file_path = os.path.join(self.args.checkpoint, diff --git a/benchmarks/object_detection/tensorflow/faster_rcnn/inference/int8/model_init.py b/benchmarks/object_detection/tensorflow/faster_rcnn/inference/int8/model_init.py index 705ef72c1..37eaf2722 100644 --- a/benchmarks/object_detection/tensorflow/faster_rcnn/inference/int8/model_init.py +++ b/benchmarks/object_detection/tensorflow/faster_rcnn/inference/int8/model_init.py @@ -83,7 +83,7 @@ def parse_args(self): def run_perf_command(self): set_env_var("OMP_NUM_THREADS", self.args.num_intra_threads) self.parse_args() - command = self.get_numactl_command(self.args.socket_id) + command = self.get_command_prefix(self.args.socket_id) command += " {} ".format(self.python_exe) + self.perf_script_path command += " -g " + self.args.input_graph if self.custom_args: diff --git a/benchmarks/object_detection/tensorflow/rfcn/README.md b/benchmarks/object_detection/tensorflow/rfcn/README.md index 3b3a64b9d..10a0342ce 100644 --- a/benchmarks/object_detection/tensorflow/rfcn/README.md +++ b/benchmarks/object_detection/tensorflow/rfcn/README.md @@ -174,16 +174,16 @@ to get additional debug output or change the default output location. Below is a sample log file tail when running benchmarking for throughput and latency: ``` -Step 0: 10.6923000813 seconds -Step 10: 0.168856859207 seconds +Step 0: 11.4450089931 seconds +Step 10: 0.25656080246 seconds ... -Step 460: 0.181148052216 seconds -Step 470: 0.202737092972 seconds -Step 480: 0.117042064667 seconds -Step 490: 0.103501081467 seconds -Avg. Duration per Step:0.169812122345 +Step 460: 0.256786823273 seconds +Step 470: 0.267828941345 seconds +Step 480: 0.141321897507 seconds +Step 490: 0.127830982208 seconds +Avg. Duration per Step:0.195356227875 Ran inference with batch size -1 -Log location outside container: {--output-dir}/benchmark_rfcn_inference_int8_20190227_191959.log +Log location outside container: {--output-dir}/benchmark_rfcn_inference_int8_20190416_182445.log ``` And here is a sample log file tail when running for accuracy: diff --git a/benchmarks/object_detection/tensorflow/rfcn/inference/fp32/model_init.py b/benchmarks/object_detection/tensorflow/rfcn/inference/fp32/model_init.py index a4ab51dfa..031c0f2ca 100644 --- a/benchmarks/object_detection/tensorflow/rfcn/inference/fp32/model_init.py +++ b/benchmarks/object_detection/tensorflow/rfcn/inference/fp32/model_init.py @@ -55,7 +55,7 @@ def __init__(self, args, custom_args, platform_util): "research") def run_benchmark(self): - command_prefix = self.get_numactl_command(self.args.socket_id) + \ + command_prefix = self.get_command_prefix(self.args.socket_id) + \ self.python_exe + " " + self.benchmark_script # set num_inter_threads and num_intra_threads diff --git a/benchmarks/object_detection/tensorflow/rfcn/inference/int8/model_init.py b/benchmarks/object_detection/tensorflow/rfcn/inference/int8/model_init.py index d6cb2cc97..4f2a29ab4 100755 --- a/benchmarks/object_detection/tensorflow/rfcn/inference/int8/model_init.py +++ b/benchmarks/object_detection/tensorflow/rfcn/inference/int8/model_init.py @@ -54,6 +54,9 @@ def __init__(self, args, custom_args=[], platform_util=None): self.parse_args() + # Get the command previx, but numactl is added later in run_perf_command() + self.command.append(self.get_command_prefix(self.args.socket_id, numactl=False)) + # Set KMP env vars, if they haven't already been set config_file_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), "config.json") self.set_kmp_vars(config_file_path) @@ -158,7 +161,8 @@ def run_perf_command(self): def run_accuracy_command(self): # already validated by parent - self.command = "FROZEN_GRAPH=" + self.args.input_graph + self.command = self.get_command_prefix(self.args.socket_id, numactl=False) + self.command += "FROZEN_GRAPH=" + self.args.input_graph if self.args.data_location and os.path.exists( self.args.data_location): diff --git a/benchmarks/object_detection/tensorflow/ssd-mobilenet/README.md b/benchmarks/object_detection/tensorflow/ssd-mobilenet/README.md index cee1a3848..2d129384f 100644 --- a/benchmarks/object_detection/tensorflow/ssd-mobilenet/README.md +++ b/benchmarks/object_detection/tensorflow/ssd-mobilenet/README.md @@ -154,13 +154,13 @@ Below is a sample log file tail when running benchmarking for throughput and latency: ``` -Step 4970: 0.0340421199799 seconds -Step 4980: 0.0429329872131 seconds -Step 4990: 0.0358219146729 seconds -Avg. Duration per Step:0.0364457404137 -Avg. Duration per Step:0.0365921088491 +Step 4970: 0.0305020809174 seconds +Step 4980: 0.0294089317322 seconds +Step 4990: 0.0301029682159 seconds +Avg. Duration per Step:0.0300041775227 +Avg. Duration per Step:0.0301246762276 Ran inference with batch size 1 -Log location outside container: /benchmark_ssd-mobilenet_inference_int8_20181203_232524.log +Log location outside container: /benchmark_ssd-mobilenet_inference_int8_20190417_175418.log ``` And here is a sample log file tail when running for accuracy: diff --git a/benchmarks/object_detection/tensorflow/ssd-mobilenet/inference/fp32/model_init.py b/benchmarks/object_detection/tensorflow/ssd-mobilenet/inference/fp32/model_init.py index 585d3ed0e..927f73048 100644 --- a/benchmarks/object_detection/tensorflow/ssd-mobilenet/inference/fp32/model_init.py +++ b/benchmarks/object_detection/tensorflow/ssd-mobilenet/inference/fp32/model_init.py @@ -68,7 +68,7 @@ def __init__(self, args, custom_args, platform_util): self.args.precision, "infer_detections.py") # get command with numactl - self.run_cmd = self.get_numactl_command( + self.run_cmd = self.get_command_prefix( self.args.socket_id) + "{} {}".format(self.python_exe, benchmark_script) output_tf_record_path = os.path.join(os.path.dirname( diff --git a/benchmarks/object_detection/tensorflow/ssd-mobilenet/inference/int8/config.json b/benchmarks/object_detection/tensorflow/ssd-mobilenet/inference/int8/config.json index 6f1228ba7..273b45b40 100644 --- a/benchmarks/object_detection/tensorflow/ssd-mobilenet/inference/int8/config.json +++ b/benchmarks/object_detection/tensorflow/ssd-mobilenet/inference/int8/config.json @@ -1,7 +1,7 @@ { "optimization_parameters": { "KMP_AFFINITY": "granularity=fine,verbose,compact,1,0", - "KMP_BLOCKTIME": 0, + "KMP_BLOCKTIME": 1, "KMP_SETTINGS": 1 } } diff --git a/benchmarks/object_detection/tensorflow/ssd-mobilenet/inference/int8/model_init.py b/benchmarks/object_detection/tensorflow/ssd-mobilenet/inference/int8/model_init.py index 57114447a..28522ada4 100644 --- a/benchmarks/object_detection/tensorflow/ssd-mobilenet/inference/int8/model_init.py +++ b/benchmarks/object_detection/tensorflow/ssd-mobilenet/inference/int8/model_init.py @@ -52,7 +52,7 @@ def __init__(self, args, custom_args=[], platform_util=None): benchmark_script = os.path.join( self.args.intelai_models, self.args.mode, self.args.precision, "run_frozen_graph_ssdmob.py") - self.command_prefix = self.get_numactl_command(self.args.socket_id) + \ + self.command_prefix = self.get_command_prefix(self.args.socket_id) + \ "{} {}".format(self.python_exe, benchmark_script) set_env_var("OMP_NUM_THREADS", self.args.num_intra_threads) diff --git a/benchmarks/object_detection/tensorflow/ssd-resnet34/inference/fp32/model_init.py b/benchmarks/object_detection/tensorflow/ssd-resnet34/inference/fp32/model_init.py index 1ad534ed9..20bfcccf5 100644 --- a/benchmarks/object_detection/tensorflow/ssd-resnet34/inference/fp32/model_init.py +++ b/benchmarks/object_detection/tensorflow/ssd-resnet34/inference/fp32/model_init.py @@ -57,7 +57,7 @@ def __init__(self, args, custom_args, platform_util): benchmark_script = os.path.join(self.model_dir, "infer_detections.py") # get command with numactl - self.run_cmd = self.get_numactl_command(self.args.socket_id) + self.run_cmd = self.get_command_prefix(self.args.socket_id) self.run_cmd += "{0} {1}".format(self.python_exe, benchmark_script) self.run_cmd += " --input-graph {0}".format(self.args.input_graph) self.run_cmd += " --batch-size {0}".format(args.batch_size) diff --git a/benchmarks/recommendation/tensorflow/ncf/inference/fp32/model_init.py b/benchmarks/recommendation/tensorflow/ncf/inference/fp32/model_init.py index 1704839cb..960c2523a 100644 --- a/benchmarks/recommendation/tensorflow/ncf/inference/fp32/model_init.py +++ b/benchmarks/recommendation/tensorflow/ncf/inference/fp32/model_init.py @@ -50,7 +50,7 @@ def __init__(self, args, custom_args=[], platform_util=None): self.args.intelai_models, self.args.mode, self.args.precision, "ncf_main.py") - self.benchmark_command = self.get_numactl_command(args.socket_id) + \ + self.benchmark_command = self.get_command_prefix(args.socket_id) + \ self.python_exe + " " + benchmark_script set_env_var("OMP_NUM_THREADS", self.args.num_intra_threads) diff --git a/benchmarks/recommendation/tensorflow/wide_deep_large_ds/README.md b/benchmarks/recommendation/tensorflow/wide_deep_large_ds/README.md index 89fb2b244..ede163b61 100755 --- a/benchmarks/recommendation/tensorflow/wide_deep_large_ds/README.md +++ b/benchmarks/recommendation/tensorflow/wide_deep_large_ds/README.md @@ -72,7 +72,7 @@ Benchmarking instructions and scripts for model training coming later. ``` cd /home//models/benchmarks - python launch_benchmark.py + python launch_benchmark.py \ --model-name wide_deep_large_ds \ --precision int8 \ --mode inference \ @@ -92,7 +92,7 @@ Benchmarking instructions and scripts for model training coming later. ``` cd /home//models/benchmarks - python launch_benchmark.py + python launch_benchmark.py \ --model-name wide_deep_large_ds \ --precision int8 \ --mode inference \ @@ -109,7 +109,7 @@ Benchmarking instructions and scripts for model training coming later. ``` cd /home//models/benchmarks - python launch_benchmark.py + python launch_benchmark.py \ --model-name wide_deep_large_ds \ --precision int8 \ --mode inference \ diff --git a/benchmarks/recommendation/tensorflow/wide_deep_large_ds/inference/fp32/model_init.py b/benchmarks/recommendation/tensorflow/wide_deep_large_ds/inference/fp32/model_init.py index 6655dce85..6293b3d0c 100755 --- a/benchmarks/recommendation/tensorflow/wide_deep_large_ds/inference/fp32/model_init.py +++ b/benchmarks/recommendation/tensorflow/wide_deep_large_ds/inference/fp32/model_init.py @@ -62,7 +62,7 @@ def run_benchmark(self): script_args_list = ["input_graph", "num_parallel_batches", "batch_size", "num_inter_threads", "num_intra_threads", "accuracy_only", "data_location"] - cmd_prefix = self.get_numactl_command(self.args.socket_id) + \ + cmd_prefix = self.get_command_prefix(self.args.socket_id) + \ self.python_exe + " " + benchmark_script cmd = self.add_args_to_command(cmd_prefix, script_args_list) self.run_command(cmd) diff --git a/benchmarks/recommendation/tensorflow/wide_deep_large_ds/inference/int8/model_init.py b/benchmarks/recommendation/tensorflow/wide_deep_large_ds/inference/int8/model_init.py index 9fdef4537..c6a3b25fd 100755 --- a/benchmarks/recommendation/tensorflow/wide_deep_large_ds/inference/int8/model_init.py +++ b/benchmarks/recommendation/tensorflow/wide_deep_large_ds/inference/int8/model_init.py @@ -62,7 +62,7 @@ def run_benchmark(self): script_args_list = ["input_graph", "num_parallel_batches", "batch_size", "num_inter_threads", "num_intra_threads", "accuracy_only", "data_location"] - cmd_prefix = self.get_numactl_command(self.args.socket_id) + \ + cmd_prefix = self.get_command_prefix(self.args.socket_id) + \ self.python_exe + " " + benchmark_script cmd = self.add_args_to_command(cmd_prefix, script_args_list) self.run_command(cmd) diff --git a/docs/general/tensorflow/LaunchBenchmark.md b/docs/general/tensorflow/LaunchBenchmark.md index 8544c7320..ccab76c9c 100644 --- a/docs/general/tensorflow/LaunchBenchmark.md +++ b/docs/general/tensorflow/LaunchBenchmark.md @@ -101,6 +101,16 @@ optional arguments: conjunction with --accuracy-only and --mode=inference. --output-dir OUTPUT_DIR Folder to dump output into. + --disable-tcmalloc Disables the use of TCMalloc for int8 benchmarking. + TCMalloc is currently not used for FP32 benchmarking, + so using this flag with FP32 models will have no + effect. + --tcmalloc-large-alloc-report-threshold TCMALLOC_LARGE_ALLOC_REPORT_THRESHOLD + Sets the TCMALLOC_LARGE_ALLOC_REPORT_THRESHOLD + environment variable to the specified value. The + environment variable sets the threshold (in bytes) for + when large memory allocation messages will be + displayed. -g INPUT_GRAPH, --in-graph INPUT_GRAPH Full path to the input graph --volume CUSTOM_VOLUMES diff --git a/models/image_recognition/tensorflow/resnet50/inference/eval_image_classifier_inference.py b/models/image_recognition/tensorflow/resnet50/inference/eval_image_classifier_inference.py index 198509a23..791c1b761 100644 --- a/models/image_recognition/tensorflow/resnet50/inference/eval_image_classifier_inference.py +++ b/models/image_recognition/tensorflow/resnet50/inference/eval_image_classifier_inference.py @@ -165,12 +165,12 @@ def run(self): input_tensor = infer_graph.get_tensor_by_name('input:0') output_tensor = infer_graph.get_tensor_by_name('predict:0') - data_sess = tf.Session(graph=data_graph, config=data_config) + data_sess = tf.Session(graph=data_graph, config=data_config) infer_sess = tf.Session(graph=infer_graph, config=infer_config) num_processed_images = 0 num_remaining_images = dataset.num_examples_per_epoch(subset=subset) - num_processed_images \ - if self.args.data_location else datasets.IMAGENET_NUM_VAL_IMAGES + if self.args.data_location else (self.args.batch_size * self.args.steps) if (not self.args.accuracy_only): iteration = 0 diff --git a/tests/unit/common/tensorflow/test_run_tf_benchmarks.py b/tests/unit/common/tensorflow/test_run_tf_benchmarks.py index b5407f00e..dba3e6ca6 100644 --- a/tests/unit/common/tensorflow/test_run_tf_benchmarks.py +++ b/tests/unit/common/tensorflow/test_run_tf_benchmarks.py @@ -73,13 +73,15 @@ def clear_kmp_env_vars(): @patch("os.stat") @patch("os.chdir") @patch("os.remove") +@patch("glob.glob") @patch("common.platform_util.os") @patch("common.platform_util.system_platform") @patch("common.platform_util.subprocess") @patch("common.base_model_init.BaseModelInitializer.run_command") -def test_run_benchmark(mock_run_command, mock_subprocess, mock_platform, - mock_os, mock_remove, mock_chdir, mock_stat, mock_path_exists, mock_is_file, mock_is_dir, - mock_listdir, mock_rmtree, mock_mkdir, test_args, expected_cmd): +def test_run_benchmark(mock_run_command, mock_subprocess, mock_platform, mock_os, + mock_glob, mock_remove, mock_chdir, mock_stat, mock_path_exists, + mock_is_file, mock_is_dir, mock_listdir, mock_rmtree, mock_mkdir, + test_args, expected_cmd): """ Runs through executing the specified run_tf_benchmarks.py command from the test_args and verifying that the model_init file calls run_command with @@ -92,6 +94,7 @@ def test_run_benchmark(mock_run_command, mock_subprocess, mock_platform, mock_stat.return_value = MagicMock(st_nlink=0) parse_model_args_file() mock_listdir.return_value = True + mock_glob.return_value = ["/usr/lib/libtcmalloc.so.4.2.6"] clear_kmp_env_vars() platform_config.set_mock_system_type(mock_platform) platform_config.set_mock_os_access(mock_os) diff --git a/tests/unit/common/tensorflow/tf_model_args.txt b/tests/unit/common/tensorflow/tf_model_args.txt index 21f680eda..55137c9e3 100644 --- a/tests/unit/common/tensorflow/tf_model_args.txt +++ b/tests/unit/common/tensorflow/tf_model_args.txt @@ -1,90 +1,92 @@ -run_tf_benchmark.py --framework tensorflow --use-case recommendation --precision fp32 --mode inference --model-name wide_deep --batch-size 1024 --data-location /dataset --checkpoint /checkpoints --intelai-models . --verbose, OMP_NUM_THREADS=1 numactl --cpunodebind=0 --membind=0 python inference/fp32/wide_deep_inference.py --data_dir=/dataset --model_dir=/checkpoints --batch_size=1024 -run_tf_benchmark.py --framework=tensorflow --use-case=image_recognition --model-name=densenet169 --precision=fp32 --mode=inference --benchmark-dir=/workspace/benchmarks --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=100 --socket-id=0 --output-dir=/workspace/benchmarks/common/tensorflow/logs --benchmark-only --verbose --model-source-dir=/workspace/models --in-graph=/in_graph/densenet169_fp32_pretrained_model.pb, numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/inference/fp32/benchmark.py --num_intra_threads=28 --num_inter_threads=1 --batch_size=100 --input_graph=/in_graph/densenet169_fp32_pretrained_model.pb -run_tf_benchmark.py --framework=tensorflow --use-case=image_recognition --model-name=densenet169 --precision=fp32 --mode=inference --benchmark-dir=/workspace/benchmarks --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=1 --socket-id=0 --output-dir=/workspace/benchmarks/common/tensorflow/logs --benchmark-only --verbose --model-source-dir=/workspace/models --in-graph=/in_graph/densenet169_fp32_pretrained_model.pb, numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/inference/fp32/benchmark.py --num_intra_threads=28 --num_inter_threads=1 --batch_size=1 --input_graph=/in_graph/densenet169_fp32_pretrained_model.pb -run_tf_benchmark.py --framework=tensorflow --use-case=image_recognition --model-name=densenet169 --precision=fp32 --mode=inference --benchmark-dir=/workspace/benchmarks --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=100 --socket-id=0 --output-dir=/workspace/benchmarks/common/tensorflow/logs --accuracy-only --verbose --model-source-dir=/workspace/models --in-graph=/in_graph/densenet169_fp32_pretrained_model.pb --data-location=/dataset, numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/inference/fp32/accuracy.py --num_intra_threads=28 --num_inter_threads=1 --batch_size=100 --input_graph=/in_graph/densenet169_fp32_pretrained_model.pb --data_location=/dataset -run_tf_benchmark.py --framework tensorflow --use-case image_recognition --precision int8 --mode inference --model-name inceptionv3 --batch-size 100 --in-graph /final_int8_inceptionv3.pb --intelai-models . --accuracy-only --verbose,python ./int8/accuracy.py --num_intra_threads=56 --num_inter_threads=2 --batch_size=100 --input_graph=/final_int8_inceptionv3.pb -run_tf_benchmark.py --framework tensorflow --use-case image_recognition --precision int8 --mode inference --model-name inceptionv3 --batch-size 1 --in-graph /final_int8_inceptionv3.pb --intelai-models . --benchmark-only --socket-id 0 --verbose,numactl --cpunodebind=0 --membind=0 python ./int8/benchmark.py --warmup_steps=10 --num_intra_threads=28 --num_inter_threads=1 --batch_size=1 --input_graph=/final_int8_inceptionv3.pb --steps=50 --num_cores=28 -run_tf_benchmark.py --framework tensorflow --use-case image_recognition --precision int8 --mode inference --model-name inceptionv3 --batch-size 128 --in-graph /final_int8_inceptionv3.pb --intelai-models . --benchmark-only --socket-id 0 --verbose,numactl --cpunodebind=0 --membind=0 python ./int8/benchmark.py --warmup_steps=10 --num_intra_threads=28 --num_inter_threads=1 --batch_size=128 --input_graph=/final_int8_inceptionv3.pb --steps=50 --num_cores=28 -run_tf_benchmark.py --framework=tensorflow --use-case=image_recognition --model-name=inceptionv3 --precision=int8 --mode=inference --model-source-dir=/workspace/models --benchmark-dir=/workspace/benchmarks --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=128 --socket-id=0 --output-dir=/workspace/benchmarks/common/tensorflow/logs --benchmark-only --verbose --in-graph=/in_graph/inception_frozen_max_min.pb --steps=200 --warmup-steps=20,numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/int8/benchmark.py --warmup_steps=20 --num_intra_threads=28 --num_inter_threads=1 --batch_size=128 --input_graph=/in_graph/inception_frozen_max_min.pb --steps=200 --num_cores=28 -run_tf_benchmark.py --framework=tensorflow --use-case=image_recognition --model-name=inceptionv3 --precision=int8 --mode=inference --model-source-dir=/workspace/models --benchmark-dir=/workspace/benchmarks --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=1 --socket-id=0 --output-dir=/workspace/benchmarks/common/tensorflow/logs --benchmark-only --verbose --in-graph=/in_graph/inception_frozen_max_min.pb --steps=200 --warmup-steps=20,numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/int8/benchmark.py --warmup_steps=20 --num_intra_threads=28 --num_inter_threads=1 --batch_size=1 --input_graph=/in_graph/inception_frozen_max_min.pb --steps=200 --num_cores=28 -run_tf_benchmark.py --framework tensorflow --use-case image_recognition --precision fp32 --mode inference --model-name inceptionv3 --batch-size 1 --in-graph /inceptionv3_fp32_pretrained_model.pb --intelai-models . --socket-id 0 --verbose,numactl --cpunodebind=0 --membind=0 python ./fp32/eval_image_classifier_inference.py --input-graph=/inceptionv3_fp32_pretrained_model.pb --num-inter-threads=1 --num-intra-threads=28 --batch-size=1 --warmup-steps=10 --steps=50 --num-cores=28 -run_tf_benchmark.py --framework tensorflow --use-case image_recognition --precision fp32 --mode inference --model-name inceptionv3 --batch-size 128 --in-graph /inceptionv3_fp32_pretrained_model.pb --intelai-models . --socket-id 0 --verbose,numactl --cpunodebind=0 --membind=0 python ./fp32/eval_image_classifier_inference.py --input-graph=/inceptionv3_fp32_pretrained_model.pb --num-inter-threads=1 --num-intra-threads=28 --batch-size=128 --warmup-steps=10 --steps=50 --num-cores=28 -run_tf_benchmark.py --framework tensorflow --use-case image_recognition --precision fp32 --mode inference --model-name inceptionv3 --batch-size 128 --in-graph /inceptionv3_fp32_pretrained_model.pb --intelai-models . --socket-id 0 --verbose --num-inter-threads 4 --num-intra-threads 16,numactl --cpunodebind=0 --membind=0 python ./fp32/eval_image_classifier_inference.py --input-graph=/inceptionv3_fp32_pretrained_model.pb --num-inter-threads=4 --num-intra-threads=16 --batch-size=128 --warmup-steps=10 --steps=50 --num-cores=28 -run_tf_benchmark.py --framework tensorflow --use-case image_recognition --precision fp32 --mode inference --model-name inceptionv3 --batch-size 100 --accuracy-only --data-location /dataset --in-graph /inceptionv3_fp32_pretrained_model.pb --intelai-models . --verbose,python ./fp32/eval_image_classifier_inference.py --input-graph=/inceptionv3_fp32_pretrained_model.pb --num-inter-threads=2 --num-intra-threads=56 --num-cores=28 --batch-size=100 --warmup-steps=10 --steps=50 --data-location=/dataset --accuracy-only -run_tf_benchmark.py --framework=tensorflow --use-case=image_recognition --precision=int8 --mode=inference --model-name=inceptionv4 --model-source-dir=/workspace/models --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=240 --socket-id 0 --accuracy-only --verbose --in-graph=/in_graph/inceptionv4_int8_pretrained_model.pb --data-location=/dataset,numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/inference/accuracy.py --batch_size=240 --num_inter_threads=2 --input_graph=/in_graph/inceptionv4_int8_pretrained_model.pb --num_intra_threads=28 --data_location=/dataset -run_tf_benchmark.py --framework=tensorflow --use-case=image_recognition --precision=fp32 --mode=inference --model-name=inceptionv4 --model-source-dir=/workspace/models --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=240 --socket-id 0 --accuracy-only --verbose --in-graph=/in_graph/inceptionv4_fp32_pretrained_model.pb --data-location=/dataset,numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/inference/accuracy.py --batch_size=240 --num_inter_threads=2 --input_graph=/in_graph/inceptionv4_fp32_pretrained_model.pb --num_intra_threads=28 --data_location=/dataset -run_tf_benchmark.py --framework=tensorflow --use-case=image_recognition --precision=int8 --mode=inference --model-name=inceptionv4 --model-source-dir=/workspace/models --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=240 --socket-id 0 --benchmark-only --verbose --in-graph=/in_graph/inceptionv4_int8_pretrained_model.pb --data-location=/dataset,numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/inference/benchmark.py --batch_size=240 --num_inter_threads=2 --input_graph=/in_graph/inceptionv4_int8_pretrained_model.pb --num_intra_threads=28 -run_tf_benchmark.py --framework=tensorflow --use-case=image_recognition --precision=int8 --mode=inference --model-name=inceptionv4 --model-source-dir=/workspace/models --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=1 --socket-id 0 --benchmark-only --verbose --in-graph=/in_graph/inceptionv4_int8_pretrained_model.pb --data-location=/dataset,numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/inference/benchmark.py --batch_size=1 --num_inter_threads=2 --input_graph=/in_graph/inceptionv4_int8_pretrained_model.pb --num_intra_threads=28 -run_tf_benchmark.py --framework=tensorflow --use-case=image_recognition --model-name=inception_resnet_v2 --precision=fp32 --mode=inference --model-source-dir=/workspace/models --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=100 --socket-id=-1 --accuracy-only --verbose --in-graph=/in_graph/inception_resnet_v2_fp32_pretrained_model.pb --data-location=/dataset,python /workspace/intelai_models/eval_image_classifier_accuracy.py --input_graph=/in_graph/inception_resnet_v2_fp32_pretrained_model.pb --data_location=/dataset --input_height=299 --input_width=299 --num_inter_threads=2 --num_intra_threads=56 --output_layer=InceptionResnetV2/Logits/Predictions --batch_size=100 -run_tf_benchmark.py --framework=tensorflow --use-case=image_recognition --model-name=inception_resnet_v2 --precision=fp32 --mode=inference --benchmark-dir=/workspace/benchmarks --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=1 --socket-id=0 --output-dir=/workspace/benchmarks/common/tensorflow/logs --benchmark-only --verbose --model-source-dir=/workspace/models --in-graph=/in_graph/inception_resnet_v2_fp32_pretrained_model.pb,numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/eval_image_classifier_benchmark.py --input-graph=/in_graph/inception_resnet_v2_fp32_pretrained_model.pb --inter-op-parallelism-threads=2 --intra-op-parallelism-threads=28 --batch-size=1 -run_tf_benchmark.py --framework=tensorflow --use-case=image_recognition --model-name=inception_resnet_v2 --precision=fp32 --mode=inference --benchmark-dir=/workspace/benchmarks --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=128 --socket-id=0 --output-dir=/workspace/benchmarks/common/tensorflow/logs --benchmark-only --verbose --model-source-dir=/workspace/models --in-graph=/in_graph/inception_resnet_v2_fp32_pretrained_model.pb,numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/eval_image_classifier_benchmark.py --input-graph=/in_graph/inception_resnet_v2_fp32_pretrained_model.pb --inter-op-parallelism-threads=2 --intra-op-parallelism-threads=28 --batch-size=128 -run_tf_benchmark.py --framework=tensorflow --use-case=image_recognition --model-name=inception_resnet_v2 --precision=int8 --mode=inference --model-source-dir=/workspace/models --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=100 --socket-id=-1 --accuracy-only --verbose --in-graph=/in_graph/inception_resnet_v2_int8_pretrained_model.pb --data-location=/dataset,python /workspace/intelai_models/eval_image_classifier_accuracy.py --input_graph=/in_graph/inception_resnet_v2_int8_pretrained_model.pb --data_location=/dataset --input_height=299 --input_width=299 --num_inter_threads=2 --num_intra_threads=56 --output_layer=InceptionResnetV2/Logits/Predictions --batch_size=100 -run_tf_benchmark.py --framework=tensorflow --use-case=image_recognition --model-name=inception_resnet_v2 --precision=int8 --mode=inference --model-source-dir=/workspace/models --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=1 --socket-id=0 --benchmark-only --verbose --in-graph=/in_graph/inception_resnet_v2_int8_pretrained_model.pb --data-location=/dataset,numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/eval_image_classifier_benchmark.py --input-graph=/in_graph/inception_resnet_v2_int8_pretrained_model.pb --inter-op-parallelism-threads=1 --intra-op-parallelism-threads=28 --batch-size=1 -run_tf_benchmark.py --framework=tensorflow --use-case=image_recognition --model-name=inception_resnet_v2 --precision=int8 --mode=inference --model-source-dir=/workspace/models --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=128 --socket-id=0 --benchmark-only --verbose --in-graph=/in_graph/inception_resnet_v2_int8_pretrained_model.pb --data-location=/dataset,numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/eval_image_classifier_benchmark.py --input-graph=/in_graph/inception_resnet_v2_int8_pretrained_model.pb --inter-op-parallelism-threads=1 --intra-op-parallelism-threads=28 --batch-size=128 -run_tf_benchmark.py --framework=tensorflow --use-case=image_recognition --model-name=resnet50 --precision=int8 --mode=inference --benchmark-dir=/workspace/benchmarks --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=100 --socket-id=-1 --output-dir=/workspace/benchmarks/common/tensorflow/logs --accuracy-only --verbose --model-source-dir=/workspace/models --in-graph=/in_graph/resnet50_int8_pretrained_model.pb --data-location=/dataset --calibration-only,python /workspace/intelai_models/int8/generate_calibration_data.py --num_intra_threads=56 --num_inter_threads=2 --batch_size=100 --input_graph=/in_graph/resnet50_int8_pretrained_model.pb --data_location=/dataset -run_tf_benchmark.py --framework tensorflow --use-case image_recognition --precision int8 --mode inference --model-name resnet50 --batch-size 100 --data-location /dataset --in-graph /final_int8_resnet50.pb --intelai-models . --accuracy-only --verbose,python ./inference/eval_image_classifier_inference.py --input-graph=/final_int8_resnet50.pb --num-inter-threads=2 --num-intra-threads=56 --batch-size=100 --warmup-steps=10 --steps=50 --data-location=/dataset --accuracy-only -run_tf_benchmark.py --framework tensorflow --use-case image_recognition --precision int8 --mode inference --model-name resnet50 --batch-size 128 --in-graph /final_int8_resnet50.pb --intelai-models . --benchmark-only --verbose,python ./inference/eval_image_classifier_inference.py --input-graph=/final_int8_resnet50.pb --num-inter-threads=2 --num-intra-threads=56 --batch-size=128 --warmup-steps=10 --steps=50 -run_tf_benchmark.py --framework=tensorflow --use-case=image_recognition --model-name=resnet50 --precision=int8 --mode=inference --model-source-dir=/workspace/models --benchmark-dir=/workspace/benchmarks --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=128 --socket-id=0 --output-dir=/workspace/benchmarks/common/tensorflow/logs --benchmark-only --verbose --in-graph=/in_graph/resnet50_int8_pretrained_model.pb --steps=200 --warmup-steps=20,numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/inference/eval_image_classifier_inference.py --input-graph=/in_graph/resnet50_int8_pretrained_model.pb --num-inter-threads=1 --num-intra-threads=28 --batch-size=128 --warmup-steps=20 --steps=200 -run_tf_benchmark.py --framework tensorflow --use-case image_recognition --precision fp32 --mode inference --model-name resnet50 --batch-size 1 --in-graph /freezed_resnet50.pb --intelai-models . --socket-id 0 --verbose,numactl --cpunodebind=0 --membind=0 python ./inference/eval_image_classifier_inference.py --input-graph=/freezed_resnet50.pb --num-inter-threads=1 --num-intra-threads=28 --batch-size=1 --warmup-steps=10 --steps=50 --num-cores=28 -run_tf_benchmark.py --framework tensorflow --use-case image_recognition --precision fp32 --mode inference --model-name resnet50 --batch-size 128 --in-graph /freezed_resnet50.pb --intelai-models . --socket-id 0 --verbose,numactl --cpunodebind=0 --membind=0 python ./inference/eval_image_classifier_inference.py --input-graph=/freezed_resnet50.pb --num-inter-threads=1 --num-intra-threads=28 --batch-size=128 --warmup-steps=10 --steps=50 --num-cores=28 -run_tf_benchmark.py --framework tensorflow --use-case image_recognition --precision fp32 --mode inference --model-name resnet50 --batch-size 1 --in-graph /freezed_resnet50.pb --intelai-models . --socket-id 0 --verbose --num-inter-threads 4 --num-intra-threads 16,numactl --cpunodebind=0 --membind=0 python ./inference/eval_image_classifier_inference.py --input-graph=/freezed_resnet50.pb --num-inter-threads=4 --num-intra-threads=16 --batch-size=1 --warmup-steps=10 --steps=50 --num-cores=28 -run_tf_benchmark.py --framework=tensorflow --use-case=image_recognition --model-name=resnet50 --precision=fp32 --mode=inference --model-source-dir=/workspace/models --intelai-models=/workspace/intelai_models --batch-size 100 --socket-id 0 --accuracy-only --verbose --in-graph=/in_graph/freezed_resnet50.pb --accuracy-only --data-location=/dataset,numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/inference/eval_image_classifier_inference.py --input-graph=/in_graph/freezed_resnet50.pb --num-inter-threads=1 --num-intra-threads=28 --batch-size=100 --data-location=/dataset --accuracy-only --num-cores=28 --warmup-steps=10 --steps=50 -run_tf_benchmark.py --framework=tensorflow --use-case=image_recognition --model-name=resnet50 --precision=fp32 --mode=inference --benchmark-dir=/workspace/benchmarks --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=100 --socket-id=0 --output-dir=/workspace/benchmarks/common/tensorflow/logs --benchmark-only --output-results --verbose --model-source-dir=/workspace/models --in-graph=/in_graph/resnet50_fp32_pretrained_model.pb --data-location=/dataset,numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/inference/eval_image_classifier_inference.py --input-graph=/in_graph/resnet50_fp32_pretrained_model.pb --num-inter-threads=1 --num-intra-threads=28 --num-cores=28 --batch-size=100 --warmup-steps=10 --steps=50 --data-location=/dataset --results-file-path /workspace/benchmarks/common/tensorflow/logs/resnet50_fp32_inference_results*.txt +run_tf_benchmark.py --framework tensorflow --use-case recommendation --precision fp32 --mode inference --model-name wide_deep --batch-size 1024 --data-location /dataset --checkpoint /checkpoints --intelai-models . --verbose,OMP_NUM_THREADS=1 numactl --cpunodebind=0 --membind=0 python inference/fp32/wide_deep_inference.py --data_dir=/dataset --model_dir=/checkpoints --batch_size=1024 +run_tf_benchmark.py --framework=tensorflow --use-case=image_recognition --model-name=densenet169 --precision=fp32 --mode=inference --benchmark-dir=/workspace/benchmarks --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=100 --socket-id=0 --output-dir=/workspace/benchmarks/common/tensorflow/logs --benchmark-only --verbose --model-source-dir=/workspace/models --in-graph=/in_graph/densenet169_fp32_pretrained_model.pb, LD_PRELOAD=/usr/lib/libtcmalloc.so.4.2.6 numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/inference/fp32/benchmark.py --num_intra_threads=28 --num_inter_threads=1 --batch_size=100 --input_graph=/in_graph/densenet169_fp32_pretrained_model.pb +run_tf_benchmark.py --framework=tensorflow --use-case=image_recognition --model-name=densenet169 --precision=fp32 --mode=inference --benchmark-dir=/workspace/benchmarks --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=1 --socket-id=0 --output-dir=/workspace/benchmarks/common/tensorflow/logs --benchmark-only --verbose --model-source-dir=/workspace/models --in-graph=/in_graph/densenet169_fp32_pretrained_model.pb, LD_PRELOAD=/usr/lib/libtcmalloc.so.4.2.6 numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/inference/fp32/benchmark.py --num_intra_threads=28 --num_inter_threads=1 --batch_size=1 --input_graph=/in_graph/densenet169_fp32_pretrained_model.pb +run_tf_benchmark.py --framework=tensorflow --use-case=image_recognition --model-name=densenet169 --precision=fp32 --mode=inference --benchmark-dir=/workspace/benchmarks --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=100 --socket-id=0 --output-dir=/workspace/benchmarks/common/tensorflow/logs --accuracy-only --verbose --model-source-dir=/workspace/models --in-graph=/in_graph/densenet169_fp32_pretrained_model.pb --data-location=/dataset, LD_PRELOAD=/usr/lib/libtcmalloc.so.4.2.6 numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/inference/fp32/accuracy.py --num_intra_threads=28 --num_inter_threads=1 --batch_size=100 --input_graph=/in_graph/densenet169_fp32_pretrained_model.pb --data_location=/dataset +run_tf_benchmark.py --framework tensorflow --use-case image_recognition --precision int8 --mode inference --model-name inceptionv3 --batch-size 100 --in-graph /final_int8_inceptionv3.pb --intelai-models . --accuracy-only --verbose,LD_PRELOAD=/usr/lib/libtcmalloc.so.4.2.6 python ./int8/accuracy.py --num_intra_threads=56 --num_inter_threads=2 --batch_size=100 --input_graph=/final_int8_inceptionv3.pb +run_tf_benchmark.py --framework tensorflow --use-case image_recognition --precision int8 --mode inference --model-name inceptionv3 --batch-size 1 --in-graph /final_int8_inceptionv3.pb --intelai-models . --benchmark-only --socket-id 0 --verbose,LD_PRELOAD=/usr/lib/libtcmalloc.so.4.2.6 numactl --cpunodebind=0 --membind=0 python ./int8/benchmark.py --warmup_steps=10 --num_intra_threads=28 --num_inter_threads=1 --batch_size=1 --input_graph=/final_int8_inceptionv3.pb --steps=50 --num_cores=28 +run_tf_benchmark.py --framework tensorflow --use-case image_recognition --precision int8 --mode inference --model-name inceptionv3 --batch-size 128 --in-graph /final_int8_inceptionv3.pb --intelai-models . --benchmark-only --socket-id 0 --verbose,LD_PRELOAD=/usr/lib/libtcmalloc.so.4.2.6 numactl --cpunodebind=0 --membind=0 python ./int8/benchmark.py --warmup_steps=10 --num_intra_threads=28 --num_inter_threads=1 --batch_size=128 --input_graph=/final_int8_inceptionv3.pb --steps=50 --num_cores=28 +run_tf_benchmark.py --framework tensorflow --use-case image_recognition --precision int8 --mode inference --model-name inceptionv3 --batch-size 128 --in-graph /final_int8_inceptionv3.pb --intelai-models . --benchmark-only --socket-id 0 --disable-tcmalloc,numactl --cpunodebind=0 --membind=0 python ./int8/benchmark.py --warmup_steps=10 --num_intra_threads=28 --num_inter_threads=1 --batch_size=128 --input_graph=/final_int8_inceptionv3.pb --steps=50 --num_cores=28 +run_tf_benchmark.py --framework=tensorflow --use-case=image_recognition --model-name=inceptionv3 --precision=int8 --mode=inference --model-source-dir=/workspace/models --benchmark-dir=/workspace/benchmarks --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=128 --socket-id=0 --output-dir=/workspace/benchmarks/common/tensorflow/logs --benchmark-only --verbose --in-graph=/in_graph/inception_frozen_max_min.pb --steps=200 --warmup-steps=20,LD_PRELOAD=/usr/lib/libtcmalloc.so.4.2.6 numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/int8/benchmark.py --warmup_steps=20 --num_intra_threads=28 --num_inter_threads=1 --batch_size=128 --input_graph=/in_graph/inception_frozen_max_min.pb --steps=200 --num_cores=28 +run_tf_benchmark.py --framework=tensorflow --use-case=image_recognition --model-name=inceptionv3 --precision=int8 --mode=inference --model-source-dir=/workspace/models --benchmark-dir=/workspace/benchmarks --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=1 --socket-id=0 --output-dir=/workspace/benchmarks/common/tensorflow/logs --benchmark-only --verbose --in-graph=/in_graph/inception_frozen_max_min.pb --steps=200 --warmup-steps=20,LD_PRELOAD=/usr/lib/libtcmalloc.so.4.2.6 numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/int8/benchmark.py --warmup_steps=20 --num_intra_threads=28 --num_inter_threads=1 --batch_size=1 --input_graph=/in_graph/inception_frozen_max_min.pb --steps=200 --num_cores=28 +run_tf_benchmark.py --framework tensorflow --use-case image_recognition --precision fp32 --mode inference --model-name inceptionv3 --batch-size 1 --in-graph /inceptionv3_fp32_pretrained_model.pb --intelai-models . --socket-id 0 --verbose,LD_PRELOAD=/usr/lib/libtcmalloc.so.4.2.6 numactl --cpunodebind=0 --membind=0 python ./fp32/eval_image_classifier_inference.py --input-graph=/inceptionv3_fp32_pretrained_model.pb --num-inter-threads=1 --num-intra-threads=28 --batch-size=1 --warmup-steps=10 --steps=50 --num-cores=28 +run_tf_benchmark.py --framework tensorflow --use-case image_recognition --precision fp32 --mode inference --model-name inceptionv3 --batch-size 128 --in-graph /inceptionv3_fp32_pretrained_model.pb --intelai-models . --socket-id 0 --verbose,LD_PRELOAD=/usr/lib/libtcmalloc.so.4.2.6 numactl --cpunodebind=0 --membind=0 python ./fp32/eval_image_classifier_inference.py --input-graph=/inceptionv3_fp32_pretrained_model.pb --num-inter-threads=1 --num-intra-threads=28 --batch-size=128 --warmup-steps=10 --steps=50 --num-cores=28 +run_tf_benchmark.py --framework tensorflow --use-case image_recognition --precision fp32 --mode inference --model-name inceptionv3 --batch-size 128 --in-graph /inceptionv3_fp32_pretrained_model.pb --intelai-models . --socket-id 0 --verbose --num-inter-threads 4 --num-intra-threads 16,LD_PRELOAD=/usr/lib/libtcmalloc.so.4.2.6 numactl --cpunodebind=0 --membind=0 python ./fp32/eval_image_classifier_inference.py --input-graph=/inceptionv3_fp32_pretrained_model.pb --num-inter-threads=4 --num-intra-threads=16 --batch-size=128 --warmup-steps=10 --steps=50 --num-cores=28 +run_tf_benchmark.py --framework tensorflow --use-case image_recognition --precision fp32 --mode inference --model-name inceptionv3 --batch-size 128 --in-graph /inceptionv3_fp32_pretrained_model.pb --intelai-models . --socket-id 0 --disable-tcmalloc --verbose,numactl --cpunodebind=0 --membind=0 python ./fp32/eval_image_classifier_inference.py --input-graph=/inceptionv3_fp32_pretrained_model.pb --num-inter-threads=1 --num-intra-threads=28 --batch-size=128 --warmup-steps=10 --steps=50 --num-cores=28 +run_tf_benchmark.py --framework tensorflow --use-case image_recognition --precision fp32 --mode inference --model-name inceptionv3 --batch-size 100 --accuracy-only --data-location /dataset --in-graph /inceptionv3_fp32_pretrained_model.pb --intelai-models . --verbose,LD_PRELOAD=/usr/lib/libtcmalloc.so.4.2.6 python ./fp32/eval_image_classifier_inference.py --input-graph=/inceptionv3_fp32_pretrained_model.pb --num-inter-threads=2 --num-intra-threads=56 --num-cores=28 --batch-size=100 --warmup-steps=10 --steps=50 --data-location=/dataset --accuracy-only +run_tf_benchmark.py --framework=tensorflow --use-case=image_recognition --precision=int8 --mode=inference --model-name=inceptionv4 --model-source-dir=/workspace/models --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=240 --socket-id 0 --accuracy-only --verbose --in-graph=/in_graph/inceptionv4_int8_pretrained_model.pb --data-location=/dataset,LD_PRELOAD=/usr/lib/libtcmalloc.so.4.2.6 numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/inference/accuracy.py --batch_size=240 --num_inter_threads=2 --input_graph=/in_graph/inceptionv4_int8_pretrained_model.pb --num_intra_threads=28 --data_location=/dataset +run_tf_benchmark.py --framework=tensorflow --use-case=image_recognition --precision=fp32 --mode=inference --model-name=inceptionv4 --model-source-dir=/workspace/models --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=240 --socket-id 0 --accuracy-only --verbose --in-graph=/in_graph/inceptionv4_fp32_pretrained_model.pb --data-location=/dataset,LD_PRELOAD=/usr/lib/libtcmalloc.so.4.2.6 numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/inference/accuracy.py --batch_size=240 --num_inter_threads=2 --input_graph=/in_graph/inceptionv4_fp32_pretrained_model.pb --num_intra_threads=28 --data_location=/dataset +run_tf_benchmark.py --framework=tensorflow --use-case=image_recognition --precision=int8 --mode=inference --model-name=inceptionv4 --model-source-dir=/workspace/models --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=240 --socket-id 0 --benchmark-only --verbose --in-graph=/in_graph/inceptionv4_int8_pretrained_model.pb --data-location=/dataset,LD_PRELOAD=/usr/lib/libtcmalloc.so.4.2.6 numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/inference/benchmark.py --batch_size=240 --num_inter_threads=2 --input_graph=/in_graph/inceptionv4_int8_pretrained_model.pb --num_intra_threads=28 +run_tf_benchmark.py --framework=tensorflow --use-case=image_recognition --precision=int8 --mode=inference --model-name=inceptionv4 --model-source-dir=/workspace/models --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=1 --socket-id 0 --benchmark-only --verbose --in-graph=/in_graph/inceptionv4_int8_pretrained_model.pb --data-location=/dataset,LD_PRELOAD=/usr/lib/libtcmalloc.so.4.2.6 numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/inference/benchmark.py --batch_size=1 --num_inter_threads=2 --input_graph=/in_graph/inceptionv4_int8_pretrained_model.pb --num_intra_threads=28 +run_tf_benchmark.py --framework=tensorflow --use-case=image_recognition --model-name=inception_resnet_v2 --precision=fp32 --mode=inference --model-source-dir=/workspace/models --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=100 --socket-id=-1 --accuracy-only --verbose --in-graph=/in_graph/inception_resnet_v2_fp32_pretrained_model.pb --data-location=/dataset,LD_PRELOAD=/usr/lib/libtcmalloc.so.4.2.6 python /workspace/intelai_models/eval_image_classifier_accuracy.py --input_graph=/in_graph/inception_resnet_v2_fp32_pretrained_model.pb --data_location=/dataset --input_height=299 --input_width=299 --num_inter_threads=2 --num_intra_threads=56 --output_layer=InceptionResnetV2/Logits/Predictions --batch_size=100 +run_tf_benchmark.py --framework=tensorflow --use-case=image_recognition --model-name=inception_resnet_v2 --precision=fp32 --mode=inference --benchmark-dir=/workspace/benchmarks --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=1 --socket-id=0 --output-dir=/workspace/benchmarks/common/tensorflow/logs --benchmark-only --verbose --model-source-dir=/workspace/models --in-graph=/in_graph/inception_resnet_v2_fp32_pretrained_model.pb,LD_PRELOAD=/usr/lib/libtcmalloc.so.4.2.6 numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/eval_image_classifier_benchmark.py --input-graph=/in_graph/inception_resnet_v2_fp32_pretrained_model.pb --inter-op-parallelism-threads=2 --intra-op-parallelism-threads=28 --batch-size=1 +run_tf_benchmark.py --framework=tensorflow --use-case=image_recognition --model-name=inception_resnet_v2 --precision=fp32 --mode=inference --benchmark-dir=/workspace/benchmarks --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=128 --socket-id=0 --output-dir=/workspace/benchmarks/common/tensorflow/logs --benchmark-only --verbose --model-source-dir=/workspace/models --in-graph=/in_graph/inception_resnet_v2_fp32_pretrained_model.pb,LD_PRELOAD=/usr/lib/libtcmalloc.so.4.2.6 numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/eval_image_classifier_benchmark.py --input-graph=/in_graph/inception_resnet_v2_fp32_pretrained_model.pb --inter-op-parallelism-threads=2 --intra-op-parallelism-threads=28 --batch-size=128 +run_tf_benchmark.py --framework=tensorflow --use-case=image_recognition --model-name=inception_resnet_v2 --precision=int8 --mode=inference --model-source-dir=/workspace/models --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=100 --socket-id=-1 --accuracy-only --verbose --in-graph=/in_graph/inception_resnet_v2_int8_pretrained_model.pb --data-location=/dataset,LD_PRELOAD=/usr/lib/libtcmalloc.so.4.2.6 python /workspace/intelai_models/eval_image_classifier_accuracy.py --input_graph=/in_graph/inception_resnet_v2_int8_pretrained_model.pb --data_location=/dataset --input_height=299 --input_width=299 --num_inter_threads=2 --num_intra_threads=56 --output_layer=InceptionResnetV2/Logits/Predictions --batch_size=100 +run_tf_benchmark.py --framework=tensorflow --use-case=image_recognition --model-name=inception_resnet_v2 --precision=int8 --mode=inference --model-source-dir=/workspace/models --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=1 --socket-id=0 --benchmark-only --verbose --in-graph=/in_graph/inception_resnet_v2_int8_pretrained_model.pb --data-location=/dataset,LD_PRELOAD=/usr/lib/libtcmalloc.so.4.2.6 numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/eval_image_classifier_benchmark.py --input-graph=/in_graph/inception_resnet_v2_int8_pretrained_model.pb --inter-op-parallelism-threads=1 --intra-op-parallelism-threads=28 --batch-size=1 +run_tf_benchmark.py --framework=tensorflow --use-case=image_recognition --model-name=inception_resnet_v2 --precision=int8 --mode=inference --model-source-dir=/workspace/models --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=128 --socket-id=0 --benchmark-only --verbose --in-graph=/in_graph/inception_resnet_v2_int8_pretrained_model.pb --data-location=/dataset,LD_PRELOAD=/usr/lib/libtcmalloc.so.4.2.6 numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/eval_image_classifier_benchmark.py --input-graph=/in_graph/inception_resnet_v2_int8_pretrained_model.pb --inter-op-parallelism-threads=1 --intra-op-parallelism-threads=28 --batch-size=128 +run_tf_benchmark.py --framework=tensorflow --use-case=image_recognition --model-name=resnet50 --precision=int8 --mode=inference --benchmark-dir=/workspace/benchmarks --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=100 --socket-id=-1 --output-dir=/workspace/benchmarks/common/tensorflow/logs --accuracy-only --verbose --model-source-dir=/workspace/models --in-graph=/in_graph/resnet50_int8_pretrained_model.pb --data-location=/dataset --calibration-only,LD_PRELOAD=/usr/lib/libtcmalloc.so.4.2.6 python /workspace/intelai_models/int8/generate_calibration_data.py --num_intra_threads=56 --num_inter_threads=2 --batch_size=100 --input_graph=/in_graph/resnet50_int8_pretrained_model.pb --data_location=/dataset +run_tf_benchmark.py --framework tensorflow --use-case image_recognition --precision int8 --mode inference --model-name resnet50 --batch-size 100 --data-location /dataset --in-graph /final_int8_resnet50.pb --intelai-models . --accuracy-only --verbose,LD_PRELOAD=/usr/lib/libtcmalloc.so.4.2.6 python ./inference/eval_image_classifier_inference.py --input-graph=/final_int8_resnet50.pb --num-inter-threads=2 --num-intra-threads=56 --batch-size=100 --warmup-steps=10 --steps=50 --data-location=/dataset --accuracy-only +run_tf_benchmark.py --framework tensorflow --use-case image_recognition --precision int8 --mode inference --model-name resnet50 --batch-size 128 --in-graph /final_int8_resnet50.pb --intelai-models . --benchmark-only --verbose,LD_PRELOAD=/usr/lib/libtcmalloc.so.4.2.6 python ./inference/eval_image_classifier_inference.py --input-graph=/final_int8_resnet50.pb --num-inter-threads=2 --num-intra-threads=56 --batch-size=128 --warmup-steps=10 --steps=50 +run_tf_benchmark.py --framework=tensorflow --use-case=image_recognition --model-name=resnet50 --precision=int8 --mode=inference --model-source-dir=/workspace/models --benchmark-dir=/workspace/benchmarks --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=128 --socket-id=0 --output-dir=/workspace/benchmarks/common/tensorflow/logs --benchmark-only --verbose --in-graph=/in_graph/resnet50_int8_pretrained_model.pb --steps=200 --warmup-steps=20,LD_PRELOAD=/usr/lib/libtcmalloc.so.4.2.6 numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/inference/eval_image_classifier_inference.py --input-graph=/in_graph/resnet50_int8_pretrained_model.pb --num-inter-threads=1 --num-intra-threads=28 --batch-size=128 --warmup-steps=20 --steps=200 +run_tf_benchmark.py --framework tensorflow --use-case image_recognition --precision fp32 --mode inference --model-name resnet50 --batch-size 1 --in-graph /freezed_resnet50.pb --intelai-models . --socket-id 0 --verbose,LD_PRELOAD=/usr/lib/libtcmalloc.so.4.2.6 numactl --cpunodebind=0 --membind=0 python ./inference/eval_image_classifier_inference.py --input-graph=/freezed_resnet50.pb --num-inter-threads=1 --num-intra-threads=28 --batch-size=1 --warmup-steps=10 --steps=50 --num-cores=28 +run_tf_benchmark.py --framework tensorflow --use-case image_recognition --precision fp32 --mode inference --model-name resnet50 --batch-size 128 --in-graph /freezed_resnet50.pb --intelai-models . --socket-id 0 --verbose,LD_PRELOAD=/usr/lib/libtcmalloc.so.4.2.6 numactl --cpunodebind=0 --membind=0 python ./inference/eval_image_classifier_inference.py --input-graph=/freezed_resnet50.pb --num-inter-threads=1 --num-intra-threads=28 --batch-size=128 --warmup-steps=10 --steps=50 --num-cores=28 +run_tf_benchmark.py --framework tensorflow --use-case image_recognition --precision fp32 --mode inference --model-name resnet50 --batch-size 1 --in-graph /freezed_resnet50.pb --intelai-models . --socket-id 0 --verbose --num-inter-threads 4 --num-intra-threads 16,LD_PRELOAD=/usr/lib/libtcmalloc.so.4.2.6 numactl --cpunodebind=0 --membind=0 python ./inference/eval_image_classifier_inference.py --input-graph=/freezed_resnet50.pb --num-inter-threads=4 --num-intra-threads=16 --batch-size=1 --warmup-steps=10 --steps=50 --num-cores=28 +run_tf_benchmark.py --framework=tensorflow --use-case=image_recognition --model-name=resnet50 --precision=fp32 --mode=inference --model-source-dir=/workspace/models --intelai-models=/workspace/intelai_models --batch-size 100 --socket-id 0 --accuracy-only --verbose --in-graph=/in_graph/freezed_resnet50.pb --accuracy-only --data-location=/dataset,LD_PRELOAD=/usr/lib/libtcmalloc.so.4.2.6 numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/inference/eval_image_classifier_inference.py --input-graph=/in_graph/freezed_resnet50.pb --num-inter-threads=1 --num-intra-threads=28 --batch-size=100 --data-location=/dataset --accuracy-only --num-cores=28 --warmup-steps=10 --steps=50 +run_tf_benchmark.py --framework=tensorflow --use-case=image_recognition --model-name=resnet50 --precision=fp32 --mode=inference --benchmark-dir=/workspace/benchmarks --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=100 --socket-id=0 --output-dir=/workspace/benchmarks/common/tensorflow/logs --benchmark-only --output-results --verbose --model-source-dir=/workspace/models --in-graph=/in_graph/resnet50_fp32_pretrained_model.pb --data-location=/dataset,LD_PRELOAD=/usr/lib/libtcmalloc.so.4.2.6 numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/inference/eval_image_classifier_inference.py --input-graph=/in_graph/resnet50_fp32_pretrained_model.pb --num-inter-threads=1 --num-intra-threads=28 --num-cores=28 --batch-size=100 --warmup-steps=10 --steps=50 --data-location=/dataset --results-file-path /workspace/benchmarks/common/tensorflow/logs/resnet50_fp32_inference_results*.txt run_tf_benchmark.py --framework tensorflow --use-case image_recognition --precision fp32 --mode inference --model-name squeezenet --batch-size 64 --checkpoint /checkpoints --intelai-models . --socket-id 0 --verbose,taskset -c 0-27 python ./fp32/train_squeezenet.py --data_location None --batch_size 64 --num_inter_threads 1 --num_intra_threads 28 --model_dir /checkpoints --inference-only --verbose run_tf_benchmark.py --framework tensorflow --use-case image_recognition --precision fp32 --mode inference --model-name squeezenet --batch-size 1 --checkpoint /checkpoints --intelai-models . --socket-id 0 --verbose,taskset -c 0-27 python ./fp32/train_squeezenet.py --data_location None --batch_size 1 --num_inter_threads 1 --num_intra_threads 28 --model_dir /checkpoints --inference-only --verbose -run_tf_benchmark.py --framework tensorflow --use-case object_detection --precision fp32 --mode inference --model-name faster_rcnn --checkpoint /checkpoints --intelai-models . --model-source-dir . --socket-id 0 --verbose --config_file=pipeline.config,numactl --cpunodebind=0 --membind=0 python ./inference/fp32/eval.py --num_inter_threads 1 --num_intra_threads 28 --pipeline_config_path /checkpoints/pipeline.config --checkpoint_dir /checkpoints --eval_dir ./research/object_detection/log/eval -run_tf_benchmark.py --framework tensorflow --use-case object_detection --precision fp32 --mode inference --model-name faster_rcnn --checkpoint /checkpoints --intelai-models . --model-source-dir . --socket-id 0 --verbose --config_file=pipeline.config --num-inter-threads 4 --num-intra-threads 16,numactl --cpunodebind=0 --membind=0 python ./inference/fp32/eval.py --num_inter_threads 4 --num_intra_threads 16 --pipeline_config_path /checkpoints/pipeline.config --checkpoint_dir /checkpoints --eval_dir ./research/object_detection/log/eval +run_tf_benchmark.py --framework tensorflow --use-case object_detection --precision fp32 --mode inference --model-name faster_rcnn --checkpoint /checkpoints --intelai-models . --model-source-dir . --socket-id 0 --verbose --config_file=pipeline.config,LD_PRELOAD=/usr/lib/libtcmalloc.so.4.2.6 numactl --cpunodebind=0 --membind=0 python ./inference/fp32/eval.py --num_inter_threads 1 --num_intra_threads 28 --pipeline_config_path /checkpoints/pipeline.config --checkpoint_dir /checkpoints --eval_dir ./research/object_detection/log/eval +run_tf_benchmark.py --framework tensorflow --use-case object_detection --precision fp32 --mode inference --model-name faster_rcnn --checkpoint /checkpoints --intelai-models . --model-source-dir . --socket-id 0 --verbose --config_file=pipeline.config --num-inter-threads 4 --num-intra-threads 16,LD_PRELOAD=/usr/lib/libtcmalloc.so.4.2.6 numactl --cpunodebind=0 --membind=0 python ./inference/fp32/eval.py --num_inter_threads 4 --num_intra_threads 16 --pipeline_config_path /checkpoints/pipeline.config --checkpoint_dir /checkpoints --eval_dir ./research/object_detection/log/eval run_tf_benchmark.py --framework=tensorflow --use-case=object_detection --model-name=faster_rcnn --precision=fp32 --mode=inference --model-source-dir=/workspace/models --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=1 --socket-id=-1 --accuracy-only --verbose --checkpoint=/checkpoints --data-location=/dataset --in-graph=/in_graph/frozen_inference_graph.pb,sh /workspace/intelai_models/inference/fp32/coco_accuracy.sh /in_graph/frozen_inference_graph.pb /dataset/coco_val.record /workspace/models run_tf_benchmark.py --framework=tensorflow --use-case=object_detection --model-name=ssd-mobilenet --precision=int8 --mode=inference --benchmark-dir=/workspace/benchmarks --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=1 --socket-id=0 --output-dir=/workspace/benchmarks/common/tensorflow/logs --accuracy-only --verbose --model-source-dir=/workspace/models --in-graph=/in_graph/ssdmobilenet_int8_pretrained_model.pb --data-location=/dataset, sh /workspace/intelai_models/inference/int8/coco_int8.sh /in_graph/ssdmobilenet_int8_pretrained_model.pb /dataset -run_tf_benchmark.py --framework=tensorflow --use-case=object_detection --model-name=ssd-mobilenet --precision=int8 --mode=inference --model-source-dir=/workspace/models --intelai-models=/workspace/intelai_models --batch-size=1 --socket-id 0 --data-location=/dataset --verbose --in-graph=/in_graph/ssdmobilenet_int8_pretrained_model.pb --benchmark-only --in-graph=/in_graph/ssdmobilenet_int8_pretrained_model.pb,numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/inference/int8/run_frozen_graph_ssdmob.py -g /in_graph/ssdmobilenet_int8_pretrained_model.pb -n 5000 -d /dataset -x --num-inter-threads 2 --num-intra-threads 28 -run_tf_benchmark.py --framework=tensorflow --use-case=object_detection --model-name=ssd-mobilenet --precision=fp32 --mode=inference --model-source-dir=/workspace/models --benchmark-dir=/workspace/benchmarks --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=-1 --socket-id=0 --benchmark-only --verbose --in-graph=/in_graph/frozen_inference_graph.pb --data-location=/dataset,numactl --cpunodebind=0 --membind=0 python /workspace/benchmarks/object_detection/tensorflow/ssd-mobilenet/inference/fp32/infer_detections.py --input_tfrecord_paths=/dataset --output_tfrecord_path=/SSD-mobilenet-out.tfrecord --inference_graph=/in_graph/frozen_inference_graph.pb --discard_image_pixels=True --num_inter_threads=2 --num_intra_threads=28 +run_tf_benchmark.py --framework=tensorflow --use-case=object_detection --model-name=ssd-mobilenet --precision=int8 --mode=inference --model-source-dir=/workspace/models --intelai-models=/workspace/intelai_models --batch-size=1 --socket-id 0 --data-location=/dataset --verbose --in-graph=/in_graph/ssdmobilenet_int8_pretrained_model.pb --benchmark-only --in-graph=/in_graph/ssdmobilenet_int8_pretrained_model.pb,LD_PRELOAD=/usr/lib/libtcmalloc.so.4.2.6 numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/inference/int8/run_frozen_graph_ssdmob.py -g /in_graph/ssdmobilenet_int8_pretrained_model.pb -n 5000 -d /dataset -x --num-inter-threads 2 --num-intra-threads 28 +run_tf_benchmark.py --framework=tensorflow --use-case=object_detection --model-name=ssd-mobilenet --precision=fp32 --mode=inference --model-source-dir=/workspace/models --benchmark-dir=/workspace/benchmarks --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=-1 --socket-id=0 --benchmark-only --verbose --in-graph=/in_graph/frozen_inference_graph.pb --data-location=/dataset,LD_PRELOAD=/usr/lib/libtcmalloc.so.4.2.6 numactl --cpunodebind=0 --membind=0 python /workspace/benchmarks/object_detection/tensorflow/ssd-mobilenet/inference/fp32/infer_detections.py --input_tfrecord_paths=/dataset --output_tfrecord_path=/SSD-mobilenet-out.tfrecord --inference_graph=/in_graph/frozen_inference_graph.pb --discard_image_pixels=True --num_inter_threads=2 --num_intra_threads=28 run_tf_benchmark.py --framework=tensorflow --use-case=object_detection --model-name=ssd-mobilenet --precision=fp32 --mode=inference --model-source-dir=/workspace/models --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=-1 --socket-id=0 --accuracy-only --verbose --in-graph=/in_graph/frozen_inference_graph.pb --benchmark-dir=/workspace/benchmarks --data-location=/dataset,sh /workspace/benchmarks/object_detection/tensorflow/ssd-mobilenet/inference/ssdmobilenet_accuracy.sh /in_graph/frozen_inference_graph.pb /dataset -run_tf_benchmark.py --framework=tensorflow --use-case=object_detection --model-name=ssd-resnet34 --precision=fp32 --mode=inference --benchmark-dir=/workspace/benchmarks --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=1 --socket-id=0 --output-dir=/workspace/benchmarks/common/tensorflow/logs --accuracy-only --verbose --model-source-dir=/workspace/models --in-graph=/in_graph/ssd_resnet34_bs1.pb --data-location=/dataset,numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/inference/fp32/infer_detections.py --input-graph /in_graph/ssd_resnet34_bs1.pb --batch-size 1 --inter-op-parallelism-threads 1 --intra-op-parallelism-threads 28 --accuracy-only --data-location /dataset -run_tf_benchmark.py --framework=tensorflow --use-case=object_detection --model-name=ssd-resnet34 --precision=fp32 --mode=inference --benchmark-dir=/workspace/benchmarks --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=1 --socket-id=0 --output-dir=/workspace/benchmarks/common/tensorflow/logs --benchmark-only --verbose --model-source-dir=/workspace/models --in-graph=/in_graph/ssd_resnet34_bs1.pb --data-location=/dataset,numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/inference/fp32/infer_detections.py --input-graph /in_graph/ssd_resnet34_bs1.pb --batch-size 1 --inter-op-parallelism-threads 1 --intra-op-parallelism-threads 28 -run_tf_benchmark.py --framework tensorflow --use-case object_detection --precision fp32 --mode inference --model-name rfcn --checkpoint /checkpoints --intelai-models . --model-source-dir . --socket-id 0 --verbose --config_file=rfcn_pipeline.config,numactl --cpunodebind=0 --membind=0 python ./inference/fp32/eval.py --inter_op 1 --intra_op 28 --omp 28 --pipeline_config_path /checkpoints/rfcn_pipeline.config --checkpoint_dir /checkpoints --eval_dir ./research/object_detection/models/rfcn/eval --logtostderr --blocktime=0 --run_once=True +run_tf_benchmark.py --framework=tensorflow --use-case=object_detection --model-name=ssd-resnet34 --precision=fp32 --mode=inference --benchmark-dir=/workspace/benchmarks --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=1 --socket-id=0 --output-dir=/workspace/benchmarks/common/tensorflow/logs --accuracy-only --verbose --model-source-dir=/workspace/models --in-graph=/in_graph/ssd_resnet34_bs1.pb --data-location=/dataset,LD_PRELOAD=/usr/lib/libtcmalloc.so.4.2.6 numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/inference/fp32/infer_detections.py --input-graph /in_graph/ssd_resnet34_bs1.pb --batch-size 1 --inter-op-parallelism-threads 1 --intra-op-parallelism-threads 28 --accuracy-only --data-location /dataset +run_tf_benchmark.py --framework=tensorflow --use-case=object_detection --model-name=ssd-resnet34 --precision=fp32 --mode=inference --benchmark-dir=/workspace/benchmarks --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=1 --socket-id=0 --output-dir=/workspace/benchmarks/common/tensorflow/logs --benchmark-only --verbose --model-source-dir=/workspace/models --in-graph=/in_graph/ssd_resnet34_bs1.pb --data-location=/dataset,LD_PRELOAD=/usr/lib/libtcmalloc.so.4.2.6 numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/inference/fp32/infer_detections.py --input-graph /in_graph/ssd_resnet34_bs1.pb --batch-size 1 --inter-op-parallelism-threads 1 --intra-op-parallelism-threads 28 +run_tf_benchmark.py --framework tensorflow --use-case object_detection --precision fp32 --mode inference --model-name rfcn --checkpoint /checkpoints --intelai-models . --model-source-dir . --socket-id 0 --verbose --config_file=rfcn_pipeline.config,LD_PRELOAD=/usr/lib/libtcmalloc.so.4.2.6 numactl --cpunodebind=0 --membind=0 python ./inference/fp32/eval.py --inter_op 1 --intra_op 28 --omp 28 --pipeline_config_path /checkpoints/rfcn_pipeline.config --checkpoint_dir /checkpoints --eval_dir ./research/object_detection/models/rfcn/eval --logtostderr --blocktime=0 --run_once=True run_tf_benchmark.py --framework=tensorflow --use-case=object_detection --model-name=rfcn --precision=fp32 --mode=inference --model-source-dir=/workspace/models --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=-1 --verbose --in-graph=/in_graph/frozen_inference_graph.pb --data-location=/dataset --accuracy-only --split=accuracy_message,FROZEN_GRAPH=/in_graph/frozen_inference_graph.pb TF_RECORD_FILE=/dataset SPLIT=accuracy_message TF_MODELS_ROOT=/workspace/models /workspace/intelai_models/inference/fp32/coco_mAP.sh -run_tf_benchmark.py --framework=tensorflow --use-case=object_detection --model-name=rfcn --precision=int8 --mode=inference --model-source-dir=/workspace/models --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=-1 --verbose --in-graph=/in_graph/rfcn_resnet101_int8_coco_pretrained_model.pb --data-location=/dataset --benchmark-only --number_of_steps=500,python /workspace/intelai_models/inference/int8/run_rfcn_inference.py -m /workspace/models -g /in_graph/rfcn_resnet101_int8_coco_pretrained_model.pb -x 500 -d /dataset --num-inter-threads 2 --num-intra-threads 56 -run_tf_benchmark.py --framework=tensorflow --use-case=object_detection --model-name=rfcn --precision=int8 --mode=inference --model-source-dir=/workspace/models --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=-1 --verbose --in-graph=/in_graph/rfcn_resnet101_int8_coco_pretrained_model.pb --data-location=/dataset --accuracy-only --split=accuracy_message,FROZEN_GRAPH=/in_graph/rfcn_resnet101_int8_coco_pretrained_model.pb TF_RECORD_FILE=/dataset SPLIT=accuracy_message TF_MODELS_ROOT=/workspace/models /workspace/intelai_models/inference/int8/coco_mAP.sh +run_tf_benchmark.py --framework=tensorflow --use-case=object_detection --model-name=rfcn --precision=int8 --mode=inference --model-source-dir=/workspace/models --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=-1 --verbose --in-graph=/in_graph/rfcn_resnet101_int8_coco_pretrained_model.pb --data-location=/dataset --benchmark-only --number_of_steps=500,LD_PRELOAD=/usr/lib/libtcmalloc.so.4.2.6 python /workspace/intelai_models/inference/int8/run_rfcn_inference.py -m /workspace/models -g /in_graph/rfcn_resnet101_int8_coco_pretrained_model.pb -x 500 -d /dataset --num-inter-threads 2 --num-intra-threads 56 +run_tf_benchmark.py --framework=tensorflow --use-case=object_detection --model-name=rfcn --precision=int8 --mode=inference --model-source-dir=/workspace/models --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=-1 --verbose --in-graph=/in_graph/rfcn_resnet101_int8_coco_pretrained_model.pb --data-location=/dataset --accuracy-only --split=accuracy_message,LD_PRELOAD=/usr/lib/libtcmalloc.so.4.2.6 FROZEN_GRAPH=/in_graph/rfcn_resnet101_int8_coco_pretrained_model.pb TF_RECORD_FILE=/dataset SPLIT=accuracy_message TF_MODELS_ROOT=/workspace/models /workspace/intelai_models/inference/int8/coco_mAP.sh run_tf_benchmark.py --framework tensorflow --use-case text_to_speech --precision fp32 --mode inference --model-name wavenet --num-cores 1 --checkpoint /checkpoints --intelai-models . --model-source-dir . --socket-id 0 --verbose --checkpoint_name=model.ckpt-99 --sample=8510,numactl --physcpubind=0-0 --membind=0 python generate.py /checkpoints/model.ckpt-99 --num_inter_threads=1 --num_intra_threads=1 --sample=8510 -run_tf_benchmark.py --framework=tensorflow --use-case=image_recognition --model-name=resnet101 --precision=fp32 --mode=inference --model-source-dir=/workspace/models --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=128 --socket-id 0 --accuracy-only --verbose --in-graph=/in_graph/resnet101_fp32_model.pb --data-location=/dataset,numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/inference/eval_image_classifier_inference.py --batch-size=128 --num-inter-threads=1 --input-graph=/in_graph/resnet101_fp32_model.pb --num-intra-threads=28 --warmup-steps=10 --steps=50 --data-location=/dataset --accuracy-only -run_tf_benchmark.py --framework=tensorflow --use-case=image_recognition --model-name=resnet101 --precision=fp32 --mode=inference --model-source-dir=/workspace/models --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=128 --accuracy-only --verbose --in-graph=/in_graph/resnet101_fp32_model.pb --data-location=/dataset,python /workspace/intelai_models/inference/eval_image_classifier_inference.py --batch-size=128 --num-inter-threads=2 --input-graph=/in_graph/resnet101_fp32_model.pb --num-intra-threads=56 --warmup-steps=10 --steps=50 --data-location=/dataset --accuracy-only -run_tf_benchmark.py --framework=tensorflow --use-case=image_recognition --model-name=resnet101 --precision=fp32 --mode=inference --model-source-dir=/workspace/models --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=128 --socket-id 0 --benchmark-only --verbose --in-graph=/in_graph/resnet101_fp32_model.pb,numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/inference/eval_image_classifier_inference.py --batch-size=128 --num-inter-threads=1 --input-graph=/in_graph/resnet101_fp32_model.pb --num-intra-threads=28 --warmup-steps=10 --steps=50 -run_tf_benchmark.py --framework=tensorflow --use-case=image_recognition --model-name=resnet101 --precision=fp32 --mode=inference --model-source-dir=/workspace/models --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=1 --socket-id 0 --benchmark-only --verbose --in-graph=/in_graph/resnet101_fp32_model.pb,numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/inference/eval_image_classifier_inference.py --batch-size=1 --num-inter-threads=1 --input-graph=/in_graph/resnet101_fp32_model.pb --num-intra-threads=28 --warmup-steps=10 --steps=50 -run_tf_benchmark.py --framework=tensorflow --use-case=image_recognition --model-name=resnet101 --precision=int8 --mode=inference --model-source-dir=/workspace/models --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=1 --socket-id 0 --benchmark-only --verbose --in-graph=/in_graph/resnet101_int8_model.pb,numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/inference/eval_image_classifier_inference.py --batch-size=1 --num-intra-threads=28 --num-inter-threads=1 --input-graph=/in_graph/resnet101_int8_model.pb --warmup-steps=40 --steps=100 -"run_tf_benchmark.py --framework=tensorflow --use-case=language_translation --model-name=transformer_language --precision=fp32 --mode=inference --model-source-dir=/workspace/models --benchmark-dir=/workspace/benchmarks --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=1 --socket-id=0 --benchmark-only --verbose --checkpoint=/checkpoints --data-location=/dataset --decode_from_file=/checkpoints/newstest2015.en --reference=/checkpoints/newstest2015.de","numactl --cpunodebind=0 --membind=0 python /workspace/models/tensor2tensor/bin/t2t_decoder.py --problem=translate_ende_wmt32k --model=transformer --hparams_set=transformer_base_single_gpu --decode_hparams=beam_size=4,alpha=0.6,batch_size=1 --data_dir=/dataset --output_dir=/checkpoints --decode_from_file=/checkpoints/newstest2015.en --decode_to_file=/workspace/models/out_dir/output_infer --reference=/checkpoints/newstest2015.de --inter_op_parallelism_threads=1 --intra_op_parallelism_threads=28" -"run_tf_benchmark.py --framework=tensorflow --use-case=language_translation --model-name=transformer_language --precision=fp32 --mode=inference --model-source-dir=/workspace/models --benchmark-dir=/workspace/benchmarks --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=32 --socket-id=0 --benchmark-only --verbose --checkpoint=/checkpoints --data-location=/dataset --decode_from_file=/checkpoints/newstest2015.en --reference=/checkpoints/newstest2015.de","numactl --cpunodebind=0 --membind=0 python /workspace/models/tensor2tensor/bin/t2t_decoder.py --problem=translate_ende_wmt32k --model=transformer --hparams_set=transformer_base_single_gpu --decode_hparams=beam_size=4,alpha=0.6,batch_size=32 --data_dir=/dataset --output_dir=/checkpoints --decode_from_file=/checkpoints/newstest2015.en --decode_to_file=/workspace/models/out_dir/output_infer --reference=/checkpoints/newstest2015.de --inter_op_parallelism_threads=1 --intra_op_parallelism_threads=28" -run_tf_benchmark.py --framework=tensorflow --use-case=language_translation --model-name=transformer_lt_official --precision=fp32 --mode=inference --model-source-dir=/workspace/models --benchmark-dir=/workspace/benchmarks --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=1 --socket-id=0 --benchmark-only --verbose --file=/dataset/newstest2014.en --reference=/dataset/newstest2014.de --vocab_file=/dataset/vocab.txt --in_graph=/in_graph/fp32_graphdef.pb,numactl --cpunodebind=0 --membind=0 python /workspace/models/official/transformer/infer_ab.py --param_set=big --in_graph=/in_graph/fp32_graphdef.pb --batch_size=1 --file=/dataset/newstest2014.en --file_out=/models/benchmarks/common/tensorflow/logs/translate.txt --vocab_file=/dataset/vocab.txt -run_tf_benchmark.py --framework=tensorflow --use-case=language_translation --model-name=transformer_lt_official --precision=fp32 --mode=inference --model-source-dir=/workspace/models --benchmark-dir=/workspace/benchmarks --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=64 --socket-id=0 --benchmark-only --verbose --file=/dataset/newstest2014.en --reference=/dataset/newstest2014.de --vocab_file=/dataset/vocab.txt --in_graph=/in_graph/fp32_graphdef.pb,numactl --cpunodebind=0 --membind=0 python /workspace/models/official/transformer/infer_ab.py --param_set=big --in_graph=/in_graph/fp32_graphdef.pb --batch_size=64 --file=/dataset/newstest2014.en --file_out=/models/benchmarks/common/tensorflow/logs/translate.txt --vocab_file=/dataset/vocab.txt -run_tf_benchmark.py --framework=tensorflow --use-case=image_recognition --model-name=mobilenet_v1 --precision=int8 --mode=inference --benchmark-dir=/workspace/benchmarks --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=100 --socket-id=0 --output-dir=/workspace/benchmarks/common/tensorflow/logs --accuracy-only --verbose --model-source-dir=/workspace/models --in-graph=/in_graph/models_mobilenetv1_int8_pretrained_model.pb --data-location=/dataset --input_height=224 --input_width=224, numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/inference/int8/accuracy.py --input_height=224 --input_width=224 --num_intra_threads=28 --output_layer=MobilenetV1/Predictions/Reshape_1 --num_inter_threads=1 --batch_size=100 --input_graph=/in_graph/models_mobilenetv1_int8_pretrained_model.pb --data_location=/dataset --input_layer=input -run_tf_benchmark.py --framework=tensorflow --use-case=image_recognition --model-name=mobilenet_v1 --precision=int8 --mode=inference --benchmark-dir=/workspace/benchmarks --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=1 --socket-id=0 --output-dir=/workspace/benchmarks/common/tensorflow/logs --benchmark-only --verbose --model-source-dir=/workspace/models --in-graph=/in_graph/models_mobilenetv1_int8_pretrained_model.pb --input_height=224 --input_width=224 --warmup_steps=10 --steps=50, numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/inference/int8/benchmark.py --input_height=224 --input_width=224 --warmup_steps=10 --num_intra_threads=28 --output_layer=MobilenetV1/Predictions/Reshape_1 --num_inter_threads=1 --batch_size=1 --input_graph=/in_graph/models_mobilenetv1_int8_pretrained_model.pb --input_layer=input --steps=50 -run_tf_benchmark.py --framework=tensorflow --use-case=image_recognition --model-name=mobilenet_v1 --precision=int8 --mode=inference --benchmark-dir=/workspace/benchmarks --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=240 --socket-id=0 --output-dir=/workspace/benchmarks/common/tensorflow/logs --benchmark-only --verbose --model-source-dir=/workspace/models --in-graph=/in_graph/models_mobilenetv1_int8_pretrained_model.pb --input_height=224 --input_width=224 --warmup_steps=10 --steps=50, numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/inference/int8/benchmark.py --input_height=224 --input_width=224 --warmup_steps=10 --num_intra_threads=28 --output_layer=MobilenetV1/Predictions/Reshape_1 --num_inter_threads=1 --batch_size=240 --input_graph=/in_graph/models_mobilenetv1_int8_pretrained_model.pb --input_layer=input --steps=50 +run_tf_benchmark.py --framework=tensorflow --use-case=image_recognition --model-name=resnet101 --precision=fp32 --mode=inference --model-source-dir=/workspace/models --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=128 --socket-id 0 --accuracy-only --verbose --in-graph=/in_graph/resnet101_fp32_model.pb --data-location=/dataset,LD_PRELOAD=/usr/lib/libtcmalloc.so.4.2.6 numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/inference/eval_image_classifier_inference.py --batch-size=128 --num-inter-threads=1 --input-graph=/in_graph/resnet101_fp32_model.pb --num-intra-threads=28 --warmup-steps=10 --steps=50 --data-location=/dataset --accuracy-only +run_tf_benchmark.py --framework=tensorflow --use-case=image_recognition --model-name=resnet101 --precision=fp32 --mode=inference --model-source-dir=/workspace/models --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=128 --accuracy-only --verbose --in-graph=/in_graph/resnet101_fp32_model.pb --data-location=/dataset,LD_PRELOAD=/usr/lib/libtcmalloc.so.4.2.6 python /workspace/intelai_models/inference/eval_image_classifier_inference.py --batch-size=128 --num-inter-threads=2 --input-graph=/in_graph/resnet101_fp32_model.pb --num-intra-threads=56 --warmup-steps=10 --steps=50 --data-location=/dataset --accuracy-only +run_tf_benchmark.py --framework=tensorflow --use-case=image_recognition --model-name=resnet101 --precision=fp32 --mode=inference --model-source-dir=/workspace/models --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=128 --socket-id 0 --benchmark-only --verbose --in-graph=/in_graph/resnet101_fp32_model.pb,LD_PRELOAD=/usr/lib/libtcmalloc.so.4.2.6 numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/inference/eval_image_classifier_inference.py --batch-size=128 --num-inter-threads=1 --input-graph=/in_graph/resnet101_fp32_model.pb --num-intra-threads=28 --warmup-steps=10 --steps=50 +run_tf_benchmark.py --framework=tensorflow --use-case=image_recognition --model-name=resnet101 --precision=fp32 --mode=inference --model-source-dir=/workspace/models --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=1 --socket-id 0 --benchmark-only --verbose --in-graph=/in_graph/resnet101_fp32_model.pb,LD_PRELOAD=/usr/lib/libtcmalloc.so.4.2.6 numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/inference/eval_image_classifier_inference.py --batch-size=1 --num-inter-threads=1 --input-graph=/in_graph/resnet101_fp32_model.pb --num-intra-threads=28 --warmup-steps=10 --steps=50 +run_tf_benchmark.py --framework=tensorflow --use-case=image_recognition --model-name=resnet101 --precision=int8 --mode=inference --model-source-dir=/workspace/models --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=1 --socket-id 0 --benchmark-only --verbose --in-graph=/in_graph/resnet101_int8_model.pb,LD_PRELOAD=/usr/lib/libtcmalloc.so.4.2.6 numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/inference/eval_image_classifier_inference.py --batch-size=1 --num-intra-threads=28 --num-inter-threads=1 --input-graph=/in_graph/resnet101_int8_model.pb --warmup-steps=40 --steps=100 +"run_tf_benchmark.py --framework=tensorflow --use-case=language_translation --model-name=transformer_language --precision=fp32 --mode=inference --model-source-dir=/workspace/models --benchmark-dir=/workspace/benchmarks --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=1 --socket-id=0 --benchmark-only --verbose --checkpoint=/checkpoints --data-location=/dataset --decode_from_file=/checkpoints/newstest2015.en --reference=/checkpoints/newstest2015.de","LD_PRELOAD=/usr/lib/libtcmalloc.so.4.2.6 numactl --cpunodebind=0 --membind=0 python /workspace/models/tensor2tensor/bin/t2t_decoder.py --problem=translate_ende_wmt32k --model=transformer --hparams_set=transformer_base_single_gpu --decode_hparams=beam_size=4,alpha=0.6,batch_size=1 --data_dir=/dataset --output_dir=/checkpoints --decode_from_file=/checkpoints/newstest2015.en --decode_to_file=/workspace/models/out_dir/output_infer --reference=/checkpoints/newstest2015.de --inter_op_parallelism_threads=1 --intra_op_parallelism_threads=28" +"run_tf_benchmark.py --framework=tensorflow --use-case=language_translation --model-name=transformer_language --precision=fp32 --mode=inference --model-source-dir=/workspace/models --benchmark-dir=/workspace/benchmarks --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=32 --socket-id=0 --benchmark-only --verbose --checkpoint=/checkpoints --data-location=/dataset --decode_from_file=/checkpoints/newstest2015.en --reference=/checkpoints/newstest2015.de","LD_PRELOAD=/usr/lib/libtcmalloc.so.4.2.6 numactl --cpunodebind=0 --membind=0 python /workspace/models/tensor2tensor/bin/t2t_decoder.py --problem=translate_ende_wmt32k --model=transformer --hparams_set=transformer_base_single_gpu --decode_hparams=beam_size=4,alpha=0.6,batch_size=32 --data_dir=/dataset --output_dir=/checkpoints --decode_from_file=/checkpoints/newstest2015.en --decode_to_file=/workspace/models/out_dir/output_infer --reference=/checkpoints/newstest2015.de --inter_op_parallelism_threads=1 --intra_op_parallelism_threads=28" +run_tf_benchmark.py --framework=tensorflow --use-case=language_translation --model-name=transformer_lt_official --precision=fp32 --mode=inference --model-source-dir=/workspace/models --benchmark-dir=/workspace/benchmarks --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=1 --socket-id=0 --benchmark-only --verbose --file=/dataset/newstest2014.en --reference=/dataset/newstest2014.de --vocab_file=/dataset/vocab.txt --in_graph=/in_graph/fp32_graphdef.pb,LD_PRELOAD=/usr/lib/libtcmalloc.so.4.2.6 numactl --cpunodebind=0 --membind=0 python /workspace/models/official/transformer/infer_ab.py --param_set=big --in_graph=/in_graph/fp32_graphdef.pb --batch_size=1 --file=/dataset/newstest2014.en --file_out=/models/benchmarks/common/tensorflow/logs/translate.txt --vocab_file=/dataset/vocab.txt +run_tf_benchmark.py --framework=tensorflow --use-case=language_translation --model-name=transformer_lt_official --precision=fp32 --mode=inference --model-source-dir=/workspace/models --benchmark-dir=/workspace/benchmarks --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=64 --socket-id=0 --benchmark-only --verbose --file=/dataset/newstest2014.en --reference=/dataset/newstest2014.de --vocab_file=/dataset/vocab.txt --in_graph=/in_graph/fp32_graphdef.pb,LD_PRELOAD=/usr/lib/libtcmalloc.so.4.2.6 numactl --cpunodebind=0 --membind=0 python /workspace/models/official/transformer/infer_ab.py --param_set=big --in_graph=/in_graph/fp32_graphdef.pb --batch_size=64 --file=/dataset/newstest2014.en --file_out=/models/benchmarks/common/tensorflow/logs/translate.txt --vocab_file=/dataset/vocab.txt +run_tf_benchmark.py --framework=tensorflow --use-case=image_recognition --model-name=mobilenet_v1 --precision=int8 --mode=inference --benchmark-dir=/workspace/benchmarks --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=100 --socket-id=0 --output-dir=/workspace/benchmarks/common/tensorflow/logs --accuracy-only --verbose --model-source-dir=/workspace/models --in-graph=/in_graph/models_mobilenetv1_int8_pretrained_model.pb --data-location=/dataset --input_height=224 --input_width=224,LD_PRELOAD=/usr/lib/libtcmalloc.so.4.2.6 numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/inference/int8/accuracy.py --input_height=224 --input_width=224 --num_intra_threads=28 --output_layer=MobilenetV1/Predictions/Reshape_1 --num_inter_threads=1 --batch_size=100 --input_graph=/in_graph/models_mobilenetv1_int8_pretrained_model.pb --data_location=/dataset --input_layer=input +run_tf_benchmark.py --framework=tensorflow --use-case=image_recognition --model-name=mobilenet_v1 --precision=int8 --mode=inference --benchmark-dir=/workspace/benchmarks --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=1 --socket-id=0 --output-dir=/workspace/benchmarks/common/tensorflow/logs --benchmark-only --verbose --model-source-dir=/workspace/models --in-graph=/in_graph/models_mobilenetv1_int8_pretrained_model.pb --input_height=224 --input_width=224 --warmup_steps=10 --steps=50,LD_PRELOAD=/usr/lib/libtcmalloc.so.4.2.6 numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/inference/int8/benchmark.py --input_height=224 --input_width=224 --warmup_steps=10 --num_intra_threads=28 --output_layer=MobilenetV1/Predictions/Reshape_1 --num_inter_threads=1 --batch_size=1 --input_graph=/in_graph/models_mobilenetv1_int8_pretrained_model.pb --input_layer=input --steps=50 +run_tf_benchmark.py --framework=tensorflow --use-case=image_recognition --model-name=mobilenet_v1 --precision=int8 --mode=inference --benchmark-dir=/workspace/benchmarks --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=240 --socket-id=0 --output-dir=/workspace/benchmarks/common/tensorflow/logs --benchmark-only --verbose --model-source-dir=/workspace/models --in-graph=/in_graph/models_mobilenetv1_int8_pretrained_model.pb --input_height=224 --input_width=224 --warmup_steps=10 --steps=50,LD_PRELOAD=/usr/lib/libtcmalloc.so.4.2.6 numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/inference/int8/benchmark.py --input_height=224 --input_width=224 --warmup_steps=10 --num_intra_threads=28 --output_layer=MobilenetV1/Predictions/Reshape_1 --num_inter_threads=1 --batch_size=240 --input_graph=/in_graph/models_mobilenetv1_int8_pretrained_model.pb --input_layer=input --steps=50 run_tf_benchmark.py --framework=tensorflow --use-case=image_recognition --model-name=mobilenet_v1 --precision=fp32 --mode=inference --model-source-dir=/workspace/models --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=1 --socket-id 0 --benchmark-only --verbose --checkpoint=/checkpoints --data-location=/dataset,numactl --cpunodebind=0 -l python /workspace/intelai_models/inference/fp32/eval_image_classifier.py --dataset_name imagenet --checkpoint_path /checkpoints --dataset_dir /dataset --dataset_split_name=validation --clone_on_cpu=True --model_name mobilenet_v1 --inter_op_parallelism_threads 2 --intra_op_parallelism_threads 28 --batch_size 1 -python common/tensorflow/run_tf_benchmark.py --framework=tensorflow --use-case=language_modeling --model-name=lm-1b --precision=fp32 --mode=inference --benchmark-dir=/workspace/benchmarks --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=1 --socket-id=0 --output-dir=/workspace/benchmarks/common/tensorflow/logs --benchmark-only --verbose --model-source-dir=/workspace/models,numactl --cpunodebind=0 --membind=0 python /workspace/models/benchmark.py -b=1 -I=100 --inter=1 --intra=28 +python common/tensorflow/run_tf_benchmark.py --framework=tensorflow --use-case=language_modeling --model-name=lm-1b --precision=fp32 --mode=inference --benchmark-dir=/workspace/benchmarks --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=1 --socket-id=0 --output-dir=/workspace/benchmarks/common/tensorflow/logs --benchmark-only --verbose --model-source-dir=/workspace/models,LD_PRELOAD=/usr/lib/libtcmalloc.so.4.2.6 numactl --cpunodebind=0 --membind=0 python /workspace/models/benchmark.py -b=1 -I=100 --inter=1 --intra=28 run_tf_benchmark.py --framework=tensorflow --use-case=image_recognition --model-name=mobilenet_v1 --precision=fp32 --mode=inference --model-source-dir=/workspace/models --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=100 --socket-id 0 --benchmark-only --verbose --checkpoint=/checkpoints --data-location=/dataset,numactl --cpunodebind=0 -l python /workspace/intelai_models/inference/fp32/eval_image_classifier.py --dataset_name imagenet --checkpoint_path /checkpoints --dataset_dir /dataset --dataset_split_name=validation --clone_on_cpu=True --model_name mobilenet_v1 --inter_op_parallelism_threads 2 --intra_op_parallelism_threads 28 --batch_size 100 run_tf_benchmark.py --framework=tensorflow --use-case=image_recognition --model-name=mobilenet_v1 --precision=fp32 --mode=inference --benchmark-dir=/workspace/benchmarks --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=100 --socket-id=0 --output-dir=/workspace/benchmarks/common/tensorflow/logs --benchmark-only --verbose --model-source-dir=/workspace/models --checkpoint=/checkpoints,numactl --cpunodebind=0 -l python /workspace/intelai_models/inference/fp32/eval_image_classifier.py --dataset_name imagenet --checkpoint_path /checkpoints --dataset_split_name=validation --clone_on_cpu=True --model_name mobilenet_v1 --inter_op_parallelism_threads 2 --intra_op_parallelism_threads 28 --batch_size 100 run_tf_benchmark.py --framework=tensorflow --use-case=image_recognition --model-name=mobilenet_v1 --precision=fp32 --mode=inference --model-source-dir=/workspace/models --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=100 --socket-id=-1 --accuracy-only --verbose --checkpoint=/checkpoints --in-graph=/in_graph/mobilenet_v1_1.0_224_frozen.pb --data-location=/dataset,python /workspace/intelai_models/inference/fp32/accuracy.py --batch_size=100 --num_inter_threads=2 --input_graph=/in_graph/mobilenet_v1_1.0_224_frozen.pb --num_intra_threads=56 --data_location=/dataset -run_tf_benchmark.py --framework=tensorflow --use-case=recommendation --model-name=ncf --precision=fp32 --mode=inference --model-source-dir=/workspace/models --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=256 --socket-id 0 --accuracy-only --verbose --checkpoint=/checkpoints --data-location=/dataset,numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/inference/fp32/ncf_main.py --data_dir=/dataset --model_dir=/checkpoints --intra_op_parallelism_threads=28 --inter_op_parallelism_threads=1 --batch_size=256 --inference_only --accuracy_only -run_tf_benchmark.py --framework=tensorflow --use-case=recommendation --model-name=ncf --precision=fp32 --mode=inference --model-source-dir=/workspace/models --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=256 --socket-id 0 --benchmark-only --verbose --checkpoint=/checkpoints --data-location=/dataset,numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/inference/fp32/ncf_main.py --data_dir=/dataset --model_dir=/checkpoints --intra_op_parallelism_threads=28 --inter_op_parallelism_threads=1 --batch_size=256 --inference_only --benchmark_only -run_tf_benchmark.py --framework=tensorflow --use-case=recommendation --model-name=ncf --precision=fp32 --mode=inference --model-source-dir=/workspace/models --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=1 --socket-id 0 --benchmark-only --verbose --checkpoint=/checkpoints --data-location=/dataset,numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/inference/fp32/ncf_main.py --data_dir=/dataset --model_dir=/checkpoints --intra_op_parallelism_threads=28 --inter_op_parallelism_threads=1 --batch_size=1 --inference_only --benchmark_only -run_tf_benchmark.py --framework=tensorflow --use-case=image_segmentation --model-name=maskrcnn --precision=fp32 --mode=inference --model-source-dir=/workspace/models --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=1 --socket-id=0 --benchmark-only --verbose --data-location=/dataset,numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/coco.py evaluate --dataset=/dataset --num_inter_threads 1 --num_intra_threads 28 --nw 5 --nb 50 --model=coco --infbs 1 -run_tf_benchmark.py --framework=tensorflow --use-case=image_segmentation --model-name=maskrcnn --precision=fp32 --mode=inference --model-source-dir=/workspace/models --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=1 --socket-id=0 --benchmark-only --verbose --data-location=/dataset --num-inter-threads 4 --num-intra-threads 16,numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/coco.py evaluate --dataset=/dataset --num_inter_threads 4 --num_intra_threads 16 --nw 5 --nb 50 --model=coco --infbs 1 +run_tf_benchmark.py --framework=tensorflow --use-case=recommendation --model-name=ncf --precision=fp32 --mode=inference --model-source-dir=/workspace/models --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=256 --socket-id 0 --accuracy-only --verbose --checkpoint=/checkpoints --data-location=/dataset,LD_PRELOAD=/usr/lib/libtcmalloc.so.4.2.6 numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/inference/fp32/ncf_main.py --data_dir=/dataset --model_dir=/checkpoints --intra_op_parallelism_threads=28 --inter_op_parallelism_threads=1 --batch_size=256 --inference_only --accuracy_only +run_tf_benchmark.py --framework=tensorflow --use-case=recommendation --model-name=ncf --precision=fp32 --mode=inference --model-source-dir=/workspace/models --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=256 --socket-id 0 --benchmark-only --verbose --checkpoint=/checkpoints --data-location=/dataset,LD_PRELOAD=/usr/lib/libtcmalloc.so.4.2.6 numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/inference/fp32/ncf_main.py --data_dir=/dataset --model_dir=/checkpoints --intra_op_parallelism_threads=28 --inter_op_parallelism_threads=1 --batch_size=256 --inference_only --benchmark_only +run_tf_benchmark.py --framework=tensorflow --use-case=recommendation --model-name=ncf --precision=fp32 --mode=inference --model-source-dir=/workspace/models --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=1 --socket-id 0 --benchmark-only --verbose --checkpoint=/checkpoints --data-location=/dataset,LD_PRELOAD=/usr/lib/libtcmalloc.so.4.2.6 numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/inference/fp32/ncf_main.py --data_dir=/dataset --model_dir=/checkpoints --intra_op_parallelism_threads=28 --inter_op_parallelism_threads=1 --batch_size=1 --inference_only --benchmark_only +run_tf_benchmark.py --framework=tensorflow --use-case=image_segmentation --model-name=maskrcnn --precision=fp32 --mode=inference --model-source-dir=/workspace/models --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=1 --socket-id=0 --benchmark-only --verbose --data-location=/dataset,LD_PRELOAD=/usr/lib/libtcmalloc.so.4.2.6 numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/coco.py evaluate --dataset=/dataset --num_inter_threads 1 --num_intra_threads 28 --nw 5 --nb 50 --model=coco --infbs 1 +run_tf_benchmark.py --framework=tensorflow --use-case=image_segmentation --model-name=maskrcnn --precision=fp32 --mode=inference --model-source-dir=/workspace/models --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=1 --socket-id=0 --benchmark-only --verbose --data-location=/dataset --num-inter-threads 4 --num-intra-threads 16,LD_PRELOAD=/usr/lib/libtcmalloc.so.4.2.6 numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/coco.py evaluate --dataset=/dataset --num_inter_threads 4 --num_intra_threads 16 --nw 5 --nb 50 --model=coco --infbs 1 run_tf_benchmark.py --framework=tensorflow --use-case=object_detection --model-name=faster_rcnn --precision=int8 --mode=inference --model-source-dir=/workspace/models --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=-1 --socket-id=-1 --accuracy-only --verbose --in-graph=/in_graph/pretrained_int8_faster_rcnn_model.pb --data-location=/dataset,/workspace/intelai_models/inference/int8/coco_int8.sh /in_graph/pretrained_int8_faster_rcnn_model.pb /dataset /workspace/models -run_tf_benchmark.py --framework=tensorflow --use-case=object_detection --model-name=faster_rcnn --precision=int8 --mode=inference --model-source-dir=/workspace/models --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=-1 --socket-id=-1 --benchmark-only --verbose --in-graph=/in_graph/pretrained_int8_faster_rcnn_model.pb --data-location=/dataset, python /workspace/intelai_models/inference/int8/run_frozen_graph_rcnn.py -g /in_graph/pretrained_int8_faster_rcnn_model.pb -d /dataset --num-inter-threads 2 --num-intra-threads 56 -run_tf_benchmark.py --framework=tensorflow --use-case=object_detection --model-name=faster_rcnn --precision=int8 --mode=inference --model-source-dir=/workspace/models --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=-1 --socket-id=-1 --benchmark-only --verbose --in-graph=/in_graph/pretrained_int8_faster_rcnn_model.pb --data-location=/dataset --number-of-steps=500, python /workspace/intelai_models/inference/int8/run_frozen_graph_rcnn.py -g /in_graph/pretrained_int8_faster_rcnn_model.pb -n 500 -d /dataset --num-inter-threads 2 --num-intra-threads 56 -run_tf_benchmark.py --framework=tensorflow --use-case=adversarial_networks --model-name=dcgan --precision=fp32 --mode=inference --model-source-dir=/workspace/models --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=100 --socket-id=0 --benchmark-only --verbose --checkpoint=/checkpoints --data-location=/dataset,numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/inference/fp32/inference_bench.py -ckpt /checkpoints -dl /dataset --num_inter_threads 1 --num_intra_threads 28 -nw 100 -nb 500 --bs 100 --kmp_blocktime 1 --kmp_settings 1 -run_tf_benchmark.py --framework=tensorflow --use-case=content_creation --model-name=draw --precision=fp32 --mode=inference --model-source-dir=/workspace/models --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=1 --socket-id=0 --benchmark-only --verbose --checkpoint=/checkpoints --data-location=/dataset,numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/inference/fp32/draw_inf.py --cp /checkpoints --num_inter_threads 1 --num_intra_threads 28 --bs 1 --dl /dataset --nw 100 --nb 200 -run_tf_benchmark.py --framework=tensorflow --use-case=content_creation --model-name=draw --precision=fp32 --mode=inference --model-source-dir=/workspace/models --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=100 --socket-id=0 --benchmark-only --verbose --checkpoint=/checkpoints --data-location=/dataset,numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/inference/fp32/draw_inf.py --cp /checkpoints --num_inter_threads 1 --num_intra_threads 28 --bs 100 --dl /dataset --nw 100 --nb 200 -run_tf_benchmark.py --framework=tensorflow --use-case=language_translation --model-name=gnmt --precision=fp32 --mode=inference --model-source-dir=/workspace/models --benchmark-dir=/workspace/benchmarks --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=1 --socket-id=0 --output-dir=/workspace/benchmarks/common/tensorflow/logs --benchmark-only --verbose --checkpoint=/checkpoints --data-location=/dataset --infer_mode=beam_search,numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/fp32/nmt.py --src=de --tgt=en --hparams_path=/workspace/intelai_models/fp32/standard_hparams/wmt16_gnmt_4_layer_internal.json --out_dir=/workspace/benchmarks/common/tensorflow/logs --vocab_prefix=/dataset/vocab.bpe.32000 --ckpt=/checkpoints/translate.ckpt --infer_batch_size=1 --inference_input_file=/dataset/newstest2015.tok.bpe.32000.de --inference_output_file=/workspace/benchmarks/common/tensorflow/logs/output_infer --inference_ref_file=/dataset/newstest2015.tok.bpe.32000.en --num_inter_threads=1 --num_intra_threads=28 --infer_mode=beam_search -run_tf_benchmark.py --framework=tensorflow --use-case=language_translation --model-name=gnmt --precision=fp32 --mode=inference --model-source-dir=/workspace/models --benchmark-dir=/workspace/benchmarks --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=32 --socket-id=0 --output-dir=/workspace/benchmarks/common/tensorflow/logs --benchmark-only --verbose --checkpoint=/checkpoints --data-location=/dataset --infer_mode=beam_search,numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/fp32/nmt.py --src=de --tgt=en --hparams_path=/workspace/intelai_models/fp32/standard_hparams/wmt16_gnmt_4_layer_internal.json --out_dir=/workspace/benchmarks/common/tensorflow/logs --vocab_prefix=/dataset/vocab.bpe.32000 --ckpt=/checkpoints/translate.ckpt --infer_batch_size=32 --inference_input_file=/dataset/newstest2015.tok.bpe.32000.de --inference_output_file=/workspace/benchmarks/common/tensorflow/logs/output_infer --inference_ref_file=/dataset/newstest2015.tok.bpe.32000.en --num_inter_threads=1 --num_intra_threads=28 --infer_mode=beam_search -run_tf_benchmark.py --framework=tensorflow --use-case=content_creation --model-name=draw --precision=fp32 --mode=inference --model-source-dir=/workspace/models --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=100 --socket-id=0 --benchmark-only --verbose --checkpoint=/checkpoints --data-location=/dataset --num-inter-threads 4 --num-intra-threads 16,numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/inference/fp32/draw_inf.py --cp /checkpoints --num_inter_threads 4 --num_intra_threads 16 --bs 100 --dl /dataset --nw 100 --nb 200 -run_tf_benchmark.py --framework=tensorflow --use-case=image_segmentation --model-name=unet --precision=fp32 --mode=inference --model-source-dir=/workspace/models --benchmark-dir=/workspace/benchmarks --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=1 --socket-id=0 --benchmark-only --verbose --checkpoint=/checkpoints --checkpoint_name=model.ckpt,numactl --cpunodebind=0 --membind=0 python /workspace/benchmarks/image_segmentation/tensorflow/unet/inference/fp32/unet_infer.py -bs 1 -cp /checkpoints/model.ckpt --num_inter_threads 1 --num_intra_threads 28 -nw 80 -nb 400 -run_tf_benchmark.py --framework=tensorflow --use-case=recommendation --model-name=wide_deep_large_ds --precision=fp32 --mode=inference --model-source-dir=/workspace/models --benchmark-dir=/workspace/benchmarks --intelai-models=/workspace/intelai_models --num-cores=-1 --socket-id=0 --output-dir=/workspace/benchmarks/common/tensorflow/logs --benchmark-only --verbose --in-graph=/in_graph/wide_deep_fp32_pretrained_model.pb --data-location=/dataset --num-parallel-batches=14,numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/inference/inference.py --num_intra_threads=1 --num_inter_threads=28 --input_graph=/in_graph/wide_deep_fp32_pretrained_model.pb --data_location=/dataset --num_parallel_batches=14 -run_tf_benchmark.py --framework=tensorflow --use-case=recommendation --model-name=wide_deep_large_ds --precision=fp32 --mode=inference --model-source-dir=/workspace/models --benchmark-dir=/workspace/benchmarks --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=1 --socket-id=0 --output-dir=/workspace/benchmarks/common/tensorflow/logs --benchmark-only --verbose --in-graph=/in_graph/wide_deep_fp32_pretrained_model.pb --data-location=/dataset --num-parallel-batches=14,numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/inference/inference.py --num_intra_threads=1 --num_inter_threads=28 --batch_size=1 --input_graph=/in_graph/wide_deep_fp32_pretrained_model.pb --data_location=/dataset --num_parallel_batches=14 -run_tf_benchmark.py --framework=tensorflow --use-case=recommendation --model-name=wide_deep_large_ds --precision=fp32 --mode=inference --model-source-dir=/workspace/models --benchmark-dir=/workspace/benchmarks --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=512 --socket-id=0 --output-dir=/workspace/benchmarks/common/tensorflow/logs --benchmark-only --verbose --in-graph=/in_graph/wide_deep_fp32_pretrained_model.pb --data-location=/dataset --num-parallel-batches=14,numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/inference/inference.py --num_intra_threads=1 --num_inter_threads=28 --batch_size=512 --input_graph=/in_graph/wide_deep_fp32_pretrained_model.pb --data_location=/dataset --num_parallel_batches=14 -run_tf_benchmark.py --framework=tensorflow --use-case=recommendation --model-name=wide_deep_large_ds --precision=int8 --mode=inference --model-source-dir=/workspace/models --benchmark-dir=/workspace/benchmarks --intelai-models=/workspace/intelai_models --num-cores=-1 --socket-id=0 --output-dir=/workspace/benchmarks/common/tensorflow/logs --benchmark-only --verbose --in-graph=/in_graph/wide_deep_int8_pretrained_model.pb --data-location=/dataset --num-parallel-batches=14,numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/inference/inference.py --num_intra_threads=1 --num_inter_threads=28 --input_graph=/in_graph/wide_deep_int8_pretrained_model.pb --data_location=/dataset --num_parallel_batches=14 -run_tf_benchmark.py --framework=tensorflow --use-case=recommendation --model-name=wide_deep_large_ds --precision=int8 --mode=inference --model-source-dir=/workspace/models --benchmark-dir=/workspace/benchmarks --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=1 --socket-id=0 --output-dir=/workspace/benchmarks/common/tensorflow/logs --benchmark-only --verbose --in-graph=/in_graph/wide_deep_int8_pretrained_model.pb --data-location=/dataset --num-parallel-batches=14,numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/inference/inference.py --num_intra_threads=1 --num_inter_threads=28 --batch_size=1 --input_graph=/in_graph/wide_deep_int8_pretrained_model.pb --data_location=/dataset --num_parallel_batches=14 -run_tf_benchmark.py --framework=tensorflow --use-case=recommendation --model-name=wide_deep_large_ds --precision=int8 --mode=inference --model-source-dir=/workspace/models --benchmark-dir=/workspace/benchmarks --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=512 --socket-id=0 --output-dir=/workspace/benchmarks/common/tensorflow/logs --benchmark-only --verbose --in-graph=/in_graph/wide_deep_int8_pretrained_model.pb --data-location=/dataset --num-parallel-batches=14,numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/inference/inference.py --num_intra_threads=1 --num_inter_threads=28 --batch_size=512 --input_graph=/in_graph/wide_deep_int8_pretrained_model.pb --data_location=/dataset --num_parallel_batches=14 -run_tf_benchmark.py --framework=tensorflow --use-case=face_detection_and_alignment --model-name=mtcc --precision=fp32 --mode=inference --model-source-dir=/workspace/models --benchmark-dir=/workspace/benchmarks --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=-1 --socket-id=0 --output-dir=/workspace/benchmarks/common/tensorflow/logs --benchmark-only --verbose --checkpoint=/checkpoints --data-location=/dataset,numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/inference/fp32/one_image_test.py --num_inter_threads 1 --num_intra_threads 28 -ckpt /checkpoints -dl /dataset -run_tf_benchmark.py --framework=tensorflow --use-case=face_detection_and_alignment --model-name=facenet --precision=fp32 --mode=inference --benchmark-dir=/workspace/benchmarks --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=1 --socket-id=0 --output-dir=/workspace/benchmarks/common/tensorflow/logs --benchmark-only --verbose --model-source-dir=/workspace/models --checkpoint=/checkpoints --data-location=/dataset, numactl --cpunodebind=0 --membind=0 python /workspace/models/src/validate_on_lfw.py /dataset /checkpoints --distance_metric 1 --use_flipped_images --subtract_mean --use_fixed_image_standardization --num_inter_threads=1 --num_intra_threads=28 --lfw_batch_size=1 --lfw_pairs=/workspace/models/data/pairs.txt --warmup_steps=200 --max_steps=1000 -run_tf_benchmark.py --framework=tensorflow --use-case=face_detection_and_alignment --model-name=facenet --precision=fp32 --mode=inference --benchmark-dir=/workspace/benchmarks --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=100 --socket-id=0 --output-dir=/workspace/benchmarks/common/tensorflow/logs --benchmark-only --verbose --model-source-dir=/workspace/models --checkpoint=/checkpoints --data-location=/dataset, numactl --cpunodebind=0 --membind=0 python /workspace/models/src/validate_on_lfw.py /dataset /checkpoints --distance_metric 1 --use_flipped_images --subtract_mean --use_fixed_image_standardization --num_inter_threads=2 --num_intra_threads=28 --lfw_batch_size=100 --lfw_pairs=/workspace/models/data/pairs.txt --warmup_steps=40 --max_steps=1000 -run_tf_benchmark.py --framework=tensorflow --use-case=face_detection_and_alignment --model-name=facenet --precision=fp32 --mode=inference --benchmark-dir=/workspace/benchmarks --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=100 --socket-id=0 --output-dir=/workspace/benchmarks/common/tensorflow/logs --accuracy-only --verbose --model-source-dir=/workspace/models --checkpoint=/checkpoints --data-location=/dataset, numactl --cpunodebind=0 --membind=0 python /workspace/models/src/validate_on_lfw.py /dataset /checkpoints --distance_metric 1 --use_flipped_images --subtract_mean --use_fixed_image_standardization --num_inter_threads=2 --num_intra_threads=28 --lfw_batch_size=100 --lfw_pairs=/workspace/models/data/pairs.txt --warmup_steps=40 --max_steps=1000 +run_tf_benchmark.py --framework=tensorflow --use-case=object_detection --model-name=faster_rcnn --precision=int8 --mode=inference --model-source-dir=/workspace/models --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=-1 --socket-id=-1 --benchmark-only --verbose --in-graph=/in_graph/pretrained_int8_faster_rcnn_model.pb --data-location=/dataset,LD_PRELOAD=/usr/lib/libtcmalloc.so.4.2.6 python /workspace/intelai_models/inference/int8/run_frozen_graph_rcnn.py -g /in_graph/pretrained_int8_faster_rcnn_model.pb -d /dataset --num-inter-threads 2 --num-intra-threads 56 +run_tf_benchmark.py --framework=tensorflow --use-case=object_detection --model-name=faster_rcnn --precision=int8 --mode=inference --model-source-dir=/workspace/models --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=-1 --socket-id=-1 --benchmark-only --verbose --in-graph=/in_graph/pretrained_int8_faster_rcnn_model.pb --data-location=/dataset --number-of-steps=500,LD_PRELOAD=/usr/lib/libtcmalloc.so.4.2.6 python /workspace/intelai_models/inference/int8/run_frozen_graph_rcnn.py -g /in_graph/pretrained_int8_faster_rcnn_model.pb -n 500 -d /dataset --num-inter-threads 2 --num-intra-threads 56 +run_tf_benchmark.py --framework=tensorflow --use-case=adversarial_networks --model-name=dcgan --precision=fp32 --mode=inference --model-source-dir=/workspace/models --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=100 --socket-id=0 --benchmark-only --verbose --checkpoint=/checkpoints --data-location=/dataset,LD_PRELOAD=/usr/lib/libtcmalloc.so.4.2.6 numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/inference/fp32/inference_bench.py -ckpt /checkpoints -dl /dataset --num_inter_threads 1 --num_intra_threads 28 -nw 100 -nb 500 --bs 100 --kmp_blocktime 1 --kmp_settings 1 +run_tf_benchmark.py --framework=tensorflow --use-case=content_creation --model-name=draw --precision=fp32 --mode=inference --model-source-dir=/workspace/models --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=1 --socket-id=0 --benchmark-only --verbose --checkpoint=/checkpoints --data-location=/dataset,LD_PRELOAD=/usr/lib/libtcmalloc.so.4.2.6 numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/inference/fp32/draw_inf.py --cp /checkpoints --num_inter_threads 1 --num_intra_threads 28 --bs 1 --dl /dataset --nw 100 --nb 200 +run_tf_benchmark.py --framework=tensorflow --use-case=content_creation --model-name=draw --precision=fp32 --mode=inference --model-source-dir=/workspace/models --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=100 --socket-id=0 --benchmark-only --verbose --checkpoint=/checkpoints --data-location=/dataset,LD_PRELOAD=/usr/lib/libtcmalloc.so.4.2.6 numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/inference/fp32/draw_inf.py --cp /checkpoints --num_inter_threads 1 --num_intra_threads 28 --bs 100 --dl /dataset --nw 100 --nb 200 +run_tf_benchmark.py --framework=tensorflow --use-case=language_translation --model-name=gnmt --precision=fp32 --mode=inference --model-source-dir=/workspace/models --benchmark-dir=/workspace/benchmarks --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=1 --socket-id=0 --output-dir=/workspace/benchmarks/common/tensorflow/logs --benchmark-only --verbose --checkpoint=/checkpoints --data-location=/dataset --infer_mode=beam_search,LD_PRELOAD=/usr/lib/libtcmalloc.so.4.2.6 numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/fp32/nmt.py --src=de --tgt=en --hparams_path=/workspace/intelai_models/fp32/standard_hparams/wmt16_gnmt_4_layer_internal.json --out_dir=/workspace/benchmarks/common/tensorflow/logs --vocab_prefix=/dataset/vocab.bpe.32000 --ckpt=/checkpoints/translate.ckpt --infer_batch_size=1 --inference_input_file=/dataset/newstest2015.tok.bpe.32000.de --inference_output_file=/workspace/benchmarks/common/tensorflow/logs/output_infer --inference_ref_file=/dataset/newstest2015.tok.bpe.32000.en --num_inter_threads=1 --num_intra_threads=28 --infer_mode=beam_search +run_tf_benchmark.py --framework=tensorflow --use-case=language_translation --model-name=gnmt --precision=fp32 --mode=inference --model-source-dir=/workspace/models --benchmark-dir=/workspace/benchmarks --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=32 --socket-id=0 --output-dir=/workspace/benchmarks/common/tensorflow/logs --benchmark-only --verbose --checkpoint=/checkpoints --data-location=/dataset --infer_mode=beam_search,LD_PRELOAD=/usr/lib/libtcmalloc.so.4.2.6 numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/fp32/nmt.py --src=de --tgt=en --hparams_path=/workspace/intelai_models/fp32/standard_hparams/wmt16_gnmt_4_layer_internal.json --out_dir=/workspace/benchmarks/common/tensorflow/logs --vocab_prefix=/dataset/vocab.bpe.32000 --ckpt=/checkpoints/translate.ckpt --infer_batch_size=32 --inference_input_file=/dataset/newstest2015.tok.bpe.32000.de --inference_output_file=/workspace/benchmarks/common/tensorflow/logs/output_infer --inference_ref_file=/dataset/newstest2015.tok.bpe.32000.en --num_inter_threads=1 --num_intra_threads=28 --infer_mode=beam_search +run_tf_benchmark.py --framework=tensorflow --use-case=content_creation --model-name=draw --precision=fp32 --mode=inference --model-source-dir=/workspace/models --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=100 --socket-id=0 --benchmark-only --verbose --checkpoint=/checkpoints --data-location=/dataset --num-inter-threads 4 --num-intra-threads 16,LD_PRELOAD=/usr/lib/libtcmalloc.so.4.2.6 numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/inference/fp32/draw_inf.py --cp /checkpoints --num_inter_threads 4 --num_intra_threads 16 --bs 100 --dl /dataset --nw 100 --nb 200 +run_tf_benchmark.py --framework=tensorflow --use-case=image_segmentation --model-name=unet --precision=fp32 --mode=inference --model-source-dir=/workspace/models --benchmark-dir=/workspace/benchmarks --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=1 --socket-id=0 --benchmark-only --verbose --checkpoint=/checkpoints --checkpoint_name=model.ckpt,LD_PRELOAD=/usr/lib/libtcmalloc.so.4.2.6 numactl --cpunodebind=0 --membind=0 python /workspace/benchmarks/image_segmentation/tensorflow/unet/inference/fp32/unet_infer.py -bs 1 -cp /checkpoints/model.ckpt --num_inter_threads 1 --num_intra_threads 28 -nw 80 -nb 400 +run_tf_benchmark.py --framework=tensorflow --use-case=recommendation --model-name=wide_deep_large_ds --precision=fp32 --mode=inference --model-source-dir=/workspace/models --benchmark-dir=/workspace/benchmarks --intelai-models=/workspace/intelai_models --num-cores=-1 --socket-id=0 --output-dir=/workspace/benchmarks/common/tensorflow/logs --benchmark-only --verbose --in-graph=/in_graph/wide_deep_fp32_pretrained_model.pb --data-location=/dataset --num-parallel-batches=14,LD_PRELOAD=/usr/lib/libtcmalloc.so.4.2.6 numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/inference/inference.py --num_intra_threads=1 --num_inter_threads=28 --input_graph=/in_graph/wide_deep_fp32_pretrained_model.pb --data_location=/dataset --num_parallel_batches=14 +run_tf_benchmark.py --framework=tensorflow --use-case=recommendation --model-name=wide_deep_large_ds --precision=fp32 --mode=inference --model-source-dir=/workspace/models --benchmark-dir=/workspace/benchmarks --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=1 --socket-id=0 --output-dir=/workspace/benchmarks/common/tensorflow/logs --benchmark-only --verbose --in-graph=/in_graph/wide_deep_fp32_pretrained_model.pb --data-location=/dataset --num-parallel-batches=14,LD_PRELOAD=/usr/lib/libtcmalloc.so.4.2.6 numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/inference/inference.py --num_intra_threads=1 --num_inter_threads=28 --batch_size=1 --input_graph=/in_graph/wide_deep_fp32_pretrained_model.pb --data_location=/dataset --num_parallel_batches=14 +run_tf_benchmark.py --framework=tensorflow --use-case=recommendation --model-name=wide_deep_large_ds --precision=fp32 --mode=inference --model-source-dir=/workspace/models --benchmark-dir=/workspace/benchmarks --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=512 --socket-id=0 --output-dir=/workspace/benchmarks/common/tensorflow/logs --benchmark-only --verbose --in-graph=/in_graph/wide_deep_fp32_pretrained_model.pb --data-location=/dataset --num-parallel-batches=14,LD_PRELOAD=/usr/lib/libtcmalloc.so.4.2.6 numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/inference/inference.py --num_intra_threads=1 --num_inter_threads=28 --batch_size=512 --input_graph=/in_graph/wide_deep_fp32_pretrained_model.pb --data_location=/dataset --num_parallel_batches=14 +run_tf_benchmark.py --framework=tensorflow --use-case=recommendation --model-name=wide_deep_large_ds --precision=int8 --mode=inference --model-source-dir=/workspace/models --benchmark-dir=/workspace/benchmarks --intelai-models=/workspace/intelai_models --num-cores=-1 --socket-id=0 --output-dir=/workspace/benchmarks/common/tensorflow/logs --benchmark-only --verbose --in-graph=/in_graph/wide_deep_int8_pretrained_model.pb --data-location=/dataset --num-parallel-batches=14,LD_PRELOAD=/usr/lib/libtcmalloc.so.4.2.6 numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/inference/inference.py --num_intra_threads=1 --num_inter_threads=28 --input_graph=/in_graph/wide_deep_int8_pretrained_model.pb --data_location=/dataset --num_parallel_batches=14 +run_tf_benchmark.py --framework=tensorflow --use-case=recommendation --model-name=wide_deep_large_ds --precision=int8 --mode=inference --model-source-dir=/workspace/models --benchmark-dir=/workspace/benchmarks --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=1 --socket-id=0 --output-dir=/workspace/benchmarks/common/tensorflow/logs --benchmark-only --verbose --in-graph=/in_graph/wide_deep_int8_pretrained_model.pb --data-location=/dataset --num-parallel-batches=14,LD_PRELOAD=/usr/lib/libtcmalloc.so.4.2.6 numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/inference/inference.py --num_intra_threads=1 --num_inter_threads=28 --batch_size=1 --input_graph=/in_graph/wide_deep_int8_pretrained_model.pb --data_location=/dataset --num_parallel_batches=14 +run_tf_benchmark.py --framework=tensorflow --use-case=recommendation --model-name=wide_deep_large_ds --precision=int8 --mode=inference --model-source-dir=/workspace/models --benchmark-dir=/workspace/benchmarks --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=512 --socket-id=0 --output-dir=/workspace/benchmarks/common/tensorflow/logs --benchmark-only --verbose --in-graph=/in_graph/wide_deep_int8_pretrained_model.pb --data-location=/dataset --num-parallel-batches=14,LD_PRELOAD=/usr/lib/libtcmalloc.so.4.2.6 numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/inference/inference.py --num_intra_threads=1 --num_inter_threads=28 --batch_size=512 --input_graph=/in_graph/wide_deep_int8_pretrained_model.pb --data_location=/dataset --num_parallel_batches=14 +run_tf_benchmark.py --framework=tensorflow --use-case=face_detection_and_alignment --model-name=mtcc --precision=fp32 --mode=inference --model-source-dir=/workspace/models --benchmark-dir=/workspace/benchmarks --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=-1 --socket-id=0 --output-dir=/workspace/benchmarks/common/tensorflow/logs --benchmark-only --verbose --checkpoint=/checkpoints --data-location=/dataset,LD_PRELOAD=/usr/lib/libtcmalloc.so.4.2.6 numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/inference/fp32/one_image_test.py --num_inter_threads 1 --num_intra_threads 28 -ckpt /checkpoints -dl /dataset +run_tf_benchmark.py --framework=tensorflow --use-case=face_detection_and_alignment --model-name=facenet --precision=fp32 --mode=inference --benchmark-dir=/workspace/benchmarks --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=1 --socket-id=0 --output-dir=/workspace/benchmarks/common/tensorflow/logs --benchmark-only --verbose --model-source-dir=/workspace/models --checkpoint=/checkpoints --data-location=/dataset, LD_PRELOAD=/usr/lib/libtcmalloc.so.4.2.6 numactl --cpunodebind=0 --membind=0 python /workspace/models/src/validate_on_lfw.py /dataset /checkpoints --distance_metric 1 --use_flipped_images --subtract_mean --use_fixed_image_standardization --num_inter_threads=1 --num_intra_threads=28 --lfw_batch_size=1 --lfw_pairs=/workspace/models/data/pairs.txt --warmup_steps=200 --max_steps=1000 +run_tf_benchmark.py --framework=tensorflow --use-case=face_detection_and_alignment --model-name=facenet --precision=fp32 --mode=inference --benchmark-dir=/workspace/benchmarks --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=100 --socket-id=0 --output-dir=/workspace/benchmarks/common/tensorflow/logs --benchmark-only --verbose --model-source-dir=/workspace/models --checkpoint=/checkpoints --data-location=/dataset, LD_PRELOAD=/usr/lib/libtcmalloc.so.4.2.6 numactl --cpunodebind=0 --membind=0 python /workspace/models/src/validate_on_lfw.py /dataset /checkpoints --distance_metric 1 --use_flipped_images --subtract_mean --use_fixed_image_standardization --num_inter_threads=2 --num_intra_threads=28 --lfw_batch_size=100 --lfw_pairs=/workspace/models/data/pairs.txt --warmup_steps=40 --max_steps=1000 +run_tf_benchmark.py --framework=tensorflow --use-case=face_detection_and_alignment --model-name=facenet --precision=fp32 --mode=inference --benchmark-dir=/workspace/benchmarks --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=100 --socket-id=0 --output-dir=/workspace/benchmarks/common/tensorflow/logs --accuracy-only --verbose --model-source-dir=/workspace/models --checkpoint=/checkpoints --data-location=/dataset, LD_PRELOAD=/usr/lib/libtcmalloc.so.4.2.6 numactl --cpunodebind=0 --membind=0 python /workspace/models/src/validate_on_lfw.py /dataset /checkpoints --distance_metric 1 --use_flipped_images --subtract_mean --use_fixed_image_standardization --num_inter_threads=2 --num_intra_threads=28 --lfw_batch_size=100 --lfw_pairs=/workspace/models/data/pairs.txt --warmup_steps=40 --max_steps=1000 diff --git a/tests/unit/common/test_base_model_init.py b/tests/unit/common/test_base_model_init.py index 979a6ac4c..2e147ee62 100644 --- a/tests/unit/common/test_base_model_init.py +++ b/tests/unit/common/test_base_model_init.py @@ -56,6 +56,11 @@ def mock_json(patch): return patch('json') +@pytest.fixture +def mock_glob(patch): + return patch('glob.glob') + + # Example args and output strings for testing mocks test_model_name = "resnet50" test_framework = "tensorflow" @@ -170,3 +175,36 @@ def test_set_kmp_vars_config_json_exists(mock_json): file_descriptor, config_file_path = tempfile.mkstemp(suffix=".json") base_model_init.set_kmp_vars(config_file_path) + + +@pytest.mark.parametrize('precision', ['int8', 'fp32']) +def test_command_prefix_tcmalloc(precision, mock_glob): + """ Models should include LD_PRELOAD in the command prefix, as long as tcmalloc is not disabled""" + platform_util = MagicMock() + args = MagicMock(verbose=True, model_name=test_model_name) + test_tcmalloc_lib = "/usr/lib/libtcmalloc.so.4.2.6" + mock_glob.return_value = [test_tcmalloc_lib] + os.environ["PYTHON_EXE"] = "python" + args.socket_id = 0 + args.precision = precision + + # If tcmalloc is not disabled, we should have LD_PRELOAD in the prefix + args.disable_tcmalloc = False + base_model_init = BaseModelInitializer(args, [], platform_util) + command_prefix = base_model_init.get_command_prefix(args.socket_id) + assert "LD_PRELOAD={}".format(test_tcmalloc_lib) in command_prefix + assert "numactl --cpunodebind=0 --membind=0" in command_prefix + + # If tcmalloc is disabled, LD_PRELOAD shouild not be in the prefix + args.disable_tcmalloc = True + base_model_init = BaseModelInitializer(args, [], platform_util) + command_prefix = base_model_init.get_command_prefix(args.socket_id) + assert "LD_PRELOAD={}".format(test_tcmalloc_lib) not in command_prefix + assert "numactl --cpunodebind=0 --membind=0" in command_prefix + + # If numactl is set to false, we should not have numactl in the prefix + args.disable_tcmalloc = False + base_model_init = BaseModelInitializer(args, [], platform_util) + command_prefix = base_model_init.get_command_prefix(args.socket_id, numactl=False) + assert "LD_PRELOAD={}".format(test_tcmalloc_lib) in command_prefix + assert "numactl" not in command_prefix From 1659cdb4f3caa9615f55252684359ba004b5dc4a Mon Sep 17 00:00:00 2001 From: Wafaa Taie Date: Mon, 22 Apr 2019 18:56:10 -0700 Subject: [PATCH 21/62] Add SSD-VGG16 COCO int8/fp32 inference benchmarks (#286) * add ssd_vgg16 coco based benchmarks * configure the env vars, clean up and update start.sh and readme. * cleanup and update readme. * update readme and other files to highlight this model works only with py3. * add and use the anchor_manipulator.py customized file. * apply changes to install cocoapi, and update readme with SKX based benchmark log snippet. * use a cocoapi that works with py3, modify to get the expected performance, add unit tests. * add changes based on code review * fix the model name to use ssd_vgg16 stead of ssd-vgg16 to fix python import. * fix unit tests, update readme. * uppdate log snippet in readme. * update unit tests after supporting memory alloc. * changes for Karthik's code review. * update the int8 log snippet based on CLX. * update the fp32 log snippet based on CLX. --- benchmarks/README.md | 1 + benchmarks/common/tensorflow/start.sh | 28 ++ .../object_detection/tensorflow/__init__.py | 2 +- .../tensorflow/ssd_vgg16/README.md | 367 ++++++++++++++++++ .../tensorflow/ssd_vgg16/__init__.py | 19 + .../ssd_vgg16/inference/__init__.py | 19 + .../ssd_vgg16/inference/config.json | 6 + .../ssd_vgg16/inference/fp32/__init__.py | 19 + .../ssd_vgg16/inference/fp32/model_init.py | 28 ++ .../ssd_vgg16/inference/int8/__init__.py | 19 + .../ssd_vgg16/inference/int8/model_init.py | 28 ++ .../inference/ssd_vgg16_model_init.py | 107 +++++ .../tensorflow/ssd_vgg16/__init__.py | 19 + .../ssd_vgg16/inference/__init__.py | 19 + .../ssd_vgg16/inference/anchor_manipulator.py | 353 +++++++++++++++++ .../ssd_vgg16/inference/eval_ssd.py | 316 +++++++++++++++ .../ssd_vgg16/inference/validate_ssd_vgg16.py | 111 ++++++ .../unit/common/tensorflow/tf_model_args.txt | 4 + 18 files changed, 1464 insertions(+), 1 deletion(-) create mode 100644 benchmarks/object_detection/tensorflow/ssd_vgg16/README.md create mode 100644 benchmarks/object_detection/tensorflow/ssd_vgg16/__init__.py create mode 100644 benchmarks/object_detection/tensorflow/ssd_vgg16/inference/__init__.py create mode 100644 benchmarks/object_detection/tensorflow/ssd_vgg16/inference/config.json create mode 100644 benchmarks/object_detection/tensorflow/ssd_vgg16/inference/fp32/__init__.py create mode 100644 benchmarks/object_detection/tensorflow/ssd_vgg16/inference/fp32/model_init.py create mode 100644 benchmarks/object_detection/tensorflow/ssd_vgg16/inference/int8/__init__.py create mode 100644 benchmarks/object_detection/tensorflow/ssd_vgg16/inference/int8/model_init.py create mode 100644 benchmarks/object_detection/tensorflow/ssd_vgg16/inference/ssd_vgg16_model_init.py create mode 100644 models/object_detection/tensorflow/ssd_vgg16/__init__.py create mode 100644 models/object_detection/tensorflow/ssd_vgg16/inference/__init__.py create mode 100644 models/object_detection/tensorflow/ssd_vgg16/inference/anchor_manipulator.py create mode 100644 models/object_detection/tensorflow/ssd_vgg16/inference/eval_ssd.py create mode 100644 models/object_detection/tensorflow/ssd_vgg16/inference/validate_ssd_vgg16.py diff --git a/benchmarks/README.md b/benchmarks/README.md index 4f5a83172..c8959af5a 100644 --- a/benchmarks/README.md +++ b/benchmarks/README.md @@ -36,6 +36,7 @@ dependencies to be installed: | Object Detection | TensorFlow | [R-FCN](https://arxiv.org/pdf/1605.06409.pdf) | Inference | [Int8](object_detection/tensorflow/rfcn/README.md#int8-inference-instructions) [FP32](object_detection/tensorflow/rfcn/README.md#fp32-inference-instructions) | | Object Detection | TensorFlow | [Faster R-CNN](https://arxiv.org/pdf/1506.01497.pdf) | Inference | [Int8](object_detection/tensorflow/faster_rcnn/README.md#int8-inference-instructions) [FP32](object_detection/tensorflow/faster_rcnn/README.md#fp32-inference-instructions) | | Object Detection | TensorFlow | [SSD-MobileNet](https://arxiv.org/pdf/1704.04861.pdf) | Inference | [Int8](object_detection/tensorflow/ssd-mobilenet/README.md#int8-inference-instructions) [FP32](object_detection/tensorflow/ssd-mobilenet/README.md#fp32-inference-instructions) | +| Object Detection | TensorFlow | SSD-VGG16 | Inference | [Int8](object_detection/tensorflow/ssd_vgg16/README.md#int8-inference-instructions) [FP32](object_detection/tensorflow/ssd_vgg16/README.md#fp32-inference-instructions) | | Object Detection | TensorFlow | [SSD-ResNet34](https://arxiv.org/pdf/1512.02325.pdf) | Inference | [FP32](object_detection/tensorflow/ssd-resnet34/README.md#fp32-inference-instructions) | | Recommendation | TensorFlow | [NCF](https://arxiv.org/pdf/1708.05031.pdf) | Inference | [FP32](recommendation/tensorflow/ncf/README.md#fp32-inference-instructions) | | Recommendation | TensorFlow | [Wide & Deep Large Dataset](https://arxiv.org/pdf/1606.07792.pdf) | Inference | [Int8](recommendation/tensorflow/wide_deep_large_ds/README.md#int8-inference-instructions) [FP32](recommendation/tensorflow/wide_deep_large_ds/README.md#fp32-inference-instructions) | diff --git a/benchmarks/common/tensorflow/start.sh b/benchmarks/common/tensorflow/start.sh index d1ec29216..ab17c9bc6 100755 --- a/benchmarks/common/tensorflow/start.sh +++ b/benchmarks/common/tensorflow/start.sh @@ -642,6 +642,32 @@ function ssd-resnet34() { fi } +# SSD-VGG16 model +function ssd_vgg16() { + + if [ ${NOINSTALL} != "True" ]; then + pip install opencv-python Cython + + if [ ${ACCURACY_ONLY} == "True" ]; then + # get the python cocoapi + get_cocoapi ${MOUNT_EXTERNAL_MODELS_SOURCE}/coco ${MOUNT_INTELAI_MODELS_SOURCE}/inference + fi + fi + + cp ${MOUNT_INTELAI_MODELS_SOURCE}/__init__.py ${MOUNT_EXTERNAL_MODELS_SOURCE}/dataset + cp ${MOUNT_INTELAI_MODELS_SOURCE}/__init__.py ${MOUNT_EXTERNAL_MODELS_SOURCE}/preprocessing + cp ${MOUNT_INTELAI_MODELS_SOURCE}/__init__.py ${MOUNT_EXTERNAL_MODELS_SOURCE}/utility + export PYTHONPATH=${PYTHONPATH}:${MOUNT_EXTERNAL_MODELS_SOURCE} + + if [ ${PRECISION} == "int8" ] || [ ${PRECISION} == "fp32" ]; then + CMD="${CMD} $(add_steps_args)" + PYTHONPATH=${PYTHONPATH} CMD=${CMD} run_model + else + echo "PRECISION=${PRECISION} is not supported for ${MODEL_NAME}" + exit 1 + fi +} + # UNet model function unet() { if [ ${PRECISION} == "fp32" ]; then @@ -868,6 +894,8 @@ elif [ ${MODEL_NAME} == "ssd-mobilenet" ]; then ssd_mobilenet elif [ ${MODEL_NAME} == "ssd-resnet34" ]; then ssd-resnet34 +elif [ ${MODEL_NAME} == "ssd_vgg16" ]; then + ssd_vgg16 elif [ ${MODEL_NAME} == "unet" ]; then unet elif [ ${MODEL_NAME} == "transformer_language" ]; then diff --git a/benchmarks/object_detection/tensorflow/__init__.py b/benchmarks/object_detection/tensorflow/__init__.py index cf793ec6a..d9c4123de 100644 --- a/benchmarks/object_detection/tensorflow/__init__.py +++ b/benchmarks/object_detection/tensorflow/__init__.py @@ -1,7 +1,7 @@ # # -*- coding: utf-8 -*- # -# Copyright (c) 2018 Intel Corporation +# Copyright (c) 2019 Intel Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/benchmarks/object_detection/tensorflow/ssd_vgg16/README.md b/benchmarks/object_detection/tensorflow/ssd_vgg16/README.md new file mode 100644 index 000000000..47233e7e2 --- /dev/null +++ b/benchmarks/object_detection/tensorflow/ssd_vgg16/README.md @@ -0,0 +1,367 @@ +# SSD-VGG16 + +This document has instructions for how to run SSD-VGG16 for the +following modes/precisions: +* [Int8 inference](#int8-inference-instructions) +* [FP32 inference](#fp32-inference-instructions) + +Benchmarking instructions and scripts for model training and inference +other precisions are coming later. + +## Int8 Inference Instructions + +1. Clone the [original model](https://github.com/HiKapok/SSD.TensorFlow) repository: +``` +$ git clone https://github.com/HiKapok/SSD.TensorFlow.git +$ cd SSD.TensorFlow +$ git checkout 2d8b0cb9b2e70281bf9dce438ff17ffa5e59075c +``` + +2. Download the 2017 validation +[COCO dataset](http://cocodataset.org/#home) and annotations: +This is required if you would like to run the accuracy test, +or the throughput and latency benchmark with real data. + +The [TensorFlow models](https://github.com/tensorflow/models) repo will be used for +converting the coco dataset to the TF records format. +``` +$ mkdir val +$ cd val +$ wget http://images.cocodataset.org/zips/val2017.zip +$ unzip val2017.zip +$ cd .. +``` + +Continue the instructions below to generate the +TF record file. +``` +$ mkdir annotations +$ cd annotations +$ wget http://images.cocodataset.org/annotations/annotations_trainval2017.zip +$ unzip annotations_trainval2017.zip +$ cd .. +``` + +Since we are only using the validation dataset in this example, we will +create an empty directory and empty annotations json file to pass as the +train and test directories in the next step. +``` +$ mkdir empty_dir + +$ cd annotations +$ echo "{ \"images\": {}, \"categories\": {}}" > empty.json +$ cd .. +``` + +3. Now that you have the raw COCO dataset, we need to convert it to the +TF records format in order to use it with the inference script. We will +do this by running the `create_coco_tf_record.py` file in the TensorFlow +models repo. + +Follow the steps below to navigate to the proper directory and point the +script to the raw COCO dataset files that you have downloaded in step 2. +The `--output_dir` is the location where the TF record files will be +located after the script has completed. + +``` +# We are going to use an older version of the conversion script to checkout the git commit +$ cd models +$ git checkout 7a9934df2afdf95be9405b4e9f1f2480d748dc40 + +$ cd research/object_detection/dataset_tools/ +$ python create_coco_tf_record.py --logtostderr \ + --train_image_dir="/home//coco/empty_dir" \ + --val_image_dir="/home//coco/val/val2017" \ + --test_image_dir="/home//coco/empty_dir" \ + --train_annotations_file="/home//coco/annotations/empty.json" \ + --val_annotations_file="/home//coco/annotations/instances_val2017.json" \ + --testdev_annotations_file="/home//coco/annotations/empty.json" \ + --output_dir="/home//coco/output" + +$ ll /home/myuser/coco/output +total 1598276 +-rw-rw-r--. 1 0 Nov 2 21:46 coco_testdev.record +-rw-rw-r--. 1 0 Nov 2 21:46 coco_train.record +-rw-rw-r--. 1 818336740 Nov 2 21:46 coco_val.record +``` + +4. Download the pretrained model: + +``` +$ wget https://storage.googleapis.com/intel-optimized-tensorflow/models/ssdvgg16_int8_pretrained_model.pb +``` + +5. Clone the [intelai/models](https://github.com/intelai/models) repo +and then run the benchmarking scripts for either benchmarking throughput +and latency or accuracy. +``` +$ git clone git@github.com:IntelAI/models.git +$ cd benchmarks +``` + +* Run benchmarking for throughput and latency where the `--model-source-dir` is the model source directory from step 1, +and the `--in-graph` is the pretrained model graph from step 4, +if you specify the `--data-location` which is the path to the tf record file that you generated in step 3, +the benchmark will run with real data, otherwise dummy data will be used: +``` +python launch_benchmark.py \ + --model-name ssd_vgg16 \ + --mode inference \ + --precision int8 \ + --framework tensorflow \ + --docker-image intelaipg/intel-optimized-tensorflow:nightly-master-devel-mkl-py3 \ + --model-source-dir /home//SSD.TensorFlow \ + --data-location /home//coco/output \ + --in-graph /home//ssdvgg16_int8_pretrained_model.pb \ + --batch-size 1 \ + --socket-id 0 \ + --num-inter-threads 11 \ + --num-intra-threads 21 \ + --data-num-inter-threads 21 \ + --data-num-intra-threads 28 \ + -- warmup-steps=100 steps=500 +``` + +* For the accuracy test: + + * Clone the customized [cocoapi repo](https://github.com/waleedka/coco) in +the model directory `SSD.TensorFlow` from step 1. + ``` + $ git clone https://github.com/waleedka/coco.git + + ``` + * The `--data-location` is required, which is the path to the tf record file that you generated in step 3. + * Copy the annotation file `instances_val2017.json` (from step 3) to the dataset directory `/home//coco/output`. + * Use the `--accuracy-only` flag: +``` +python launch_benchmark.py \ + --model-name ssd_vgg16 \ + --mode inference \ + --precision int8 \ + --framework tensorflow \ + --docker-image intelaipg/intel-optimized-tensorflow:nightly-master-devel-mkl-py3 \ + --model-source-dir /home//SSD.TensorFlow \ + --data-location /home//coco/output \ + --in-graph /home//ssdvgg16_int8_pretrained_model.pb \ + --accuracy-only \ + --batch-size 1 +``` + +>Notes: +>* For the throughput and latency benchmark, we recommend the provided values for the arguments: `--num-inter-threads=11`, `--num-intra-threads=21`, `--data-num-inter-threads=21`, + `--data-num-intra-threads=28` for optimized performance on `28-cores Cascade Lake (CLX)` machine. + +>* SSD-VGG16 model accuracy test works only with the `Python3` based docker images. + +>* The `--verbose` or `--output-dir` flag can be added to any of the above commands +to get additional debug output or change the default output location. + +6. The log file is saved to the value of `--output-dir`. + +Below is a sample log file tail when running benchmarking for throughput +and latency, the following results are based on CLX 28-cores with hyper-threading enabled: + +``` +Batch size = 1 +Throughput: 30.382 images/sec +Latency: 32.915 ms +Ran inference with batch size 1 +Log location outside container: {--output-dir value}/benchmark_ssd_vgg16_inference_int8_20190417_231832.log +``` + +And here is a sample log file tail when running for accuracy: + +``` + Average Precision (AP) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.231 + Average Precision (AP) @[ IoU=0.50 | area= all | maxDets=100 ] = 0.386 + Average Precision (AP) @[ IoU=0.75 | area= all | maxDets=100 ] = 0.243 + Average Precision (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.058 + Average Precision (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.265 + Average Precision (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.391 + Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 1 ] = 0.224 + Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 10 ] = 0.330 + Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.355 + Average Recall (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.091 + Average Recall (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.420 + Average Recall (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.558 +``` + +## FP32 Inference Instructions + +1. Clone the [original model](https://github.com/HiKapok/SSD.TensorFlow) repository: +``` +$ git clone https://github.com/HiKapok/SSD.TensorFlow.git +$ cd SSD.TensorFlow +$ git checkout 2d8b0cb9b2e70281bf9dce438ff17ffa5e59075c +``` + +2. Download the 2017 validation +[COCO dataset](http://cocodataset.org/#home) and annotations: + +This is required if you would like to run the accuracy test, +or the throughput and latency benchmark with real data. + +The [TensorFlow models](https://github.com/tensorflow/models) repo will be used for +converting the coco dataset to the TF records format. +``` +$ mkdir val +$ cd val +$ wget http://images.cocodataset.org/zips/val2017.zip +$ unzip val2017.zip +$ cd .. +``` + +Continue the instructions below to generate the +TF record file. +``` +$ mkdir annotations +$ cd annotations +$ wget http://images.cocodataset.org/annotations/annotations_trainval2017.zip +$ unzip annotations_trainval2017.zip +$ cd .. +``` + +Since we are only using the validation dataset in this example, we will +create an empty directory and empty annotations json file to pass as the +train and test directories in the next step. +``` +$ mkdir empty_dir + +$ cd annotations +$ echo "{ \"images\": {}, \"categories\": {}}" > empty.json +$ cd .. +``` + +3. Now that you have the raw COCO dataset, we need to convert it to the +TF records format in order to use it with the inference script. We will +do this by running the `create_coco_tf_record.py` file in the TensorFlow +models repo. + +Follow the steps below to navigate to the proper directory and point the +script to the raw COCO dataset files that you have downloaded in step 2. +The `--output_dir` is the location where the TF record files will be +located after the script has completed. + +``` +# We are going to use an older version of the conversion script to checkout the git commit +$ cd models +$ git checkout 7a9934df2afdf95be9405b4e9f1f2480d748dc40 + +$ cd research/object_detection/dataset_tools/ +$ python create_coco_tf_record.py --logtostderr \ + --train_image_dir="/home//coco/empty_dir" \ + --val_image_dir="/home//coco/val/val2017" \ + --test_image_dir="/home//coco/empty_dir" \ + --train_annotations_file="/home//coco/annotations/empty.json" \ + --val_annotations_file="/home//coco/annotations/instances_val2017.json" \ + --testdev_annotations_file="/home//coco/annotations/empty.json" \ + --output_dir="/home//coco/output" + +$ ll /home/myuser/coco/output +total 1598276 +-rw-rw-r--. 1 0 Nov 2 21:46 coco_testdev.record +-rw-rw-r--. 1 0 Nov 2 21:46 coco_train.record +-rw-rw-r--. 1 818336740 Nov 2 21:46 coco_val.record +``` + +4. Download the pretrained model: +``` +$ wget https://storage.googleapis.com/intel-optimized-tensorflow/models/ssdvgg16_fp32_pretrained_model.pb +``` + +5. Clone the [intelai/models](https://github.com/intelai/models) repo +and then run the benchmarking scripts for either benchmarking throughput +and latency or accuracy. +``` +$ git clone git@github.com:IntelAI/models.git +$ cd benchmarks +``` + +* Run benchmarking for throughput and latency where the `--model-source-dir` is the model source directory from step 1, +and the `--in-graph` is the pretrained model graph from step 4, +if you specify the `--data-location` which is the path to the tf record file that you generated in step 3, +the benchmark will run with real data, otherwise dummy data will be used: +``` +$ cd /home//models/benchmarks + +$ python launch_benchmark.py \ + --data-location /home//coco/output \ + --in-graph /home//ssdvgg16_fp32_pretrained_model.pb \ + --model-source-dir /home//SSD.TensorFlow \ + --model-name ssd_vgg16 \ + --framework tensorflow \ + --precision fp32 \ + --mode inference \ + --docker-image intelaipg/intel-optimized-tensorflow:nightly-master-devel-mkl-py3 \ + --batch-size 1 \ + --socket-id 0 \ + --num-inter-threads 11 \ + --num-intra-threads 21 \ + --data-num-inter-threads 21 \ + --data-num-intra-threads 28 \ + -- warmup-steps=100 steps=500 +``` + +* For the accuracy test: + + * Clone the customized [cocoapi repo](https://github.com/waleedka/coco) in +the model directory `SSD.TensorFlow` from step 1. + ``` + $ git clone https://github.com/waleedka/coco.git + + ``` + * The `--data-location` is required, which is the path to the tf record file that you generated in step 3. + * Copy the annotation file `instances_val2017.json` (from step 3) to the dataset directory `/home//coco/output`. + * Use the `--accuracy-only` flag: +``` +python launch_benchmark.py \ + --model-name ssd_vgg16 \ + --mode inference \ + --precision fp32 \ + --framework tensorflow \ + --docker-image intelaipg/intel-optimized-tensorflow:nightly-master-devel-mkl-py3 \ + --model-source-dir /home//SSD.TensorFlow \ + --data-location /home//coco/output \ + --in-graph /home//ssdvgg16_fp32_pretrained_model.pb \ + --accuracy-only \ + --batch-size 1 +``` + +>Notes: +>* For the throughput and latency benchmark, we recommend the provided values for the arguments: `--num-inter-threads=11`, `--num-intra-threads=21`, `--data-num-inter-threads=21`, + `--data-num-intra-threads=28` for optimized performance on `28-cores Cascade Lake (CLX)` machine. + +>* SSD-VGG16 model accuracy test works only with the `Python3` based docker images. + +>* The `--verbose` or `--output-dir` flag can be added to any of the above commands +to get additional debug output or change the default output location. + +6. The log file is saved to the value of `--output-dir`. + +Below is a sample log file tail when running throughput and latency benchmarking, +the following results are based on CLX 28-cores with hyper-threading enabled: + +``` +Batch size = 1 +Throughput: 15.662 images/sec +Latency: 63.848 ms +Ran inference with batch size 1 +Log location outside container: {--output-dir value}/benchmark_ssd_vgg16_inference_fp32_20190417_232130.log +``` + +Below is a sample log file tail when testing accuracy: + +``` + Average Precision (AP) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.236 + Average Precision (AP) @[ IoU=0.50 | area= all | maxDets=100 ] = 0.391 + Average Precision (AP) @[ IoU=0.75 | area= all | maxDets=100 ] = 0.248 + Average Precision (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.058 + Average Precision (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.264 + Average Precision (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.399 + Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 1 ] = 0.227 + Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 10 ] = 0.334 + Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.358 + Average Recall (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.091 + Average Recall (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.423 + Average Recall (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.564 +``` diff --git a/benchmarks/object_detection/tensorflow/ssd_vgg16/__init__.py b/benchmarks/object_detection/tensorflow/ssd_vgg16/__init__.py new file mode 100644 index 000000000..d9c4123de --- /dev/null +++ b/benchmarks/object_detection/tensorflow/ssd_vgg16/__init__.py @@ -0,0 +1,19 @@ +# +# -*- coding: utf-8 -*- +# +# Copyright (c) 2019 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# SPDX-License-Identifier: EPL-2.0 +# diff --git a/benchmarks/object_detection/tensorflow/ssd_vgg16/inference/__init__.py b/benchmarks/object_detection/tensorflow/ssd_vgg16/inference/__init__.py new file mode 100644 index 000000000..d9c4123de --- /dev/null +++ b/benchmarks/object_detection/tensorflow/ssd_vgg16/inference/__init__.py @@ -0,0 +1,19 @@ +# +# -*- coding: utf-8 -*- +# +# Copyright (c) 2019 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# SPDX-License-Identifier: EPL-2.0 +# diff --git a/benchmarks/object_detection/tensorflow/ssd_vgg16/inference/config.json b/benchmarks/object_detection/tensorflow/ssd_vgg16/inference/config.json new file mode 100644 index 000000000..14d129748 --- /dev/null +++ b/benchmarks/object_detection/tensorflow/ssd_vgg16/inference/config.json @@ -0,0 +1,6 @@ +{ + "optimization_parameters": { + "KMP_SETTINGS": 1, + "TF_ENABLE_WINOGRAD_NONFUSED": 1 + } +} diff --git a/benchmarks/object_detection/tensorflow/ssd_vgg16/inference/fp32/__init__.py b/benchmarks/object_detection/tensorflow/ssd_vgg16/inference/fp32/__init__.py new file mode 100644 index 000000000..d9c4123de --- /dev/null +++ b/benchmarks/object_detection/tensorflow/ssd_vgg16/inference/fp32/__init__.py @@ -0,0 +1,19 @@ +# +# -*- coding: utf-8 -*- +# +# Copyright (c) 2019 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# SPDX-License-Identifier: EPL-2.0 +# diff --git a/benchmarks/object_detection/tensorflow/ssd_vgg16/inference/fp32/model_init.py b/benchmarks/object_detection/tensorflow/ssd_vgg16/inference/fp32/model_init.py new file mode 100644 index 000000000..5698700f4 --- /dev/null +++ b/benchmarks/object_detection/tensorflow/ssd_vgg16/inference/fp32/model_init.py @@ -0,0 +1,28 @@ +# +# -*- coding: utf-8 -*- +# +# Copyright (c) 2019 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# SPDX-License-Identifier: EPL-2.0 +# + +from object_detection.tensorflow.ssd_vgg16.inference.ssd_vgg16_model_init import SSDVGG16ModelInitializer + + +class ModelInitializer(SSDVGG16ModelInitializer): + """Model initializer for SSD-VGG16 FP32 inference""" + + def __init__(self, args, custom_args=[], platform_util=None): + super(ModelInitializer, self).__init__(args, custom_args, platform_util) diff --git a/benchmarks/object_detection/tensorflow/ssd_vgg16/inference/int8/__init__.py b/benchmarks/object_detection/tensorflow/ssd_vgg16/inference/int8/__init__.py new file mode 100644 index 000000000..d9c4123de --- /dev/null +++ b/benchmarks/object_detection/tensorflow/ssd_vgg16/inference/int8/__init__.py @@ -0,0 +1,19 @@ +# +# -*- coding: utf-8 -*- +# +# Copyright (c) 2019 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# SPDX-License-Identifier: EPL-2.0 +# diff --git a/benchmarks/object_detection/tensorflow/ssd_vgg16/inference/int8/model_init.py b/benchmarks/object_detection/tensorflow/ssd_vgg16/inference/int8/model_init.py new file mode 100644 index 000000000..01d1822ba --- /dev/null +++ b/benchmarks/object_detection/tensorflow/ssd_vgg16/inference/int8/model_init.py @@ -0,0 +1,28 @@ +# +# -*- coding: utf-8 -*- +# +# Copyright (c) 2019 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# SPDX-License-Identifier: EPL-2.0 +# + +from object_detection.tensorflow.ssd_vgg16.inference.ssd_vgg16_model_init import SSDVGG16ModelInitializer + + +class ModelInitializer(SSDVGG16ModelInitializer): + """Model initializer for SSD-VGG16 Int8 inference""" + + def __init__(self, args, custom_args=[], platform_util=None): + super(ModelInitializer, self).__init__(args, custom_args, platform_util) diff --git a/benchmarks/object_detection/tensorflow/ssd_vgg16/inference/ssd_vgg16_model_init.py b/benchmarks/object_detection/tensorflow/ssd_vgg16/inference/ssd_vgg16_model_init.py new file mode 100644 index 000000000..c54994170 --- /dev/null +++ b/benchmarks/object_detection/tensorflow/ssd_vgg16/inference/ssd_vgg16_model_init.py @@ -0,0 +1,107 @@ +# +# -*- coding: utf-8 -*- +# +# Copyright (c) 2019 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# SPDX-License-Identifier: EPL-2.0 +# + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import os +import sys +import argparse + +from common.base_model_init import BaseModelInitializer, set_env_var + + +class SSDVGG16ModelInitializer(BaseModelInitializer): + """Common model initializer for SSD-VGG16 inference""" + + def run_inference_sanity_checks(self, args, custom_args): + if not args.input_graph: + sys.exit("Please provide a path to the frozen graph directory" + " via the '--in-graph' flag.") + if not args.data_location and self.args.accuracy_only: + sys.exit("For accuracy test, please provide a path to the data directory via the " + "'--data-location' flag.") + if args.batch_size != -1 and args.batch_size != 1: + sys.exit("SSD-VGG16 inference supports 'batch-size=1' " + + "only, please modify via the '--batch_size' flag.") + + def __init__(self, args, custom_args, platform_util): + super(SSDVGG16ModelInitializer, self).__init__(args, custom_args, platform_util) + + self.parse_custom_args() + self.run_inference_sanity_checks(self.args, self.custom_args) + + # Set KMP env vars, if they haven't already been set + config_file_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), "config.json") + self.set_kmp_vars(config_file_path) + + self.set_num_inter_intra_threads(num_inter_threads=self.args.num_inter_threads, + num_intra_threads=self.args.num_intra_threads) + + omp_num_threads = str(int(platform_util.num_cores_per_socket / 2))\ + if self.args.precision == "int8" else platform_util.num_cores_per_socket + + set_env_var("OMP_NUM_THREADS", omp_num_threads + if self.args.num_cores == -1 else self.args.num_cores) + + script_path = os.path.join( + self.args.intelai_models, self.args.mode, "eval_ssd.py") + + self.run_cmd = self.get_command_prefix( + self.args.socket_id) + "{} {}".format(self.python_exe, script_path) + + self.run_cmd += " --input-graph={} " \ + " --num-inter-threads={} --num-intra-threads={} ". \ + format(self.args.input_graph, self.args.num_inter_threads, + self.args.num_intra_threads) + + if self.args.data_num_inter_threads: + self.run_cmd += " --data-num-inter-threads={} ".format( + self.args.data_num_inter_threads) + + if self.args.data_num_intra_threads: + self.run_cmd += " --data-num-intra-threads={} ".format( + self.args.data_num_intra_threads) + + if self.args.benchmark_only: + self.run_cmd += " --warmup-steps={} --steps={} ". \ + format(self.args.warmup_steps, self.args.steps) + + # if the data location directory is not empty, then include the arg + if self.args.data_location and os.listdir(self.args.data_location): + self.run_cmd += " --data-location={} ".format(self.args.data_location) + + if self.args.accuracy_only: + self.run_cmd += "--accuracy-only " + + def parse_custom_args(self): + if self.custom_args: + parser = argparse.ArgumentParser() + parser.add_argument("--warmup-steps", type=int, default=10, + help="number of warmup steps") + parser.add_argument("--steps", type=int, default=50, + help="number of steps") + + self.args = parser.parse_args(self.custom_args, + namespace=self.args) + + def run(self): + self.run_command(self.run_cmd) diff --git a/models/object_detection/tensorflow/ssd_vgg16/__init__.py b/models/object_detection/tensorflow/ssd_vgg16/__init__.py new file mode 100644 index 000000000..d9c4123de --- /dev/null +++ b/models/object_detection/tensorflow/ssd_vgg16/__init__.py @@ -0,0 +1,19 @@ +# +# -*- coding: utf-8 -*- +# +# Copyright (c) 2019 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# SPDX-License-Identifier: EPL-2.0 +# diff --git a/models/object_detection/tensorflow/ssd_vgg16/inference/__init__.py b/models/object_detection/tensorflow/ssd_vgg16/inference/__init__.py new file mode 100644 index 000000000..d9c4123de --- /dev/null +++ b/models/object_detection/tensorflow/ssd_vgg16/inference/__init__.py @@ -0,0 +1,19 @@ +# +# -*- coding: utf-8 -*- +# +# Copyright (c) 2019 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# SPDX-License-Identifier: EPL-2.0 +# diff --git a/models/object_detection/tensorflow/ssd_vgg16/inference/anchor_manipulator.py b/models/object_detection/tensorflow/ssd_vgg16/inference/anchor_manipulator.py new file mode 100644 index 000000000..f52acdc08 --- /dev/null +++ b/models/object_detection/tensorflow/ssd_vgg16/inference/anchor_manipulator.py @@ -0,0 +1,353 @@ +# Copyright 2018 Changan Wang + +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at + +# http://www.apache.org/licenses/LICENSE-2.0 + +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================= + +# +# -*- coding: utf-8 -*- +# +# Copyright (c) 2019 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# SPDX-License-Identifier: EPL-2.0 +# +import math + +import tensorflow as tf +import numpy as np + +from tensorflow.contrib.image.python.ops import image_ops + +def areas(gt_bboxes): + with tf.name_scope('bboxes_areas', values=[gt_bboxes]): + ymin, xmin, ymax, xmax = tf.split(gt_bboxes, 4, axis=1) + return (xmax - xmin) * (ymax - ymin) + +def intersection(gt_bboxes, default_bboxes): + with tf.name_scope('bboxes_intersection', values=[gt_bboxes, default_bboxes]): + # num_anchors x 1 + ymin, xmin, ymax, xmax = tf.split(gt_bboxes, 4, axis=1) + # 1 x num_anchors + gt_ymin, gt_xmin, gt_ymax, gt_xmax = [tf.transpose(b, perm=[1, 0]) for b in tf.split(default_bboxes, 4, axis=1)] + # broadcast here to generate the full matrix + int_ymin = tf.maximum(ymin, gt_ymin) + int_xmin = tf.maximum(xmin, gt_xmin) + int_ymax = tf.minimum(ymax, gt_ymax) + int_xmax = tf.minimum(xmax, gt_xmax) + h = tf.maximum(int_ymax - int_ymin, 0.) + w = tf.maximum(int_xmax - int_xmin, 0.) + + return h * w +def iou_matrix(gt_bboxes, default_bboxes): + with tf.name_scope('iou_matrix', values = [gt_bboxes, default_bboxes]): + inter_vol = intersection(gt_bboxes, default_bboxes) + # broadcast + union_vol = areas(gt_bboxes) + tf.transpose(areas(default_bboxes), perm=[1, 0]) - inter_vol + + return tf.where(tf.equal(union_vol, 0.0), + tf.zeros_like(inter_vol), tf.truediv(inter_vol, union_vol)) + +def do_dual_max_match(overlap_matrix, low_thres, high_thres, ignore_between=True, gt_max_first=True): + ''' + overlap_matrix: num_gt * num_anchors + ''' + with tf.name_scope('dual_max_match', values=[overlap_matrix]): + # first match from anchors' side + anchors_to_gt = tf.argmax(overlap_matrix, axis=0) + # the matching degree + match_values = tf.reduce_max(overlap_matrix, axis=0) + + #positive_mask = tf.greater(match_values, high_thres) + less_mask = tf.less(match_values, low_thres) + between_mask = tf.logical_and(tf.less(match_values, high_thres), tf.greater_equal(match_values, low_thres)) + negative_mask = less_mask if ignore_between else between_mask + ignore_mask = between_mask if ignore_between else less_mask + # fill all negative positions with -1, all ignore positions is -2 + match_indices = tf.where(negative_mask, -1 * tf.ones_like(anchors_to_gt), anchors_to_gt) + match_indices = tf.where(ignore_mask, -2 * tf.ones_like(match_indices), match_indices) + + # negtive values has no effect in tf.one_hot, that means all zeros along that axis + # so all positive match positions in anchors_to_gt_mask is 1, all others are 0 + anchors_to_gt_mask = tf.one_hot(tf.clip_by_value(match_indices, -1, tf.cast(tf.shape(overlap_matrix)[0], tf.int64)), + tf.shape(overlap_matrix)[0], on_value=1, off_value=0, axis=0, dtype=tf.int32) + # match from ground truth's side + gt_to_anchors = tf.argmax(overlap_matrix, axis=1) + + if gt_max_first: + # the max match from ground truth's side has higher priority + left_gt_to_anchors_mask = tf.one_hot(gt_to_anchors, tf.shape(overlap_matrix)[1], on_value=1, off_value=0, axis=1, dtype=tf.int32) + else: + # the max match from anchors' side has higher priority + # use match result from ground truth's side only when the the matching degree from anchors' side is lower than position threshold + left_gt_to_anchors_mask = tf.cast(tf.logical_and(tf.reduce_max(anchors_to_gt_mask, axis=1, keep_dims=True) < 1, + tf.one_hot(gt_to_anchors, tf.shape(overlap_matrix)[1], + on_value=True, off_value=False, axis=1, dtype=tf.bool) + ), tf.int64) + # can not use left_gt_to_anchors_mask here, because there are many ground truthes match to one anchor, we should pick the highest one even when we are merging matching from ground truth side + left_gt_to_anchors_scores = overlap_matrix * tf.to_float(left_gt_to_anchors_mask) + # merge matching results from ground truth's side with the original matching results from anchors' side + # then select all the overlap score of those matching pairs + selected_scores = tf.gather_nd(overlap_matrix, tf.stack([tf.where(tf.reduce_max(left_gt_to_anchors_mask, axis=0) > 0, + tf.argmax(left_gt_to_anchors_scores, axis=0), + anchors_to_gt), + tf.range(tf.cast(tf.shape(overlap_matrix)[1], tf.int64))], axis=1)) + # return the matching results for both foreground anchors and background anchors, also with overlap scores + return tf.where(tf.reduce_max(left_gt_to_anchors_mask, axis=0) > 0, + tf.argmax(left_gt_to_anchors_scores, axis=0), + match_indices), selected_scores + +# def save_anchors(bboxes, labels, anchors_point): +# if not hasattr(save_image_with_bbox, "counter"): +# save_image_with_bbox.counter = 0 # it doesn't exist yet, so initialize it +# save_image_with_bbox.counter += 1 + +# np.save('./debug/bboxes_{}.npy'.format(save_image_with_bbox.counter), np.copy(bboxes)) +# np.save('./debug/labels_{}.npy'.format(save_image_with_bbox.counter), np.copy(labels)) +# np.save('./debug/anchors_{}.npy'.format(save_image_with_bbox.counter), np.copy(anchors_point)) +# return save_image_with_bbox.counter + +class AnchorEncoder(object): + def __init__(self, allowed_borders, positive_threshold, ignore_threshold, prior_scaling, clip=False): + super(AnchorEncoder, self).__init__() + self._all_anchors = None + self._allowed_borders = allowed_borders + self._positive_threshold = positive_threshold + self._ignore_threshold = ignore_threshold + self._prior_scaling = prior_scaling + self._clip = clip + + def center2point(self, center_y, center_x, height, width): + return center_y - height / 2., center_x - width / 2., center_y + height / 2., center_x + width / 2., + + def point2center(self, ymin, xmin, ymax, xmax): + height, width = (ymax - ymin), (xmax - xmin) + return ymin + height / 2., xmin + width / 2., height, width + + def encode_all_anchors(self, labels, bboxes, all_anchors, all_num_anchors_depth, all_num_anchors_spatial, debug=False): + # y, x, h, w are all in range [0, 1] relative to the original image size + # shape info: + # y_on_image, x_on_image: layers_shapes[0] * layers_shapes[1] + # h_on_image, w_on_image: num_anchors + assert (len(all_num_anchors_depth)==len(all_num_anchors_spatial)) and (len(all_num_anchors_depth)==len(all_anchors)), 'inconsist num layers for anchors.' + with tf.name_scope('encode_all_anchors'): + num_layers = len(all_num_anchors_depth) + list_anchors_ymin = [] + list_anchors_xmin = [] + list_anchors_ymax = [] + list_anchors_xmax = [] + tiled_allowed_borders = [] + for ind, anchor in enumerate(all_anchors): + anchors_ymin_, anchors_xmin_, anchors_ymax_, anchors_xmax_ = self.center2point(anchor[0], anchor[1], anchor[2], anchor[3]) + + list_anchors_ymin.append(tf.reshape(anchors_ymin_, [-1])) + list_anchors_xmin.append(tf.reshape(anchors_xmin_, [-1])) + list_anchors_ymax.append(tf.reshape(anchors_ymax_, [-1])) + list_anchors_xmax.append(tf.reshape(anchors_xmax_, [-1])) + + tiled_allowed_borders.extend([self._allowed_borders[ind]] * all_num_anchors_depth[ind] * all_num_anchors_spatial[ind]) + + anchors_ymin = tf.concat(list_anchors_ymin, 0, name='concat_ymin') + anchors_xmin = tf.concat(list_anchors_xmin, 0, name='concat_xmin') + anchors_ymax = tf.concat(list_anchors_ymax, 0, name='concat_ymax') + anchors_xmax = tf.concat(list_anchors_xmax, 0, name='concat_xmax') + + if self._clip: + anchors_ymin = tf.clip_by_value(anchors_ymin, 0., 1.) + anchors_xmin = tf.clip_by_value(anchors_xmin, 0., 1.) + anchors_ymax = tf.clip_by_value(anchors_ymax, 0., 1.) + anchors_xmax = tf.clip_by_value(anchors_xmax, 0., 1.) + + anchor_allowed_borders = tf.stack(tiled_allowed_borders, 0, name='concat_allowed_borders') + + inside_mask = tf.logical_and(tf.logical_and(anchors_ymin > -anchor_allowed_borders * 1., + anchors_xmin > -anchor_allowed_borders * 1.), + tf.logical_and(anchors_ymax < (1. + anchor_allowed_borders * 1.), + anchors_xmax < (1. + anchor_allowed_borders * 1.))) + + anchors_point = tf.stack([anchors_ymin, anchors_xmin, anchors_ymax, anchors_xmax], axis=-1) + + # save_anchors_op = tf.py_func(save_anchors, + # [bboxes, + # labels, + # anchors_point], + # tf.int64, stateful=True) + + # with tf.control_dependencies([save_anchors_op]): + overlap_matrix = iou_matrix(bboxes, anchors_point) * tf.cast(tf.expand_dims(inside_mask, 0), tf.float32) + matched_gt, gt_scores = do_dual_max_match(overlap_matrix, self._ignore_threshold, self._positive_threshold) + # get all positive matching positions + matched_gt_mask = matched_gt > -1 + matched_indices = tf.clip_by_value(matched_gt, 0, tf.int64.max) + # the labels here maybe chaos at those non-positive positions + gt_labels = tf.gather(labels, matched_indices) + # filter the invalid labels + gt_labels = gt_labels * tf.cast(matched_gt_mask, tf.int64) + # set those ignored positions to -1 + gt_labels = gt_labels + (-1 * tf.cast(matched_gt < -1, tf.int64)) + + gt_ymin, gt_xmin, gt_ymax, gt_xmax = tf.unstack(tf.gather(bboxes, matched_indices), 4, axis=-1) + + # transform to center / size. + gt_cy, gt_cx, gt_h, gt_w = self.point2center(gt_ymin, gt_xmin, gt_ymax, gt_xmax) + anchor_cy, anchor_cx, anchor_h, anchor_w = self.point2center(anchors_ymin, anchors_xmin, anchors_ymax, anchors_xmax) + # encode features. + # the prior_scaling (in fact is 5 and 10) is use for balance the regression loss of center and with(or height) + gt_cy = (gt_cy - anchor_cy) / anchor_h / self._prior_scaling[0] + gt_cx = (gt_cx - anchor_cx) / anchor_w / self._prior_scaling[1] + gt_h = tf.log(gt_h / anchor_h) / self._prior_scaling[2] + gt_w = tf.log(gt_w / anchor_w) / self._prior_scaling[3] + # now gt_localizations is our regression object, but also maybe chaos at those non-positive positions + if debug: + gt_targets = tf.stack([anchors_ymin, anchors_xmin, anchors_ymax, anchors_xmax], axis=-1) + else: + gt_targets = tf.stack([gt_cy, gt_cx, gt_h, gt_w], axis=-1) + # set all targets of non-positive positions to 0 + gt_targets = tf.expand_dims(tf.cast(matched_gt_mask, tf.float32), -1) * gt_targets + self._all_anchors = (anchor_cy, anchor_cx, anchor_h, anchor_w) + return gt_targets, gt_labels, gt_scores + + # return a list, of which each is: + # shape: [feature_h, feature_w, num_anchors, 4] + # order: ymin, xmin, ymax, xmax + def decode_all_anchors(self, pred_location, num_anchors_per_layer): + assert self._all_anchors is not None, 'no anchors to decode.' + with tf.name_scope('decode_all_anchors', values=[pred_location]): + anchor_cy, anchor_cx, anchor_h, anchor_w = self._all_anchors + + pred_h = tf.exp(pred_location[:, -2] * self._prior_scaling[2]) * anchor_h + pred_w = tf.exp(pred_location[:, -1] * self._prior_scaling[3]) * anchor_w + pred_cy = pred_location[:, 0] * self._prior_scaling[0] * anchor_h + anchor_cy + pred_cx = pred_location[:, 1] * self._prior_scaling[1] * anchor_w + anchor_cx + + return tf.split(tf.stack(self.center2point(pred_cy, pred_cx, pred_h, pred_w), axis=-1), num_anchors_per_layer, axis=0) + + def ext_decode_all_anchors(self, pred_location, all_anchors, all_num_anchors_depth, all_num_anchors_spatial): + assert (len(all_num_anchors_depth)==len(all_num_anchors_spatial)) and (len(all_num_anchors_depth)==len(all_anchors)), 'inconsist num layers for anchors.' + with tf.name_scope('ext_decode_all_anchors', values=[pred_location]): + num_anchors_per_layer = [] + for ind in range(len(all_anchors)): + num_anchors_per_layer.append(all_num_anchors_depth[ind] * all_num_anchors_spatial[ind]) + + num_layers = len(all_num_anchors_depth) + list_anchors_ymin = [] + list_anchors_xmin = [] + list_anchors_ymax = [] + list_anchors_xmax = [] + tiled_allowed_borders = [] + for ind, anchor in enumerate(all_anchors): + anchors_ymin_, anchors_xmin_, anchors_ymax_, anchors_xmax_ = self.center2point(anchor[0], anchor[1], anchor[2], anchor[3]) + + list_anchors_ymin.append(tf.reshape(anchors_ymin_, [-1])) + list_anchors_xmin.append(tf.reshape(anchors_xmin_, [-1])) + list_anchors_ymax.append(tf.reshape(anchors_ymax_, [-1])) + list_anchors_xmax.append(tf.reshape(anchors_xmax_, [-1])) + + anchors_ymin = tf.concat(list_anchors_ymin, 0, name='concat_ymin') + anchors_xmin = tf.concat(list_anchors_xmin, 0, name='concat_xmin') + anchors_ymax = tf.concat(list_anchors_ymax, 0, name='concat_ymax') + anchors_xmax = tf.concat(list_anchors_xmax, 0, name='concat_xmax') + + anchor_cy, anchor_cx, anchor_h, anchor_w = self.point2center(anchors_ymin, anchors_xmin, anchors_ymax, anchors_xmax) + + pred_h = tf.exp(pred_location[:,-2] * self._prior_scaling[2]) * anchor_h + pred_w = tf.exp(pred_location[:, -1] * self._prior_scaling[3]) * anchor_w + pred_cy = pred_location[:, 0] * self._prior_scaling[0] * anchor_h + anchor_cy + pred_cx = pred_location[:, 1] * self._prior_scaling[1] * anchor_w + anchor_cx + + return tf.split(tf.stack(self.center2point(pred_cy, pred_cx, pred_h, pred_w), axis=-1), num_anchors_per_layer, axis=0) + +class AnchorCreator(object): + def __init__(self, img_shape, layers_shapes, anchor_scales, extra_anchor_scales, anchor_ratios, layer_steps): + super(AnchorCreator, self).__init__() + # img_shape -> (height, width) + self._img_shape = img_shape + self._layers_shapes = layers_shapes + self._anchor_scales = anchor_scales + self._extra_anchor_scales = extra_anchor_scales + self._anchor_ratios = anchor_ratios + self._layer_steps = layer_steps + self._anchor_offset = [0.5] * len(self._layers_shapes) + + def get_layer_anchors(self, layer_shape, anchor_scale, extra_anchor_scale, anchor_ratio, layer_step, offset = 0.5): + ''' assume layer_shape[0] = 6, layer_shape[1] = 5 + x_on_layer = [[0, 1, 2, 3, 4], + [0, 1, 2, 3, 4], + [0, 1, 2, 3, 4], + [0, 1, 2, 3, 4], + [0, 1, 2, 3, 4], + [0, 1, 2, 3, 4]] + y_on_layer = [[0, 0, 0, 0, 0], + [1, 1, 1, 1, 1], + [2, 2, 2, 2, 2], + [3, 3, 3, 3, 3], + [4, 4, 4, 4, 4], + [5, 5, 5, 5, 5]] + ''' + with tf.name_scope('get_layer_anchors'): + x_on_layer, y_on_layer = tf.meshgrid(tf.range(layer_shape[1]), tf.range(layer_shape[0])) + + y_on_image = (tf.cast(y_on_layer, tf.float32) + offset) * layer_step / self._img_shape[0] + x_on_image = (tf.cast(x_on_layer, tf.float32) + offset) * layer_step / self._img_shape[1] + + num_anchors_along_depth = len(anchor_scale) * len(anchor_ratio) + len(extra_anchor_scale) + num_anchors_along_spatial = layer_shape[1] * layer_shape[0] + + list_h_on_image = [] + list_w_on_image = [] + + global_index = 0 + # for square anchors + for _, scale in enumerate(extra_anchor_scale): + list_h_on_image.append(scale) + list_w_on_image.append(scale) + global_index += 1 + # for other aspect ratio anchors + for scale_index, scale in enumerate(anchor_scale): + for ratio_index, ratio in enumerate(anchor_ratio): + list_h_on_image.append(scale / math.sqrt(ratio)) + list_w_on_image.append(scale * math.sqrt(ratio)) + global_index += 1 + # shape info: + # y_on_image, x_on_image: layers_shapes[0] * layers_shapes[1] + # h_on_image, w_on_image: num_anchors_along_depth + return tf.expand_dims(y_on_image, axis=-1), tf.expand_dims(x_on_image, axis=-1), \ + tf.constant(list_h_on_image, dtype=tf.float32), \ + tf.constant(list_w_on_image, dtype=tf.float32), num_anchors_along_depth, num_anchors_along_spatial + + def get_all_anchors(self): + all_anchors = [] + all_num_anchors_depth = [] + all_num_anchors_spatial = [] + for layer_index, layer_shape in enumerate(self._layers_shapes): + anchors_this_layer = self.get_layer_anchors(layer_shape, + self._anchor_scales[layer_index], + self._extra_anchor_scales[layer_index], + self._anchor_ratios[layer_index], + self._layer_steps[layer_index], + self._anchor_offset[layer_index]) + all_anchors.append(anchors_this_layer[:-2]) + all_num_anchors_depth.append(anchors_this_layer[-2]) + all_num_anchors_spatial.append(anchors_this_layer[-1]) + return all_anchors, all_num_anchors_depth, all_num_anchors_spatial + diff --git a/models/object_detection/tensorflow/ssd_vgg16/inference/eval_ssd.py b/models/object_detection/tensorflow/ssd_vgg16/inference/eval_ssd.py new file mode 100644 index 000000000..fdbb4a44d --- /dev/null +++ b/models/object_detection/tensorflow/ssd_vgg16/inference/eval_ssd.py @@ -0,0 +1,316 @@ +# Copyright 2018 Changan Wang + +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at + +# http://www.apache.org/licenses/LICENSE-2.0 + +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================= +# +# -*- coding: utf-8 -*- +# +# Copyright (c) 2019 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# SPDX-License-Identifier: EPL-2.0 +# +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import os +import numpy as np +import time +from argparse import ArgumentParser +import sys +from google.protobuf import text_format +import tensorflow as tf + +from dataset import dataset_common +from preprocessing import ssd_preprocessing +import anchor_manipulator + +SSD_VGG16_IMAGE_SIZE = 300 +NUM_CLASSES = 81 +NEGATIVE_RATIO = 1.0 +SELECT_THRESHOLD = 0.1 +MATCH_THRESHOLD = 0.5 +NEG_THRESHOLD = 0.5 +DATA_FORMAT = 'channels_last' +NUM_READERS = 10 +NUM_PREPROCESSING_THREADS = 28 + + +def input_fn(dataset_pattern='val-*', batch_size=1, data_location=None): + out_shape = [SSD_VGG16_IMAGE_SIZE] * 2 + anchor_creator = anchor_manipulator.AnchorCreator(out_shape, + layers_shapes=[(38, 38), (19, 19), (10, 10), (5, 5), (3, 3), + (1, 1)], + anchor_scales=[(0.1,), (0.2,), (0.375,), (0.55,), (0.725,), + (0.9,)], + extra_anchor_scales=[(0.1414,), (0.2739,), (0.4541,), (0.6315,), + (0.8078,), (0.9836,)], + anchor_ratios=[(1., 2., .5), (1., 2., 3., .5, 0.3333), + (1., 2., 3., .5, 0.3333), (1., 2., 3., .5, 0.3333), + (1., 2., .5), (1., 2., .5)], + layer_steps=[8, 16, 32, 64, 100, 300]) + all_anchors, all_num_anchors_depth, all_num_anchors_spatial = anchor_creator.get_all_anchors() + + num_anchors_per_layer = [] + for ind in range(len(all_anchors)): + num_anchors_per_layer.append(all_num_anchors_depth[ind] * all_num_anchors_spatial[ind]) + + anchor_encoder_decoder = anchor_manipulator.AnchorEncoder(allowed_borders=[1.0] * 6, + positive_threshold=MATCH_THRESHOLD, + ignore_threshold=NEG_THRESHOLD, + prior_scaling=[0.1, 0.1, 0.2, 0.2]) + + image_preprocessing_fn = lambda image_, labels_, bboxes_: ssd_preprocessing.preprocess_image(image_, labels_, + bboxes_, out_shape, + is_training=False, + data_format=DATA_FORMAT, + output_rgb=False) + anchor_encoder_fn = lambda glabels_, gbboxes_: anchor_encoder_decoder.encode_all_anchors(glabels_, gbboxes_, + all_anchors, + all_num_anchors_depth, + all_num_anchors_spatial) + + image, filename, shape, loc_targets, cls_targets, match_scores = \ + dataset_common.slim_get_batch(NUM_CLASSES, + batch_size, + 'val', + os.path.join( + data_location, + dataset_pattern), + NUM_READERS, + NUM_PREPROCESSING_THREADS, + image_preprocessing_fn, + anchor_encoder_fn, + num_epochs=1, + is_training=False) + return image, filename, shape + + +class EvaluateSSDModel(): + def __init__(self): + + arg_parser = ArgumentParser(description='Parse args') + + arg_parser.add_argument('-b', "--batch-size", + help="Specify the batch size. If this " \ + "parameter is not specified or is -1, the " \ + "largest ideal batch size for the model will " \ + "be used.", + dest="batch_size", type=int, default=1) + + arg_parser.add_argument('-e', "--num-inter-threads", + help='The number of inter-thread.', + dest='num_inter_threads', type=int, default=0) + + arg_parser.add_argument('-a', "--num-intra-threads", + help='The number of intra-thread.', + dest='num_intra_threads', type=int, default=0) + + arg_parser.add_argument('--data-num-inter-threads', dest='data_num_inter_threads', + help='number threads across operators', + type=int, default=21) + + arg_parser.add_argument('--data-num-intra-threads', dest='data_num_intra_threads', + help='number threads for data layer operator', + type=int, default=28) + + arg_parser.add_argument('--kmp-blocktime', dest='kmp_blocktime', + help='number of kmp blocktime', + type=int, default=1) + + arg_parser.add_argument('-g', "--input-graph", + help='Specify the input graph for the transform tool', + dest='input_graph') + + arg_parser.add_argument('-d', "--data-location", + help='Specify the location of the data. ' + 'If this parameter is not specified, ' + 'the benchmark will use random/dummy data.', + dest="data_location", default=None) + + arg_parser.add_argument('-r', "--accuracy-only", + help='For accuracy measurement only.', + dest='accuracy_only', action='store_true') + + arg_parser.add_argument("--warmup-steps", type=int, default=10, + help="number of warmup steps") + + arg_parser.add_argument("--steps", type=int, default=50, + help="number of steps") + + self.args = arg_parser.parse_args() + + os.environ["KMP_BLOCKTIME"] = str(self.args.kmp_blocktime) + + def eval(self): + + data_config = tf.ConfigProto() + data_config.inter_op_parallelism_threads = self.args.data_num_inter_threads + data_config.intra_op_parallelism_threads = self.args.data_num_intra_threads + data_config.use_per_session_threads = 1 + + infer_config = tf.ConfigProto() + infer_config.inter_op_parallelism_threads = self.args.num_inter_threads # self.args.num_inter_threads + infer_config.intra_op_parallelism_threads = self.args.num_intra_threads # self.args.num_intra_threads + infer_config.use_per_session_threads = 1 + + data_graph = tf.Graph() + with data_graph.as_default(): + if self.args.data_location: # real data + image, filename, shape = \ + input_fn(dataset_pattern='val-*', batch_size=self.args.batch_size, data_location=self.args.data_location) + else: # dummy data + input_shape = [self.args.batch_size, SSD_VGG16_IMAGE_SIZE, SSD_VGG16_IMAGE_SIZE, 3] + image = tf.random.uniform(input_shape, -123.68, 151.06, dtype=tf.float32, name='synthetic_images') + + infer_graph = tf.Graph() + model_file = self.args.input_graph + with infer_graph.as_default(): + graph_def = tf.GraphDef() + file_ext = os.path.splitext(model_file)[1] + with open(model_file, "rb") as f: + if file_ext == '.pbtxt': + text_format.Merge(f.read(), graph_def) + else: + graph_def.ParseFromString(f.read()) + tf.import_graph_def(graph_def, name='') + + # Define input and output Tensors for inference graph + output_names = ["ExpandDims"] + for i in range(1, 160): + output_names.append("ExpandDims_" + str(i)) + + input_operation = infer_graph.get_operation_by_name("input") + output_operations = [] + for name in output_names: + output_operations.append(infer_graph.get_operation_by_name(name).outputs[0]) + + infer_sess = tf.Session(graph=infer_graph, config=infer_config) + + if not self.args.accuracy_only: # benchmark + step = 0 + total_steps = self.args.warmup_steps + self.args.steps + + total_images = 0 + total_duration = 0 + + if not self.args.data_location: # inference with dummy data + print("Inference with dummy data") + data_sess = tf.Session(graph=data_graph, config=data_config) + + while step < total_steps: + step += 1 + image_np = data_sess.run(image) + start_time = time.time() + + infer_sess.run(output_operations, {input_operation.outputs[0]: image_np}) + duration = time.time() - start_time + + if step > self.args.warmup_steps: + total_duration += duration + total_images += self.args.batch_size + print('Iteration %d: %.6f sec' % (step, duration)) + sys.stdout.flush() + + else: # benchmark with real data + print("Inference with real data") + with data_graph.as_default(): + with tf.train.MonitoredTrainingSession(config=data_config) as data_sess: + while not data_sess.should_stop() and step < total_steps: + step += 1 + start_time = time.time() + image_np, _, _ = data_sess.run([image, filename, shape]) + infer_sess.run(output_operations, {input_operation.outputs[0]: image_np}) + duration = time.time() - start_time + + if step > self.args.warmup_steps: + total_duration += duration + total_images += self.args.batch_size + print('Iteration %d: %.6f sec' % (step, duration)) + sys.stdout.flush() + + print('Batch size = %d' % self.args.batch_size) + print('Throughput: %.3f images/sec' % (total_images / total_duration)) + if (self.args.batch_size == 1): + latency = (total_duration / total_images) * 1000 + print('Latency: %.3f ms' % (latency)) + + else: # accuracy only + results = [] + filenames = [] + shapes = [] + total_processed_images = 0 + with data_graph.as_default(): + with tf.train.MonitoredTrainingSession(config=data_config) as data_sess: + while not data_sess.should_stop(): + image_np, filename_np, shape_np = data_sess.run([image, filename, shape]) + total_processed_images += self.args.batch_size + predict = infer_sess.run(output_operations, {input_operation.outputs[0]: image_np}) + if (total_processed_images % 30 == 0): + print("Predicting results for {} images...".format(total_processed_images)) + sys.stdout.flush() + results.append(predict) + filenames.append(filename_np[0]) + shapes.append(shape_np[0]) + + log_dir = os.path.join('./', 'logs') + # if it doesn't exist, create. + if not os.path.exists(log_dir): + os.makedirs(log_dir) + for class_ind in range(1, NUM_CLASSES): + with open(os.path.join(log_dir, 'results_{}.txt'.format(class_ind)), 'wt') as f: + for image_ind, pred in enumerate(results): + shape = shapes[image_ind] + filename = filenames[image_ind] + # parsing prediction results and calculate bbox + scores = pred[(class_ind * 2) - 2][0] + bboxes = pred[(class_ind * 2) - 1][0] + bboxes[:, 0] = (bboxes[:, 0] * shape[0]).astype(np.int32, copy=False) + 1 + bboxes[:, 1] = (bboxes[:, 1] * shape[1]).astype(np.int32, copy=False) + 1 + bboxes[:, 2] = (bboxes[:, 2] * shape[0]).astype(np.int32, copy=False) + 1 + bboxes[:, 3] = (bboxes[:, 3] * shape[1]).astype(np.int32, copy=False) + 1 + + valid_mask = np.logical_and((bboxes[:, 2] - bboxes[:, 0] > 0), + (bboxes[:, 3] - bboxes[:, 1] > 0)) + + for det_ind in range(valid_mask.shape[0]): + if not valid_mask[det_ind]: + continue + f.write('{:s} {:.3f} {:.1f} {:.1f} {:.1f} {:.1f}\n'. + format(filename.decode('utf8')[:-4], scores[det_ind], + bboxes[det_ind, 1], bboxes[det_ind, 0], + bboxes[det_ind, 3], bboxes[det_ind, 2])) + + coco_eval = os.path.join( + os.path.dirname(os.path.realpath(__file__)), "validate_ssd_vgg16.py") + cmd_prefix = "python " + coco_eval + cmd_prefix += " --detections_path ./logs" + cmd_prefix += " --annotations_file {}/instances_val2017.json".format(self.args.data_location) + cmd = cmd_prefix + os.system(cmd) + +if __name__ == "__main__": + obj = EvaluateSSDModel() + obj.eval() diff --git a/models/object_detection/tensorflow/ssd_vgg16/inference/validate_ssd_vgg16.py b/models/object_detection/tensorflow/ssd_vgg16/inference/validate_ssd_vgg16.py new file mode 100644 index 000000000..c580fc022 --- /dev/null +++ b/models/object_detection/tensorflow/ssd_vgg16/inference/validate_ssd_vgg16.py @@ -0,0 +1,111 @@ +# +# -*- coding: utf-8 -*- +# +# Copyright (c) 2019 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# SPDX-License-Identifier: EPL-2.0 +# +import argparse +import os +import json +from pycocotools.coco import COCO +from pycocotools.cocoeval import COCOeval + + +def convert_detection(label, detection): + + ID_INDEX = 0 + SCORE_INDEX = 1 + XMIN_INDEX = 2 + YMIN_INDEX = 3 + XMAX_INDEX = 4 + YMAX_INDEX = 5 + LABEL_MAP = {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, + 12: 13, 13: 14, 14: 15, 15: 16, 16: 17, 17: 18, 18: 19, 19: 20, 20: 21, 21: 22, + 22: 23, 23: 24, 24: 25, 25: 27, 26: 28, 27: 31, 28: 32, 29: 33, 30: 34, 31: 35, + 32: 36, 33: 37, 34: 38, 35: 39, 36: 40, 37: 41, 38: 42, 39: 43, 40: 44, 41: 46, + 42: 47, 43: 48, 44: 49, 45: 50, 46: 51, 47: 52, 48: 53, 49: 54, 50: 55, 51: 56, + 52: 57, 53: 58, 54: 59, 55: 60, 56: 61, 57: 62, 58: 63, 59: 64, 60: 65, 61: 67, + 62: 70, 63: 72, 64: 73, 65: 74, 66: 75, 67: 76, 68: 77, 69: 78, 70: 79, 71: 80, + 72: 81, 73: 82, 74: 84, 75: 85, 76: 86, 77: 87, 78: 88, 79: 89, 80: 90} + + # Extract image ID and bounding box score from detection + image_id = int(detection[ID_INDEX]) + score = float(detection[SCORE_INDEX]) + + # Convert bounding box coordinates [xmin, ymin, xmax, ymax] to [x, y, width, height] + x = float(detection[XMIN_INDEX]) + y = float(detection[YMIN_INDEX]) + width = float(detection[XMAX_INDEX]) - x + height = float(detection[YMAX_INDEX]) - y + bbox = [x, y, width, height] + + return {'category_id': LABEL_MAP[label], 'image_id': image_id, 'score': score, 'bbox': bbox} + + +def generate_results_file(detections_path, results_filename): + + DETECTIONS_EXTENSION = '.txt' + + # Retrieve detections filenames + filenames = [filename for filename in os.listdir(detections_path) if filename.endswith(DETECTIONS_EXTENSION)] + + results = [] + for filename in filenames: + # Read detections from current file + with open(os.path.join(detections_path, filename), 'r') as detections_file: + lines = detections_file.readlines() + + # Convert detections from current file + label = int(os.path.splitext(filename)[0].split('_')[1]) + for line in lines: + results.append(convert_detection(label, line.strip().split())) + + # Write results to file + with open(os.path.join(detections_path, results_filename), 'w') as results_file: + json.dump(results, results_file) + + +def main(): + + RESULTS_FILENAME = 'results.json' + ANNOTATION_TYPE = 'bbox' + + parser = argparse.ArgumentParser() + parser.add_argument('--detections_path', type = str, required = True, help = 'path to the input detected bounding box files') + parser.add_argument('--annotations_file', type = str, required = True, help = 'name of the input validation annotations file') + + args = parser.parse_args() + + # Generate COCO results file + print('Generating COCO results...') + generate_results_file(args.detections_path, RESULTS_FILENAME) + + # Create COCO instance + cocoGt = COCO(args.annotations_file) + + # Load COCO results + cocoDt = cocoGt.loadRes(os.path.join(args.detections_path, RESULTS_FILENAME)) + + # Evaluate results + cocoEval = COCOeval(cocoGt, cocoDt, ANNOTATION_TYPE) + cocoEval.evaluate() + cocoEval.accumulate() + cocoEval.summarize() + + +if __name__ == '__main__': + + main() \ No newline at end of file diff --git a/tests/unit/common/tensorflow/tf_model_args.txt b/tests/unit/common/tensorflow/tf_model_args.txt index 55137c9e3..32d9b51e3 100644 --- a/tests/unit/common/tensorflow/tf_model_args.txt +++ b/tests/unit/common/tensorflow/tf_model_args.txt @@ -90,3 +90,7 @@ run_tf_benchmark.py --framework=tensorflow --use-case=face_detection_and_alignme run_tf_benchmark.py --framework=tensorflow --use-case=face_detection_and_alignment --model-name=facenet --precision=fp32 --mode=inference --benchmark-dir=/workspace/benchmarks --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=1 --socket-id=0 --output-dir=/workspace/benchmarks/common/tensorflow/logs --benchmark-only --verbose --model-source-dir=/workspace/models --checkpoint=/checkpoints --data-location=/dataset, LD_PRELOAD=/usr/lib/libtcmalloc.so.4.2.6 numactl --cpunodebind=0 --membind=0 python /workspace/models/src/validate_on_lfw.py /dataset /checkpoints --distance_metric 1 --use_flipped_images --subtract_mean --use_fixed_image_standardization --num_inter_threads=1 --num_intra_threads=28 --lfw_batch_size=1 --lfw_pairs=/workspace/models/data/pairs.txt --warmup_steps=200 --max_steps=1000 run_tf_benchmark.py --framework=tensorflow --use-case=face_detection_and_alignment --model-name=facenet --precision=fp32 --mode=inference --benchmark-dir=/workspace/benchmarks --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=100 --socket-id=0 --output-dir=/workspace/benchmarks/common/tensorflow/logs --benchmark-only --verbose --model-source-dir=/workspace/models --checkpoint=/checkpoints --data-location=/dataset, LD_PRELOAD=/usr/lib/libtcmalloc.so.4.2.6 numactl --cpunodebind=0 --membind=0 python /workspace/models/src/validate_on_lfw.py /dataset /checkpoints --distance_metric 1 --use_flipped_images --subtract_mean --use_fixed_image_standardization --num_inter_threads=2 --num_intra_threads=28 --lfw_batch_size=100 --lfw_pairs=/workspace/models/data/pairs.txt --warmup_steps=40 --max_steps=1000 run_tf_benchmark.py --framework=tensorflow --use-case=face_detection_and_alignment --model-name=facenet --precision=fp32 --mode=inference --benchmark-dir=/workspace/benchmarks --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=100 --socket-id=0 --output-dir=/workspace/benchmarks/common/tensorflow/logs --accuracy-only --verbose --model-source-dir=/workspace/models --checkpoint=/checkpoints --data-location=/dataset, LD_PRELOAD=/usr/lib/libtcmalloc.so.4.2.6 numactl --cpunodebind=0 --membind=0 python /workspace/models/src/validate_on_lfw.py /dataset /checkpoints --distance_metric 1 --use_flipped_images --subtract_mean --use_fixed_image_standardization --num_inter_threads=2 --num_intra_threads=28 --lfw_batch_size=100 --lfw_pairs=/workspace/models/data/pairs.txt --warmup_steps=40 --max_steps=1000 +run_tf_benchmark.py --framework=tensorflow --use-case=object_detection --model-name=ssd_vgg16 --precision=int8 --mode=inference --benchmark-dir=/workspace/benchmarks --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=1 --socket-id=0 --output-dir=/workspace/benchmarks/common/tensorflow/logs --benchmark-only --verbose --model-source-dir=/workspace/models --in-graph=/in_graph/ssdvgg16_int8_pretrained_model.pb --data-location=/dataset --num-inter-threads=11 --num-intra-threads=21 --data-num-inter-threads=21 --data-num-intra-threads=28 --steps=500 --warmup-steps=100,LD_PRELOAD=/usr/lib/libtcmalloc.so.4.2.6 numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/inference/eval_ssd.py --input-graph=/in_graph/ssdvgg16_int8_pretrained_model.pb --num-inter-threads=11 --num-intra-threads=21 --data-num-inter-threads=21 --data-num-intra-threads=28 --warmup-steps=100 --steps=500 --data-location=/dataset +run_tf_benchmark.py --framework=tensorflow --use-case=object_detection --model-name=ssd_vgg16 --precision=int8 --mode=inference --benchmark-dir=/workspace/benchmarks --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=1 --socket-id=-1 --output-dir=/workspace/benchmarks/common/tensorflow/logs --accuracy-only --verbose --model-source-dir=/workspace/models --in-graph=/in_graph/ssdvgg16_int8_pretrained_model.pb --data-location=/dataset,LD_PRELOAD=/usr/lib/libtcmalloc.so.4.2.6 python /workspace/intelai_models/inference/eval_ssd.py --input-graph=/in_graph/ssdvgg16_int8_pretrained_model.pb --num-inter-threads=2 --num-intra-threads=56 --data-location=/dataset --accuracy-only +run_tf_benchmark.py --framework=tensorflow --use-case=object_detection --model-name=ssd_vgg16 --precision=fp32 --mode=inference --benchmark-dir=/workspace/benchmarks --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=1 --socket-id=-1 --output-dir=/workspace/benchmarks/common/tensorflow/logs --accuracy-only --verbose --model-source-dir=/workspace/models --in-graph=/in_graph/ssdvgg16_fp32_pretrained_model.pb --data-location=/dataset,LD_PRELOAD=/usr/lib/libtcmalloc.so.4.2.6 python /workspace/intelai_models/inference/eval_ssd.py --input-graph=/in_graph/ssdvgg16_fp32_pretrained_model.pb --num-inter-threads=2 --num-intra-threads=56 --data-location=/dataset --accuracy-only +run_tf_benchmark.py --framework=tensorflow --use-case=object_detection --model-name=ssd_vgg16 --precision=fp32 --mode=inference --benchmark-dir=/workspace/benchmarks --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=1 --socket-id=0 --output-dir=/workspace/benchmarks/common/tensorflow/logs --benchmark-only --verbose --model-source-dir=/workspace/models --in-graph=/in_graph/ssdvgg16_int8_pretrained_model.pb --data-location=/dataset --num-inter-threads=11 --num-intra-threads=21 --data-num-inter-threads=21 --data-num-intra-threads=28 --steps=500 --warmup-steps=100,LD_PRELOAD=/usr/lib/libtcmalloc.so.4.2.6 numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/inference/eval_ssd.py --input-graph=/in_graph/ssdvgg16_int8_pretrained_model.pb --num-inter-threads=11 --num-intra-threads=21 --data-num-inter-threads=21 --data-num-intra-threads=28 --warmup-steps=100 --steps=500 --data-location=/dataset \ No newline at end of file From f4fd7a2e3d8abb5fa08e797f8e4b5056f94b8a64 Mon Sep 17 00:00:00 2001 From: Dina Suehiro Jones Date: Thu, 25 Apr 2019 09:54:35 -0700 Subject: [PATCH 22/62] Make TCMalloc enabled for int8 by default, but disabled for other precisions (#290) * Make TCMalloc enabled for int8 by default, but disabled for other precisions. * Code cleanup * update start script * Updating doc to add more info on TCMalloc --- benchmarks/common/base_benchmark_util.py | 16 ++- benchmarks/common/base_model_init.py | 7 ++ benchmarks/common/tensorflow/start.sh | 4 +- benchmarks/launch_benchmark.py | 4 + docs/general/tensorflow/LaunchBenchmark.md | 15 ++- .../unit/common/tensorflow/tf_model_args.txt | 109 +++++++++--------- tests/unit/common/test_base_model_init.py | 40 ++++++- tests/unit/test_launch_benchmark.py | 12 ++ 8 files changed, 139 insertions(+), 68 deletions(-) diff --git a/benchmarks/common/base_benchmark_util.py b/benchmarks/common/base_benchmark_util.py index 0768e8871..e4c92639d 100644 --- a/benchmarks/common/base_benchmark_util.py +++ b/benchmarks/common/base_benchmark_util.py @@ -161,12 +161,20 @@ def _define_args(self): "with --accuracy-only and --mode=inference.", dest="output_results", action="store_true") + # Note this can't be a normal boolean flag, because we need to know when the user + # does not explicitly set the arg value so that we can apply the appropriate + # default value, depending on the the precision. self._common_arg_parser.add_argument( "--disable-tcmalloc", - help="Disables the use of TCMalloc for int8 benchmarking. TCMalloc is " - "currently not used for FP32 benchmarking, so using this flag with " - "FP32 models will have no effect.", - dest="disable_tcmalloc", action="store_true" + help="When TCMalloc is enabled, the google-perftools are installed (if running " + "using docker) and the LD_PRELOAD environment variable is set to point to " + "the TCMalloc library file. The TCMalloc memory allocator produces better " + "performance results with smaller batch sizes. This flag disables the use of " + "TCMalloc when set to True. For int8 benchmarking, TCMalloc is enabled by " + "default (--disable-tcmalloc=False). For other precisions, the flag is " + "--disable-tcmalloc=True by default.", + dest="disable_tcmalloc", choices=["True", "False"], + default=None ) self._common_arg_parser.add_argument( diff --git a/benchmarks/common/base_model_init.py b/benchmarks/common/base_model_init.py index 8e8d1abb2..4a334ca65 100644 --- a/benchmarks/common/base_model_init.py +++ b/benchmarks/common/base_model_init.py @@ -44,6 +44,13 @@ def __init__(self, args, custom_args=[], platform_util=None): self.custom_args = custom_args self.platform_util = platform_util + # Set default values for TCMalloc and convert string value to a boolean + if self.args.disable_tcmalloc is None: + # Set to False for int8 and True for other precisions + self.args.disable_tcmalloc = self.args.precision != "int8" + elif isinstance(self.args.disable_tcmalloc, str): + self.args.disable_tcmalloc = self.args.disable_tcmalloc == "True" + # Ensure that we are using the proper version of python to run the benchmarking script self.python_exe = os.environ["PYTHON_EXE"] diff --git a/benchmarks/common/tensorflow/start.sh b/benchmarks/common/tensorflow/start.sh index ab17c9bc6..26d25af86 100755 --- a/benchmarks/common/tensorflow/start.sh +++ b/benchmarks/common/tensorflow/start.sh @@ -177,8 +177,8 @@ if [ ${DATA_NUM_INTRA_THREADS} != "None" ]; then CMD="${CMD} --data-num-intra-threads=${DATA_NUM_INTRA_THREADS}" fi -if [ ${DISABLE_TCMALLOC} == "True" ]; then - CMD="${CMD} --disable-tcmalloc" +if [ ${DISABLE_TCMALLOC} != "None" ]; then + CMD="${CMD} --disable-tcmalloc=${DISABLE_TCMALLOC}" fi function install_protoc() { diff --git a/benchmarks/launch_benchmark.py b/benchmarks/launch_benchmark.py index 7515936eb..32c0f68ae 100644 --- a/benchmarks/launch_benchmark.py +++ b/benchmarks/launch_benchmark.py @@ -93,6 +93,10 @@ def validate_args(self): if not self.args.benchmark_only and not self.args.accuracy_only: self.args.benchmark_only = True + # default disable_tcmalloc=False for int8 and disable_tcmalloc=True for other precisions + if not self.args.disable_tcmalloc: + self.args.disable_tcmalloc = str(self.args.precision != "int8") + if self.args.custom_volumes and not self.args.docker_image: raise ValueError("Volume mounts can only be used when running in a docker container " "(a --docker-image must be specified when using --volume).") diff --git a/docs/general/tensorflow/LaunchBenchmark.md b/docs/general/tensorflow/LaunchBenchmark.md index ccab76c9c..017b303f1 100644 --- a/docs/general/tensorflow/LaunchBenchmark.md +++ b/docs/general/tensorflow/LaunchBenchmark.md @@ -101,10 +101,17 @@ optional arguments: conjunction with --accuracy-only and --mode=inference. --output-dir OUTPUT_DIR Folder to dump output into. - --disable-tcmalloc Disables the use of TCMalloc for int8 benchmarking. - TCMalloc is currently not used for FP32 benchmarking, - so using this flag with FP32 models will have no - effect. + --disable-tcmalloc {True,False} + When TCMalloc is enabled, the google-perftools are + installed (if running using docker) and the LD_PRELOAD + environment variable is set to point to the TCMalloc + library file. The TCMalloc memory allocator produces + better performance results with smaller batch sizes. + This flag disables the use of TCMalloc when set to + True. For int8 benchmarking, TCMalloc is enabled by + default (--disable-tcmalloc=False). For other + precisions, the flag is --disable-tcmalloc=True by + default. --tcmalloc-large-alloc-report-threshold TCMALLOC_LARGE_ALLOC_REPORT_THRESHOLD Sets the TCMALLOC_LARGE_ALLOC_REPORT_THRESHOLD environment variable to the specified value. The diff --git a/tests/unit/common/tensorflow/tf_model_args.txt b/tests/unit/common/tensorflow/tf_model_args.txt index 32d9b51e3..386d1185c 100644 --- a/tests/unit/common/tensorflow/tf_model_args.txt +++ b/tests/unit/common/tensorflow/tf_model_args.txt @@ -1,25 +1,24 @@ run_tf_benchmark.py --framework tensorflow --use-case recommendation --precision fp32 --mode inference --model-name wide_deep --batch-size 1024 --data-location /dataset --checkpoint /checkpoints --intelai-models . --verbose,OMP_NUM_THREADS=1 numactl --cpunodebind=0 --membind=0 python inference/fp32/wide_deep_inference.py --data_dir=/dataset --model_dir=/checkpoints --batch_size=1024 -run_tf_benchmark.py --framework=tensorflow --use-case=image_recognition --model-name=densenet169 --precision=fp32 --mode=inference --benchmark-dir=/workspace/benchmarks --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=100 --socket-id=0 --output-dir=/workspace/benchmarks/common/tensorflow/logs --benchmark-only --verbose --model-source-dir=/workspace/models --in-graph=/in_graph/densenet169_fp32_pretrained_model.pb, LD_PRELOAD=/usr/lib/libtcmalloc.so.4.2.6 numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/inference/fp32/benchmark.py --num_intra_threads=28 --num_inter_threads=1 --batch_size=100 --input_graph=/in_graph/densenet169_fp32_pretrained_model.pb -run_tf_benchmark.py --framework=tensorflow --use-case=image_recognition --model-name=densenet169 --precision=fp32 --mode=inference --benchmark-dir=/workspace/benchmarks --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=1 --socket-id=0 --output-dir=/workspace/benchmarks/common/tensorflow/logs --benchmark-only --verbose --model-source-dir=/workspace/models --in-graph=/in_graph/densenet169_fp32_pretrained_model.pb, LD_PRELOAD=/usr/lib/libtcmalloc.so.4.2.6 numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/inference/fp32/benchmark.py --num_intra_threads=28 --num_inter_threads=1 --batch_size=1 --input_graph=/in_graph/densenet169_fp32_pretrained_model.pb -run_tf_benchmark.py --framework=tensorflow --use-case=image_recognition --model-name=densenet169 --precision=fp32 --mode=inference --benchmark-dir=/workspace/benchmarks --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=100 --socket-id=0 --output-dir=/workspace/benchmarks/common/tensorflow/logs --accuracy-only --verbose --model-source-dir=/workspace/models --in-graph=/in_graph/densenet169_fp32_pretrained_model.pb --data-location=/dataset, LD_PRELOAD=/usr/lib/libtcmalloc.so.4.2.6 numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/inference/fp32/accuracy.py --num_intra_threads=28 --num_inter_threads=1 --batch_size=100 --input_graph=/in_graph/densenet169_fp32_pretrained_model.pb --data_location=/dataset +run_tf_benchmark.py --framework=tensorflow --use-case=image_recognition --model-name=densenet169 --precision=fp32 --mode=inference --benchmark-dir=/workspace/benchmarks --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=100 --socket-id=0 --output-dir=/workspace/benchmarks/common/tensorflow/logs --benchmark-only --verbose --model-source-dir=/workspace/models --in-graph=/in_graph/densenet169_fp32_pretrained_model.pb, numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/inference/fp32/benchmark.py --num_intra_threads=28 --num_inter_threads=1 --batch_size=100 --input_graph=/in_graph/densenet169_fp32_pretrained_model.pb +run_tf_benchmark.py --framework=tensorflow --use-case=image_recognition --model-name=densenet169 --precision=fp32 --mode=inference --benchmark-dir=/workspace/benchmarks --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=1 --socket-id=0 --output-dir=/workspace/benchmarks/common/tensorflow/logs --benchmark-only --verbose --model-source-dir=/workspace/models --in-graph=/in_graph/densenet169_fp32_pretrained_model.pb, numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/inference/fp32/benchmark.py --num_intra_threads=28 --num_inter_threads=1 --batch_size=1 --input_graph=/in_graph/densenet169_fp32_pretrained_model.pb +run_tf_benchmark.py --framework=tensorflow --use-case=image_recognition --model-name=densenet169 --precision=fp32 --mode=inference --benchmark-dir=/workspace/benchmarks --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=100 --socket-id=0 --output-dir=/workspace/benchmarks/common/tensorflow/logs --accuracy-only --verbose --model-source-dir=/workspace/models --in-graph=/in_graph/densenet169_fp32_pretrained_model.pb --data-location=/dataset, numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/inference/fp32/accuracy.py --num_intra_threads=28 --num_inter_threads=1 --batch_size=100 --input_graph=/in_graph/densenet169_fp32_pretrained_model.pb --data_location=/dataset run_tf_benchmark.py --framework tensorflow --use-case image_recognition --precision int8 --mode inference --model-name inceptionv3 --batch-size 100 --in-graph /final_int8_inceptionv3.pb --intelai-models . --accuracy-only --verbose,LD_PRELOAD=/usr/lib/libtcmalloc.so.4.2.6 python ./int8/accuracy.py --num_intra_threads=56 --num_inter_threads=2 --batch_size=100 --input_graph=/final_int8_inceptionv3.pb run_tf_benchmark.py --framework tensorflow --use-case image_recognition --precision int8 --mode inference --model-name inceptionv3 --batch-size 1 --in-graph /final_int8_inceptionv3.pb --intelai-models . --benchmark-only --socket-id 0 --verbose,LD_PRELOAD=/usr/lib/libtcmalloc.so.4.2.6 numactl --cpunodebind=0 --membind=0 python ./int8/benchmark.py --warmup_steps=10 --num_intra_threads=28 --num_inter_threads=1 --batch_size=1 --input_graph=/final_int8_inceptionv3.pb --steps=50 --num_cores=28 run_tf_benchmark.py --framework tensorflow --use-case image_recognition --precision int8 --mode inference --model-name inceptionv3 --batch-size 128 --in-graph /final_int8_inceptionv3.pb --intelai-models . --benchmark-only --socket-id 0 --verbose,LD_PRELOAD=/usr/lib/libtcmalloc.so.4.2.6 numactl --cpunodebind=0 --membind=0 python ./int8/benchmark.py --warmup_steps=10 --num_intra_threads=28 --num_inter_threads=1 --batch_size=128 --input_graph=/final_int8_inceptionv3.pb --steps=50 --num_cores=28 -run_tf_benchmark.py --framework tensorflow --use-case image_recognition --precision int8 --mode inference --model-name inceptionv3 --batch-size 128 --in-graph /final_int8_inceptionv3.pb --intelai-models . --benchmark-only --socket-id 0 --disable-tcmalloc,numactl --cpunodebind=0 --membind=0 python ./int8/benchmark.py --warmup_steps=10 --num_intra_threads=28 --num_inter_threads=1 --batch_size=128 --input_graph=/final_int8_inceptionv3.pb --steps=50 --num_cores=28 +run_tf_benchmark.py --framework tensorflow --use-case image_recognition --precision int8 --mode inference --model-name inceptionv3 --batch-size 128 --in-graph /final_int8_inceptionv3.pb --intelai-models . --benchmark-only --socket-id 0 --disable-tcmalloc=True,numactl --cpunodebind=0 --membind=0 python ./int8/benchmark.py --warmup_steps=10 --num_intra_threads=28 --num_inter_threads=1 --batch_size=128 --input_graph=/final_int8_inceptionv3.pb --steps=50 --num_cores=28 run_tf_benchmark.py --framework=tensorflow --use-case=image_recognition --model-name=inceptionv3 --precision=int8 --mode=inference --model-source-dir=/workspace/models --benchmark-dir=/workspace/benchmarks --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=128 --socket-id=0 --output-dir=/workspace/benchmarks/common/tensorflow/logs --benchmark-only --verbose --in-graph=/in_graph/inception_frozen_max_min.pb --steps=200 --warmup-steps=20,LD_PRELOAD=/usr/lib/libtcmalloc.so.4.2.6 numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/int8/benchmark.py --warmup_steps=20 --num_intra_threads=28 --num_inter_threads=1 --batch_size=128 --input_graph=/in_graph/inception_frozen_max_min.pb --steps=200 --num_cores=28 run_tf_benchmark.py --framework=tensorflow --use-case=image_recognition --model-name=inceptionv3 --precision=int8 --mode=inference --model-source-dir=/workspace/models --benchmark-dir=/workspace/benchmarks --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=1 --socket-id=0 --output-dir=/workspace/benchmarks/common/tensorflow/logs --benchmark-only --verbose --in-graph=/in_graph/inception_frozen_max_min.pb --steps=200 --warmup-steps=20,LD_PRELOAD=/usr/lib/libtcmalloc.so.4.2.6 numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/int8/benchmark.py --warmup_steps=20 --num_intra_threads=28 --num_inter_threads=1 --batch_size=1 --input_graph=/in_graph/inception_frozen_max_min.pb --steps=200 --num_cores=28 -run_tf_benchmark.py --framework tensorflow --use-case image_recognition --precision fp32 --mode inference --model-name inceptionv3 --batch-size 1 --in-graph /inceptionv3_fp32_pretrained_model.pb --intelai-models . --socket-id 0 --verbose,LD_PRELOAD=/usr/lib/libtcmalloc.so.4.2.6 numactl --cpunodebind=0 --membind=0 python ./fp32/eval_image_classifier_inference.py --input-graph=/inceptionv3_fp32_pretrained_model.pb --num-inter-threads=1 --num-intra-threads=28 --batch-size=1 --warmup-steps=10 --steps=50 --num-cores=28 -run_tf_benchmark.py --framework tensorflow --use-case image_recognition --precision fp32 --mode inference --model-name inceptionv3 --batch-size 128 --in-graph /inceptionv3_fp32_pretrained_model.pb --intelai-models . --socket-id 0 --verbose,LD_PRELOAD=/usr/lib/libtcmalloc.so.4.2.6 numactl --cpunodebind=0 --membind=0 python ./fp32/eval_image_classifier_inference.py --input-graph=/inceptionv3_fp32_pretrained_model.pb --num-inter-threads=1 --num-intra-threads=28 --batch-size=128 --warmup-steps=10 --steps=50 --num-cores=28 -run_tf_benchmark.py --framework tensorflow --use-case image_recognition --precision fp32 --mode inference --model-name inceptionv3 --batch-size 128 --in-graph /inceptionv3_fp32_pretrained_model.pb --intelai-models . --socket-id 0 --verbose --num-inter-threads 4 --num-intra-threads 16,LD_PRELOAD=/usr/lib/libtcmalloc.so.4.2.6 numactl --cpunodebind=0 --membind=0 python ./fp32/eval_image_classifier_inference.py --input-graph=/inceptionv3_fp32_pretrained_model.pb --num-inter-threads=4 --num-intra-threads=16 --batch-size=128 --warmup-steps=10 --steps=50 --num-cores=28 -run_tf_benchmark.py --framework tensorflow --use-case image_recognition --precision fp32 --mode inference --model-name inceptionv3 --batch-size 128 --in-graph /inceptionv3_fp32_pretrained_model.pb --intelai-models . --socket-id 0 --disable-tcmalloc --verbose,numactl --cpunodebind=0 --membind=0 python ./fp32/eval_image_classifier_inference.py --input-graph=/inceptionv3_fp32_pretrained_model.pb --num-inter-threads=1 --num-intra-threads=28 --batch-size=128 --warmup-steps=10 --steps=50 --num-cores=28 -run_tf_benchmark.py --framework tensorflow --use-case image_recognition --precision fp32 --mode inference --model-name inceptionv3 --batch-size 100 --accuracy-only --data-location /dataset --in-graph /inceptionv3_fp32_pretrained_model.pb --intelai-models . --verbose,LD_PRELOAD=/usr/lib/libtcmalloc.so.4.2.6 python ./fp32/eval_image_classifier_inference.py --input-graph=/inceptionv3_fp32_pretrained_model.pb --num-inter-threads=2 --num-intra-threads=56 --num-cores=28 --batch-size=100 --warmup-steps=10 --steps=50 --data-location=/dataset --accuracy-only +run_tf_benchmark.py --framework tensorflow --use-case image_recognition --precision fp32 --mode inference --model-name inceptionv3 --batch-size 1 --in-graph /inceptionv3_fp32_pretrained_model.pb --intelai-models . --socket-id 0 --verbose,numactl --cpunodebind=0 --membind=0 python ./fp32/eval_image_classifier_inference.py --input-graph=/inceptionv3_fp32_pretrained_model.pb --num-inter-threads=1 --num-intra-threads=28 --batch-size=1 --warmup-steps=10 --steps=50 --num-cores=28 +run_tf_benchmark.py --framework tensorflow --use-case image_recognition --precision fp32 --mode inference --model-name inceptionv3 --batch-size 128 --in-graph /inceptionv3_fp32_pretrained_model.pb --intelai-models . --socket-id 0 --verbose,numactl --cpunodebind=0 --membind=0 python ./fp32/eval_image_classifier_inference.py --input-graph=/inceptionv3_fp32_pretrained_model.pb --num-inter-threads=1 --num-intra-threads=28 --batch-size=128 --warmup-steps=10 --steps=50 --num-cores=28 +run_tf_benchmark.py --framework tensorflow --use-case image_recognition --precision fp32 --mode inference --model-name inceptionv3 --batch-size 128 --in-graph /inceptionv3_fp32_pretrained_model.pb --intelai-models . --socket-id 0 --verbose --num-inter-threads 4 --num-intra-threads 16,numactl --cpunodebind=0 --membind=0 python ./fp32/eval_image_classifier_inference.py --input-graph=/inceptionv3_fp32_pretrained_model.pb --num-inter-threads=4 --num-intra-threads=16 --batch-size=128 --warmup-steps=10 --steps=50 --num-cores=28 +run_tf_benchmark.py --framework tensorflow --use-case image_recognition --precision fp32 --mode inference --model-name inceptionv3 --batch-size 100 --accuracy-only --data-location /dataset --in-graph /inceptionv3_fp32_pretrained_model.pb --intelai-models . --verbose,python ./fp32/eval_image_classifier_inference.py --input-graph=/inceptionv3_fp32_pretrained_model.pb --num-inter-threads=2 --num-intra-threads=56 --num-cores=28 --batch-size=100 --warmup-steps=10 --steps=50 --data-location=/dataset --accuracy-only run_tf_benchmark.py --framework=tensorflow --use-case=image_recognition --precision=int8 --mode=inference --model-name=inceptionv4 --model-source-dir=/workspace/models --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=240 --socket-id 0 --accuracy-only --verbose --in-graph=/in_graph/inceptionv4_int8_pretrained_model.pb --data-location=/dataset,LD_PRELOAD=/usr/lib/libtcmalloc.so.4.2.6 numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/inference/accuracy.py --batch_size=240 --num_inter_threads=2 --input_graph=/in_graph/inceptionv4_int8_pretrained_model.pb --num_intra_threads=28 --data_location=/dataset -run_tf_benchmark.py --framework=tensorflow --use-case=image_recognition --precision=fp32 --mode=inference --model-name=inceptionv4 --model-source-dir=/workspace/models --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=240 --socket-id 0 --accuracy-only --verbose --in-graph=/in_graph/inceptionv4_fp32_pretrained_model.pb --data-location=/dataset,LD_PRELOAD=/usr/lib/libtcmalloc.so.4.2.6 numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/inference/accuracy.py --batch_size=240 --num_inter_threads=2 --input_graph=/in_graph/inceptionv4_fp32_pretrained_model.pb --num_intra_threads=28 --data_location=/dataset +run_tf_benchmark.py --framework=tensorflow --use-case=image_recognition --precision=fp32 --mode=inference --model-name=inceptionv4 --model-source-dir=/workspace/models --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=240 --socket-id 0 --accuracy-only --verbose --in-graph=/in_graph/inceptionv4_fp32_pretrained_model.pb --data-location=/dataset,numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/inference/accuracy.py --batch_size=240 --num_inter_threads=2 --input_graph=/in_graph/inceptionv4_fp32_pretrained_model.pb --num_intra_threads=28 --data_location=/dataset run_tf_benchmark.py --framework=tensorflow --use-case=image_recognition --precision=int8 --mode=inference --model-name=inceptionv4 --model-source-dir=/workspace/models --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=240 --socket-id 0 --benchmark-only --verbose --in-graph=/in_graph/inceptionv4_int8_pretrained_model.pb --data-location=/dataset,LD_PRELOAD=/usr/lib/libtcmalloc.so.4.2.6 numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/inference/benchmark.py --batch_size=240 --num_inter_threads=2 --input_graph=/in_graph/inceptionv4_int8_pretrained_model.pb --num_intra_threads=28 run_tf_benchmark.py --framework=tensorflow --use-case=image_recognition --precision=int8 --mode=inference --model-name=inceptionv4 --model-source-dir=/workspace/models --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=1 --socket-id 0 --benchmark-only --verbose --in-graph=/in_graph/inceptionv4_int8_pretrained_model.pb --data-location=/dataset,LD_PRELOAD=/usr/lib/libtcmalloc.so.4.2.6 numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/inference/benchmark.py --batch_size=1 --num_inter_threads=2 --input_graph=/in_graph/inceptionv4_int8_pretrained_model.pb --num_intra_threads=28 -run_tf_benchmark.py --framework=tensorflow --use-case=image_recognition --model-name=inception_resnet_v2 --precision=fp32 --mode=inference --model-source-dir=/workspace/models --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=100 --socket-id=-1 --accuracy-only --verbose --in-graph=/in_graph/inception_resnet_v2_fp32_pretrained_model.pb --data-location=/dataset,LD_PRELOAD=/usr/lib/libtcmalloc.so.4.2.6 python /workspace/intelai_models/eval_image_classifier_accuracy.py --input_graph=/in_graph/inception_resnet_v2_fp32_pretrained_model.pb --data_location=/dataset --input_height=299 --input_width=299 --num_inter_threads=2 --num_intra_threads=56 --output_layer=InceptionResnetV2/Logits/Predictions --batch_size=100 -run_tf_benchmark.py --framework=tensorflow --use-case=image_recognition --model-name=inception_resnet_v2 --precision=fp32 --mode=inference --benchmark-dir=/workspace/benchmarks --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=1 --socket-id=0 --output-dir=/workspace/benchmarks/common/tensorflow/logs --benchmark-only --verbose --model-source-dir=/workspace/models --in-graph=/in_graph/inception_resnet_v2_fp32_pretrained_model.pb,LD_PRELOAD=/usr/lib/libtcmalloc.so.4.2.6 numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/eval_image_classifier_benchmark.py --input-graph=/in_graph/inception_resnet_v2_fp32_pretrained_model.pb --inter-op-parallelism-threads=2 --intra-op-parallelism-threads=28 --batch-size=1 -run_tf_benchmark.py --framework=tensorflow --use-case=image_recognition --model-name=inception_resnet_v2 --precision=fp32 --mode=inference --benchmark-dir=/workspace/benchmarks --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=128 --socket-id=0 --output-dir=/workspace/benchmarks/common/tensorflow/logs --benchmark-only --verbose --model-source-dir=/workspace/models --in-graph=/in_graph/inception_resnet_v2_fp32_pretrained_model.pb,LD_PRELOAD=/usr/lib/libtcmalloc.so.4.2.6 numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/eval_image_classifier_benchmark.py --input-graph=/in_graph/inception_resnet_v2_fp32_pretrained_model.pb --inter-op-parallelism-threads=2 --intra-op-parallelism-threads=28 --batch-size=128 +run_tf_benchmark.py --framework=tensorflow --use-case=image_recognition --model-name=inception_resnet_v2 --precision=fp32 --mode=inference --model-source-dir=/workspace/models --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=100 --socket-id=-1 --accuracy-only --verbose --in-graph=/in_graph/inception_resnet_v2_fp32_pretrained_model.pb --data-location=/dataset,python /workspace/intelai_models/eval_image_classifier_accuracy.py --input_graph=/in_graph/inception_resnet_v2_fp32_pretrained_model.pb --data_location=/dataset --input_height=299 --input_width=299 --num_inter_threads=2 --num_intra_threads=56 --output_layer=InceptionResnetV2/Logits/Predictions --batch_size=100 +run_tf_benchmark.py --framework=tensorflow --use-case=image_recognition --model-name=inception_resnet_v2 --precision=fp32 --mode=inference --benchmark-dir=/workspace/benchmarks --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=1 --socket-id=0 --output-dir=/workspace/benchmarks/common/tensorflow/logs --benchmark-only --verbose --model-source-dir=/workspace/models --in-graph=/in_graph/inception_resnet_v2_fp32_pretrained_model.pb,numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/eval_image_classifier_benchmark.py --input-graph=/in_graph/inception_resnet_v2_fp32_pretrained_model.pb --inter-op-parallelism-threads=2 --intra-op-parallelism-threads=28 --batch-size=1 +run_tf_benchmark.py --framework=tensorflow --use-case=image_recognition --model-name=inception_resnet_v2 --precision=fp32 --mode=inference --benchmark-dir=/workspace/benchmarks --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=128 --socket-id=0 --output-dir=/workspace/benchmarks/common/tensorflow/logs --benchmark-only --verbose --model-source-dir=/workspace/models --in-graph=/in_graph/inception_resnet_v2_fp32_pretrained_model.pb,numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/eval_image_classifier_benchmark.py --input-graph=/in_graph/inception_resnet_v2_fp32_pretrained_model.pb --inter-op-parallelism-threads=2 --intra-op-parallelism-threads=28 --batch-size=128 run_tf_benchmark.py --framework=tensorflow --use-case=image_recognition --model-name=inception_resnet_v2 --precision=int8 --mode=inference --model-source-dir=/workspace/models --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=100 --socket-id=-1 --accuracy-only --verbose --in-graph=/in_graph/inception_resnet_v2_int8_pretrained_model.pb --data-location=/dataset,LD_PRELOAD=/usr/lib/libtcmalloc.so.4.2.6 python /workspace/intelai_models/eval_image_classifier_accuracy.py --input_graph=/in_graph/inception_resnet_v2_int8_pretrained_model.pb --data_location=/dataset --input_height=299 --input_width=299 --num_inter_threads=2 --num_intra_threads=56 --output_layer=InceptionResnetV2/Logits/Predictions --batch_size=100 run_tf_benchmark.py --framework=tensorflow --use-case=image_recognition --model-name=inception_resnet_v2 --precision=int8 --mode=inference --model-source-dir=/workspace/models --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=1 --socket-id=0 --benchmark-only --verbose --in-graph=/in_graph/inception_resnet_v2_int8_pretrained_model.pb --data-location=/dataset,LD_PRELOAD=/usr/lib/libtcmalloc.so.4.2.6 numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/eval_image_classifier_benchmark.py --input-graph=/in_graph/inception_resnet_v2_int8_pretrained_model.pb --inter-op-parallelism-threads=1 --intra-op-parallelism-threads=28 --batch-size=1 run_tf_benchmark.py --framework=tensorflow --use-case=image_recognition --model-name=inception_resnet_v2 --precision=int8 --mode=inference --model-source-dir=/workspace/models --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=128 --socket-id=0 --benchmark-only --verbose --in-graph=/in_graph/inception_resnet_v2_int8_pretrained_model.pb --data-location=/dataset,LD_PRELOAD=/usr/lib/libtcmalloc.so.4.2.6 numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/eval_image_classifier_benchmark.py --input-graph=/in_graph/inception_resnet_v2_int8_pretrained_model.pb --inter-op-parallelism-threads=1 --intra-op-parallelism-threads=28 --batch-size=128 @@ -27,70 +26,70 @@ run_tf_benchmark.py --framework=tensorflow --use-case=image_recognition --model- run_tf_benchmark.py --framework tensorflow --use-case image_recognition --precision int8 --mode inference --model-name resnet50 --batch-size 100 --data-location /dataset --in-graph /final_int8_resnet50.pb --intelai-models . --accuracy-only --verbose,LD_PRELOAD=/usr/lib/libtcmalloc.so.4.2.6 python ./inference/eval_image_classifier_inference.py --input-graph=/final_int8_resnet50.pb --num-inter-threads=2 --num-intra-threads=56 --batch-size=100 --warmup-steps=10 --steps=50 --data-location=/dataset --accuracy-only run_tf_benchmark.py --framework tensorflow --use-case image_recognition --precision int8 --mode inference --model-name resnet50 --batch-size 128 --in-graph /final_int8_resnet50.pb --intelai-models . --benchmark-only --verbose,LD_PRELOAD=/usr/lib/libtcmalloc.so.4.2.6 python ./inference/eval_image_classifier_inference.py --input-graph=/final_int8_resnet50.pb --num-inter-threads=2 --num-intra-threads=56 --batch-size=128 --warmup-steps=10 --steps=50 run_tf_benchmark.py --framework=tensorflow --use-case=image_recognition --model-name=resnet50 --precision=int8 --mode=inference --model-source-dir=/workspace/models --benchmark-dir=/workspace/benchmarks --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=128 --socket-id=0 --output-dir=/workspace/benchmarks/common/tensorflow/logs --benchmark-only --verbose --in-graph=/in_graph/resnet50_int8_pretrained_model.pb --steps=200 --warmup-steps=20,LD_PRELOAD=/usr/lib/libtcmalloc.so.4.2.6 numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/inference/eval_image_classifier_inference.py --input-graph=/in_graph/resnet50_int8_pretrained_model.pb --num-inter-threads=1 --num-intra-threads=28 --batch-size=128 --warmup-steps=20 --steps=200 -run_tf_benchmark.py --framework tensorflow --use-case image_recognition --precision fp32 --mode inference --model-name resnet50 --batch-size 1 --in-graph /freezed_resnet50.pb --intelai-models . --socket-id 0 --verbose,LD_PRELOAD=/usr/lib/libtcmalloc.so.4.2.6 numactl --cpunodebind=0 --membind=0 python ./inference/eval_image_classifier_inference.py --input-graph=/freezed_resnet50.pb --num-inter-threads=1 --num-intra-threads=28 --batch-size=1 --warmup-steps=10 --steps=50 --num-cores=28 -run_tf_benchmark.py --framework tensorflow --use-case image_recognition --precision fp32 --mode inference --model-name resnet50 --batch-size 128 --in-graph /freezed_resnet50.pb --intelai-models . --socket-id 0 --verbose,LD_PRELOAD=/usr/lib/libtcmalloc.so.4.2.6 numactl --cpunodebind=0 --membind=0 python ./inference/eval_image_classifier_inference.py --input-graph=/freezed_resnet50.pb --num-inter-threads=1 --num-intra-threads=28 --batch-size=128 --warmup-steps=10 --steps=50 --num-cores=28 -run_tf_benchmark.py --framework tensorflow --use-case image_recognition --precision fp32 --mode inference --model-name resnet50 --batch-size 1 --in-graph /freezed_resnet50.pb --intelai-models . --socket-id 0 --verbose --num-inter-threads 4 --num-intra-threads 16,LD_PRELOAD=/usr/lib/libtcmalloc.so.4.2.6 numactl --cpunodebind=0 --membind=0 python ./inference/eval_image_classifier_inference.py --input-graph=/freezed_resnet50.pb --num-inter-threads=4 --num-intra-threads=16 --batch-size=1 --warmup-steps=10 --steps=50 --num-cores=28 -run_tf_benchmark.py --framework=tensorflow --use-case=image_recognition --model-name=resnet50 --precision=fp32 --mode=inference --model-source-dir=/workspace/models --intelai-models=/workspace/intelai_models --batch-size 100 --socket-id 0 --accuracy-only --verbose --in-graph=/in_graph/freezed_resnet50.pb --accuracy-only --data-location=/dataset,LD_PRELOAD=/usr/lib/libtcmalloc.so.4.2.6 numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/inference/eval_image_classifier_inference.py --input-graph=/in_graph/freezed_resnet50.pb --num-inter-threads=1 --num-intra-threads=28 --batch-size=100 --data-location=/dataset --accuracy-only --num-cores=28 --warmup-steps=10 --steps=50 -run_tf_benchmark.py --framework=tensorflow --use-case=image_recognition --model-name=resnet50 --precision=fp32 --mode=inference --benchmark-dir=/workspace/benchmarks --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=100 --socket-id=0 --output-dir=/workspace/benchmarks/common/tensorflow/logs --benchmark-only --output-results --verbose --model-source-dir=/workspace/models --in-graph=/in_graph/resnet50_fp32_pretrained_model.pb --data-location=/dataset,LD_PRELOAD=/usr/lib/libtcmalloc.so.4.2.6 numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/inference/eval_image_classifier_inference.py --input-graph=/in_graph/resnet50_fp32_pretrained_model.pb --num-inter-threads=1 --num-intra-threads=28 --num-cores=28 --batch-size=100 --warmup-steps=10 --steps=50 --data-location=/dataset --results-file-path /workspace/benchmarks/common/tensorflow/logs/resnet50_fp32_inference_results*.txt +run_tf_benchmark.py --framework tensorflow --use-case image_recognition --precision fp32 --mode inference --model-name resnet50 --batch-size 1 --in-graph /freezed_resnet50.pb --intelai-models . --socket-id 0 --verbose,numactl --cpunodebind=0 --membind=0 python ./inference/eval_image_classifier_inference.py --input-graph=/freezed_resnet50.pb --num-inter-threads=1 --num-intra-threads=28 --batch-size=1 --warmup-steps=10 --steps=50 --num-cores=28 +run_tf_benchmark.py --framework tensorflow --use-case image_recognition --precision fp32 --mode inference --model-name resnet50 --batch-size 128 --in-graph /freezed_resnet50.pb --intelai-models . --socket-id 0 --verbose,numactl --cpunodebind=0 --membind=0 python ./inference/eval_image_classifier_inference.py --input-graph=/freezed_resnet50.pb --num-inter-threads=1 --num-intra-threads=28 --batch-size=128 --warmup-steps=10 --steps=50 --num-cores=28 +run_tf_benchmark.py --framework tensorflow --use-case image_recognition --precision fp32 --mode inference --model-name resnet50 --batch-size 1 --in-graph /freezed_resnet50.pb --intelai-models . --socket-id 0 --verbose --num-inter-threads 4 --num-intra-threads 16,numactl --cpunodebind=0 --membind=0 python ./inference/eval_image_classifier_inference.py --input-graph=/freezed_resnet50.pb --num-inter-threads=4 --num-intra-threads=16 --batch-size=1 --warmup-steps=10 --steps=50 --num-cores=28 +run_tf_benchmark.py --framework=tensorflow --use-case=image_recognition --model-name=resnet50 --precision=fp32 --mode=inference --model-source-dir=/workspace/models --intelai-models=/workspace/intelai_models --batch-size 100 --socket-id 0 --accuracy-only --verbose --in-graph=/in_graph/freezed_resnet50.pb --accuracy-only --data-location=/dataset,numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/inference/eval_image_classifier_inference.py --input-graph=/in_graph/freezed_resnet50.pb --num-inter-threads=1 --num-intra-threads=28 --batch-size=100 --data-location=/dataset --accuracy-only --num-cores=28 --warmup-steps=10 --steps=50 +run_tf_benchmark.py --framework=tensorflow --use-case=image_recognition --model-name=resnet50 --precision=fp32 --mode=inference --benchmark-dir=/workspace/benchmarks --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=100 --socket-id=0 --output-dir=/workspace/benchmarks/common/tensorflow/logs --benchmark-only --output-results --verbose --model-source-dir=/workspace/models --in-graph=/in_graph/resnet50_fp32_pretrained_model.pb --data-location=/dataset,numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/inference/eval_image_classifier_inference.py --input-graph=/in_graph/resnet50_fp32_pretrained_model.pb --num-inter-threads=1 --num-intra-threads=28 --num-cores=28 --batch-size=100 --warmup-steps=10 --steps=50 --data-location=/dataset --results-file-path /workspace/benchmarks/common/tensorflow/logs/resnet50_fp32_inference_results*.txt run_tf_benchmark.py --framework tensorflow --use-case image_recognition --precision fp32 --mode inference --model-name squeezenet --batch-size 64 --checkpoint /checkpoints --intelai-models . --socket-id 0 --verbose,taskset -c 0-27 python ./fp32/train_squeezenet.py --data_location None --batch_size 64 --num_inter_threads 1 --num_intra_threads 28 --model_dir /checkpoints --inference-only --verbose run_tf_benchmark.py --framework tensorflow --use-case image_recognition --precision fp32 --mode inference --model-name squeezenet --batch-size 1 --checkpoint /checkpoints --intelai-models . --socket-id 0 --verbose,taskset -c 0-27 python ./fp32/train_squeezenet.py --data_location None --batch_size 1 --num_inter_threads 1 --num_intra_threads 28 --model_dir /checkpoints --inference-only --verbose -run_tf_benchmark.py --framework tensorflow --use-case object_detection --precision fp32 --mode inference --model-name faster_rcnn --checkpoint /checkpoints --intelai-models . --model-source-dir . --socket-id 0 --verbose --config_file=pipeline.config,LD_PRELOAD=/usr/lib/libtcmalloc.so.4.2.6 numactl --cpunodebind=0 --membind=0 python ./inference/fp32/eval.py --num_inter_threads 1 --num_intra_threads 28 --pipeline_config_path /checkpoints/pipeline.config --checkpoint_dir /checkpoints --eval_dir ./research/object_detection/log/eval -run_tf_benchmark.py --framework tensorflow --use-case object_detection --precision fp32 --mode inference --model-name faster_rcnn --checkpoint /checkpoints --intelai-models . --model-source-dir . --socket-id 0 --verbose --config_file=pipeline.config --num-inter-threads 4 --num-intra-threads 16,LD_PRELOAD=/usr/lib/libtcmalloc.so.4.2.6 numactl --cpunodebind=0 --membind=0 python ./inference/fp32/eval.py --num_inter_threads 4 --num_intra_threads 16 --pipeline_config_path /checkpoints/pipeline.config --checkpoint_dir /checkpoints --eval_dir ./research/object_detection/log/eval +run_tf_benchmark.py --framework tensorflow --use-case object_detection --precision fp32 --mode inference --model-name faster_rcnn --checkpoint /checkpoints --intelai-models . --model-source-dir . --socket-id 0 --verbose --config_file=pipeline.config,numactl --cpunodebind=0 --membind=0 python ./inference/fp32/eval.py --num_inter_threads 1 --num_intra_threads 28 --pipeline_config_path /checkpoints/pipeline.config --checkpoint_dir /checkpoints --eval_dir ./research/object_detection/log/eval +run_tf_benchmark.py --framework tensorflow --use-case object_detection --precision fp32 --mode inference --model-name faster_rcnn --checkpoint /checkpoints --intelai-models . --model-source-dir . --socket-id 0 --verbose --config_file=pipeline.config --num-inter-threads 4 --num-intra-threads 16,numactl --cpunodebind=0 --membind=0 python ./inference/fp32/eval.py --num_inter_threads 4 --num_intra_threads 16 --pipeline_config_path /checkpoints/pipeline.config --checkpoint_dir /checkpoints --eval_dir ./research/object_detection/log/eval run_tf_benchmark.py --framework=tensorflow --use-case=object_detection --model-name=faster_rcnn --precision=fp32 --mode=inference --model-source-dir=/workspace/models --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=1 --socket-id=-1 --accuracy-only --verbose --checkpoint=/checkpoints --data-location=/dataset --in-graph=/in_graph/frozen_inference_graph.pb,sh /workspace/intelai_models/inference/fp32/coco_accuracy.sh /in_graph/frozen_inference_graph.pb /dataset/coco_val.record /workspace/models run_tf_benchmark.py --framework=tensorflow --use-case=object_detection --model-name=ssd-mobilenet --precision=int8 --mode=inference --benchmark-dir=/workspace/benchmarks --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=1 --socket-id=0 --output-dir=/workspace/benchmarks/common/tensorflow/logs --accuracy-only --verbose --model-source-dir=/workspace/models --in-graph=/in_graph/ssdmobilenet_int8_pretrained_model.pb --data-location=/dataset, sh /workspace/intelai_models/inference/int8/coco_int8.sh /in_graph/ssdmobilenet_int8_pretrained_model.pb /dataset run_tf_benchmark.py --framework=tensorflow --use-case=object_detection --model-name=ssd-mobilenet --precision=int8 --mode=inference --model-source-dir=/workspace/models --intelai-models=/workspace/intelai_models --batch-size=1 --socket-id 0 --data-location=/dataset --verbose --in-graph=/in_graph/ssdmobilenet_int8_pretrained_model.pb --benchmark-only --in-graph=/in_graph/ssdmobilenet_int8_pretrained_model.pb,LD_PRELOAD=/usr/lib/libtcmalloc.so.4.2.6 numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/inference/int8/run_frozen_graph_ssdmob.py -g /in_graph/ssdmobilenet_int8_pretrained_model.pb -n 5000 -d /dataset -x --num-inter-threads 2 --num-intra-threads 28 -run_tf_benchmark.py --framework=tensorflow --use-case=object_detection --model-name=ssd-mobilenet --precision=fp32 --mode=inference --model-source-dir=/workspace/models --benchmark-dir=/workspace/benchmarks --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=-1 --socket-id=0 --benchmark-only --verbose --in-graph=/in_graph/frozen_inference_graph.pb --data-location=/dataset,LD_PRELOAD=/usr/lib/libtcmalloc.so.4.2.6 numactl --cpunodebind=0 --membind=0 python /workspace/benchmarks/object_detection/tensorflow/ssd-mobilenet/inference/fp32/infer_detections.py --input_tfrecord_paths=/dataset --output_tfrecord_path=/SSD-mobilenet-out.tfrecord --inference_graph=/in_graph/frozen_inference_graph.pb --discard_image_pixels=True --num_inter_threads=2 --num_intra_threads=28 +run_tf_benchmark.py --framework=tensorflow --use-case=object_detection --model-name=ssd-mobilenet --precision=fp32 --mode=inference --model-source-dir=/workspace/models --benchmark-dir=/workspace/benchmarks --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=-1 --socket-id=0 --benchmark-only --verbose --in-graph=/in_graph/frozen_inference_graph.pb --data-location=/dataset,numactl --cpunodebind=0 --membind=0 python /workspace/benchmarks/object_detection/tensorflow/ssd-mobilenet/inference/fp32/infer_detections.py --input_tfrecord_paths=/dataset --output_tfrecord_path=/SSD-mobilenet-out.tfrecord --inference_graph=/in_graph/frozen_inference_graph.pb --discard_image_pixels=True --num_inter_threads=2 --num_intra_threads=28 run_tf_benchmark.py --framework=tensorflow --use-case=object_detection --model-name=ssd-mobilenet --precision=fp32 --mode=inference --model-source-dir=/workspace/models --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=-1 --socket-id=0 --accuracy-only --verbose --in-graph=/in_graph/frozen_inference_graph.pb --benchmark-dir=/workspace/benchmarks --data-location=/dataset,sh /workspace/benchmarks/object_detection/tensorflow/ssd-mobilenet/inference/ssdmobilenet_accuracy.sh /in_graph/frozen_inference_graph.pb /dataset -run_tf_benchmark.py --framework=tensorflow --use-case=object_detection --model-name=ssd-resnet34 --precision=fp32 --mode=inference --benchmark-dir=/workspace/benchmarks --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=1 --socket-id=0 --output-dir=/workspace/benchmarks/common/tensorflow/logs --accuracy-only --verbose --model-source-dir=/workspace/models --in-graph=/in_graph/ssd_resnet34_bs1.pb --data-location=/dataset,LD_PRELOAD=/usr/lib/libtcmalloc.so.4.2.6 numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/inference/fp32/infer_detections.py --input-graph /in_graph/ssd_resnet34_bs1.pb --batch-size 1 --inter-op-parallelism-threads 1 --intra-op-parallelism-threads 28 --accuracy-only --data-location /dataset -run_tf_benchmark.py --framework=tensorflow --use-case=object_detection --model-name=ssd-resnet34 --precision=fp32 --mode=inference --benchmark-dir=/workspace/benchmarks --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=1 --socket-id=0 --output-dir=/workspace/benchmarks/common/tensorflow/logs --benchmark-only --verbose --model-source-dir=/workspace/models --in-graph=/in_graph/ssd_resnet34_bs1.pb --data-location=/dataset,LD_PRELOAD=/usr/lib/libtcmalloc.so.4.2.6 numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/inference/fp32/infer_detections.py --input-graph /in_graph/ssd_resnet34_bs1.pb --batch-size 1 --inter-op-parallelism-threads 1 --intra-op-parallelism-threads 28 -run_tf_benchmark.py --framework tensorflow --use-case object_detection --precision fp32 --mode inference --model-name rfcn --checkpoint /checkpoints --intelai-models . --model-source-dir . --socket-id 0 --verbose --config_file=rfcn_pipeline.config,LD_PRELOAD=/usr/lib/libtcmalloc.so.4.2.6 numactl --cpunodebind=0 --membind=0 python ./inference/fp32/eval.py --inter_op 1 --intra_op 28 --omp 28 --pipeline_config_path /checkpoints/rfcn_pipeline.config --checkpoint_dir /checkpoints --eval_dir ./research/object_detection/models/rfcn/eval --logtostderr --blocktime=0 --run_once=True +run_tf_benchmark.py --framework=tensorflow --use-case=object_detection --model-name=ssd-resnet34 --precision=fp32 --mode=inference --benchmark-dir=/workspace/benchmarks --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=1 --socket-id=0 --output-dir=/workspace/benchmarks/common/tensorflow/logs --accuracy-only --verbose --model-source-dir=/workspace/models --in-graph=/in_graph/ssd_resnet34_bs1.pb --data-location=/dataset,numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/inference/fp32/infer_detections.py --input-graph /in_graph/ssd_resnet34_bs1.pb --batch-size 1 --inter-op-parallelism-threads 1 --intra-op-parallelism-threads 28 --accuracy-only --data-location /dataset +run_tf_benchmark.py --framework=tensorflow --use-case=object_detection --model-name=ssd-resnet34 --precision=fp32 --mode=inference --benchmark-dir=/workspace/benchmarks --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=1 --socket-id=0 --output-dir=/workspace/benchmarks/common/tensorflow/logs --benchmark-only --verbose --model-source-dir=/workspace/models --in-graph=/in_graph/ssd_resnet34_bs1.pb --data-location=/dataset,numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/inference/fp32/infer_detections.py --input-graph /in_graph/ssd_resnet34_bs1.pb --batch-size 1 --inter-op-parallelism-threads 1 --intra-op-parallelism-threads 28 +run_tf_benchmark.py --framework tensorflow --use-case object_detection --precision fp32 --mode inference --model-name rfcn --checkpoint /checkpoints --intelai-models . --model-source-dir . --socket-id 0 --verbose --config_file=rfcn_pipeline.config,numactl --cpunodebind=0 --membind=0 python ./inference/fp32/eval.py --inter_op 1 --intra_op 28 --omp 28 --pipeline_config_path /checkpoints/rfcn_pipeline.config --checkpoint_dir /checkpoints --eval_dir ./research/object_detection/models/rfcn/eval --logtostderr --blocktime=0 --run_once=True run_tf_benchmark.py --framework=tensorflow --use-case=object_detection --model-name=rfcn --precision=fp32 --mode=inference --model-source-dir=/workspace/models --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=-1 --verbose --in-graph=/in_graph/frozen_inference_graph.pb --data-location=/dataset --accuracy-only --split=accuracy_message,FROZEN_GRAPH=/in_graph/frozen_inference_graph.pb TF_RECORD_FILE=/dataset SPLIT=accuracy_message TF_MODELS_ROOT=/workspace/models /workspace/intelai_models/inference/fp32/coco_mAP.sh run_tf_benchmark.py --framework=tensorflow --use-case=object_detection --model-name=rfcn --precision=int8 --mode=inference --model-source-dir=/workspace/models --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=-1 --verbose --in-graph=/in_graph/rfcn_resnet101_int8_coco_pretrained_model.pb --data-location=/dataset --benchmark-only --number_of_steps=500,LD_PRELOAD=/usr/lib/libtcmalloc.so.4.2.6 python /workspace/intelai_models/inference/int8/run_rfcn_inference.py -m /workspace/models -g /in_graph/rfcn_resnet101_int8_coco_pretrained_model.pb -x 500 -d /dataset --num-inter-threads 2 --num-intra-threads 56 run_tf_benchmark.py --framework=tensorflow --use-case=object_detection --model-name=rfcn --precision=int8 --mode=inference --model-source-dir=/workspace/models --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=-1 --verbose --in-graph=/in_graph/rfcn_resnet101_int8_coco_pretrained_model.pb --data-location=/dataset --accuracy-only --split=accuracy_message,LD_PRELOAD=/usr/lib/libtcmalloc.so.4.2.6 FROZEN_GRAPH=/in_graph/rfcn_resnet101_int8_coco_pretrained_model.pb TF_RECORD_FILE=/dataset SPLIT=accuracy_message TF_MODELS_ROOT=/workspace/models /workspace/intelai_models/inference/int8/coco_mAP.sh run_tf_benchmark.py --framework tensorflow --use-case text_to_speech --precision fp32 --mode inference --model-name wavenet --num-cores 1 --checkpoint /checkpoints --intelai-models . --model-source-dir . --socket-id 0 --verbose --checkpoint_name=model.ckpt-99 --sample=8510,numactl --physcpubind=0-0 --membind=0 python generate.py /checkpoints/model.ckpt-99 --num_inter_threads=1 --num_intra_threads=1 --sample=8510 -run_tf_benchmark.py --framework=tensorflow --use-case=image_recognition --model-name=resnet101 --precision=fp32 --mode=inference --model-source-dir=/workspace/models --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=128 --socket-id 0 --accuracy-only --verbose --in-graph=/in_graph/resnet101_fp32_model.pb --data-location=/dataset,LD_PRELOAD=/usr/lib/libtcmalloc.so.4.2.6 numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/inference/eval_image_classifier_inference.py --batch-size=128 --num-inter-threads=1 --input-graph=/in_graph/resnet101_fp32_model.pb --num-intra-threads=28 --warmup-steps=10 --steps=50 --data-location=/dataset --accuracy-only -run_tf_benchmark.py --framework=tensorflow --use-case=image_recognition --model-name=resnet101 --precision=fp32 --mode=inference --model-source-dir=/workspace/models --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=128 --accuracy-only --verbose --in-graph=/in_graph/resnet101_fp32_model.pb --data-location=/dataset,LD_PRELOAD=/usr/lib/libtcmalloc.so.4.2.6 python /workspace/intelai_models/inference/eval_image_classifier_inference.py --batch-size=128 --num-inter-threads=2 --input-graph=/in_graph/resnet101_fp32_model.pb --num-intra-threads=56 --warmup-steps=10 --steps=50 --data-location=/dataset --accuracy-only -run_tf_benchmark.py --framework=tensorflow --use-case=image_recognition --model-name=resnet101 --precision=fp32 --mode=inference --model-source-dir=/workspace/models --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=128 --socket-id 0 --benchmark-only --verbose --in-graph=/in_graph/resnet101_fp32_model.pb,LD_PRELOAD=/usr/lib/libtcmalloc.so.4.2.6 numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/inference/eval_image_classifier_inference.py --batch-size=128 --num-inter-threads=1 --input-graph=/in_graph/resnet101_fp32_model.pb --num-intra-threads=28 --warmup-steps=10 --steps=50 -run_tf_benchmark.py --framework=tensorflow --use-case=image_recognition --model-name=resnet101 --precision=fp32 --mode=inference --model-source-dir=/workspace/models --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=1 --socket-id 0 --benchmark-only --verbose --in-graph=/in_graph/resnet101_fp32_model.pb,LD_PRELOAD=/usr/lib/libtcmalloc.so.4.2.6 numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/inference/eval_image_classifier_inference.py --batch-size=1 --num-inter-threads=1 --input-graph=/in_graph/resnet101_fp32_model.pb --num-intra-threads=28 --warmup-steps=10 --steps=50 +run_tf_benchmark.py --framework=tensorflow --use-case=image_recognition --model-name=resnet101 --precision=fp32 --mode=inference --model-source-dir=/workspace/models --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=128 --socket-id 0 --accuracy-only --verbose --in-graph=/in_graph/resnet101_fp32_model.pb --data-location=/dataset,numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/inference/eval_image_classifier_inference.py --batch-size=128 --num-inter-threads=1 --input-graph=/in_graph/resnet101_fp32_model.pb --num-intra-threads=28 --warmup-steps=10 --steps=50 --data-location=/dataset --accuracy-only +run_tf_benchmark.py --framework=tensorflow --use-case=image_recognition --model-name=resnet101 --precision=fp32 --mode=inference --model-source-dir=/workspace/models --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=128 --accuracy-only --verbose --in-graph=/in_graph/resnet101_fp32_model.pb --data-location=/dataset,python /workspace/intelai_models/inference/eval_image_classifier_inference.py --batch-size=128 --num-inter-threads=2 --input-graph=/in_graph/resnet101_fp32_model.pb --num-intra-threads=56 --warmup-steps=10 --steps=50 --data-location=/dataset --accuracy-only +run_tf_benchmark.py --framework=tensorflow --use-case=image_recognition --model-name=resnet101 --precision=fp32 --mode=inference --model-source-dir=/workspace/models --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=128 --socket-id 0 --benchmark-only --verbose --in-graph=/in_graph/resnet101_fp32_model.pb,numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/inference/eval_image_classifier_inference.py --batch-size=128 --num-inter-threads=1 --input-graph=/in_graph/resnet101_fp32_model.pb --num-intra-threads=28 --warmup-steps=10 --steps=50 +run_tf_benchmark.py --framework=tensorflow --use-case=image_recognition --model-name=resnet101 --precision=fp32 --mode=inference --model-source-dir=/workspace/models --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=1 --socket-id 0 --benchmark-only --verbose --in-graph=/in_graph/resnet101_fp32_model.pb,numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/inference/eval_image_classifier_inference.py --batch-size=1 --num-inter-threads=1 --input-graph=/in_graph/resnet101_fp32_model.pb --num-intra-threads=28 --warmup-steps=10 --steps=50 run_tf_benchmark.py --framework=tensorflow --use-case=image_recognition --model-name=resnet101 --precision=int8 --mode=inference --model-source-dir=/workspace/models --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=1 --socket-id 0 --benchmark-only --verbose --in-graph=/in_graph/resnet101_int8_model.pb,LD_PRELOAD=/usr/lib/libtcmalloc.so.4.2.6 numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/inference/eval_image_classifier_inference.py --batch-size=1 --num-intra-threads=28 --num-inter-threads=1 --input-graph=/in_graph/resnet101_int8_model.pb --warmup-steps=40 --steps=100 -"run_tf_benchmark.py --framework=tensorflow --use-case=language_translation --model-name=transformer_language --precision=fp32 --mode=inference --model-source-dir=/workspace/models --benchmark-dir=/workspace/benchmarks --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=1 --socket-id=0 --benchmark-only --verbose --checkpoint=/checkpoints --data-location=/dataset --decode_from_file=/checkpoints/newstest2015.en --reference=/checkpoints/newstest2015.de","LD_PRELOAD=/usr/lib/libtcmalloc.so.4.2.6 numactl --cpunodebind=0 --membind=0 python /workspace/models/tensor2tensor/bin/t2t_decoder.py --problem=translate_ende_wmt32k --model=transformer --hparams_set=transformer_base_single_gpu --decode_hparams=beam_size=4,alpha=0.6,batch_size=1 --data_dir=/dataset --output_dir=/checkpoints --decode_from_file=/checkpoints/newstest2015.en --decode_to_file=/workspace/models/out_dir/output_infer --reference=/checkpoints/newstest2015.de --inter_op_parallelism_threads=1 --intra_op_parallelism_threads=28" -"run_tf_benchmark.py --framework=tensorflow --use-case=language_translation --model-name=transformer_language --precision=fp32 --mode=inference --model-source-dir=/workspace/models --benchmark-dir=/workspace/benchmarks --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=32 --socket-id=0 --benchmark-only --verbose --checkpoint=/checkpoints --data-location=/dataset --decode_from_file=/checkpoints/newstest2015.en --reference=/checkpoints/newstest2015.de","LD_PRELOAD=/usr/lib/libtcmalloc.so.4.2.6 numactl --cpunodebind=0 --membind=0 python /workspace/models/tensor2tensor/bin/t2t_decoder.py --problem=translate_ende_wmt32k --model=transformer --hparams_set=transformer_base_single_gpu --decode_hparams=beam_size=4,alpha=0.6,batch_size=32 --data_dir=/dataset --output_dir=/checkpoints --decode_from_file=/checkpoints/newstest2015.en --decode_to_file=/workspace/models/out_dir/output_infer --reference=/checkpoints/newstest2015.de --inter_op_parallelism_threads=1 --intra_op_parallelism_threads=28" -run_tf_benchmark.py --framework=tensorflow --use-case=language_translation --model-name=transformer_lt_official --precision=fp32 --mode=inference --model-source-dir=/workspace/models --benchmark-dir=/workspace/benchmarks --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=1 --socket-id=0 --benchmark-only --verbose --file=/dataset/newstest2014.en --reference=/dataset/newstest2014.de --vocab_file=/dataset/vocab.txt --in_graph=/in_graph/fp32_graphdef.pb,LD_PRELOAD=/usr/lib/libtcmalloc.so.4.2.6 numactl --cpunodebind=0 --membind=0 python /workspace/models/official/transformer/infer_ab.py --param_set=big --in_graph=/in_graph/fp32_graphdef.pb --batch_size=1 --file=/dataset/newstest2014.en --file_out=/models/benchmarks/common/tensorflow/logs/translate.txt --vocab_file=/dataset/vocab.txt -run_tf_benchmark.py --framework=tensorflow --use-case=language_translation --model-name=transformer_lt_official --precision=fp32 --mode=inference --model-source-dir=/workspace/models --benchmark-dir=/workspace/benchmarks --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=64 --socket-id=0 --benchmark-only --verbose --file=/dataset/newstest2014.en --reference=/dataset/newstest2014.de --vocab_file=/dataset/vocab.txt --in_graph=/in_graph/fp32_graphdef.pb,LD_PRELOAD=/usr/lib/libtcmalloc.so.4.2.6 numactl --cpunodebind=0 --membind=0 python /workspace/models/official/transformer/infer_ab.py --param_set=big --in_graph=/in_graph/fp32_graphdef.pb --batch_size=64 --file=/dataset/newstest2014.en --file_out=/models/benchmarks/common/tensorflow/logs/translate.txt --vocab_file=/dataset/vocab.txt +"run_tf_benchmark.py --framework=tensorflow --use-case=language_translation --model-name=transformer_language --precision=fp32 --mode=inference --model-source-dir=/workspace/models --benchmark-dir=/workspace/benchmarks --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=1 --socket-id=0 --benchmark-only --verbose --checkpoint=/checkpoints --data-location=/dataset --decode_from_file=/checkpoints/newstest2015.en --reference=/checkpoints/newstest2015.de","numactl --cpunodebind=0 --membind=0 python /workspace/models/tensor2tensor/bin/t2t_decoder.py --problem=translate_ende_wmt32k --model=transformer --hparams_set=transformer_base_single_gpu --decode_hparams=beam_size=4,alpha=0.6,batch_size=1 --data_dir=/dataset --output_dir=/checkpoints --decode_from_file=/checkpoints/newstest2015.en --decode_to_file=/workspace/models/out_dir/output_infer --reference=/checkpoints/newstest2015.de --inter_op_parallelism_threads=1 --intra_op_parallelism_threads=28" +"run_tf_benchmark.py --framework=tensorflow --use-case=language_translation --model-name=transformer_language --precision=fp32 --mode=inference --model-source-dir=/workspace/models --benchmark-dir=/workspace/benchmarks --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=32 --socket-id=0 --benchmark-only --verbose --checkpoint=/checkpoints --data-location=/dataset --decode_from_file=/checkpoints/newstest2015.en --reference=/checkpoints/newstest2015.de","numactl --cpunodebind=0 --membind=0 python /workspace/models/tensor2tensor/bin/t2t_decoder.py --problem=translate_ende_wmt32k --model=transformer --hparams_set=transformer_base_single_gpu --decode_hparams=beam_size=4,alpha=0.6,batch_size=32 --data_dir=/dataset --output_dir=/checkpoints --decode_from_file=/checkpoints/newstest2015.en --decode_to_file=/workspace/models/out_dir/output_infer --reference=/checkpoints/newstest2015.de --inter_op_parallelism_threads=1 --intra_op_parallelism_threads=28" +run_tf_benchmark.py --framework=tensorflow --use-case=language_translation --model-name=transformer_lt_official --precision=fp32 --mode=inference --model-source-dir=/workspace/models --benchmark-dir=/workspace/benchmarks --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=1 --socket-id=0 --benchmark-only --verbose --file=/dataset/newstest2014.en --reference=/dataset/newstest2014.de --vocab_file=/dataset/vocab.txt --in_graph=/in_graph/fp32_graphdef.pb,numactl --cpunodebind=0 --membind=0 python /workspace/models/official/transformer/infer_ab.py --param_set=big --in_graph=/in_graph/fp32_graphdef.pb --batch_size=1 --file=/dataset/newstest2014.en --file_out=/models/benchmarks/common/tensorflow/logs/translate.txt --vocab_file=/dataset/vocab.txt +run_tf_benchmark.py --framework=tensorflow --use-case=language_translation --model-name=transformer_lt_official --precision=fp32 --mode=inference --model-source-dir=/workspace/models --benchmark-dir=/workspace/benchmarks --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=64 --socket-id=0 --benchmark-only --verbose --file=/dataset/newstest2014.en --reference=/dataset/newstest2014.de --vocab_file=/dataset/vocab.txt --in_graph=/in_graph/fp32_graphdef.pb,numactl --cpunodebind=0 --membind=0 python /workspace/models/official/transformer/infer_ab.py --param_set=big --in_graph=/in_graph/fp32_graphdef.pb --batch_size=64 --file=/dataset/newstest2014.en --file_out=/models/benchmarks/common/tensorflow/logs/translate.txt --vocab_file=/dataset/vocab.txt run_tf_benchmark.py --framework=tensorflow --use-case=image_recognition --model-name=mobilenet_v1 --precision=int8 --mode=inference --benchmark-dir=/workspace/benchmarks --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=100 --socket-id=0 --output-dir=/workspace/benchmarks/common/tensorflow/logs --accuracy-only --verbose --model-source-dir=/workspace/models --in-graph=/in_graph/models_mobilenetv1_int8_pretrained_model.pb --data-location=/dataset --input_height=224 --input_width=224,LD_PRELOAD=/usr/lib/libtcmalloc.so.4.2.6 numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/inference/int8/accuracy.py --input_height=224 --input_width=224 --num_intra_threads=28 --output_layer=MobilenetV1/Predictions/Reshape_1 --num_inter_threads=1 --batch_size=100 --input_graph=/in_graph/models_mobilenetv1_int8_pretrained_model.pb --data_location=/dataset --input_layer=input run_tf_benchmark.py --framework=tensorflow --use-case=image_recognition --model-name=mobilenet_v1 --precision=int8 --mode=inference --benchmark-dir=/workspace/benchmarks --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=1 --socket-id=0 --output-dir=/workspace/benchmarks/common/tensorflow/logs --benchmark-only --verbose --model-source-dir=/workspace/models --in-graph=/in_graph/models_mobilenetv1_int8_pretrained_model.pb --input_height=224 --input_width=224 --warmup_steps=10 --steps=50,LD_PRELOAD=/usr/lib/libtcmalloc.so.4.2.6 numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/inference/int8/benchmark.py --input_height=224 --input_width=224 --warmup_steps=10 --num_intra_threads=28 --output_layer=MobilenetV1/Predictions/Reshape_1 --num_inter_threads=1 --batch_size=1 --input_graph=/in_graph/models_mobilenetv1_int8_pretrained_model.pb --input_layer=input --steps=50 run_tf_benchmark.py --framework=tensorflow --use-case=image_recognition --model-name=mobilenet_v1 --precision=int8 --mode=inference --benchmark-dir=/workspace/benchmarks --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=240 --socket-id=0 --output-dir=/workspace/benchmarks/common/tensorflow/logs --benchmark-only --verbose --model-source-dir=/workspace/models --in-graph=/in_graph/models_mobilenetv1_int8_pretrained_model.pb --input_height=224 --input_width=224 --warmup_steps=10 --steps=50,LD_PRELOAD=/usr/lib/libtcmalloc.so.4.2.6 numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/inference/int8/benchmark.py --input_height=224 --input_width=224 --warmup_steps=10 --num_intra_threads=28 --output_layer=MobilenetV1/Predictions/Reshape_1 --num_inter_threads=1 --batch_size=240 --input_graph=/in_graph/models_mobilenetv1_int8_pretrained_model.pb --input_layer=input --steps=50 run_tf_benchmark.py --framework=tensorflow --use-case=image_recognition --model-name=mobilenet_v1 --precision=fp32 --mode=inference --model-source-dir=/workspace/models --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=1 --socket-id 0 --benchmark-only --verbose --checkpoint=/checkpoints --data-location=/dataset,numactl --cpunodebind=0 -l python /workspace/intelai_models/inference/fp32/eval_image_classifier.py --dataset_name imagenet --checkpoint_path /checkpoints --dataset_dir /dataset --dataset_split_name=validation --clone_on_cpu=True --model_name mobilenet_v1 --inter_op_parallelism_threads 2 --intra_op_parallelism_threads 28 --batch_size 1 -python common/tensorflow/run_tf_benchmark.py --framework=tensorflow --use-case=language_modeling --model-name=lm-1b --precision=fp32 --mode=inference --benchmark-dir=/workspace/benchmarks --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=1 --socket-id=0 --output-dir=/workspace/benchmarks/common/tensorflow/logs --benchmark-only --verbose --model-source-dir=/workspace/models,LD_PRELOAD=/usr/lib/libtcmalloc.so.4.2.6 numactl --cpunodebind=0 --membind=0 python /workspace/models/benchmark.py -b=1 -I=100 --inter=1 --intra=28 +python common/tensorflow/run_tf_benchmark.py --framework=tensorflow --use-case=language_modeling --model-name=lm-1b --precision=fp32 --mode=inference --benchmark-dir=/workspace/benchmarks --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=1 --socket-id=0 --output-dir=/workspace/benchmarks/common/tensorflow/logs --benchmark-only --verbose --model-source-dir=/workspace/models,numactl --cpunodebind=0 --membind=0 python /workspace/models/benchmark.py -b=1 -I=100 --inter=1 --intra=28 run_tf_benchmark.py --framework=tensorflow --use-case=image_recognition --model-name=mobilenet_v1 --precision=fp32 --mode=inference --model-source-dir=/workspace/models --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=100 --socket-id 0 --benchmark-only --verbose --checkpoint=/checkpoints --data-location=/dataset,numactl --cpunodebind=0 -l python /workspace/intelai_models/inference/fp32/eval_image_classifier.py --dataset_name imagenet --checkpoint_path /checkpoints --dataset_dir /dataset --dataset_split_name=validation --clone_on_cpu=True --model_name mobilenet_v1 --inter_op_parallelism_threads 2 --intra_op_parallelism_threads 28 --batch_size 100 run_tf_benchmark.py --framework=tensorflow --use-case=image_recognition --model-name=mobilenet_v1 --precision=fp32 --mode=inference --benchmark-dir=/workspace/benchmarks --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=100 --socket-id=0 --output-dir=/workspace/benchmarks/common/tensorflow/logs --benchmark-only --verbose --model-source-dir=/workspace/models --checkpoint=/checkpoints,numactl --cpunodebind=0 -l python /workspace/intelai_models/inference/fp32/eval_image_classifier.py --dataset_name imagenet --checkpoint_path /checkpoints --dataset_split_name=validation --clone_on_cpu=True --model_name mobilenet_v1 --inter_op_parallelism_threads 2 --intra_op_parallelism_threads 28 --batch_size 100 run_tf_benchmark.py --framework=tensorflow --use-case=image_recognition --model-name=mobilenet_v1 --precision=fp32 --mode=inference --model-source-dir=/workspace/models --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=100 --socket-id=-1 --accuracy-only --verbose --checkpoint=/checkpoints --in-graph=/in_graph/mobilenet_v1_1.0_224_frozen.pb --data-location=/dataset,python /workspace/intelai_models/inference/fp32/accuracy.py --batch_size=100 --num_inter_threads=2 --input_graph=/in_graph/mobilenet_v1_1.0_224_frozen.pb --num_intra_threads=56 --data_location=/dataset -run_tf_benchmark.py --framework=tensorflow --use-case=recommendation --model-name=ncf --precision=fp32 --mode=inference --model-source-dir=/workspace/models --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=256 --socket-id 0 --accuracy-only --verbose --checkpoint=/checkpoints --data-location=/dataset,LD_PRELOAD=/usr/lib/libtcmalloc.so.4.2.6 numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/inference/fp32/ncf_main.py --data_dir=/dataset --model_dir=/checkpoints --intra_op_parallelism_threads=28 --inter_op_parallelism_threads=1 --batch_size=256 --inference_only --accuracy_only -run_tf_benchmark.py --framework=tensorflow --use-case=recommendation --model-name=ncf --precision=fp32 --mode=inference --model-source-dir=/workspace/models --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=256 --socket-id 0 --benchmark-only --verbose --checkpoint=/checkpoints --data-location=/dataset,LD_PRELOAD=/usr/lib/libtcmalloc.so.4.2.6 numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/inference/fp32/ncf_main.py --data_dir=/dataset --model_dir=/checkpoints --intra_op_parallelism_threads=28 --inter_op_parallelism_threads=1 --batch_size=256 --inference_only --benchmark_only -run_tf_benchmark.py --framework=tensorflow --use-case=recommendation --model-name=ncf --precision=fp32 --mode=inference --model-source-dir=/workspace/models --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=1 --socket-id 0 --benchmark-only --verbose --checkpoint=/checkpoints --data-location=/dataset,LD_PRELOAD=/usr/lib/libtcmalloc.so.4.2.6 numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/inference/fp32/ncf_main.py --data_dir=/dataset --model_dir=/checkpoints --intra_op_parallelism_threads=28 --inter_op_parallelism_threads=1 --batch_size=1 --inference_only --benchmark_only -run_tf_benchmark.py --framework=tensorflow --use-case=image_segmentation --model-name=maskrcnn --precision=fp32 --mode=inference --model-source-dir=/workspace/models --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=1 --socket-id=0 --benchmark-only --verbose --data-location=/dataset,LD_PRELOAD=/usr/lib/libtcmalloc.so.4.2.6 numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/coco.py evaluate --dataset=/dataset --num_inter_threads 1 --num_intra_threads 28 --nw 5 --nb 50 --model=coco --infbs 1 -run_tf_benchmark.py --framework=tensorflow --use-case=image_segmentation --model-name=maskrcnn --precision=fp32 --mode=inference --model-source-dir=/workspace/models --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=1 --socket-id=0 --benchmark-only --verbose --data-location=/dataset --num-inter-threads 4 --num-intra-threads 16,LD_PRELOAD=/usr/lib/libtcmalloc.so.4.2.6 numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/coco.py evaluate --dataset=/dataset --num_inter_threads 4 --num_intra_threads 16 --nw 5 --nb 50 --model=coco --infbs 1 -run_tf_benchmark.py --framework=tensorflow --use-case=object_detection --model-name=faster_rcnn --precision=int8 --mode=inference --model-source-dir=/workspace/models --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=-1 --socket-id=-1 --accuracy-only --verbose --in-graph=/in_graph/pretrained_int8_faster_rcnn_model.pb --data-location=/dataset,/workspace/intelai_models/inference/int8/coco_int8.sh /in_graph/pretrained_int8_faster_rcnn_model.pb /dataset /workspace/models +run_tf_benchmark.py --framework=tensorflow --use-case=recommendation --model-name=ncf --precision=fp32 --mode=inference --model-source-dir=/workspace/models --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=256 --socket-id 0 --accuracy-only --verbose --checkpoint=/checkpoints --data-location=/dataset,numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/inference/fp32/ncf_main.py --data_dir=/dataset --model_dir=/checkpoints --intra_op_parallelism_threads=28 --inter_op_parallelism_threads=1 --batch_size=256 --inference_only --accuracy_only +run_tf_benchmark.py --framework=tensorflow --use-case=recommendation --model-name=ncf --precision=fp32 --mode=inference --model-source-dir=/workspace/models --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=256 --socket-id 0 --benchmark-only --verbose --checkpoint=/checkpoints --data-location=/dataset,numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/inference/fp32/ncf_main.py --data_dir=/dataset --model_dir=/checkpoints --intra_op_parallelism_threads=28 --inter_op_parallelism_threads=1 --batch_size=256 --inference_only --benchmark_only +run_tf_benchmark.py --framework=tensorflow --use-case=recommendation --model-name=ncf --precision=fp32 --mode=inference --model-source-dir=/workspace/models --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=1 --socket-id 0 --benchmark-only --verbose --checkpoint=/checkpoints --data-location=/dataset,numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/inference/fp32/ncf_main.py --data_dir=/dataset --model_dir=/checkpoints --intra_op_parallelism_threads=28 --inter_op_parallelism_threads=1 --batch_size=1 --inference_only --benchmark_only +run_tf_benchmark.py --framework=tensorflow --use-case=image_segmentation --model-name=maskrcnn --precision=fp32 --mode=inference --model-source-dir=/workspace/models --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=1 --socket-id=0 --benchmark-only --verbose --data-location=/dataset,numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/coco.py evaluate --dataset=/dataset --num_inter_threads 1 --num_intra_threads 28 --nw 5 --nb 50 --model=coco --infbs 1 +run_tf_benchmark.py --framework=tensorflow --use-case=image_segmentation --model-name=maskrcnn --precision=fp32 --mode=inference --model-source-dir=/workspace/models --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=1 --socket-id=0 --benchmark-only --verbose --data-location=/dataset --num-inter-threads 4 --num-intra-threads 16,numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/coco.py evaluate --dataset=/dataset --num_inter_threads 4 --num_intra_threads 16 --nw 5 --nb 50 --model=coco --infbs 1 +run_tf_benchmark.py --framework=tensorflow --use-case=object_detection --model-name=faster_rcnn --precision=int8 --mode=inference --model-source-dir=/workspace/models --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=-1 --socket-id=-1 --accuracy-only --verbose --in-graph=/in_graph/pretrained_int8_faster_rcnn_model.pb --data-location=/dataset,LD_PRELOAD=/usr/lib/libtcmalloc.so.4.2.6 /workspace/intelai_models/inference/int8/coco_int8.sh /in_graph/pretrained_int8_faster_rcnn_model.pb /dataset /workspace/models run_tf_benchmark.py --framework=tensorflow --use-case=object_detection --model-name=faster_rcnn --precision=int8 --mode=inference --model-source-dir=/workspace/models --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=-1 --socket-id=-1 --benchmark-only --verbose --in-graph=/in_graph/pretrained_int8_faster_rcnn_model.pb --data-location=/dataset,LD_PRELOAD=/usr/lib/libtcmalloc.so.4.2.6 python /workspace/intelai_models/inference/int8/run_frozen_graph_rcnn.py -g /in_graph/pretrained_int8_faster_rcnn_model.pb -d /dataset --num-inter-threads 2 --num-intra-threads 56 run_tf_benchmark.py --framework=tensorflow --use-case=object_detection --model-name=faster_rcnn --precision=int8 --mode=inference --model-source-dir=/workspace/models --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=-1 --socket-id=-1 --benchmark-only --verbose --in-graph=/in_graph/pretrained_int8_faster_rcnn_model.pb --data-location=/dataset --number-of-steps=500,LD_PRELOAD=/usr/lib/libtcmalloc.so.4.2.6 python /workspace/intelai_models/inference/int8/run_frozen_graph_rcnn.py -g /in_graph/pretrained_int8_faster_rcnn_model.pb -n 500 -d /dataset --num-inter-threads 2 --num-intra-threads 56 -run_tf_benchmark.py --framework=tensorflow --use-case=adversarial_networks --model-name=dcgan --precision=fp32 --mode=inference --model-source-dir=/workspace/models --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=100 --socket-id=0 --benchmark-only --verbose --checkpoint=/checkpoints --data-location=/dataset,LD_PRELOAD=/usr/lib/libtcmalloc.so.4.2.6 numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/inference/fp32/inference_bench.py -ckpt /checkpoints -dl /dataset --num_inter_threads 1 --num_intra_threads 28 -nw 100 -nb 500 --bs 100 --kmp_blocktime 1 --kmp_settings 1 -run_tf_benchmark.py --framework=tensorflow --use-case=content_creation --model-name=draw --precision=fp32 --mode=inference --model-source-dir=/workspace/models --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=1 --socket-id=0 --benchmark-only --verbose --checkpoint=/checkpoints --data-location=/dataset,LD_PRELOAD=/usr/lib/libtcmalloc.so.4.2.6 numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/inference/fp32/draw_inf.py --cp /checkpoints --num_inter_threads 1 --num_intra_threads 28 --bs 1 --dl /dataset --nw 100 --nb 200 -run_tf_benchmark.py --framework=tensorflow --use-case=content_creation --model-name=draw --precision=fp32 --mode=inference --model-source-dir=/workspace/models --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=100 --socket-id=0 --benchmark-only --verbose --checkpoint=/checkpoints --data-location=/dataset,LD_PRELOAD=/usr/lib/libtcmalloc.so.4.2.6 numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/inference/fp32/draw_inf.py --cp /checkpoints --num_inter_threads 1 --num_intra_threads 28 --bs 100 --dl /dataset --nw 100 --nb 200 -run_tf_benchmark.py --framework=tensorflow --use-case=language_translation --model-name=gnmt --precision=fp32 --mode=inference --model-source-dir=/workspace/models --benchmark-dir=/workspace/benchmarks --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=1 --socket-id=0 --output-dir=/workspace/benchmarks/common/tensorflow/logs --benchmark-only --verbose --checkpoint=/checkpoints --data-location=/dataset --infer_mode=beam_search,LD_PRELOAD=/usr/lib/libtcmalloc.so.4.2.6 numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/fp32/nmt.py --src=de --tgt=en --hparams_path=/workspace/intelai_models/fp32/standard_hparams/wmt16_gnmt_4_layer_internal.json --out_dir=/workspace/benchmarks/common/tensorflow/logs --vocab_prefix=/dataset/vocab.bpe.32000 --ckpt=/checkpoints/translate.ckpt --infer_batch_size=1 --inference_input_file=/dataset/newstest2015.tok.bpe.32000.de --inference_output_file=/workspace/benchmarks/common/tensorflow/logs/output_infer --inference_ref_file=/dataset/newstest2015.tok.bpe.32000.en --num_inter_threads=1 --num_intra_threads=28 --infer_mode=beam_search -run_tf_benchmark.py --framework=tensorflow --use-case=language_translation --model-name=gnmt --precision=fp32 --mode=inference --model-source-dir=/workspace/models --benchmark-dir=/workspace/benchmarks --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=32 --socket-id=0 --output-dir=/workspace/benchmarks/common/tensorflow/logs --benchmark-only --verbose --checkpoint=/checkpoints --data-location=/dataset --infer_mode=beam_search,LD_PRELOAD=/usr/lib/libtcmalloc.so.4.2.6 numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/fp32/nmt.py --src=de --tgt=en --hparams_path=/workspace/intelai_models/fp32/standard_hparams/wmt16_gnmt_4_layer_internal.json --out_dir=/workspace/benchmarks/common/tensorflow/logs --vocab_prefix=/dataset/vocab.bpe.32000 --ckpt=/checkpoints/translate.ckpt --infer_batch_size=32 --inference_input_file=/dataset/newstest2015.tok.bpe.32000.de --inference_output_file=/workspace/benchmarks/common/tensorflow/logs/output_infer --inference_ref_file=/dataset/newstest2015.tok.bpe.32000.en --num_inter_threads=1 --num_intra_threads=28 --infer_mode=beam_search -run_tf_benchmark.py --framework=tensorflow --use-case=content_creation --model-name=draw --precision=fp32 --mode=inference --model-source-dir=/workspace/models --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=100 --socket-id=0 --benchmark-only --verbose --checkpoint=/checkpoints --data-location=/dataset --num-inter-threads 4 --num-intra-threads 16,LD_PRELOAD=/usr/lib/libtcmalloc.so.4.2.6 numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/inference/fp32/draw_inf.py --cp /checkpoints --num_inter_threads 4 --num_intra_threads 16 --bs 100 --dl /dataset --nw 100 --nb 200 -run_tf_benchmark.py --framework=tensorflow --use-case=image_segmentation --model-name=unet --precision=fp32 --mode=inference --model-source-dir=/workspace/models --benchmark-dir=/workspace/benchmarks --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=1 --socket-id=0 --benchmark-only --verbose --checkpoint=/checkpoints --checkpoint_name=model.ckpt,LD_PRELOAD=/usr/lib/libtcmalloc.so.4.2.6 numactl --cpunodebind=0 --membind=0 python /workspace/benchmarks/image_segmentation/tensorflow/unet/inference/fp32/unet_infer.py -bs 1 -cp /checkpoints/model.ckpt --num_inter_threads 1 --num_intra_threads 28 -nw 80 -nb 400 -run_tf_benchmark.py --framework=tensorflow --use-case=recommendation --model-name=wide_deep_large_ds --precision=fp32 --mode=inference --model-source-dir=/workspace/models --benchmark-dir=/workspace/benchmarks --intelai-models=/workspace/intelai_models --num-cores=-1 --socket-id=0 --output-dir=/workspace/benchmarks/common/tensorflow/logs --benchmark-only --verbose --in-graph=/in_graph/wide_deep_fp32_pretrained_model.pb --data-location=/dataset --num-parallel-batches=14,LD_PRELOAD=/usr/lib/libtcmalloc.so.4.2.6 numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/inference/inference.py --num_intra_threads=1 --num_inter_threads=28 --input_graph=/in_graph/wide_deep_fp32_pretrained_model.pb --data_location=/dataset --num_parallel_batches=14 -run_tf_benchmark.py --framework=tensorflow --use-case=recommendation --model-name=wide_deep_large_ds --precision=fp32 --mode=inference --model-source-dir=/workspace/models --benchmark-dir=/workspace/benchmarks --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=1 --socket-id=0 --output-dir=/workspace/benchmarks/common/tensorflow/logs --benchmark-only --verbose --in-graph=/in_graph/wide_deep_fp32_pretrained_model.pb --data-location=/dataset --num-parallel-batches=14,LD_PRELOAD=/usr/lib/libtcmalloc.so.4.2.6 numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/inference/inference.py --num_intra_threads=1 --num_inter_threads=28 --batch_size=1 --input_graph=/in_graph/wide_deep_fp32_pretrained_model.pb --data_location=/dataset --num_parallel_batches=14 -run_tf_benchmark.py --framework=tensorflow --use-case=recommendation --model-name=wide_deep_large_ds --precision=fp32 --mode=inference --model-source-dir=/workspace/models --benchmark-dir=/workspace/benchmarks --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=512 --socket-id=0 --output-dir=/workspace/benchmarks/common/tensorflow/logs --benchmark-only --verbose --in-graph=/in_graph/wide_deep_fp32_pretrained_model.pb --data-location=/dataset --num-parallel-batches=14,LD_PRELOAD=/usr/lib/libtcmalloc.so.4.2.6 numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/inference/inference.py --num_intra_threads=1 --num_inter_threads=28 --batch_size=512 --input_graph=/in_graph/wide_deep_fp32_pretrained_model.pb --data_location=/dataset --num_parallel_batches=14 +run_tf_benchmark.py --framework=tensorflow --use-case=adversarial_networks --model-name=dcgan --precision=fp32 --mode=inference --model-source-dir=/workspace/models --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=100 --socket-id=0 --benchmark-only --verbose --checkpoint=/checkpoints --data-location=/dataset,numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/inference/fp32/inference_bench.py -ckpt /checkpoints -dl /dataset --num_inter_threads 1 --num_intra_threads 28 -nw 100 -nb 500 --bs 100 --kmp_blocktime 1 --kmp_settings 1 +run_tf_benchmark.py --framework=tensorflow --use-case=content_creation --model-name=draw --precision=fp32 --mode=inference --model-source-dir=/workspace/models --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=1 --socket-id=0 --benchmark-only --verbose --checkpoint=/checkpoints --data-location=/dataset,numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/inference/fp32/draw_inf.py --cp /checkpoints --num_inter_threads 1 --num_intra_threads 28 --bs 1 --dl /dataset --nw 100 --nb 200 +run_tf_benchmark.py --framework=tensorflow --use-case=content_creation --model-name=draw --precision=fp32 --mode=inference --model-source-dir=/workspace/models --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=100 --socket-id=0 --benchmark-only --verbose --checkpoint=/checkpoints --data-location=/dataset,numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/inference/fp32/draw_inf.py --cp /checkpoints --num_inter_threads 1 --num_intra_threads 28 --bs 100 --dl /dataset --nw 100 --nb 200 +run_tf_benchmark.py --framework=tensorflow --use-case=language_translation --model-name=gnmt --precision=fp32 --mode=inference --model-source-dir=/workspace/models --benchmark-dir=/workspace/benchmarks --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=1 --socket-id=0 --output-dir=/workspace/benchmarks/common/tensorflow/logs --benchmark-only --verbose --checkpoint=/checkpoints --data-location=/dataset --infer_mode=beam_search,numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/fp32/nmt.py --src=de --tgt=en --hparams_path=/workspace/intelai_models/fp32/standard_hparams/wmt16_gnmt_4_layer_internal.json --out_dir=/workspace/benchmarks/common/tensorflow/logs --vocab_prefix=/dataset/vocab.bpe.32000 --ckpt=/checkpoints/translate.ckpt --infer_batch_size=1 --inference_input_file=/dataset/newstest2015.tok.bpe.32000.de --inference_output_file=/workspace/benchmarks/common/tensorflow/logs/output_infer --inference_ref_file=/dataset/newstest2015.tok.bpe.32000.en --num_inter_threads=1 --num_intra_threads=28 --infer_mode=beam_search +run_tf_benchmark.py --framework=tensorflow --use-case=language_translation --model-name=gnmt --precision=fp32 --mode=inference --model-source-dir=/workspace/models --benchmark-dir=/workspace/benchmarks --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=32 --socket-id=0 --output-dir=/workspace/benchmarks/common/tensorflow/logs --benchmark-only --verbose --checkpoint=/checkpoints --data-location=/dataset --infer_mode=beam_search,numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/fp32/nmt.py --src=de --tgt=en --hparams_path=/workspace/intelai_models/fp32/standard_hparams/wmt16_gnmt_4_layer_internal.json --out_dir=/workspace/benchmarks/common/tensorflow/logs --vocab_prefix=/dataset/vocab.bpe.32000 --ckpt=/checkpoints/translate.ckpt --infer_batch_size=32 --inference_input_file=/dataset/newstest2015.tok.bpe.32000.de --inference_output_file=/workspace/benchmarks/common/tensorflow/logs/output_infer --inference_ref_file=/dataset/newstest2015.tok.bpe.32000.en --num_inter_threads=1 --num_intra_threads=28 --infer_mode=beam_search +run_tf_benchmark.py --framework=tensorflow --use-case=content_creation --model-name=draw --precision=fp32 --mode=inference --model-source-dir=/workspace/models --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=100 --socket-id=0 --benchmark-only --verbose --checkpoint=/checkpoints --data-location=/dataset --num-inter-threads 4 --num-intra-threads 16,numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/inference/fp32/draw_inf.py --cp /checkpoints --num_inter_threads 4 --num_intra_threads 16 --bs 100 --dl /dataset --nw 100 --nb 200 +run_tf_benchmark.py --framework=tensorflow --use-case=image_segmentation --model-name=unet --precision=fp32 --mode=inference --model-source-dir=/workspace/models --benchmark-dir=/workspace/benchmarks --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=1 --socket-id=0 --benchmark-only --verbose --checkpoint=/checkpoints --checkpoint_name=model.ckpt,numactl --cpunodebind=0 --membind=0 python /workspace/benchmarks/image_segmentation/tensorflow/unet/inference/fp32/unet_infer.py -bs 1 -cp /checkpoints/model.ckpt --num_inter_threads 1 --num_intra_threads 28 -nw 80 -nb 400 +run_tf_benchmark.py --framework=tensorflow --use-case=recommendation --model-name=wide_deep_large_ds --precision=fp32 --mode=inference --model-source-dir=/workspace/models --benchmark-dir=/workspace/benchmarks --intelai-models=/workspace/intelai_models --num-cores=-1 --socket-id=0 --output-dir=/workspace/benchmarks/common/tensorflow/logs --benchmark-only --verbose --in-graph=/in_graph/wide_deep_fp32_pretrained_model.pb --data-location=/dataset --num-parallel-batches=14,numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/inference/inference.py --num_intra_threads=1 --num_inter_threads=28 --input_graph=/in_graph/wide_deep_fp32_pretrained_model.pb --data_location=/dataset --num_parallel_batches=14 +run_tf_benchmark.py --framework=tensorflow --use-case=recommendation --model-name=wide_deep_large_ds --precision=fp32 --mode=inference --model-source-dir=/workspace/models --benchmark-dir=/workspace/benchmarks --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=1 --socket-id=0 --output-dir=/workspace/benchmarks/common/tensorflow/logs --benchmark-only --verbose --in-graph=/in_graph/wide_deep_fp32_pretrained_model.pb --data-location=/dataset --num-parallel-batches=14,numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/inference/inference.py --num_intra_threads=1 --num_inter_threads=28 --batch_size=1 --input_graph=/in_graph/wide_deep_fp32_pretrained_model.pb --data_location=/dataset --num_parallel_batches=14 +run_tf_benchmark.py --framework=tensorflow --use-case=recommendation --model-name=wide_deep_large_ds --precision=fp32 --mode=inference --model-source-dir=/workspace/models --benchmark-dir=/workspace/benchmarks --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=512 --socket-id=0 --output-dir=/workspace/benchmarks/common/tensorflow/logs --benchmark-only --verbose --in-graph=/in_graph/wide_deep_fp32_pretrained_model.pb --data-location=/dataset --num-parallel-batches=14,numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/inference/inference.py --num_intra_threads=1 --num_inter_threads=28 --batch_size=512 --input_graph=/in_graph/wide_deep_fp32_pretrained_model.pb --data_location=/dataset --num_parallel_batches=14 run_tf_benchmark.py --framework=tensorflow --use-case=recommendation --model-name=wide_deep_large_ds --precision=int8 --mode=inference --model-source-dir=/workspace/models --benchmark-dir=/workspace/benchmarks --intelai-models=/workspace/intelai_models --num-cores=-1 --socket-id=0 --output-dir=/workspace/benchmarks/common/tensorflow/logs --benchmark-only --verbose --in-graph=/in_graph/wide_deep_int8_pretrained_model.pb --data-location=/dataset --num-parallel-batches=14,LD_PRELOAD=/usr/lib/libtcmalloc.so.4.2.6 numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/inference/inference.py --num_intra_threads=1 --num_inter_threads=28 --input_graph=/in_graph/wide_deep_int8_pretrained_model.pb --data_location=/dataset --num_parallel_batches=14 run_tf_benchmark.py --framework=tensorflow --use-case=recommendation --model-name=wide_deep_large_ds --precision=int8 --mode=inference --model-source-dir=/workspace/models --benchmark-dir=/workspace/benchmarks --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=1 --socket-id=0 --output-dir=/workspace/benchmarks/common/tensorflow/logs --benchmark-only --verbose --in-graph=/in_graph/wide_deep_int8_pretrained_model.pb --data-location=/dataset --num-parallel-batches=14,LD_PRELOAD=/usr/lib/libtcmalloc.so.4.2.6 numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/inference/inference.py --num_intra_threads=1 --num_inter_threads=28 --batch_size=1 --input_graph=/in_graph/wide_deep_int8_pretrained_model.pb --data_location=/dataset --num_parallel_batches=14 run_tf_benchmark.py --framework=tensorflow --use-case=recommendation --model-name=wide_deep_large_ds --precision=int8 --mode=inference --model-source-dir=/workspace/models --benchmark-dir=/workspace/benchmarks --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=512 --socket-id=0 --output-dir=/workspace/benchmarks/common/tensorflow/logs --benchmark-only --verbose --in-graph=/in_graph/wide_deep_int8_pretrained_model.pb --data-location=/dataset --num-parallel-batches=14,LD_PRELOAD=/usr/lib/libtcmalloc.so.4.2.6 numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/inference/inference.py --num_intra_threads=1 --num_inter_threads=28 --batch_size=512 --input_graph=/in_graph/wide_deep_int8_pretrained_model.pb --data_location=/dataset --num_parallel_batches=14 -run_tf_benchmark.py --framework=tensorflow --use-case=face_detection_and_alignment --model-name=mtcc --precision=fp32 --mode=inference --model-source-dir=/workspace/models --benchmark-dir=/workspace/benchmarks --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=-1 --socket-id=0 --output-dir=/workspace/benchmarks/common/tensorflow/logs --benchmark-only --verbose --checkpoint=/checkpoints --data-location=/dataset,LD_PRELOAD=/usr/lib/libtcmalloc.so.4.2.6 numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/inference/fp32/one_image_test.py --num_inter_threads 1 --num_intra_threads 28 -ckpt /checkpoints -dl /dataset -run_tf_benchmark.py --framework=tensorflow --use-case=face_detection_and_alignment --model-name=facenet --precision=fp32 --mode=inference --benchmark-dir=/workspace/benchmarks --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=1 --socket-id=0 --output-dir=/workspace/benchmarks/common/tensorflow/logs --benchmark-only --verbose --model-source-dir=/workspace/models --checkpoint=/checkpoints --data-location=/dataset, LD_PRELOAD=/usr/lib/libtcmalloc.so.4.2.6 numactl --cpunodebind=0 --membind=0 python /workspace/models/src/validate_on_lfw.py /dataset /checkpoints --distance_metric 1 --use_flipped_images --subtract_mean --use_fixed_image_standardization --num_inter_threads=1 --num_intra_threads=28 --lfw_batch_size=1 --lfw_pairs=/workspace/models/data/pairs.txt --warmup_steps=200 --max_steps=1000 -run_tf_benchmark.py --framework=tensorflow --use-case=face_detection_and_alignment --model-name=facenet --precision=fp32 --mode=inference --benchmark-dir=/workspace/benchmarks --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=100 --socket-id=0 --output-dir=/workspace/benchmarks/common/tensorflow/logs --benchmark-only --verbose --model-source-dir=/workspace/models --checkpoint=/checkpoints --data-location=/dataset, LD_PRELOAD=/usr/lib/libtcmalloc.so.4.2.6 numactl --cpunodebind=0 --membind=0 python /workspace/models/src/validate_on_lfw.py /dataset /checkpoints --distance_metric 1 --use_flipped_images --subtract_mean --use_fixed_image_standardization --num_inter_threads=2 --num_intra_threads=28 --lfw_batch_size=100 --lfw_pairs=/workspace/models/data/pairs.txt --warmup_steps=40 --max_steps=1000 -run_tf_benchmark.py --framework=tensorflow --use-case=face_detection_and_alignment --model-name=facenet --precision=fp32 --mode=inference --benchmark-dir=/workspace/benchmarks --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=100 --socket-id=0 --output-dir=/workspace/benchmarks/common/tensorflow/logs --accuracy-only --verbose --model-source-dir=/workspace/models --checkpoint=/checkpoints --data-location=/dataset, LD_PRELOAD=/usr/lib/libtcmalloc.so.4.2.6 numactl --cpunodebind=0 --membind=0 python /workspace/models/src/validate_on_lfw.py /dataset /checkpoints --distance_metric 1 --use_flipped_images --subtract_mean --use_fixed_image_standardization --num_inter_threads=2 --num_intra_threads=28 --lfw_batch_size=100 --lfw_pairs=/workspace/models/data/pairs.txt --warmup_steps=40 --max_steps=1000 +run_tf_benchmark.py --framework=tensorflow --use-case=face_detection_and_alignment --model-name=mtcc --precision=fp32 --mode=inference --model-source-dir=/workspace/models --benchmark-dir=/workspace/benchmarks --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=-1 --socket-id=0 --output-dir=/workspace/benchmarks/common/tensorflow/logs --benchmark-only --verbose --checkpoint=/checkpoints --data-location=/dataset,numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/inference/fp32/one_image_test.py --num_inter_threads 1 --num_intra_threads 28 -ckpt /checkpoints -dl /dataset +run_tf_benchmark.py --framework=tensorflow --use-case=face_detection_and_alignment --model-name=facenet --precision=fp32 --mode=inference --benchmark-dir=/workspace/benchmarks --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=1 --socket-id=0 --output-dir=/workspace/benchmarks/common/tensorflow/logs --benchmark-only --verbose --model-source-dir=/workspace/models --checkpoint=/checkpoints --data-location=/dataset, numactl --cpunodebind=0 --membind=0 python /workspace/models/src/validate_on_lfw.py /dataset /checkpoints --distance_metric 1 --use_flipped_images --subtract_mean --use_fixed_image_standardization --num_inter_threads=1 --num_intra_threads=28 --lfw_batch_size=1 --lfw_pairs=/workspace/models/data/pairs.txt --warmup_steps=200 --max_steps=1000 +run_tf_benchmark.py --framework=tensorflow --use-case=face_detection_and_alignment --model-name=facenet --precision=fp32 --mode=inference --benchmark-dir=/workspace/benchmarks --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=100 --socket-id=0 --output-dir=/workspace/benchmarks/common/tensorflow/logs --benchmark-only --verbose --model-source-dir=/workspace/models --checkpoint=/checkpoints --data-location=/dataset, numactl --cpunodebind=0 --membind=0 python /workspace/models/src/validate_on_lfw.py /dataset /checkpoints --distance_metric 1 --use_flipped_images --subtract_mean --use_fixed_image_standardization --num_inter_threads=2 --num_intra_threads=28 --lfw_batch_size=100 --lfw_pairs=/workspace/models/data/pairs.txt --warmup_steps=40 --max_steps=1000 +run_tf_benchmark.py --framework=tensorflow --use-case=face_detection_and_alignment --model-name=facenet --precision=fp32 --mode=inference --benchmark-dir=/workspace/benchmarks --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=100 --socket-id=0 --output-dir=/workspace/benchmarks/common/tensorflow/logs --accuracy-only --verbose --model-source-dir=/workspace/models --checkpoint=/checkpoints --data-location=/dataset, numactl --cpunodebind=0 --membind=0 python /workspace/models/src/validate_on_lfw.py /dataset /checkpoints --distance_metric 1 --use_flipped_images --subtract_mean --use_fixed_image_standardization --num_inter_threads=2 --num_intra_threads=28 --lfw_batch_size=100 --lfw_pairs=/workspace/models/data/pairs.txt --warmup_steps=40 --max_steps=1000 run_tf_benchmark.py --framework=tensorflow --use-case=object_detection --model-name=ssd_vgg16 --precision=int8 --mode=inference --benchmark-dir=/workspace/benchmarks --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=1 --socket-id=0 --output-dir=/workspace/benchmarks/common/tensorflow/logs --benchmark-only --verbose --model-source-dir=/workspace/models --in-graph=/in_graph/ssdvgg16_int8_pretrained_model.pb --data-location=/dataset --num-inter-threads=11 --num-intra-threads=21 --data-num-inter-threads=21 --data-num-intra-threads=28 --steps=500 --warmup-steps=100,LD_PRELOAD=/usr/lib/libtcmalloc.so.4.2.6 numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/inference/eval_ssd.py --input-graph=/in_graph/ssdvgg16_int8_pretrained_model.pb --num-inter-threads=11 --num-intra-threads=21 --data-num-inter-threads=21 --data-num-intra-threads=28 --warmup-steps=100 --steps=500 --data-location=/dataset run_tf_benchmark.py --framework=tensorflow --use-case=object_detection --model-name=ssd_vgg16 --precision=int8 --mode=inference --benchmark-dir=/workspace/benchmarks --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=1 --socket-id=-1 --output-dir=/workspace/benchmarks/common/tensorflow/logs --accuracy-only --verbose --model-source-dir=/workspace/models --in-graph=/in_graph/ssdvgg16_int8_pretrained_model.pb --data-location=/dataset,LD_PRELOAD=/usr/lib/libtcmalloc.so.4.2.6 python /workspace/intelai_models/inference/eval_ssd.py --input-graph=/in_graph/ssdvgg16_int8_pretrained_model.pb --num-inter-threads=2 --num-intra-threads=56 --data-location=/dataset --accuracy-only -run_tf_benchmark.py --framework=tensorflow --use-case=object_detection --model-name=ssd_vgg16 --precision=fp32 --mode=inference --benchmark-dir=/workspace/benchmarks --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=1 --socket-id=-1 --output-dir=/workspace/benchmarks/common/tensorflow/logs --accuracy-only --verbose --model-source-dir=/workspace/models --in-graph=/in_graph/ssdvgg16_fp32_pretrained_model.pb --data-location=/dataset,LD_PRELOAD=/usr/lib/libtcmalloc.so.4.2.6 python /workspace/intelai_models/inference/eval_ssd.py --input-graph=/in_graph/ssdvgg16_fp32_pretrained_model.pb --num-inter-threads=2 --num-intra-threads=56 --data-location=/dataset --accuracy-only -run_tf_benchmark.py --framework=tensorflow --use-case=object_detection --model-name=ssd_vgg16 --precision=fp32 --mode=inference --benchmark-dir=/workspace/benchmarks --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=1 --socket-id=0 --output-dir=/workspace/benchmarks/common/tensorflow/logs --benchmark-only --verbose --model-source-dir=/workspace/models --in-graph=/in_graph/ssdvgg16_int8_pretrained_model.pb --data-location=/dataset --num-inter-threads=11 --num-intra-threads=21 --data-num-inter-threads=21 --data-num-intra-threads=28 --steps=500 --warmup-steps=100,LD_PRELOAD=/usr/lib/libtcmalloc.so.4.2.6 numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/inference/eval_ssd.py --input-graph=/in_graph/ssdvgg16_int8_pretrained_model.pb --num-inter-threads=11 --num-intra-threads=21 --data-num-inter-threads=21 --data-num-intra-threads=28 --warmup-steps=100 --steps=500 --data-location=/dataset \ No newline at end of file +run_tf_benchmark.py --framework=tensorflow --use-case=object_detection --model-name=ssd_vgg16 --precision=fp32 --mode=inference --benchmark-dir=/workspace/benchmarks --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=1 --socket-id=-1 --output-dir=/workspace/benchmarks/common/tensorflow/logs --accuracy-only --verbose --model-source-dir=/workspace/models --in-graph=/in_graph/ssdvgg16_fp32_pretrained_model.pb --data-location=/dataset,python /workspace/intelai_models/inference/eval_ssd.py --input-graph=/in_graph/ssdvgg16_fp32_pretrained_model.pb --num-inter-threads=2 --num-intra-threads=56 --data-location=/dataset --accuracy-only +run_tf_benchmark.py --framework=tensorflow --use-case=object_detection --model-name=ssd_vgg16 --precision=fp32 --mode=inference --benchmark-dir=/workspace/benchmarks --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=1 --socket-id=0 --output-dir=/workspace/benchmarks/common/tensorflow/logs --benchmark-only --verbose --model-source-dir=/workspace/models --in-graph=/in_graph/ssdvgg16_int8_pretrained_model.pb --data-location=/dataset --num-inter-threads=11 --num-intra-threads=21 --data-num-inter-threads=21 --data-num-intra-threads=28 --steps=500 --warmup-steps=100,numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/inference/eval_ssd.py --input-graph=/in_graph/ssdvgg16_int8_pretrained_model.pb --num-inter-threads=11 --num-intra-threads=21 --data-num-inter-threads=21 --data-num-intra-threads=28 --warmup-steps=100 --steps=500 --data-location=/dataset \ No newline at end of file diff --git a/tests/unit/common/test_base_model_init.py b/tests/unit/common/test_base_model_init.py index 2e147ee62..7a376fb35 100644 --- a/tests/unit/common/test_base_model_init.py +++ b/tests/unit/common/test_base_model_init.py @@ -177,9 +177,10 @@ def test_set_kmp_vars_config_json_exists(mock_json): base_model_init.set_kmp_vars(config_file_path) -@pytest.mark.parametrize('precision', ['int8', 'fp32']) -def test_command_prefix_tcmalloc(precision, mock_glob): - """ Models should include LD_PRELOAD in the command prefix, as long as tcmalloc is not disabled""" +@pytest.mark.parametrize('precision', ['int8']) +def test_command_prefix_tcmalloc_int8(precision, mock_glob): + """ For Int8 models, TCMalloc should be enabled by default and models should include + LD_PRELOAD in the command prefix, unless disable_tcmalloc=True is set """ platform_util = MagicMock() args = MagicMock(verbose=True, model_name=test_model_name) test_tcmalloc_lib = "/usr/lib/libtcmalloc.so.4.2.6" @@ -208,3 +209,36 @@ def test_command_prefix_tcmalloc(precision, mock_glob): command_prefix = base_model_init.get_command_prefix(args.socket_id, numactl=False) assert "LD_PRELOAD={}".format(test_tcmalloc_lib) in command_prefix assert "numactl" not in command_prefix + + +@pytest.mark.parametrize('precision', ['fp32']) +def test_command_prefix_tcmalloc_fp32(precision, mock_glob): + """ FP32 models should have TC Malloc disabled by default, but models should + include LD_PRELOAD in the command prefix if disable_tcmalloc=False is explicitly set. """ + platform_util = MagicMock() + args = MagicMock(verbose=True, model_name=test_model_name) + test_tcmalloc_lib = "/usr/lib/libtcmalloc.so.4.2.6" + mock_glob.return_value = [test_tcmalloc_lib] + os.environ["PYTHON_EXE"] = "python" + args.socket_id = 0 + args.precision = precision + + # By default, TCMalloc should not be used + base_model_init = BaseModelInitializer(args, [], platform_util) + command_prefix = base_model_init.get_command_prefix(args.socket_id) + assert "LD_PRELOAD={}".format(test_tcmalloc_lib) not in command_prefix + assert "numactl --cpunodebind=0 --membind=0" in command_prefix + + # If tcmalloc is disabled, LD_PRELOAD shouild not be in the prefix + args.disable_tcmalloc = False + base_model_init = BaseModelInitializer(args, [], platform_util) + command_prefix = base_model_init.get_command_prefix(args.socket_id) + assert "LD_PRELOAD={}".format(test_tcmalloc_lib) in command_prefix + assert "numactl --cpunodebind=0 --membind=0" in command_prefix + + # If numactl is set to false, we should not have numactl in the prefix + args.disable_tcmalloc = True + base_model_init = BaseModelInitializer(args, [], platform_util) + command_prefix = base_model_init.get_command_prefix(args.socket_id, numactl=False) + assert "LD_PRELOAD={}".format(test_tcmalloc_lib) not in command_prefix + assert "numactl" not in command_prefix diff --git a/tests/unit/test_launch_benchmark.py b/tests/unit/test_launch_benchmark.py index 03b96f697..32a9eaec1 100644 --- a/tests/unit/test_launch_benchmark.py +++ b/tests/unit/test_launch_benchmark.py @@ -258,3 +258,15 @@ def test_launch_benchmark_custom_volume(launch_benchmark, mock_popen): docker_run_cmd = " ".join(args[0]) for custom_volume in custom_volumes: assert "--volume {}".format(custom_volume) in docker_run_cmd + + +@pytest.mark.parametrize("precision,expected_disable_tcmalloc", [["int8", "False"], + ["fp32", "True"]]) +def test_disable_tcmalloc(launch_benchmark, mock_popen, precision, expected_disable_tcmalloc): + launch_benchmark.args.precision = precision + launch_benchmark.main() + assert mock_popen.called + args, _ = mock_popen.call_args + # convert the run command args to a string and then check for the custom volume mounts + docker_run_cmd = " ".join(args[0]) + assert "--env DISABLE_TCMALLOC=".format(expected_disable_tcmalloc) in docker_run_cmd From c21b9ed54fa0b2af7c61ec1f95ae0adb65606781 Mon Sep 17 00:00:00 2001 From: Wafaa Taie Date: Fri, 26 Apr 2019 09:44:48 -0700 Subject: [PATCH 23/62] add the required dependencies for coco dataset conversion to tf records, and the instructions to install. (#292) --- benchmarks/object_detection/tensorflow/ssd_vgg16/README.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/benchmarks/object_detection/tensorflow/ssd_vgg16/README.md b/benchmarks/object_detection/tensorflow/ssd_vgg16/README.md index 47233e7e2..514abe6a7 100644 --- a/benchmarks/object_detection/tensorflow/ssd_vgg16/README.md +++ b/benchmarks/object_detection/tensorflow/ssd_vgg16/README.md @@ -24,6 +24,7 @@ or the throughput and latency benchmark with real data. The [TensorFlow models](https://github.com/tensorflow/models) repo will be used for converting the coco dataset to the TF records format. +Follow [instructions](https://github.com/tensorflow/models/blob/master/research/object_detection/g3doc/installation.md#dependencies) to install the required dependencies (`cocoapi` and `Protobuf 3.0.0`). ``` $ mkdir val $ cd val @@ -65,6 +66,7 @@ located after the script has completed. ``` # We are going to use an older version of the conversion script to checkout the git commit +$ git clone https://github.com/tensorflow/models.git $ cd models $ git checkout 7a9934df2afdf95be9405b4e9f1f2480d748dc40 @@ -203,6 +205,7 @@ or the throughput and latency benchmark with real data. The [TensorFlow models](https://github.com/tensorflow/models) repo will be used for converting the coco dataset to the TF records format. +Follow [instructions](https://github.com/tensorflow/models/blob/master/research/object_detection/g3doc/installation.md#dependencies) to install the required dependencies (`cocoapi` and `Protobuf 3.0.0`). ``` $ mkdir val $ cd val @@ -244,6 +247,7 @@ located after the script has completed. ``` # We are going to use an older version of the conversion script to checkout the git commit +$ git clone https://github.com/tensorflow/models.git $ cd models $ git checkout 7a9934df2afdf95be9405b4e9f1f2480d748dc40 From 9f6387db3609a731b2df071f0f206ce187de34b4 Mon Sep 17 00:00:00 2001 From: Wafaa Taie Date: Fri, 26 Apr 2019 10:19:22 -0700 Subject: [PATCH 24/62] update object detection models readme for dataset converion. (#293) --- .../object_detection/tensorflow/faster_rcnn/README.md | 1 + benchmarks/object_detection/tensorflow/rfcn/README.md | 6 ++++-- .../object_detection/tensorflow/ssd-mobilenet/README.md | 2 ++ .../object_detection/tensorflow/ssd-resnet34/README.md | 1 + 4 files changed, 8 insertions(+), 2 deletions(-) diff --git a/benchmarks/object_detection/tensorflow/faster_rcnn/README.md b/benchmarks/object_detection/tensorflow/faster_rcnn/README.md index 162acdf07..cad38b7de 100644 --- a/benchmarks/object_detection/tensorflow/faster_rcnn/README.md +++ b/benchmarks/object_detection/tensorflow/faster_rcnn/README.md @@ -78,6 +78,7 @@ TF records format in order to use it with the inference script. We will do this by running the `create_coco_tf_record.py` file in the TensorFlow models repo. +Follow [instructions](https://github.com/tensorflow/models/blob/master/research/object_detection/g3doc/installation.md#dependencies) to install the required dependencies (`cocoapi` and `Protobuf 3.0.0`). Follow the steps below to navigate to the proper directory and point the script to the raw COCO dataset files that you have downloaded in step 2. The `--output_dir` is the location where the TF record files will be diff --git a/benchmarks/object_detection/tensorflow/rfcn/README.md b/benchmarks/object_detection/tensorflow/rfcn/README.md index 10a0342ce..ec9fad2e6 100644 --- a/benchmarks/object_detection/tensorflow/rfcn/README.md +++ b/benchmarks/object_detection/tensorflow/rfcn/README.md @@ -44,7 +44,7 @@ sed -i.bak 95s/input_config/input_config[0]/ offline_eval_map_corloc.py ``` -2. Download the 2017 validation +2. Download the 2017 validation [COCO dataset](http://cocodataset.org/#home) and annotations: ``` @@ -78,6 +78,7 @@ TF records format in order to use it with the inference script. We will do this by running the `create_coco_tf_record.py` file in the TensorFlow models repo. +Follow [instructions](https://github.com/tensorflow/models/blob/master/research/object_detection/g3doc/installation.md#dependencies) to install the required dependencies (`cocoapi` and `Protobuf 3.0.0`). Follow the steps below to navigate to the proper directory and point the script to the raw COCO dataset files that you have downloaded in step 2. The `--output_dir` is the location where the TF record files will be @@ -222,7 +223,7 @@ $ git clone https://github.com/cocodataset/cocoapi.git The TensorFlow models repo will be used for running inference as well as converting the coco dataset to the TF records format. -2. Download the 2017 validation +2. Download the 2017 validation [COCO dataset](http://cocodataset.org/#home) and annotations: ``` @@ -256,6 +257,7 @@ TF records format in order to use it with the inference script. We will do this by running the `create_coco_tf_record.py` file in the TensorFlow models repo. +Follow [instructions](https://github.com/tensorflow/models/blob/master/research/object_detection/g3doc/installation.md#dependencies) to install the required dependencies (`cocoapi` and `Protobuf 3.0.0`). Follow the steps below to navigate to the proper directory and point the script to the raw COCO dataset files that you have downloaded in step 2. The `--output_dir` is the location where the TF record files will be diff --git a/benchmarks/object_detection/tensorflow/ssd-mobilenet/README.md b/benchmarks/object_detection/tensorflow/ssd-mobilenet/README.md index 2d129384f..6425640e5 100644 --- a/benchmarks/object_detection/tensorflow/ssd-mobilenet/README.md +++ b/benchmarks/object_detection/tensorflow/ssd-mobilenet/README.md @@ -61,6 +61,7 @@ TF records format in order to use it with the inference script. We will do this by running the `create_coco_tf_record.py` file in the TensorFlow models repo. +Follow [instructions](https://github.com/tensorflow/models/blob/master/research/object_detection/g3doc/installation.md#dependencies) to install the required dependencies (`cocoapi` and `Protobuf 3.0.0`). Follow the steps below to navigate to the proper directory and point the script to the raw COCO dataset files that you have downloaded in step 2. The `--output_dir` is the location where the TF record files will be @@ -241,6 +242,7 @@ TF records format in order to use it with the inference script. We will do this by running the `create_coco_tf_record.py` file in the TensorFlow models repo. +Follow [instructions](https://github.com/tensorflow/models/blob/master/research/object_detection/g3doc/installation.md#dependencies) to install the required dependencies (`cocoapi` and `Protobuf 3.0.0`). Follow the steps below to navigate to the proper directory and point the script to the raw COCO dataset files that you have downloaded in step 2. The `--output_dir` is the location where the TF record files will be diff --git a/benchmarks/object_detection/tensorflow/ssd-resnet34/README.md b/benchmarks/object_detection/tensorflow/ssd-resnet34/README.md index 0a6915bac..f4e419f79 100644 --- a/benchmarks/object_detection/tensorflow/ssd-resnet34/README.md +++ b/benchmarks/object_detection/tensorflow/ssd-resnet34/README.md @@ -61,6 +61,7 @@ TF records format in order to use it with the inference script. We will do this by running the `create_coco_tf_record.py` file in the TensorFlow models repo. +Follow [instructions](https://github.com/tensorflow/models/blob/master/research/object_detection/g3doc/installation.md#dependencies) to install the required dependencies (`cocoapi` and `Protobuf 3.0.0`). Follow the steps below to navigate to the proper directory and point the script to the raw COCO dataset files that you have downloaded in step 2. The `--output_dir` is the location where the TF record files will be From 3bffb2ae6c3882861eed080da013c453277d7ce1 Mon Sep 17 00:00:00 2001 From: Melanie Buehler Date: Mon, 29 Apr 2019 09:57:20 -0700 Subject: [PATCH 25/62] Update Int8 docs to reflect use of tcmalloc (#291) --- .../tensorflow/inception_resnet_v2/README.md | 7 ++++++- .../image_recognition/tensorflow/inceptionv3/README.md | 5 +++++ .../image_recognition/tensorflow/inceptionv4/README.md | 5 +++++ .../image_recognition/tensorflow/mobilenet_v1/README.md | 5 +++++ .../image_recognition/tensorflow/resnet101/README.md | 5 +++++ benchmarks/image_recognition/tensorflow/resnet50/README.md | 5 +++++ .../object_detection/tensorflow/faster_rcnn/README.md | 5 +++++ benchmarks/object_detection/tensorflow/rfcn/README.md | 5 +++++ .../object_detection/tensorflow/ssd-mobilenet/README.md | 5 +++++ benchmarks/object_detection/tensorflow/ssd_vgg16/README.md | 5 +++++ .../recommendation/tensorflow/wide_deep_large_ds/README.md | 5 +++++ 11 files changed, 56 insertions(+), 1 deletion(-) diff --git a/benchmarks/image_recognition/tensorflow/inception_resnet_v2/README.md b/benchmarks/image_recognition/tensorflow/inception_resnet_v2/README.md index 7c9c246fc..4b0543e56 100644 --- a/benchmarks/image_recognition/tensorflow/inception_resnet_v2/README.md +++ b/benchmarks/image_recognition/tensorflow/inception_resnet_v2/README.md @@ -7,6 +7,11 @@ following modes/precisions: ## Int8 Inference Instructions +These instructions use the TCMalloc memory allocator, which produces +better performance results for Int8 precision models with smaller batch sizes. +If you want to disable the use of TCMalloc, set `--disable-tcmalloc=True` +when calling `launch_benchmark.py` and the script will run without TCMalloc. + 1. Clone this [intelai/models](https://github.com/IntelAI/models) repository: @@ -69,7 +74,7 @@ are required to run Inception ResNet V2 Int8. Inception ResNet V2 can be run for accuracy, latency benchmarking, or throughput benchmarking. Use one of the following examples below, depending on -your use case. +your use case. For accuracy (using your `--data-location`, `--accuracy-only` and `--batch-size 100`): diff --git a/benchmarks/image_recognition/tensorflow/inceptionv3/README.md b/benchmarks/image_recognition/tensorflow/inceptionv3/README.md index 1da257669..3e8cf2f0b 100644 --- a/benchmarks/image_recognition/tensorflow/inceptionv3/README.md +++ b/benchmarks/image_recognition/tensorflow/inceptionv3/README.md @@ -10,6 +10,11 @@ other precisions are coming later. ## Int8 Inference Instructions +These instructions use the TCMalloc memory allocator, which produces +better performance results for Int8 precision models with smaller batch sizes. +If you want to disable the use of TCMalloc, set `--disable-tcmalloc=True` +when calling `launch_benchmark.py` and the script will run without TCMalloc. + 1. Clone this [intelai/models](https://github.com/IntelAI/models) repository: diff --git a/benchmarks/image_recognition/tensorflow/inceptionv4/README.md b/benchmarks/image_recognition/tensorflow/inceptionv4/README.md index 13fb7c060..edb391d84 100644 --- a/benchmarks/image_recognition/tensorflow/inceptionv4/README.md +++ b/benchmarks/image_recognition/tensorflow/inceptionv4/README.md @@ -10,6 +10,11 @@ other precisions are coming later. ## Int8 Inference Instructions +These instructions use the TCMalloc memory allocator, which produces +better performance results for Int8 precision models with smaller batch sizes. +If you want to disable the use of TCMalloc, set `--disable-tcmalloc=True` +when calling `launch_benchmark.py` and the script will run without TCMalloc. + 1. Clone this [intelai/models](https://github.com/IntelAI/models) repository: ``` diff --git a/benchmarks/image_recognition/tensorflow/mobilenet_v1/README.md b/benchmarks/image_recognition/tensorflow/mobilenet_v1/README.md index a32138d86..bc84ba6c2 100644 --- a/benchmarks/image_recognition/tensorflow/mobilenet_v1/README.md +++ b/benchmarks/image_recognition/tensorflow/mobilenet_v1/README.md @@ -11,6 +11,11 @@ later. ## Int8 Inference Instructions +These instructions use the TCMalloc memory allocator, which produces +better performance results for Int8 precision models with smaller batch sizes. +If you want to disable the use of TCMalloc, set `--disable-tcmalloc=True` +when calling `launch_benchmark.py` and the script will run without TCMalloc. + 1. Download ImageNet dataset. This step is required only for running accuracy, for running benchmark we do not need to provide dataset. diff --git a/benchmarks/image_recognition/tensorflow/resnet101/README.md b/benchmarks/image_recognition/tensorflow/resnet101/README.md index 7343f472a..4bb6a8ded 100644 --- a/benchmarks/image_recognition/tensorflow/resnet101/README.md +++ b/benchmarks/image_recognition/tensorflow/resnet101/README.md @@ -7,6 +7,11 @@ following modes/precisions: ## Int8 Inference Instructions +These instructions use the TCMalloc memory allocator, which produces +better performance results for Int8 precision models with smaller batch sizes. +If you want to disable the use of TCMalloc, set `--disable-tcmalloc=True` +when calling `launch_benchmark.py` and the script will run without TCMalloc. + 1. Clone this [intelai/models](https://github.com/IntelAI/models) repository: diff --git a/benchmarks/image_recognition/tensorflow/resnet50/README.md b/benchmarks/image_recognition/tensorflow/resnet50/README.md index a34a52139..5a666c6dd 100644 --- a/benchmarks/image_recognition/tensorflow/resnet50/README.md +++ b/benchmarks/image_recognition/tensorflow/resnet50/README.md @@ -10,6 +10,11 @@ precisions. ## Int8 Inference Instructions +These instructions use the TCMalloc memory allocator, which produces +better performance results for Int8 precision models with smaller batch sizes. +If you want to disable the use of TCMalloc, set `--disable-tcmalloc=True` +when calling `launch_benchmark.py` and the script will run without TCMalloc. + 1. Download the full ImageNet dataset and convert to the TF records format. * Clone the tensorflow/models repository: diff --git a/benchmarks/object_detection/tensorflow/faster_rcnn/README.md b/benchmarks/object_detection/tensorflow/faster_rcnn/README.md index cad38b7de..e69fba728 100644 --- a/benchmarks/object_detection/tensorflow/faster_rcnn/README.md +++ b/benchmarks/object_detection/tensorflow/faster_rcnn/README.md @@ -213,6 +213,11 @@ Log location outside container: {--output-dir value}/benchmark_faster_rcnn_infer ## Int8 Inference Instructions +These instructions use the TCMalloc memory allocator, which produces +better performance results for Int8 precision models with smaller batch sizes. +If you want to disable the use of TCMalloc, set `--disable-tcmalloc=True` +when calling `launch_benchmark.py` and the script will run without TCMalloc. + 1. Please follow step 1, 2 and 3 of Faster R-CNN FP32 instructions written above. 2. Download the pre-trained model. diff --git a/benchmarks/object_detection/tensorflow/rfcn/README.md b/benchmarks/object_detection/tensorflow/rfcn/README.md index ec9fad2e6..f42ab9313 100644 --- a/benchmarks/object_detection/tensorflow/rfcn/README.md +++ b/benchmarks/object_detection/tensorflow/rfcn/README.md @@ -10,6 +10,11 @@ other precisions are coming later. ## Int8 Inference Instructions +These instructions use the TCMalloc memory allocator, which produces +better performance results for Int8 precision models with smaller batch sizes. +If you want to disable the use of TCMalloc, set `--disable-tcmalloc=True` +when calling `launch_benchmark.py` and the script will run without TCMalloc. + 1. Clone the [tensorflow/models](https://github.com/tensorflow/models) and [cocodataset/cocoapi](https://github.com/cocodataset/cocoapi) repositories: ``` diff --git a/benchmarks/object_detection/tensorflow/ssd-mobilenet/README.md b/benchmarks/object_detection/tensorflow/ssd-mobilenet/README.md index 6425640e5..33ac1d237 100644 --- a/benchmarks/object_detection/tensorflow/ssd-mobilenet/README.md +++ b/benchmarks/object_detection/tensorflow/ssd-mobilenet/README.md @@ -10,6 +10,11 @@ other precisions are coming later. ## Int8 Inference Instructions +These instructions use the TCMalloc memory allocator, which produces +better performance results for Int8 precision models with smaller batch sizes. +If you want to disable the use of TCMalloc, set `--disable-tcmalloc=True` +when calling `launch_benchmark.py` and the script will run without TCMalloc. + 1. Clone the [tensorflow/models](https://github.com/tensorflow/models) repository at the specified SHA and clone the [cocoapi repo](git clone https://github.com/cocodataset/cocoapi.git) in diff --git a/benchmarks/object_detection/tensorflow/ssd_vgg16/README.md b/benchmarks/object_detection/tensorflow/ssd_vgg16/README.md index 514abe6a7..653d6a3ce 100644 --- a/benchmarks/object_detection/tensorflow/ssd_vgg16/README.md +++ b/benchmarks/object_detection/tensorflow/ssd_vgg16/README.md @@ -10,6 +10,11 @@ other precisions are coming later. ## Int8 Inference Instructions +These instructions use the TCMalloc memory allocator, which produces +better performance results for Int8 precision models with smaller batch sizes. +If you want to disable the use of TCMalloc, set `--disable-tcmalloc=True` +when calling `launch_benchmark.py` and the script will run without TCMalloc. + 1. Clone the [original model](https://github.com/HiKapok/SSD.TensorFlow) repository: ``` $ git clone https://github.com/HiKapok/SSD.TensorFlow.git diff --git a/benchmarks/recommendation/tensorflow/wide_deep_large_ds/README.md b/benchmarks/recommendation/tensorflow/wide_deep_large_ds/README.md index ede163b61..41870f762 100755 --- a/benchmarks/recommendation/tensorflow/wide_deep_large_ds/README.md +++ b/benchmarks/recommendation/tensorflow/wide_deep_large_ds/README.md @@ -55,6 +55,11 @@ Benchmarking instructions and scripts for model training coming later. ## INT8 Inference Instructions +These instructions use the TCMalloc memory allocator, which produces +better performance results for Int8 precision models with smaller batch sizes. +If you want to disable the use of TCMalloc, set `--disable-tcmalloc=True` +when calling `launch_benchmark.py` and the script will run without TCMalloc. + 1. Download and extract the pre-trained model. ``` wget https://storage.googleapis.com/intel-optimized-tensorflow/models/wide_deep_int8_pretrained_model.pb From b98fc4b14c13e8ed771249c25bee96660b1e2063 Mon Sep 17 00:00:00 2001 From: Wafaa Taie Date: Mon, 29 Apr 2019 17:00:11 -0700 Subject: [PATCH 26/62] add a reference publication for the ssd_vgg16 doc. (#295) --- benchmarks/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/benchmarks/README.md b/benchmarks/README.md index c8959af5a..d0f2a15b5 100644 --- a/benchmarks/README.md +++ b/benchmarks/README.md @@ -36,7 +36,7 @@ dependencies to be installed: | Object Detection | TensorFlow | [R-FCN](https://arxiv.org/pdf/1605.06409.pdf) | Inference | [Int8](object_detection/tensorflow/rfcn/README.md#int8-inference-instructions) [FP32](object_detection/tensorflow/rfcn/README.md#fp32-inference-instructions) | | Object Detection | TensorFlow | [Faster R-CNN](https://arxiv.org/pdf/1506.01497.pdf) | Inference | [Int8](object_detection/tensorflow/faster_rcnn/README.md#int8-inference-instructions) [FP32](object_detection/tensorflow/faster_rcnn/README.md#fp32-inference-instructions) | | Object Detection | TensorFlow | [SSD-MobileNet](https://arxiv.org/pdf/1704.04861.pdf) | Inference | [Int8](object_detection/tensorflow/ssd-mobilenet/README.md#int8-inference-instructions) [FP32](object_detection/tensorflow/ssd-mobilenet/README.md#fp32-inference-instructions) | -| Object Detection | TensorFlow | SSD-VGG16 | Inference | [Int8](object_detection/tensorflow/ssd_vgg16/README.md#int8-inference-instructions) [FP32](object_detection/tensorflow/ssd_vgg16/README.md#fp32-inference-instructions) | +| Object Detection | TensorFlow | [SSD-VGG16](https://arxiv.org/pdf/1512.02325.pdf) | Inference | [Int8](object_detection/tensorflow/ssd_vgg16/README.md#int8-inference-instructions) [FP32](object_detection/tensorflow/ssd_vgg16/README.md#fp32-inference-instructions) | | Object Detection | TensorFlow | [SSD-ResNet34](https://arxiv.org/pdf/1512.02325.pdf) | Inference | [FP32](object_detection/tensorflow/ssd-resnet34/README.md#fp32-inference-instructions) | | Recommendation | TensorFlow | [NCF](https://arxiv.org/pdf/1708.05031.pdf) | Inference | [FP32](recommendation/tensorflow/ncf/README.md#fp32-inference-instructions) | | Recommendation | TensorFlow | [Wide & Deep Large Dataset](https://arxiv.org/pdf/1606.07792.pdf) | Inference | [Int8](recommendation/tensorflow/wide_deep_large_ds/README.md#int8-inference-instructions) [FP32](recommendation/tensorflow/wide_deep_large_ds/README.md#fp32-inference-instructions) | From 6d068b7d09fd4df099f28c055a9ed5cb19faae03 Mon Sep 17 00:00:00 2001 From: Melanie Buehler Date: Tue, 30 Apr 2019 14:23:16 -0700 Subject: [PATCH 27/62] Fixes tutorial link and text (#296) --- docs/image_recognition/tensorflow/Tutorial.md | 2 +- docs/object_detection/tensorflow_serving/Tutorial.md | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/image_recognition/tensorflow/Tutorial.md b/docs/image_recognition/tensorflow/Tutorial.md index f31a49ff6..4fe43bb83 100644 --- a/docs/image_recognition/tensorflow/Tutorial.md +++ b/docs/image_recognition/tensorflow/Tutorial.md @@ -393,7 +393,7 @@ and to skip the run from reinstalling packages pass ```True``` to ```NOINSTALL`` NOINSTALL=True BATCH_SIZE=128 ./start.sh -All other flags will be defaulted to values passed in the first ```launch_benchmark.py``` that starts the container. [See here](google.com) to get the full list of flags. +All other flags will be defaulted to values passed in the first ```launch_benchmark.py``` that starts the container. [See here](/docs/general/tensorflow/LaunchBenchmark.md) to get the full list of flags. Example Output diff --git a/docs/object_detection/tensorflow_serving/Tutorial.md b/docs/object_detection/tensorflow_serving/Tutorial.md index 479a34aea..c464b2e3b 100644 --- a/docs/object_detection/tensorflow_serving/Tutorial.md +++ b/docs/object_detection/tensorflow_serving/Tutorial.md @@ -105,7 +105,7 @@ This tutorial assumes you have already: (rfcn_venv)$ cp rfcn_resnet101_fp32_coco/saved_model/saved_model.pb rfcn/1 ``` -4. **Discover the number of physical cores**: Compute *num_physical_cores* by executing the `lscpu` command and multiplying `Core(s) per socket` by `Socket(s)`. For example, for a machine with `Core(s) per socket: 28` and `Socket(s): 2`, `num_physical_cores = 28 * 2 = 56`. To compute *num_physical_cores* and *tf_session_parallelism* with bash commands: +4. **Discover the number of physical cores**: Compute *num_physical_cores* by executing the `lscpu` command and multiplying `Core(s) per socket` by `Socket(s)`. For example, for a machine with `Core(s) per socket: 28` and `Socket(s): 2`, `num_physical_cores = 28 * 2 = 56`. To compute *num_physical_cores* with bash commands: ``` (rfcn_venv)$ cores_per_socket=`lscpu | grep "Core(s) per socket" | cut -d':' -f2 | xargs` (rfcn_venv)$ num_sockets=`lscpu | grep "Socket(s)" | cut -d':' -f2 | xargs` From d2547e507d5963c4107a604485fe626a306b2679 Mon Sep 17 00:00:00 2001 From: Nathan Greeneltch Date: Wed, 1 May 2019 17:52:55 -0500 Subject: [PATCH 28/62] Adds TF Transformer-LT tutorial (#247) * submit for PR language translations tutorial * Removes .nfs files and adds model to main docs README * Deleted more .nfs files * Updated for legal, marketing, punctuation, and official model location * Improved code snippets and added transformer files to launch_benchmark guide * Fixed alignment in main doc README * Remove refs to RNN and LSTM and correct a sentence * Transformer_LT tutorial updates * Update Tutorial.md * Update Tutorial.md * Update Tutorial.md * Update Tutorial.md * Update Tutorial.md * Update Tutorial.md * Update Tutorial.md * Update Tutorial.md --- docs/README.md | 3 +- docs/general/tensorflow/LaunchBenchmark.md | 7 +- .../tensorflow/Tutorial.md | 266 ++++++++++++++++++ 3 files changed, 273 insertions(+), 3 deletions(-) create mode 100644 docs/language_translation/tensorflow/Tutorial.md diff --git a/docs/README.md b/docs/README.md index 7ade8475e..11e99bf97 100644 --- a/docs/README.md +++ b/docs/README.md @@ -12,7 +12,8 @@ ## Tutorials by Use Case * Inference with IntelĀ® Optimization of Tensorflow: - * [Image Recognition](/docs/image_recognition/tensorflow/Tutorial.md) (ResNet50, ResNet101, and InceptionV3) + * [Image Recognition](/docs/image_recognition/tensorflow/Tutorial.md) (ResNet50, ResNet101, and InceptionV3) + * [Language Translation](/docs/language_translation/tensorflow/Tutorial.md) (Transformer-LT) * [Recommendation Systems](/docs/recommendation/tensorflow/Tutorial.md) (Wide and Deep) * Inference with IntelĀ® Optimization of Tensorflow Serving: * [Image Recognition](/docs/image_recognition/tensorflow_serving/Tutorial.md) (ResNet50 and InceptionV3) diff --git a/docs/general/tensorflow/LaunchBenchmark.md b/docs/general/tensorflow/LaunchBenchmark.md index 017b303f1..59b9eb68d 100644 --- a/docs/general/tensorflow/LaunchBenchmark.md +++ b/docs/general/tensorflow/LaunchBenchmark.md @@ -29,8 +29,11 @@ Below the general description is an [index of links](#model-scripts-for-tensorfl [inference](/models/image_recognition/tensorflow/resnet101/inference/eval_image_classifier_inference.py) | [preprocessing](/models/image_recognition/tensorflow/resnet101/inference/preprocessing.py) * InceptionV3: [init](/benchmarks/image_recognition/tensorflow/inceptionv3/inference/fp32/model_init.py) | - [inference](/models/image_recognition/tensorflow/inceptionv3/fp32/eval_image_classifier_inference.py) | - [preprocessing](/models/image_recognition/tensorflow/inceptionv3/fp32/preprocessing.py) + [inference](/models/image_recognition/tensorflow/inceptionv3/fp32/eval_image_classifier_inference.py) | + [preprocessing](/models/image_recognition/tensorflow/inceptionv3/fp32/preprocessing.py) +* Language Translation + * Transformer-LT: [init](/benchmarks/language_translation/tensorflow/transformer_lt_official/inference/fp32/model_init.py) | + [inference](/models/language_translation/tensorflow/transformer_lt_official/inference/fp32/infer_ab.py) * Recommendation Systems * Wide and Deep: [init](/benchmarks/recommendation/tensorflow/wide_deep_large_ds/inference/fp32/model_init.py) | [inference](/models/recommendation/tensorflow/wide_deep_large_ds/inference/inference.py) | diff --git a/docs/language_translation/tensorflow/Tutorial.md b/docs/language_translation/tensorflow/Tutorial.md new file mode 100644 index 000000000..13f827a50 --- /dev/null +++ b/docs/language_translation/tensorflow/Tutorial.md @@ -0,0 +1,266 @@ +# Language Translation with Transformer-LT + + +## Goal +This tutorial will introduce CPU performance considerations of the deep learning Transformer-LT model for language translation and how to use IntelĀ® Optimizations for TensorFlow to improve inference time on CPUs. +This tutorial will also provide code examples to use Intel Model Zoo's pretrained English to German model that can be copy/pasted for quick off-the-ground implementation on real data. + +## Background +Language Translation with deep learning is a computationally expensive endeavor. This tutorial will show you how to reduce the inference runtime of your Transformer-LT network, a popular topology solution to translation. +It is based on an encoder-decoder architecture with an added attention mechanism. The encoder is used to encode the original sentence to a meaningful fixed-length vector, and the decoder is responsible for extracting the context data from the vector. +The encoder and decoder process the inputs and outputs, which are in the form of a time sequence. + +In a traditional encoder/decoder model, each element in the context vector is treated equally. This is typically not the ideal solution. +For instance, when you translate the phrase ā€œI travel by trainā€ from English into Chinese, the word ā€œIā€ has a greater influence than other words when producing its counterpart in Chinese. +Thus, the attention mechanism was introduced to differentiate contributions of each element in the source sequence to their counterpart in the destination sequence, through the use of a hidden matrix. +This matrix contains weights of each element in the source sequence when producing elements in the destination sequence. + + +## Recommended Settings +In addition to TensorFlow optimizations that use the IntelĀ® Math Kernel Library for Deep Neural Networks (IntelĀ® MKL-DNN) to utilize instruction sets appropriately, the runtime settings also significantly contribute to improved performance. +Tuning these options to optimize CPU workloads is vital to optimize performance of TensorFlow on IntelĀ® processors. +Below are the set of run-time options tested empirically on Transformer-LT and recommended by Intel: + + +| Run-time options | Recommendations | +| ------------- | ------------- | +| Batch Size | 64. Regardless of the hardware | +| Hyperthreading | Enabled. Turn on in BIOS. Requires a restart. | +|intra_op_parallelism_threads |# physical cores | +|inter_op_parallelism_threads | 1 | +|NUMA Controls| --cpunodebind=0 --membind=0 | +|KMP_AFFINITY| KMP_AFFINITY=granularity=fine,verbose,compact,1,0| +|KMP_BLOCKTIME| 1 | +|OMP_NUM_THREADS |physical cores| + +Note 1: Refer to this [link](https://software.intel.com/en-us/articles/maximize-tensorflow-performance-on-cpu-considerations-and-recommendations-for-inference) to learn more about the run time options. + +Note 2: You can remove `verbose` from `KMP_AFFINITY` setting to avoid verbose output at runtime. + +Run the following commands to get your processor information: + +a. #physical cores per socket : `lscpu | grep "Core(s) per socket" | cut -d':' -f2 | xargs` + +b. #all physical cores: `lscpu -b -p=Core,Socket | grep -v '^#' | sort -u | wc -l` + +Below is a code snippet you can incorporate into your existing TensorFlow application to set the best settings. +You can either set them in the CLI or in the Python script. Note that inter and intra_op_parallelism_threads settings can only be set +in the Python script. + +```bash +export OMP_NUM_THREADS=physical cores +export KMP_AFFINITY="granularity=fine,verbose,compact,1,0" +export KMP_BLOCKTIME=1 +export KMP_SETTINGS=1 +``` +(or) +``` +import os +os.environ["KMP_BLOCKTIME"] = "1" +os.environ["KMP_SETTINGS"] = "1" +os.environ["KMP_AFFINITY"]= "granularity=fine,verbose,compact,1,0" +os.environ["OMP_NUM_THREADS"]= <# physical cores> +config = tf.ConfigProto() +config.intra_op_parallelism_threads = <# physical cores> +config.inter_op_parallelism_threads = 1 +tf.Session(config=config) +``` + +## Hands-on Tutorial +This section shows how to measure inference performance on Intel's Model Zoo pretrained model (or your pretrained model) by setting the above-discussed run time flags. +### FP32 inference + +### Initial Setup + +1. The model source is based off a specific commit from the TensorFlow models repo. Follow the instructions below to clone an older commit into your home directory. + +``` +cd ~ +mkdir tensorflow-models +cd tensorflow-models +git clone https://github.com/tensorflow/models.git +cd models +git checkout 8367cf6dabe11adf7628541706b660821f397dce +``` + +2. Clone IntelAI models and download into your home directory, skip this step if you already have Intel AI models installed. + +```bash +cd ~ +git clone https://github.com/IntelAI/models.git +``` + +3. Skip to step 4 if you already have a pretrained model or download the file `transformer_lt_official_fp32_pretrained_model.tar.gz` into your ~/transformer_LT_german location. +``` +mkdir ~/transformer_LT_german +cd ~/transformer_LT_german +wget https://storage.googleapis.com/intel-optimized-tensorflow/models/transformer_lt_official_fp32_pretrained_model.tar.gz +tar -xzvf transformer_lt_official_fp32_pretrained_model.tar.gz +``` + +4. After extraction, you should see the following folders and files in the `transformer_lt_official_fp32_pretrained_model` directory: +``` +$ ls -l transformer_lt_official_fp32_pretrained_model/* + +transformer_lt_official_fp32_pretrained_model/data: +total 1064 +-rw-r--r--. 1 359898 Feb 20 16:05 newstest2014.en +-rw-r--r--. 1 399406 Feb 20 16:05 newstest2014.de +-rw-r--r--. 1 324025 Mar 15 17:31 vocab.txt + +transformer_lt_official_fp32_pretrained_model/graph: +total 241540 +-rwx------. 1 247333269 Mar 15 17:29 fp32_graphdef.pb + +``` +`newstest2014.en`: Input file with English text
+`newstest2014.de`: German translation of the input file for measuring accuracy
+`vocab.txt`: A dictionary of vocabulary
+`fp32_graphdef.pb`: Pretrained model + +Or, if you have your own model/data, ensure the folder structure following the structure depicted below to run the pretrained model in Intel Model Zoo. + +``` +ā”œā”€ transformer_LT_german +ā”‚ ā”œā”€ā”€ transformer_pretrained_model +ā”‚ ā”œā”€ā”€ data +ā”‚ ā”‚ ā”œā”€ā”€ newstest2014.en(Input file) +ā”‚ ā”‚ ā”œā”€ā”€ newstest2014.de (Reference file, this is optional) +ā”‚ ā”‚ ā””ā”€ā”€ vocab.txt +ā”‚ ā””ā”€ā”€ graph +ā”‚ ā””ā”€ā”€ pretrained_model.pb +``` +5. Install [Docker](https://docs.docker.com/v17.09/engine/installation/) since the tutorial runs in a Docker container. + +### Run inference + +1. Pull the relevant Intel-optimized TensorFlow Docker image. + [Click here](https://software.intel.com/en-us/articles/intel-optimization-for-tensorflow-installation-guide) to find all the available Docker images. +```bash +docker pull docker.io/intelaipg/intel-optimized-tensorflow:latest +``` +2. cd to the inference script directory in local IntelAI repo +```bash +cd ~/models/benchmarks +``` +3. Run the Python script ``` launch_benchmark.py``` with the pretrained model. +```launch_benchmark.py``` script can be treated as an entry point to conveniently perform out-of-box high performance +inference on pretrained models trained of popular topologies. +The script will automatically set the recommended run-time options for supported topologies, +but if you choose to set your own options, refer to full of available flags and a detailed +explanation on ```launch_benchmarking.py``` script [here](/docs/general/tensorflow/LaunchBenchmark.md). + This step will automatically launch a new container on every run and terminate. Go to [Step 4](#step_4) to interactively run the script on the container. + +Substitute the `--model-source-dir` for the location where you cloned the +[tensorflow/models](https://github.com/tensorflow/models.git) repo + + +``` +~/tensorflow-models/models +``` +3.1. *Real Time inference* (using `--socket-id 0` and `--batch-size 1` for latency) + +If you wish to calculate the [BLEU](https://en.wikipedia.org/wiki/BLEU) metric to find out the machine-translation quality, pass the file as `reference` flag. +`newstest2014.en` file must have only one sentence per line + + +console in: +```bash +python launch_benchmark.py \ + --model-name transformer_lt_official \ + --precision fp32 \ + --mode inference \ + --framework tensorflow \ + --batch-size 1 \ + --socket-id 0 \ + --docker-image intelaipg/intel-optimized-tensorflow:latest \ + --model-source-dir ~/tensorflow-models/models \ + --in-graph ~/transformer_LT_german/transformer_lt_official_fp32_pretrained_model/graph/fp32_graphdef.pb \ + --data-location ~/transformer_LT_german/transformer_lt_official_fp32_pretrained_model/data \ + -- file=newstest2014.en \ + vocab_file=vocab.txt \ + file_out=translate.txt \ + reference=newstest2014.de +``` + +The translated German text will be in the file `translation.txt` located at `~/models/benchmarks/common/tensorflow/logs` + +3.2. *Max Throughput inference* (using `--socket-id 0` and `--batch-size 64` for throughput) + +```bash +python launch_benchmark.py \ + --model-name transformer_lt_official \ + --precision fp32 \ + --mode inference \ + --framework tensorflow \ + --batch-size 64 \ + --socket-id 0 \ + --docker-image intelaipg/intel-optimized-tensorflow:latest \ + --model-source-dir ~/tensorflow-models/models \ + --in-graph ~/transformer_LT_german/transformer_lt_official_fp32_pretrained_model/graph/fp32_graphdef.pb \ + --data-location ~/transformer_LT_german/transformer_lt_official_fp32_pretrained_model/data \ + -- file=newstest2014.en \ + vocab_file=vocab.txt \ + file_out=translate.txt \ + reference=newstest2014.de +``` +console out: +``` +Graph parsed in ..... s +import_graph_def took .....s +tokenizer took ..... s +Translating 3003 sentences from English to German. +Total inferencing time:.... +Throughput:.... sentences/second +Total number of sentences translated:3003 +I0419 22:50:49.856748 140013257643776 compute_bleu.py:106] Case-insensitive results: 27.510020 +I0419 22:50:51.203501 140013257643776 compute_bleu.py:110] Case-sensitive results: 26.964748 +Ran inference with batch size 64 +Log location outside container: /~/models/benchmarks/common/tensorflow/logs/benchmark_transformer_lt_official_inference_fp32_20190419_224047.log +``` + +The logs are captured in a directory outside of the container.
+ +4. If you want to run the ```launch_benchmark.py``` interactively from within the docker container, add flag ```--debug```. This will launch a docker container based on the ```--docker_image```, +performs necessary installs, runs the ```launch_benchmark.py``` script and does not terminate the container process. As an example, this step will demonstrate real-time inference (--batch-size 1), but you can implement the same strategy for max throughput (--batch-size 64)." + +console in: +```bash +python launch_benchmark.py \ + --model-name transformer_lt_official \ + --precision fp32 \ + --mode inference \ + --framework tensorflow \ + --batch-size 64 \ + --socket-id 0 \ + --docker-image intelaipg/intel-optimized-tensorflow:latest \ + --model-source-dir ~/tensorflow-models/models \ + --in-graph ~/transformer_LT_german/transformer_lt_official_fp32_pretrained_model/graph/fp32_graphdef.pb \ + --data-location ~/transformer_LT_german/transformer_lt_official_fp32_pretrained_model/data \ + --debug + -- file=newstest2014.en \ + vocab_file=vocab.txt \ + file_out=translate.txt \ + reference=newstest2014.de + +``` +console out: +```bash + lscpu_path_cmd = command -v lscpu + lscpu located here: b'/usr/bin/lscpu' + root@a78677f56d69:/workspace/benchmarks/common/tensorflow# +``` + +To rerun the bechmarking script, execute the ```start.sh``` bash script from your existing directory with the available flags, which inturn will run ```launch_benchmark.py```. For e.g to rerun with the different batch size (batch size=64) settings run with ```BATCH_SIZE``` +and to skip the run from reinstalling packages pass ```True``` to ```NOINSTALL```. + +```bash + chmod +x ./start.sh +``` +```bash + NOINSTALL=True BATCH_SIZE=64 ./start.sh +``` + +All other flags will be defaulted to values passed in the first ```launch_benchmark.py``` that starts the container. [See here](/docs/general/tensorflow/LaunchBenchmark.md) to get the full list of flags. + + From 9f0ee3d24991e9713d548adc11efe26f5f4ec6b5 Mon Sep 17 00:00:00 2001 From: Wafaa Taie Date: Fri, 3 May 2019 13:49:49 -0700 Subject: [PATCH 29/62] Add instructions to download and convert coco dataset to TF records using a custom script (SSD-VGG16 model). (#298) * add instructions for how to download coco dataset and it convert to tf records (using a custom script). --- .../tensorflow/ssd_vgg16/README.md | 195 +++++------------- .../inference/generate_coco_records.py | 185 +++++++++++++++++ 2 files changed, 237 insertions(+), 143 deletions(-) create mode 100755 models/object_detection/tensorflow/ssd_vgg16/inference/generate_coco_records.py diff --git a/benchmarks/object_detection/tensorflow/ssd_vgg16/README.md b/benchmarks/object_detection/tensorflow/ssd_vgg16/README.md index 653d6a3ce..320223c95 100644 --- a/benchmarks/object_detection/tensorflow/ssd_vgg16/README.md +++ b/benchmarks/object_detection/tensorflow/ssd_vgg16/README.md @@ -22,93 +22,80 @@ $ cd SSD.TensorFlow $ git checkout 2d8b0cb9b2e70281bf9dce438ff17ffa5e59075c ``` -2. Download the 2017 validation +2. Clone the [intelai/models](https://github.com/intelai/models) repository. +It will be used to run the SSD-VGG16 model accuracy and benchmark tests. + +3. Download the 2017 validation images file: [COCO dataset](http://cocodataset.org/#home) and annotations: This is required if you would like to run the accuracy test, or the throughput and latency benchmark with real data. -The [TensorFlow models](https://github.com/tensorflow/models) repo will be used for -converting the coco dataset to the TF records format. -Follow [instructions](https://github.com/tensorflow/models/blob/master/research/object_detection/g3doc/installation.md#dependencies) to install the required dependencies (`cocoapi` and `Protobuf 3.0.0`). ``` -$ mkdir val -$ cd val $ wget http://images.cocodataset.org/zips/val2017.zip $ unzip val2017.zip -$ cd .. ``` -Continue the instructions below to generate the -TF record file. +Download the validation annotations file: ``` -$ mkdir annotations -$ cd annotations $ wget http://images.cocodataset.org/annotations/annotations_trainval2017.zip $ unzip annotations_trainval2017.zip -$ cd .. ``` -Since we are only using the validation dataset in this example, we will -create an empty directory and empty annotations json file to pass as the -train and test directories in the next step. -``` -$ mkdir empty_dir +4. Convert the COCO dataset to TF records format: -$ cd annotations -$ echo "{ \"images\": {}, \"categories\": {}}" > empty.json -$ cd .. -``` +We provide a script `generate_coco_records.py` to convert the raw dataset to the TF records required pattern. +* Some dependencies are required to be installed to run the script such as `python3`, `Tensorflow` and `tqdm`, also, the `SSD.TensorFlow/dataset` from the original model directory (from step 1). -3. Now that you have the raw COCO dataset, we need to convert it to the -TF records format in order to use it with the inference script. We will -do this by running the `create_coco_tf_record.py` file in the TensorFlow -models repo. +Follow the steps below get the COCO TF records: -Follow the steps below to navigate to the proper directory and point the -script to the raw COCO dataset files that you have downloaded in step 2. -The `--output_dir` is the location where the TF record files will be -located after the script has completed. +* Copy the `generate_coco_records.py` script from `models/object_detection/tensorflow/ssd_vgg16/inference/generate_coco_records.py` +from the `models` directory (step 2) to `SSD.TensorFlow/dataset` in the original model directory (step 1). ``` -# We are going to use an older version of the conversion script to checkout the git commit -$ git clone https://github.com/tensorflow/models.git -$ cd models -$ git checkout 7a9934df2afdf95be9405b4e9f1f2480d748dc40 +$ cp /home//models/models/object_detection/tensorflow/ssd_vgg16/inference/generate_coco_records.py /home//SSD.TensorFlow/dataset +``` -$ cd research/object_detection/dataset_tools/ -$ python create_coco_tf_record.py --logtostderr \ - --train_image_dir="/home//coco/empty_dir" \ - --val_image_dir="/home//coco/val/val2017" \ - --test_image_dir="/home//coco/empty_dir" \ - --train_annotations_file="/home//coco/annotations/empty.json" \ - --val_annotations_file="/home//coco/annotations/instances_val2017.json" \ - --testdev_annotations_file="/home//coco/annotations/empty.json" \ - --output_dir="/home//coco/output" +* Create directory for the output TF records: +``` +$ mkdir tf_records +``` -$ ll /home/myuser/coco/output -total 1598276 --rw-rw-r--. 1 0 Nov 2 21:46 coco_testdev.record --rw-rw-r--. 1 0 Nov 2 21:46 coco_train.record --rw-rw-r--. 1 818336740 Nov 2 21:46 coco_val.record +* Run the script to generate the TF records with the required prefix `val`, COCO raw dataset and annotation file (step 3): +``` +$ cd /home//SSD.TensorFlow/dataset +$ python generate_coco_records.py \ +--image_path /home//val2017/ \ +--annotations_file /home//annotations/instances_val2017.json \ +--output_prefix val \ +--output_path /home//tf_records/ ``` -4. Download the pretrained model: +Now, you can use the `/home//tf_records/` as the dataset location to run inference with real data, and test the model accuracy. +``` +$ ls -l /home//tf_records +total 792084 +-rw-r--r--. 1 170038836 Mar 17 21:35 val-00000-of-00005 +-rw-r--r--. 1 167260232 Mar 17 21:35 val-00001-of-00005 +-rw-r--r--. 1 167326957 Mar 17 21:35 val-00002-of-00005 +-rw-r--r--. 1 166289231 Mar 17 21:35 val-00003-of-00005 +-rw-r--r--. 1 140168531 Mar 17 21:35 val-00004-of-00005 +``` + +5. Download the pretrained model: ``` $ wget https://storage.googleapis.com/intel-optimized-tensorflow/models/ssdvgg16_int8_pretrained_model.pb ``` -5. Clone the [intelai/models](https://github.com/intelai/models) repo -and then run the benchmarking scripts for either benchmarking throughput +6. Navigate to the `benchmarks` directory (step 2), and run the benchmarking scripts for either benchmarking throughput and latency or accuracy. ``` -$ git clone git@github.com:IntelAI/models.git -$ cd benchmarks +$ cd models/benchmarks ``` * Run benchmarking for throughput and latency where the `--model-source-dir` is the model source directory from step 1, -and the `--in-graph` is the pretrained model graph from step 4, -if you specify the `--data-location` which is the path to the tf record file that you generated in step 3, +and the `--in-graph` is the pretrained model graph from step 5, +if you specify the `--data-location` which is the path to the tf record file that you generated in step 4, the benchmark will run with real data, otherwise dummy data will be used: ``` python launch_benchmark.py \ @@ -118,7 +105,7 @@ python launch_benchmark.py \ --framework tensorflow \ --docker-image intelaipg/intel-optimized-tensorflow:nightly-master-devel-mkl-py3 \ --model-source-dir /home//SSD.TensorFlow \ - --data-location /home//coco/output \ + --data-location /home//tf_records \ --in-graph /home//ssdvgg16_int8_pretrained_model.pb \ --batch-size 1 \ --socket-id 0 \ @@ -137,7 +124,7 @@ the model directory `SSD.TensorFlow` from step 1. $ git clone https://github.com/waleedka/coco.git ``` - * The `--data-location` is required, which is the path to the tf record file that you generated in step 3. + * The `--data-location` is required, which is the path to the tf record file that you generated in step 4. * Copy the annotation file `instances_val2017.json` (from step 3) to the dataset directory `/home//coco/output`. * Use the `--accuracy-only` flag: ``` @@ -148,7 +135,7 @@ python launch_benchmark.py \ --framework tensorflow \ --docker-image intelaipg/intel-optimized-tensorflow:nightly-master-devel-mkl-py3 \ --model-source-dir /home//SSD.TensorFlow \ - --data-location /home//coco/output \ + --data-location /home//tf_records \ --in-graph /home//ssdvgg16_int8_pretrained_model.pb \ --accuracy-only \ --batch-size 1 @@ -195,106 +182,28 @@ And here is a sample log file tail when running for accuracy: ## FP32 Inference Instructions -1. Clone the [original model](https://github.com/HiKapok/SSD.TensorFlow) repository: -``` -$ git clone https://github.com/HiKapok/SSD.TensorFlow.git -$ cd SSD.TensorFlow -$ git checkout 2d8b0cb9b2e70281bf9dce438ff17ffa5e59075c -``` - -2. Download the 2017 validation -[COCO dataset](http://cocodataset.org/#home) and annotations: - -This is required if you would like to run the accuracy test, -or the throughput and latency benchmark with real data. - -The [TensorFlow models](https://github.com/tensorflow/models) repo will be used for -converting the coco dataset to the TF records format. -Follow [instructions](https://github.com/tensorflow/models/blob/master/research/object_detection/g3doc/installation.md#dependencies) to install the required dependencies (`cocoapi` and `Protobuf 3.0.0`). -``` -$ mkdir val -$ cd val -$ wget http://images.cocodataset.org/zips/val2017.zip -$ unzip val2017.zip -$ cd .. -``` - -Continue the instructions below to generate the -TF record file. -``` -$ mkdir annotations -$ cd annotations -$ wget http://images.cocodataset.org/annotations/annotations_trainval2017.zip -$ unzip annotations_trainval2017.zip -$ cd .. -``` - -Since we are only using the validation dataset in this example, we will -create an empty directory and empty annotations json file to pass as the -train and test directories in the next step. -``` -$ mkdir empty_dir - -$ cd annotations -$ echo "{ \"images\": {}, \"categories\": {}}" > empty.json -$ cd .. -``` - -3. Now that you have the raw COCO dataset, we need to convert it to the -TF records format in order to use it with the inference script. We will -do this by running the `create_coco_tf_record.py` file in the TensorFlow -models repo. - -Follow the steps below to navigate to the proper directory and point the -script to the raw COCO dataset files that you have downloaded in step 2. -The `--output_dir` is the location where the TF record files will be -located after the script has completed. - -``` -# We are going to use an older version of the conversion script to checkout the git commit -$ git clone https://github.com/tensorflow/models.git -$ cd models -$ git checkout 7a9934df2afdf95be9405b4e9f1f2480d748dc40 - -$ cd research/object_detection/dataset_tools/ -$ python create_coco_tf_record.py --logtostderr \ - --train_image_dir="/home//coco/empty_dir" \ - --val_image_dir="/home//coco/val/val2017" \ - --test_image_dir="/home//coco/empty_dir" \ - --train_annotations_file="/home//coco/annotations/empty.json" \ - --val_annotations_file="/home//coco/annotations/instances_val2017.json" \ - --testdev_annotations_file="/home//coco/annotations/empty.json" \ - --output_dir="/home//coco/output" - -$ ll /home/myuser/coco/output -total 1598276 --rw-rw-r--. 1 0 Nov 2 21:46 coco_testdev.record --rw-rw-r--. 1 0 Nov 2 21:46 coco_train.record --rw-rw-r--. 1 818336740 Nov 2 21:46 coco_val.record -``` +Use the steps 1, 2,3 and 4 as above. -4. Download the pretrained model: +5. Download the pretrained model: ``` $ wget https://storage.googleapis.com/intel-optimized-tensorflow/models/ssdvgg16_fp32_pretrained_model.pb ``` -5. Clone the [intelai/models](https://github.com/intelai/models) repo -and then run the benchmarking scripts for either benchmarking throughput +6. Navigate to the `benchmarks` directory (step 2), and run the benchmarking scripts for either benchmarking throughput and latency or accuracy. ``` -$ git clone git@github.com:IntelAI/models.git -$ cd benchmarks +$ cd models/benchmarks ``` * Run benchmarking for throughput and latency where the `--model-source-dir` is the model source directory from step 1, -and the `--in-graph` is the pretrained model graph from step 4, -if you specify the `--data-location` which is the path to the tf record file that you generated in step 3, +and the `--in-graph` is the pretrained model graph from step 5, +if you specify the `--data-location` which is the path to the tf record file that you generated in step 4, the benchmark will run with real data, otherwise dummy data will be used: ``` $ cd /home//models/benchmarks $ python launch_benchmark.py \ - --data-location /home//coco/output \ + --data-location /home//tf_records \ --in-graph /home//ssdvgg16_fp32_pretrained_model.pb \ --model-source-dir /home//SSD.TensorFlow \ --model-name ssd_vgg16 \ @@ -330,7 +239,7 @@ python launch_benchmark.py \ --framework tensorflow \ --docker-image intelaipg/intel-optimized-tensorflow:nightly-master-devel-mkl-py3 \ --model-source-dir /home//SSD.TensorFlow \ - --data-location /home//coco/output \ + --data-location /home//tf_records \ --in-graph /home//ssdvgg16_fp32_pretrained_model.pb \ --accuracy-only \ --batch-size 1 diff --git a/models/object_detection/tensorflow/ssd_vgg16/inference/generate_coco_records.py b/models/object_detection/tensorflow/ssd_vgg16/inference/generate_coco_records.py new file mode 100755 index 000000000..6badc74a9 --- /dev/null +++ b/models/object_detection/tensorflow/ssd_vgg16/inference/generate_coco_records.py @@ -0,0 +1,185 @@ +# +# -*- coding: utf-8 -*- +# +# Copyright (c) 2019 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# SPDX-License-Identifier: EPL-2.0 +# + +import argparse +import os +import json +import numpy as np +from tqdm import tqdm +import tensorflow as tf +from convert_tfrecords import ImageCoder, _process_image, _int64_feature, _float_feature, _bytes_feature, _bytes_list_feature + + +def load_annotation_data(annotations_filename): + + # Load annotation data + with open(annotations_filename, 'r') as annotations_file: + data = json.load(annotations_file) + + # Create map of category IDs to category names + category_map = {} + for category_datum in data['categories']: + category_map[category_datum['id']] = category_datum['name'] + + # Create map of file IDs to annotation data + annotation_map = {} + for annotation_datum in data['annotations']: + image_id = annotation_datum['image_id'] + if (image_id not in annotation_map): + annotation_map[image_id] = [] + + # Add annotation datum for current image ID + annotation_map[image_id].append(annotation_datum) + + # Create map of file IDs to image data + image_map = {} + for image_datum in data['images']: + image_id = image_datum['id'] + if (image_id in annotation_map): + image_map[image_id] = image_datum + + return image_map, annotation_map, category_map + + +def get_annotation_data(image_data, annotation_data, category_map): + + LABEL_MAP = {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, + 13: 12, 14: 13, 15: 14, 16: 15, 17: 16, 18: 17, 19: 18, 20: 19, 21: 20, 22: 21, + 23: 22, 24: 23, 25: 24, 27: 25, 28: 26, 31: 27, 32: 28, 33: 29, 34: 30, 35: 31, + 36: 32, 37: 33, 38: 34, 39: 35, 40: 36, 41: 37, 42: 38, 43: 39, 44: 40, 46: 41, + 47: 42, 48: 43, 49: 44, 50: 45, 51: 46, 52: 47, 53: 48, 54: 49, 55: 50, 56: 51, + 57: 52, 58: 53, 59: 54, 60: 55, 61: 56, 62: 57, 63: 58, 64: 59, 65: 60, 67: 61, + 70: 62, 72: 63, 73: 64, 74: 65, 75: 66, 76: 67, 77: 68, 78: 69, 79: 70, 80: 71, + 81: 72, 82: 73, 84: 74, 85: 75, 86: 76, 87: 77, 88: 78, 89: 79, 90: 80} + + # Retrieve image width and height + image_width = image_data['width'] + image_height = image_data['height'] + + bboxes = [] + labels = [] + label_names = [] + difficult = [] + truncated = [] + for annotation_datum in annotation_data: + # Scale bounding box coordinates + # COCO bounding boxes are [x, y, width, height] but https://github.com/HiKapok/SSD.TensorFlow.git expects [ymin, xmin, ymax, xmax] + bbox = annotation_datum['bbox'] + ymin = bbox[1] / image_height + xmin = bbox[0] / image_width + ymax = (bbox[1] + bbox[3]) / image_height + xmax = (bbox[0] + bbox[2]) / image_width + bboxes.append([ymin, xmin, ymax, xmax]) + + labels.append(LABEL_MAP[annotation_datum['category_id']]) + label_names.append(category_map[annotation_datum['category_id']].encode('ascii')) + + # Append difficult and truncated flags + difficult.append(0) + truncated.append(0) + + return bboxes, labels, label_names, difficult, truncated + + +def get_record(filename, buffer, width, height, bboxes, labels, label_names, difficult, truncated): + + CHANNEL_COUNT = 3 + IMAGE_FORMAT = 'JPEG' + + # Extract bounding box coordinates + ymin = [] + xmin = [] + ymax = [] + xmax = [] + for bbox in bboxes: + ymin.append(bbox[0]) + xmin.append(bbox[1]) + ymax.append(bbox[2]) + xmax.append(bbox[3]) + + # Create record features + features = { + 'image/width': _int64_feature(width), + 'image/height': _int64_feature(height), + 'image/channels': _int64_feature(CHANNEL_COUNT), + 'image/shape': _int64_feature([height, width, CHANNEL_COUNT]), + 'image/object/bbox/xmin': _float_feature(xmin), + 'image/object/bbox/xmax': _float_feature(xmax), + 'image/object/bbox/ymin': _float_feature(ymin), + 'image/object/bbox/ymax': _float_feature(ymax), + 'image/object/bbox/label': _int64_feature(labels), + 'image/object/bbox/label_text': _bytes_list_feature(label_names), + 'image/object/bbox/difficult': _int64_feature(difficult), + 'image/object/bbox/truncated': _int64_feature(truncated), + 'image/format': _bytes_feature(IMAGE_FORMAT), + 'image/filename': _bytes_feature(filename.encode('utf8')), + 'image/encoded': _bytes_feature(buffer)} + + return tf.train.Example(features = tf.train.Features(feature = features)) + + +def main(): + + RECORDS_PER_FILE = 1024 + RECORD_FILENAME_FORMAT = '%s-%.5d-of-%.5d' + + parser = argparse.ArgumentParser() + parser.add_argument('--image_path', type = str, required = True, help = 'path to the input validation image files') + parser.add_argument('--annotations_file', type = str, required = True, help = 'name of the input validation annotations file') + parser.add_argument('--output_prefix', type = str, required = True, help = 'prefix of the output TensorFlow record files') + parser.add_argument('--output_path', type = str, required = True, help = 'path to the output TensorFlow record files') + + args = parser.parse_args() + + # Load annotation data + image_map, annotation_map, category_map = load_annotation_data(args.annotations_file) + + # Create output path if necessary + if (not os.path.exists(args.output_path)): + os.makedirs(args.output_path) + + # Create image coder + image_coder = ImageCoder() + + record_file_index = 0 + record_file_count = np.ceil(len(image_map) / RECORDS_PER_FILE).astype(int) + for index, image_id in tqdm(enumerate(image_map), desc = 'Generating', total = len(image_map), unit = ' file'): + # Create record writer + if (index % RECORDS_PER_FILE == 0): + output_filename = os.path.join(args.output_path, RECORD_FILENAME_FORMAT % (args.output_prefix, record_file_index, record_file_count)) + writer = tf.python_io.TFRecordWriter(output_filename) + record_file_index += 1 + + # Extract image data from current image file + image_filename = image_map[image_id]['file_name'] + image_buffer, _, _ = _process_image(os.path.join(args.image_path, image_filename), image_coder) + + # Retrieve annotation data associated with current image file + bboxes, labels, label_names, difficult, truncated = get_annotation_data(image_map[image_id], annotation_map[image_id], category_map) + + # Write TF record for current image file + image_width, image_height = image_map[image_id]['width'], image_map[image_id]['height'] + record = get_record(image_filename, image_buffer, image_width, image_height, bboxes, labels, label_names, difficult, truncated) + writer.write(record.SerializeToString()) + + +if __name__ == '__main__': + + main() \ No newline at end of file From 339e8bab17ad82d87a9ed328c7f6182dc37f7585 Mon Sep 17 00:00:00 2001 From: Wafaa Taie Date: Mon, 6 May 2019 09:43:07 -0700 Subject: [PATCH 30/62] Use model-based JSON files for unit tests args (#294) * add an example for a resnet50 json file. * add a method to parse the models args json files, add resnet50 and facenet model args, tested and validated it. * clean up parse csv code. * convert some models args to json. * modify the parsing method doc. * add rfcn test and delete tf_model_args.txt file. * add faster_cnn test. * add unit tests for more models. * remeove white spaces. * add more tests. * add more tests. * fix int8 unit tests to work with memory alloc enabled, add ssdvgg16 unit test. * update the contribute.md * add empty line at the end of the file. * fix rfcn performance script. * add densenet169 unit test. * print out comments for each test. * add the two missing tests. --- Contribute.md | 9 +- .../rfcn/inference/int8/model_init.py | 5 +- tests/test_utils/io.py | 22 +++-- .../tensorflow/test_run_tf_benchmarks.py | 13 +-- .../unit/common/tensorflow/tf_model_args.txt | 95 ------------------- .../tf_model_args/tf_dcgan_args.json | 5 + .../tf_model_args/tf_densenet169_args.json | 15 +++ .../tf_model_args/tf_draw_args.json | 15 +++ .../tf_model_args/tf_facenet_args.json | 13 +++ .../tf_model_args/tf_faster_rcnn_args.json | 28 ++++++ .../tf_model_args/tf_gnmt_args.json | 11 +++ .../tf_inception_resnet_v2_args.json | 27 ++++++ .../tf_model_args/tf_inceptionv3_args.json | 44 +++++++++ .../tf_model_args/tf_inceptionv4_args.json | 19 ++++ .../tf_model_args/tf_lm_1b_args.json | 7 ++ .../tf_model_args/tf_maskrcnn_args.json | 11 +++ .../tf_model_args/tf_mobilenet_v1_args.json | 36 +++++++ .../tf_model_args/tf_mtcc_args.json | 5 + .../tensorflow/tf_model_args/tf_ncf_args.json | 15 +++ .../tf_model_args/tf_resnet101_args.json | 17 ++++ .../tf_model_args/tf_resnet50_args.json | 40 ++++++++ .../tf_model_args/tf_rfcn_args.json | 17 ++++ .../tf_model_args/tf_squeezenet_args.json | 11 +++ .../tf_model_args/tf_ssd_mobilenet_args.json | 17 ++++ .../tf_model_args/tf_ssd_resnet34_args.json | 11 +++ .../tf_model_args/tf_ssd_vgg16_args.json | 17 ++++ .../tf_transformer_language_args.json | 9 ++ .../tf_transformer_lt_official_args.json | 9 ++ .../tf_model_args/tf_unet_args.json | 7 ++ .../tf_model_args/tf_wavenet_args.json | 7 ++ .../tf_model_args/tf_wide_deep_args.json | 5 + .../tf_wide_deep_large_ds_args.json | 27 ++++++ 32 files changed, 471 insertions(+), 118 deletions(-) delete mode 100644 tests/unit/common/tensorflow/tf_model_args.txt create mode 100644 tests/unit/common/tensorflow/tf_model_args/tf_dcgan_args.json create mode 100644 tests/unit/common/tensorflow/tf_model_args/tf_densenet169_args.json create mode 100644 tests/unit/common/tensorflow/tf_model_args/tf_draw_args.json create mode 100644 tests/unit/common/tensorflow/tf_model_args/tf_facenet_args.json create mode 100644 tests/unit/common/tensorflow/tf_model_args/tf_faster_rcnn_args.json create mode 100644 tests/unit/common/tensorflow/tf_model_args/tf_gnmt_args.json create mode 100644 tests/unit/common/tensorflow/tf_model_args/tf_inception_resnet_v2_args.json create mode 100644 tests/unit/common/tensorflow/tf_model_args/tf_inceptionv3_args.json create mode 100644 tests/unit/common/tensorflow/tf_model_args/tf_inceptionv4_args.json create mode 100644 tests/unit/common/tensorflow/tf_model_args/tf_lm_1b_args.json create mode 100644 tests/unit/common/tensorflow/tf_model_args/tf_maskrcnn_args.json create mode 100644 tests/unit/common/tensorflow/tf_model_args/tf_mobilenet_v1_args.json create mode 100644 tests/unit/common/tensorflow/tf_model_args/tf_mtcc_args.json create mode 100644 tests/unit/common/tensorflow/tf_model_args/tf_ncf_args.json create mode 100644 tests/unit/common/tensorflow/tf_model_args/tf_resnet101_args.json create mode 100644 tests/unit/common/tensorflow/tf_model_args/tf_resnet50_args.json create mode 100644 tests/unit/common/tensorflow/tf_model_args/tf_rfcn_args.json create mode 100644 tests/unit/common/tensorflow/tf_model_args/tf_squeezenet_args.json create mode 100644 tests/unit/common/tensorflow/tf_model_args/tf_ssd_mobilenet_args.json create mode 100644 tests/unit/common/tensorflow/tf_model_args/tf_ssd_resnet34_args.json create mode 100644 tests/unit/common/tensorflow/tf_model_args/tf_ssd_vgg16_args.json create mode 100644 tests/unit/common/tensorflow/tf_model_args/tf_transformer_language_args.json create mode 100644 tests/unit/common/tensorflow/tf_model_args/tf_transformer_lt_official_args.json create mode 100644 tests/unit/common/tensorflow/tf_model_args/tf_unet_args.json create mode 100644 tests/unit/common/tensorflow/tf_model_args/tf_wavenet_args.json create mode 100644 tests/unit/common/tensorflow/tf_model_args/tf_wide_deep_args.json create mode 100644 tests/unit/common/tensorflow/tf_model_args/tf_wide_deep_large_ds_args.json diff --git a/Contribute.md b/Contribute.md index 55d2b7e44..73c58e8af 100644 --- a/Contribute.md +++ b/Contribute.md @@ -177,10 +177,11 @@ developing new scripts: arguments. To add a new parameterized instance of the test for your - new model, update the [tf_models_args.txt](/tests/unit/common/tensorflow/tf_model_args.txt) - file. This file has comma-separated values where each row has two - items: (1) the `run_tf_benchmarks.py` command with the appropriate - flags to run the model (2) the expected inference or training + new model, add a new JSON file `tf__args.json` to the [tf_models_args](/tests/unit/common/tensorflow/tf_model_args) + directory. Each file has a list of dictionaries, a dictionary has three + items: (1) `_comment` a comment describes the command, + (2) `input` the `run_tf_benchmarks.py` command with the appropriate + flags to run the model (3) `output` the expected inference or training command that should get run by the `model_init.py` file. * If any launch script or base class files were changed, then additional unit tests should be added. diff --git a/benchmarks/object_detection/tensorflow/rfcn/inference/int8/model_init.py b/benchmarks/object_detection/tensorflow/rfcn/inference/int8/model_init.py index 4f2a29ab4..f52eed9b4 100755 --- a/benchmarks/object_detection/tensorflow/rfcn/inference/int8/model_init.py +++ b/benchmarks/object_detection/tensorflow/rfcn/inference/int8/model_init.py @@ -54,9 +54,6 @@ def __init__(self, args, custom_args=[], platform_util=None): self.parse_args() - # Get the command previx, but numactl is added later in run_perf_command() - self.command.append(self.get_command_prefix(self.args.socket_id, numactl=False)) - # Set KMP env vars, if they haven't already been set config_file_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), "config.json") self.set_kmp_vars(config_file_path) @@ -114,6 +111,8 @@ def validate_args(self): format(self.args.model_source_dir)) def run_perf_command(self): + # Get the command previx, but numactl is added later in run_perf_command() + self.command.append(self.get_command_prefix(self.args.socket_id, numactl=False)) num_cores = str(self.platform_util.num_cores_per_socket) if self.args.num_cores != -1: num_cores = str(self.args.num_cores) diff --git a/tests/test_utils/io.py b/tests/test_utils/io.py index 50f8e5e61..5ec580f94 100644 --- a/tests/test_utils/io.py +++ b/tests/test_utils/io.py @@ -18,19 +18,21 @@ # SPDX-License-Identifier: EPL-2.0 # -import csv +import os +import json -def parse_csv_file(file_path, expected_num_columns): +def parse_json_files(json_dir_path): """ - Reads the specified csv file. Checks for a value number of columns in - each row. Returns the csv file values as a list of tuples. + Reads the JSON files in the specified directory. Checks for a value number of columns in + each row. Returns the JSON files values as a list of tuples. """ values = [] - with open(file_path) as csv_file: - csv_reader = csv.reader(csv_file, delimiter=',', - skipinitialspace=True) - for row in csv_reader: - assert len(row) == expected_num_columns - values.append(tuple(row)) + for model_file in os.listdir(json_dir_path): + file_path = os.path.join(json_dir_path, model_file) + with open(file_path) as f: + data = json.load(f) + for x in data: + values.append( + tuple((x['input'], x['output'], model_file + " :: " + x['_comment']))) return values diff --git a/tests/unit/common/tensorflow/test_run_tf_benchmarks.py b/tests/unit/common/tensorflow/test_run_tf_benchmarks.py index dba3e6ca6..0b58ac411 100644 --- a/tests/unit/common/tensorflow/test_run_tf_benchmarks.py +++ b/tests/unit/common/tensorflow/test_run_tf_benchmarks.py @@ -28,19 +28,19 @@ from benchmarks.common.tensorflow.run_tf_benchmark import ModelBenchmarkUtil from test_utils import platform_config -from test_utils.io import parse_csv_file +from test_utils.io import parse_json_files def parse_model_args_file(): """ - Gets test args from the tf_model_args.txt file to use as parameters + Gets test args from the models files in the specified directory to use as parameters for testing model benchmarking scripts. The file has a run_tf_benchmarks.py command with args with the corresponding run command that should get called from model_init.py """ current_dir = os.path.dirname(os.path.realpath(__file__)) - csv_file_path = os.path.join(current_dir, "tf_model_args.txt") - return parse_csv_file(csv_file_path, 2) + models_args_path = os.path.join(current_dir, "tf_model_args") + return parse_json_files(models_args_path) def delete_env_var(env_var): @@ -63,7 +63,7 @@ def clear_kmp_env_vars(): test_arg_values = parse_model_args_file() -@pytest.mark.parametrize("test_args,expected_cmd", test_arg_values) +@pytest.mark.parametrize("test_args,expected_cmd,comment", test_arg_values) @patch("os.mkdir") @patch("shutil.rmtree") @patch("os.listdir") @@ -81,12 +81,13 @@ def clear_kmp_env_vars(): def test_run_benchmark(mock_run_command, mock_subprocess, mock_platform, mock_os, mock_glob, mock_remove, mock_chdir, mock_stat, mock_path_exists, mock_is_file, mock_is_dir, mock_listdir, mock_rmtree, mock_mkdir, - test_args, expected_cmd): + test_args, expected_cmd, comment): """ Runs through executing the specified run_tf_benchmarks.py command from the test_args and verifying that the model_init file calls run_command with the expected_cmd string. """ + print("****** Running The {} test ******".format(comment)) os.environ["PYTHON_EXE"] = "python" mock_path_exists.return_value = True mock_is_dir.return_value = True diff --git a/tests/unit/common/tensorflow/tf_model_args.txt b/tests/unit/common/tensorflow/tf_model_args.txt deleted file mode 100644 index 386d1185c..000000000 --- a/tests/unit/common/tensorflow/tf_model_args.txt +++ /dev/null @@ -1,95 +0,0 @@ -run_tf_benchmark.py --framework tensorflow --use-case recommendation --precision fp32 --mode inference --model-name wide_deep --batch-size 1024 --data-location /dataset --checkpoint /checkpoints --intelai-models . --verbose,OMP_NUM_THREADS=1 numactl --cpunodebind=0 --membind=0 python inference/fp32/wide_deep_inference.py --data_dir=/dataset --model_dir=/checkpoints --batch_size=1024 -run_tf_benchmark.py --framework=tensorflow --use-case=image_recognition --model-name=densenet169 --precision=fp32 --mode=inference --benchmark-dir=/workspace/benchmarks --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=100 --socket-id=0 --output-dir=/workspace/benchmarks/common/tensorflow/logs --benchmark-only --verbose --model-source-dir=/workspace/models --in-graph=/in_graph/densenet169_fp32_pretrained_model.pb, numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/inference/fp32/benchmark.py --num_intra_threads=28 --num_inter_threads=1 --batch_size=100 --input_graph=/in_graph/densenet169_fp32_pretrained_model.pb -run_tf_benchmark.py --framework=tensorflow --use-case=image_recognition --model-name=densenet169 --precision=fp32 --mode=inference --benchmark-dir=/workspace/benchmarks --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=1 --socket-id=0 --output-dir=/workspace/benchmarks/common/tensorflow/logs --benchmark-only --verbose --model-source-dir=/workspace/models --in-graph=/in_graph/densenet169_fp32_pretrained_model.pb, numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/inference/fp32/benchmark.py --num_intra_threads=28 --num_inter_threads=1 --batch_size=1 --input_graph=/in_graph/densenet169_fp32_pretrained_model.pb -run_tf_benchmark.py --framework=tensorflow --use-case=image_recognition --model-name=densenet169 --precision=fp32 --mode=inference --benchmark-dir=/workspace/benchmarks --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=100 --socket-id=0 --output-dir=/workspace/benchmarks/common/tensorflow/logs --accuracy-only --verbose --model-source-dir=/workspace/models --in-graph=/in_graph/densenet169_fp32_pretrained_model.pb --data-location=/dataset, numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/inference/fp32/accuracy.py --num_intra_threads=28 --num_inter_threads=1 --batch_size=100 --input_graph=/in_graph/densenet169_fp32_pretrained_model.pb --data_location=/dataset -run_tf_benchmark.py --framework tensorflow --use-case image_recognition --precision int8 --mode inference --model-name inceptionv3 --batch-size 100 --in-graph /final_int8_inceptionv3.pb --intelai-models . --accuracy-only --verbose,LD_PRELOAD=/usr/lib/libtcmalloc.so.4.2.6 python ./int8/accuracy.py --num_intra_threads=56 --num_inter_threads=2 --batch_size=100 --input_graph=/final_int8_inceptionv3.pb -run_tf_benchmark.py --framework tensorflow --use-case image_recognition --precision int8 --mode inference --model-name inceptionv3 --batch-size 1 --in-graph /final_int8_inceptionv3.pb --intelai-models . --benchmark-only --socket-id 0 --verbose,LD_PRELOAD=/usr/lib/libtcmalloc.so.4.2.6 numactl --cpunodebind=0 --membind=0 python ./int8/benchmark.py --warmup_steps=10 --num_intra_threads=28 --num_inter_threads=1 --batch_size=1 --input_graph=/final_int8_inceptionv3.pb --steps=50 --num_cores=28 -run_tf_benchmark.py --framework tensorflow --use-case image_recognition --precision int8 --mode inference --model-name inceptionv3 --batch-size 128 --in-graph /final_int8_inceptionv3.pb --intelai-models . --benchmark-only --socket-id 0 --verbose,LD_PRELOAD=/usr/lib/libtcmalloc.so.4.2.6 numactl --cpunodebind=0 --membind=0 python ./int8/benchmark.py --warmup_steps=10 --num_intra_threads=28 --num_inter_threads=1 --batch_size=128 --input_graph=/final_int8_inceptionv3.pb --steps=50 --num_cores=28 -run_tf_benchmark.py --framework tensorflow --use-case image_recognition --precision int8 --mode inference --model-name inceptionv3 --batch-size 128 --in-graph /final_int8_inceptionv3.pb --intelai-models . --benchmark-only --socket-id 0 --disable-tcmalloc=True,numactl --cpunodebind=0 --membind=0 python ./int8/benchmark.py --warmup_steps=10 --num_intra_threads=28 --num_inter_threads=1 --batch_size=128 --input_graph=/final_int8_inceptionv3.pb --steps=50 --num_cores=28 -run_tf_benchmark.py --framework=tensorflow --use-case=image_recognition --model-name=inceptionv3 --precision=int8 --mode=inference --model-source-dir=/workspace/models --benchmark-dir=/workspace/benchmarks --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=128 --socket-id=0 --output-dir=/workspace/benchmarks/common/tensorflow/logs --benchmark-only --verbose --in-graph=/in_graph/inception_frozen_max_min.pb --steps=200 --warmup-steps=20,LD_PRELOAD=/usr/lib/libtcmalloc.so.4.2.6 numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/int8/benchmark.py --warmup_steps=20 --num_intra_threads=28 --num_inter_threads=1 --batch_size=128 --input_graph=/in_graph/inception_frozen_max_min.pb --steps=200 --num_cores=28 -run_tf_benchmark.py --framework=tensorflow --use-case=image_recognition --model-name=inceptionv3 --precision=int8 --mode=inference --model-source-dir=/workspace/models --benchmark-dir=/workspace/benchmarks --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=1 --socket-id=0 --output-dir=/workspace/benchmarks/common/tensorflow/logs --benchmark-only --verbose --in-graph=/in_graph/inception_frozen_max_min.pb --steps=200 --warmup-steps=20,LD_PRELOAD=/usr/lib/libtcmalloc.so.4.2.6 numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/int8/benchmark.py --warmup_steps=20 --num_intra_threads=28 --num_inter_threads=1 --batch_size=1 --input_graph=/in_graph/inception_frozen_max_min.pb --steps=200 --num_cores=28 -run_tf_benchmark.py --framework tensorflow --use-case image_recognition --precision fp32 --mode inference --model-name inceptionv3 --batch-size 1 --in-graph /inceptionv3_fp32_pretrained_model.pb --intelai-models . --socket-id 0 --verbose,numactl --cpunodebind=0 --membind=0 python ./fp32/eval_image_classifier_inference.py --input-graph=/inceptionv3_fp32_pretrained_model.pb --num-inter-threads=1 --num-intra-threads=28 --batch-size=1 --warmup-steps=10 --steps=50 --num-cores=28 -run_tf_benchmark.py --framework tensorflow --use-case image_recognition --precision fp32 --mode inference --model-name inceptionv3 --batch-size 128 --in-graph /inceptionv3_fp32_pretrained_model.pb --intelai-models . --socket-id 0 --verbose,numactl --cpunodebind=0 --membind=0 python ./fp32/eval_image_classifier_inference.py --input-graph=/inceptionv3_fp32_pretrained_model.pb --num-inter-threads=1 --num-intra-threads=28 --batch-size=128 --warmup-steps=10 --steps=50 --num-cores=28 -run_tf_benchmark.py --framework tensorflow --use-case image_recognition --precision fp32 --mode inference --model-name inceptionv3 --batch-size 128 --in-graph /inceptionv3_fp32_pretrained_model.pb --intelai-models . --socket-id 0 --verbose --num-inter-threads 4 --num-intra-threads 16,numactl --cpunodebind=0 --membind=0 python ./fp32/eval_image_classifier_inference.py --input-graph=/inceptionv3_fp32_pretrained_model.pb --num-inter-threads=4 --num-intra-threads=16 --batch-size=128 --warmup-steps=10 --steps=50 --num-cores=28 -run_tf_benchmark.py --framework tensorflow --use-case image_recognition --precision fp32 --mode inference --model-name inceptionv3 --batch-size 100 --accuracy-only --data-location /dataset --in-graph /inceptionv3_fp32_pretrained_model.pb --intelai-models . --verbose,python ./fp32/eval_image_classifier_inference.py --input-graph=/inceptionv3_fp32_pretrained_model.pb --num-inter-threads=2 --num-intra-threads=56 --num-cores=28 --batch-size=100 --warmup-steps=10 --steps=50 --data-location=/dataset --accuracy-only -run_tf_benchmark.py --framework=tensorflow --use-case=image_recognition --precision=int8 --mode=inference --model-name=inceptionv4 --model-source-dir=/workspace/models --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=240 --socket-id 0 --accuracy-only --verbose --in-graph=/in_graph/inceptionv4_int8_pretrained_model.pb --data-location=/dataset,LD_PRELOAD=/usr/lib/libtcmalloc.so.4.2.6 numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/inference/accuracy.py --batch_size=240 --num_inter_threads=2 --input_graph=/in_graph/inceptionv4_int8_pretrained_model.pb --num_intra_threads=28 --data_location=/dataset -run_tf_benchmark.py --framework=tensorflow --use-case=image_recognition --precision=fp32 --mode=inference --model-name=inceptionv4 --model-source-dir=/workspace/models --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=240 --socket-id 0 --accuracy-only --verbose --in-graph=/in_graph/inceptionv4_fp32_pretrained_model.pb --data-location=/dataset,numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/inference/accuracy.py --batch_size=240 --num_inter_threads=2 --input_graph=/in_graph/inceptionv4_fp32_pretrained_model.pb --num_intra_threads=28 --data_location=/dataset -run_tf_benchmark.py --framework=tensorflow --use-case=image_recognition --precision=int8 --mode=inference --model-name=inceptionv4 --model-source-dir=/workspace/models --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=240 --socket-id 0 --benchmark-only --verbose --in-graph=/in_graph/inceptionv4_int8_pretrained_model.pb --data-location=/dataset,LD_PRELOAD=/usr/lib/libtcmalloc.so.4.2.6 numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/inference/benchmark.py --batch_size=240 --num_inter_threads=2 --input_graph=/in_graph/inceptionv4_int8_pretrained_model.pb --num_intra_threads=28 -run_tf_benchmark.py --framework=tensorflow --use-case=image_recognition --precision=int8 --mode=inference --model-name=inceptionv4 --model-source-dir=/workspace/models --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=1 --socket-id 0 --benchmark-only --verbose --in-graph=/in_graph/inceptionv4_int8_pretrained_model.pb --data-location=/dataset,LD_PRELOAD=/usr/lib/libtcmalloc.so.4.2.6 numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/inference/benchmark.py --batch_size=1 --num_inter_threads=2 --input_graph=/in_graph/inceptionv4_int8_pretrained_model.pb --num_intra_threads=28 -run_tf_benchmark.py --framework=tensorflow --use-case=image_recognition --model-name=inception_resnet_v2 --precision=fp32 --mode=inference --model-source-dir=/workspace/models --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=100 --socket-id=-1 --accuracy-only --verbose --in-graph=/in_graph/inception_resnet_v2_fp32_pretrained_model.pb --data-location=/dataset,python /workspace/intelai_models/eval_image_classifier_accuracy.py --input_graph=/in_graph/inception_resnet_v2_fp32_pretrained_model.pb --data_location=/dataset --input_height=299 --input_width=299 --num_inter_threads=2 --num_intra_threads=56 --output_layer=InceptionResnetV2/Logits/Predictions --batch_size=100 -run_tf_benchmark.py --framework=tensorflow --use-case=image_recognition --model-name=inception_resnet_v2 --precision=fp32 --mode=inference --benchmark-dir=/workspace/benchmarks --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=1 --socket-id=0 --output-dir=/workspace/benchmarks/common/tensorflow/logs --benchmark-only --verbose --model-source-dir=/workspace/models --in-graph=/in_graph/inception_resnet_v2_fp32_pretrained_model.pb,numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/eval_image_classifier_benchmark.py --input-graph=/in_graph/inception_resnet_v2_fp32_pretrained_model.pb --inter-op-parallelism-threads=2 --intra-op-parallelism-threads=28 --batch-size=1 -run_tf_benchmark.py --framework=tensorflow --use-case=image_recognition --model-name=inception_resnet_v2 --precision=fp32 --mode=inference --benchmark-dir=/workspace/benchmarks --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=128 --socket-id=0 --output-dir=/workspace/benchmarks/common/tensorflow/logs --benchmark-only --verbose --model-source-dir=/workspace/models --in-graph=/in_graph/inception_resnet_v2_fp32_pretrained_model.pb,numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/eval_image_classifier_benchmark.py --input-graph=/in_graph/inception_resnet_v2_fp32_pretrained_model.pb --inter-op-parallelism-threads=2 --intra-op-parallelism-threads=28 --batch-size=128 -run_tf_benchmark.py --framework=tensorflow --use-case=image_recognition --model-name=inception_resnet_v2 --precision=int8 --mode=inference --model-source-dir=/workspace/models --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=100 --socket-id=-1 --accuracy-only --verbose --in-graph=/in_graph/inception_resnet_v2_int8_pretrained_model.pb --data-location=/dataset,LD_PRELOAD=/usr/lib/libtcmalloc.so.4.2.6 python /workspace/intelai_models/eval_image_classifier_accuracy.py --input_graph=/in_graph/inception_resnet_v2_int8_pretrained_model.pb --data_location=/dataset --input_height=299 --input_width=299 --num_inter_threads=2 --num_intra_threads=56 --output_layer=InceptionResnetV2/Logits/Predictions --batch_size=100 -run_tf_benchmark.py --framework=tensorflow --use-case=image_recognition --model-name=inception_resnet_v2 --precision=int8 --mode=inference --model-source-dir=/workspace/models --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=1 --socket-id=0 --benchmark-only --verbose --in-graph=/in_graph/inception_resnet_v2_int8_pretrained_model.pb --data-location=/dataset,LD_PRELOAD=/usr/lib/libtcmalloc.so.4.2.6 numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/eval_image_classifier_benchmark.py --input-graph=/in_graph/inception_resnet_v2_int8_pretrained_model.pb --inter-op-parallelism-threads=1 --intra-op-parallelism-threads=28 --batch-size=1 -run_tf_benchmark.py --framework=tensorflow --use-case=image_recognition --model-name=inception_resnet_v2 --precision=int8 --mode=inference --model-source-dir=/workspace/models --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=128 --socket-id=0 --benchmark-only --verbose --in-graph=/in_graph/inception_resnet_v2_int8_pretrained_model.pb --data-location=/dataset,LD_PRELOAD=/usr/lib/libtcmalloc.so.4.2.6 numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/eval_image_classifier_benchmark.py --input-graph=/in_graph/inception_resnet_v2_int8_pretrained_model.pb --inter-op-parallelism-threads=1 --intra-op-parallelism-threads=28 --batch-size=128 -run_tf_benchmark.py --framework=tensorflow --use-case=image_recognition --model-name=resnet50 --precision=int8 --mode=inference --benchmark-dir=/workspace/benchmarks --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=100 --socket-id=-1 --output-dir=/workspace/benchmarks/common/tensorflow/logs --accuracy-only --verbose --model-source-dir=/workspace/models --in-graph=/in_graph/resnet50_int8_pretrained_model.pb --data-location=/dataset --calibration-only,LD_PRELOAD=/usr/lib/libtcmalloc.so.4.2.6 python /workspace/intelai_models/int8/generate_calibration_data.py --num_intra_threads=56 --num_inter_threads=2 --batch_size=100 --input_graph=/in_graph/resnet50_int8_pretrained_model.pb --data_location=/dataset -run_tf_benchmark.py --framework tensorflow --use-case image_recognition --precision int8 --mode inference --model-name resnet50 --batch-size 100 --data-location /dataset --in-graph /final_int8_resnet50.pb --intelai-models . --accuracy-only --verbose,LD_PRELOAD=/usr/lib/libtcmalloc.so.4.2.6 python ./inference/eval_image_classifier_inference.py --input-graph=/final_int8_resnet50.pb --num-inter-threads=2 --num-intra-threads=56 --batch-size=100 --warmup-steps=10 --steps=50 --data-location=/dataset --accuracy-only -run_tf_benchmark.py --framework tensorflow --use-case image_recognition --precision int8 --mode inference --model-name resnet50 --batch-size 128 --in-graph /final_int8_resnet50.pb --intelai-models . --benchmark-only --verbose,LD_PRELOAD=/usr/lib/libtcmalloc.so.4.2.6 python ./inference/eval_image_classifier_inference.py --input-graph=/final_int8_resnet50.pb --num-inter-threads=2 --num-intra-threads=56 --batch-size=128 --warmup-steps=10 --steps=50 -run_tf_benchmark.py --framework=tensorflow --use-case=image_recognition --model-name=resnet50 --precision=int8 --mode=inference --model-source-dir=/workspace/models --benchmark-dir=/workspace/benchmarks --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=128 --socket-id=0 --output-dir=/workspace/benchmarks/common/tensorflow/logs --benchmark-only --verbose --in-graph=/in_graph/resnet50_int8_pretrained_model.pb --steps=200 --warmup-steps=20,LD_PRELOAD=/usr/lib/libtcmalloc.so.4.2.6 numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/inference/eval_image_classifier_inference.py --input-graph=/in_graph/resnet50_int8_pretrained_model.pb --num-inter-threads=1 --num-intra-threads=28 --batch-size=128 --warmup-steps=20 --steps=200 -run_tf_benchmark.py --framework tensorflow --use-case image_recognition --precision fp32 --mode inference --model-name resnet50 --batch-size 1 --in-graph /freezed_resnet50.pb --intelai-models . --socket-id 0 --verbose,numactl --cpunodebind=0 --membind=0 python ./inference/eval_image_classifier_inference.py --input-graph=/freezed_resnet50.pb --num-inter-threads=1 --num-intra-threads=28 --batch-size=1 --warmup-steps=10 --steps=50 --num-cores=28 -run_tf_benchmark.py --framework tensorflow --use-case image_recognition --precision fp32 --mode inference --model-name resnet50 --batch-size 128 --in-graph /freezed_resnet50.pb --intelai-models . --socket-id 0 --verbose,numactl --cpunodebind=0 --membind=0 python ./inference/eval_image_classifier_inference.py --input-graph=/freezed_resnet50.pb --num-inter-threads=1 --num-intra-threads=28 --batch-size=128 --warmup-steps=10 --steps=50 --num-cores=28 -run_tf_benchmark.py --framework tensorflow --use-case image_recognition --precision fp32 --mode inference --model-name resnet50 --batch-size 1 --in-graph /freezed_resnet50.pb --intelai-models . --socket-id 0 --verbose --num-inter-threads 4 --num-intra-threads 16,numactl --cpunodebind=0 --membind=0 python ./inference/eval_image_classifier_inference.py --input-graph=/freezed_resnet50.pb --num-inter-threads=4 --num-intra-threads=16 --batch-size=1 --warmup-steps=10 --steps=50 --num-cores=28 -run_tf_benchmark.py --framework=tensorflow --use-case=image_recognition --model-name=resnet50 --precision=fp32 --mode=inference --model-source-dir=/workspace/models --intelai-models=/workspace/intelai_models --batch-size 100 --socket-id 0 --accuracy-only --verbose --in-graph=/in_graph/freezed_resnet50.pb --accuracy-only --data-location=/dataset,numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/inference/eval_image_classifier_inference.py --input-graph=/in_graph/freezed_resnet50.pb --num-inter-threads=1 --num-intra-threads=28 --batch-size=100 --data-location=/dataset --accuracy-only --num-cores=28 --warmup-steps=10 --steps=50 -run_tf_benchmark.py --framework=tensorflow --use-case=image_recognition --model-name=resnet50 --precision=fp32 --mode=inference --benchmark-dir=/workspace/benchmarks --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=100 --socket-id=0 --output-dir=/workspace/benchmarks/common/tensorflow/logs --benchmark-only --output-results --verbose --model-source-dir=/workspace/models --in-graph=/in_graph/resnet50_fp32_pretrained_model.pb --data-location=/dataset,numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/inference/eval_image_classifier_inference.py --input-graph=/in_graph/resnet50_fp32_pretrained_model.pb --num-inter-threads=1 --num-intra-threads=28 --num-cores=28 --batch-size=100 --warmup-steps=10 --steps=50 --data-location=/dataset --results-file-path /workspace/benchmarks/common/tensorflow/logs/resnet50_fp32_inference_results*.txt -run_tf_benchmark.py --framework tensorflow --use-case image_recognition --precision fp32 --mode inference --model-name squeezenet --batch-size 64 --checkpoint /checkpoints --intelai-models . --socket-id 0 --verbose,taskset -c 0-27 python ./fp32/train_squeezenet.py --data_location None --batch_size 64 --num_inter_threads 1 --num_intra_threads 28 --model_dir /checkpoints --inference-only --verbose -run_tf_benchmark.py --framework tensorflow --use-case image_recognition --precision fp32 --mode inference --model-name squeezenet --batch-size 1 --checkpoint /checkpoints --intelai-models . --socket-id 0 --verbose,taskset -c 0-27 python ./fp32/train_squeezenet.py --data_location None --batch_size 1 --num_inter_threads 1 --num_intra_threads 28 --model_dir /checkpoints --inference-only --verbose -run_tf_benchmark.py --framework tensorflow --use-case object_detection --precision fp32 --mode inference --model-name faster_rcnn --checkpoint /checkpoints --intelai-models . --model-source-dir . --socket-id 0 --verbose --config_file=pipeline.config,numactl --cpunodebind=0 --membind=0 python ./inference/fp32/eval.py --num_inter_threads 1 --num_intra_threads 28 --pipeline_config_path /checkpoints/pipeline.config --checkpoint_dir /checkpoints --eval_dir ./research/object_detection/log/eval -run_tf_benchmark.py --framework tensorflow --use-case object_detection --precision fp32 --mode inference --model-name faster_rcnn --checkpoint /checkpoints --intelai-models . --model-source-dir . --socket-id 0 --verbose --config_file=pipeline.config --num-inter-threads 4 --num-intra-threads 16,numactl --cpunodebind=0 --membind=0 python ./inference/fp32/eval.py --num_inter_threads 4 --num_intra_threads 16 --pipeline_config_path /checkpoints/pipeline.config --checkpoint_dir /checkpoints --eval_dir ./research/object_detection/log/eval -run_tf_benchmark.py --framework=tensorflow --use-case=object_detection --model-name=faster_rcnn --precision=fp32 --mode=inference --model-source-dir=/workspace/models --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=1 --socket-id=-1 --accuracy-only --verbose --checkpoint=/checkpoints --data-location=/dataset --in-graph=/in_graph/frozen_inference_graph.pb,sh /workspace/intelai_models/inference/fp32/coco_accuracy.sh /in_graph/frozen_inference_graph.pb /dataset/coco_val.record /workspace/models -run_tf_benchmark.py --framework=tensorflow --use-case=object_detection --model-name=ssd-mobilenet --precision=int8 --mode=inference --benchmark-dir=/workspace/benchmarks --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=1 --socket-id=0 --output-dir=/workspace/benchmarks/common/tensorflow/logs --accuracy-only --verbose --model-source-dir=/workspace/models --in-graph=/in_graph/ssdmobilenet_int8_pretrained_model.pb --data-location=/dataset, sh /workspace/intelai_models/inference/int8/coco_int8.sh /in_graph/ssdmobilenet_int8_pretrained_model.pb /dataset -run_tf_benchmark.py --framework=tensorflow --use-case=object_detection --model-name=ssd-mobilenet --precision=int8 --mode=inference --model-source-dir=/workspace/models --intelai-models=/workspace/intelai_models --batch-size=1 --socket-id 0 --data-location=/dataset --verbose --in-graph=/in_graph/ssdmobilenet_int8_pretrained_model.pb --benchmark-only --in-graph=/in_graph/ssdmobilenet_int8_pretrained_model.pb,LD_PRELOAD=/usr/lib/libtcmalloc.so.4.2.6 numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/inference/int8/run_frozen_graph_ssdmob.py -g /in_graph/ssdmobilenet_int8_pretrained_model.pb -n 5000 -d /dataset -x --num-inter-threads 2 --num-intra-threads 28 -run_tf_benchmark.py --framework=tensorflow --use-case=object_detection --model-name=ssd-mobilenet --precision=fp32 --mode=inference --model-source-dir=/workspace/models --benchmark-dir=/workspace/benchmarks --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=-1 --socket-id=0 --benchmark-only --verbose --in-graph=/in_graph/frozen_inference_graph.pb --data-location=/dataset,numactl --cpunodebind=0 --membind=0 python /workspace/benchmarks/object_detection/tensorflow/ssd-mobilenet/inference/fp32/infer_detections.py --input_tfrecord_paths=/dataset --output_tfrecord_path=/SSD-mobilenet-out.tfrecord --inference_graph=/in_graph/frozen_inference_graph.pb --discard_image_pixels=True --num_inter_threads=2 --num_intra_threads=28 -run_tf_benchmark.py --framework=tensorflow --use-case=object_detection --model-name=ssd-mobilenet --precision=fp32 --mode=inference --model-source-dir=/workspace/models --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=-1 --socket-id=0 --accuracy-only --verbose --in-graph=/in_graph/frozen_inference_graph.pb --benchmark-dir=/workspace/benchmarks --data-location=/dataset,sh /workspace/benchmarks/object_detection/tensorflow/ssd-mobilenet/inference/ssdmobilenet_accuracy.sh /in_graph/frozen_inference_graph.pb /dataset -run_tf_benchmark.py --framework=tensorflow --use-case=object_detection --model-name=ssd-resnet34 --precision=fp32 --mode=inference --benchmark-dir=/workspace/benchmarks --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=1 --socket-id=0 --output-dir=/workspace/benchmarks/common/tensorflow/logs --accuracy-only --verbose --model-source-dir=/workspace/models --in-graph=/in_graph/ssd_resnet34_bs1.pb --data-location=/dataset,numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/inference/fp32/infer_detections.py --input-graph /in_graph/ssd_resnet34_bs1.pb --batch-size 1 --inter-op-parallelism-threads 1 --intra-op-parallelism-threads 28 --accuracy-only --data-location /dataset -run_tf_benchmark.py --framework=tensorflow --use-case=object_detection --model-name=ssd-resnet34 --precision=fp32 --mode=inference --benchmark-dir=/workspace/benchmarks --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=1 --socket-id=0 --output-dir=/workspace/benchmarks/common/tensorflow/logs --benchmark-only --verbose --model-source-dir=/workspace/models --in-graph=/in_graph/ssd_resnet34_bs1.pb --data-location=/dataset,numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/inference/fp32/infer_detections.py --input-graph /in_graph/ssd_resnet34_bs1.pb --batch-size 1 --inter-op-parallelism-threads 1 --intra-op-parallelism-threads 28 -run_tf_benchmark.py --framework tensorflow --use-case object_detection --precision fp32 --mode inference --model-name rfcn --checkpoint /checkpoints --intelai-models . --model-source-dir . --socket-id 0 --verbose --config_file=rfcn_pipeline.config,numactl --cpunodebind=0 --membind=0 python ./inference/fp32/eval.py --inter_op 1 --intra_op 28 --omp 28 --pipeline_config_path /checkpoints/rfcn_pipeline.config --checkpoint_dir /checkpoints --eval_dir ./research/object_detection/models/rfcn/eval --logtostderr --blocktime=0 --run_once=True -run_tf_benchmark.py --framework=tensorflow --use-case=object_detection --model-name=rfcn --precision=fp32 --mode=inference --model-source-dir=/workspace/models --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=-1 --verbose --in-graph=/in_graph/frozen_inference_graph.pb --data-location=/dataset --accuracy-only --split=accuracy_message,FROZEN_GRAPH=/in_graph/frozen_inference_graph.pb TF_RECORD_FILE=/dataset SPLIT=accuracy_message TF_MODELS_ROOT=/workspace/models /workspace/intelai_models/inference/fp32/coco_mAP.sh -run_tf_benchmark.py --framework=tensorflow --use-case=object_detection --model-name=rfcn --precision=int8 --mode=inference --model-source-dir=/workspace/models --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=-1 --verbose --in-graph=/in_graph/rfcn_resnet101_int8_coco_pretrained_model.pb --data-location=/dataset --benchmark-only --number_of_steps=500,LD_PRELOAD=/usr/lib/libtcmalloc.so.4.2.6 python /workspace/intelai_models/inference/int8/run_rfcn_inference.py -m /workspace/models -g /in_graph/rfcn_resnet101_int8_coco_pretrained_model.pb -x 500 -d /dataset --num-inter-threads 2 --num-intra-threads 56 -run_tf_benchmark.py --framework=tensorflow --use-case=object_detection --model-name=rfcn --precision=int8 --mode=inference --model-source-dir=/workspace/models --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=-1 --verbose --in-graph=/in_graph/rfcn_resnet101_int8_coco_pretrained_model.pb --data-location=/dataset --accuracy-only --split=accuracy_message,LD_PRELOAD=/usr/lib/libtcmalloc.so.4.2.6 FROZEN_GRAPH=/in_graph/rfcn_resnet101_int8_coco_pretrained_model.pb TF_RECORD_FILE=/dataset SPLIT=accuracy_message TF_MODELS_ROOT=/workspace/models /workspace/intelai_models/inference/int8/coco_mAP.sh -run_tf_benchmark.py --framework tensorflow --use-case text_to_speech --precision fp32 --mode inference --model-name wavenet --num-cores 1 --checkpoint /checkpoints --intelai-models . --model-source-dir . --socket-id 0 --verbose --checkpoint_name=model.ckpt-99 --sample=8510,numactl --physcpubind=0-0 --membind=0 python generate.py /checkpoints/model.ckpt-99 --num_inter_threads=1 --num_intra_threads=1 --sample=8510 -run_tf_benchmark.py --framework=tensorflow --use-case=image_recognition --model-name=resnet101 --precision=fp32 --mode=inference --model-source-dir=/workspace/models --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=128 --socket-id 0 --accuracy-only --verbose --in-graph=/in_graph/resnet101_fp32_model.pb --data-location=/dataset,numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/inference/eval_image_classifier_inference.py --batch-size=128 --num-inter-threads=1 --input-graph=/in_graph/resnet101_fp32_model.pb --num-intra-threads=28 --warmup-steps=10 --steps=50 --data-location=/dataset --accuracy-only -run_tf_benchmark.py --framework=tensorflow --use-case=image_recognition --model-name=resnet101 --precision=fp32 --mode=inference --model-source-dir=/workspace/models --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=128 --accuracy-only --verbose --in-graph=/in_graph/resnet101_fp32_model.pb --data-location=/dataset,python /workspace/intelai_models/inference/eval_image_classifier_inference.py --batch-size=128 --num-inter-threads=2 --input-graph=/in_graph/resnet101_fp32_model.pb --num-intra-threads=56 --warmup-steps=10 --steps=50 --data-location=/dataset --accuracy-only -run_tf_benchmark.py --framework=tensorflow --use-case=image_recognition --model-name=resnet101 --precision=fp32 --mode=inference --model-source-dir=/workspace/models --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=128 --socket-id 0 --benchmark-only --verbose --in-graph=/in_graph/resnet101_fp32_model.pb,numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/inference/eval_image_classifier_inference.py --batch-size=128 --num-inter-threads=1 --input-graph=/in_graph/resnet101_fp32_model.pb --num-intra-threads=28 --warmup-steps=10 --steps=50 -run_tf_benchmark.py --framework=tensorflow --use-case=image_recognition --model-name=resnet101 --precision=fp32 --mode=inference --model-source-dir=/workspace/models --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=1 --socket-id 0 --benchmark-only --verbose --in-graph=/in_graph/resnet101_fp32_model.pb,numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/inference/eval_image_classifier_inference.py --batch-size=1 --num-inter-threads=1 --input-graph=/in_graph/resnet101_fp32_model.pb --num-intra-threads=28 --warmup-steps=10 --steps=50 -run_tf_benchmark.py --framework=tensorflow --use-case=image_recognition --model-name=resnet101 --precision=int8 --mode=inference --model-source-dir=/workspace/models --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=1 --socket-id 0 --benchmark-only --verbose --in-graph=/in_graph/resnet101_int8_model.pb,LD_PRELOAD=/usr/lib/libtcmalloc.so.4.2.6 numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/inference/eval_image_classifier_inference.py --batch-size=1 --num-intra-threads=28 --num-inter-threads=1 --input-graph=/in_graph/resnet101_int8_model.pb --warmup-steps=40 --steps=100 -"run_tf_benchmark.py --framework=tensorflow --use-case=language_translation --model-name=transformer_language --precision=fp32 --mode=inference --model-source-dir=/workspace/models --benchmark-dir=/workspace/benchmarks --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=1 --socket-id=0 --benchmark-only --verbose --checkpoint=/checkpoints --data-location=/dataset --decode_from_file=/checkpoints/newstest2015.en --reference=/checkpoints/newstest2015.de","numactl --cpunodebind=0 --membind=0 python /workspace/models/tensor2tensor/bin/t2t_decoder.py --problem=translate_ende_wmt32k --model=transformer --hparams_set=transformer_base_single_gpu --decode_hparams=beam_size=4,alpha=0.6,batch_size=1 --data_dir=/dataset --output_dir=/checkpoints --decode_from_file=/checkpoints/newstest2015.en --decode_to_file=/workspace/models/out_dir/output_infer --reference=/checkpoints/newstest2015.de --inter_op_parallelism_threads=1 --intra_op_parallelism_threads=28" -"run_tf_benchmark.py --framework=tensorflow --use-case=language_translation --model-name=transformer_language --precision=fp32 --mode=inference --model-source-dir=/workspace/models --benchmark-dir=/workspace/benchmarks --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=32 --socket-id=0 --benchmark-only --verbose --checkpoint=/checkpoints --data-location=/dataset --decode_from_file=/checkpoints/newstest2015.en --reference=/checkpoints/newstest2015.de","numactl --cpunodebind=0 --membind=0 python /workspace/models/tensor2tensor/bin/t2t_decoder.py --problem=translate_ende_wmt32k --model=transformer --hparams_set=transformer_base_single_gpu --decode_hparams=beam_size=4,alpha=0.6,batch_size=32 --data_dir=/dataset --output_dir=/checkpoints --decode_from_file=/checkpoints/newstest2015.en --decode_to_file=/workspace/models/out_dir/output_infer --reference=/checkpoints/newstest2015.de --inter_op_parallelism_threads=1 --intra_op_parallelism_threads=28" -run_tf_benchmark.py --framework=tensorflow --use-case=language_translation --model-name=transformer_lt_official --precision=fp32 --mode=inference --model-source-dir=/workspace/models --benchmark-dir=/workspace/benchmarks --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=1 --socket-id=0 --benchmark-only --verbose --file=/dataset/newstest2014.en --reference=/dataset/newstest2014.de --vocab_file=/dataset/vocab.txt --in_graph=/in_graph/fp32_graphdef.pb,numactl --cpunodebind=0 --membind=0 python /workspace/models/official/transformer/infer_ab.py --param_set=big --in_graph=/in_graph/fp32_graphdef.pb --batch_size=1 --file=/dataset/newstest2014.en --file_out=/models/benchmarks/common/tensorflow/logs/translate.txt --vocab_file=/dataset/vocab.txt -run_tf_benchmark.py --framework=tensorflow --use-case=language_translation --model-name=transformer_lt_official --precision=fp32 --mode=inference --model-source-dir=/workspace/models --benchmark-dir=/workspace/benchmarks --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=64 --socket-id=0 --benchmark-only --verbose --file=/dataset/newstest2014.en --reference=/dataset/newstest2014.de --vocab_file=/dataset/vocab.txt --in_graph=/in_graph/fp32_graphdef.pb,numactl --cpunodebind=0 --membind=0 python /workspace/models/official/transformer/infer_ab.py --param_set=big --in_graph=/in_graph/fp32_graphdef.pb --batch_size=64 --file=/dataset/newstest2014.en --file_out=/models/benchmarks/common/tensorflow/logs/translate.txt --vocab_file=/dataset/vocab.txt -run_tf_benchmark.py --framework=tensorflow --use-case=image_recognition --model-name=mobilenet_v1 --precision=int8 --mode=inference --benchmark-dir=/workspace/benchmarks --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=100 --socket-id=0 --output-dir=/workspace/benchmarks/common/tensorflow/logs --accuracy-only --verbose --model-source-dir=/workspace/models --in-graph=/in_graph/models_mobilenetv1_int8_pretrained_model.pb --data-location=/dataset --input_height=224 --input_width=224,LD_PRELOAD=/usr/lib/libtcmalloc.so.4.2.6 numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/inference/int8/accuracy.py --input_height=224 --input_width=224 --num_intra_threads=28 --output_layer=MobilenetV1/Predictions/Reshape_1 --num_inter_threads=1 --batch_size=100 --input_graph=/in_graph/models_mobilenetv1_int8_pretrained_model.pb --data_location=/dataset --input_layer=input -run_tf_benchmark.py --framework=tensorflow --use-case=image_recognition --model-name=mobilenet_v1 --precision=int8 --mode=inference --benchmark-dir=/workspace/benchmarks --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=1 --socket-id=0 --output-dir=/workspace/benchmarks/common/tensorflow/logs --benchmark-only --verbose --model-source-dir=/workspace/models --in-graph=/in_graph/models_mobilenetv1_int8_pretrained_model.pb --input_height=224 --input_width=224 --warmup_steps=10 --steps=50,LD_PRELOAD=/usr/lib/libtcmalloc.so.4.2.6 numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/inference/int8/benchmark.py --input_height=224 --input_width=224 --warmup_steps=10 --num_intra_threads=28 --output_layer=MobilenetV1/Predictions/Reshape_1 --num_inter_threads=1 --batch_size=1 --input_graph=/in_graph/models_mobilenetv1_int8_pretrained_model.pb --input_layer=input --steps=50 -run_tf_benchmark.py --framework=tensorflow --use-case=image_recognition --model-name=mobilenet_v1 --precision=int8 --mode=inference --benchmark-dir=/workspace/benchmarks --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=240 --socket-id=0 --output-dir=/workspace/benchmarks/common/tensorflow/logs --benchmark-only --verbose --model-source-dir=/workspace/models --in-graph=/in_graph/models_mobilenetv1_int8_pretrained_model.pb --input_height=224 --input_width=224 --warmup_steps=10 --steps=50,LD_PRELOAD=/usr/lib/libtcmalloc.so.4.2.6 numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/inference/int8/benchmark.py --input_height=224 --input_width=224 --warmup_steps=10 --num_intra_threads=28 --output_layer=MobilenetV1/Predictions/Reshape_1 --num_inter_threads=1 --batch_size=240 --input_graph=/in_graph/models_mobilenetv1_int8_pretrained_model.pb --input_layer=input --steps=50 -run_tf_benchmark.py --framework=tensorflow --use-case=image_recognition --model-name=mobilenet_v1 --precision=fp32 --mode=inference --model-source-dir=/workspace/models --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=1 --socket-id 0 --benchmark-only --verbose --checkpoint=/checkpoints --data-location=/dataset,numactl --cpunodebind=0 -l python /workspace/intelai_models/inference/fp32/eval_image_classifier.py --dataset_name imagenet --checkpoint_path /checkpoints --dataset_dir /dataset --dataset_split_name=validation --clone_on_cpu=True --model_name mobilenet_v1 --inter_op_parallelism_threads 2 --intra_op_parallelism_threads 28 --batch_size 1 -python common/tensorflow/run_tf_benchmark.py --framework=tensorflow --use-case=language_modeling --model-name=lm-1b --precision=fp32 --mode=inference --benchmark-dir=/workspace/benchmarks --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=1 --socket-id=0 --output-dir=/workspace/benchmarks/common/tensorflow/logs --benchmark-only --verbose --model-source-dir=/workspace/models,numactl --cpunodebind=0 --membind=0 python /workspace/models/benchmark.py -b=1 -I=100 --inter=1 --intra=28 -run_tf_benchmark.py --framework=tensorflow --use-case=image_recognition --model-name=mobilenet_v1 --precision=fp32 --mode=inference --model-source-dir=/workspace/models --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=100 --socket-id 0 --benchmark-only --verbose --checkpoint=/checkpoints --data-location=/dataset,numactl --cpunodebind=0 -l python /workspace/intelai_models/inference/fp32/eval_image_classifier.py --dataset_name imagenet --checkpoint_path /checkpoints --dataset_dir /dataset --dataset_split_name=validation --clone_on_cpu=True --model_name mobilenet_v1 --inter_op_parallelism_threads 2 --intra_op_parallelism_threads 28 --batch_size 100 -run_tf_benchmark.py --framework=tensorflow --use-case=image_recognition --model-name=mobilenet_v1 --precision=fp32 --mode=inference --benchmark-dir=/workspace/benchmarks --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=100 --socket-id=0 --output-dir=/workspace/benchmarks/common/tensorflow/logs --benchmark-only --verbose --model-source-dir=/workspace/models --checkpoint=/checkpoints,numactl --cpunodebind=0 -l python /workspace/intelai_models/inference/fp32/eval_image_classifier.py --dataset_name imagenet --checkpoint_path /checkpoints --dataset_split_name=validation --clone_on_cpu=True --model_name mobilenet_v1 --inter_op_parallelism_threads 2 --intra_op_parallelism_threads 28 --batch_size 100 -run_tf_benchmark.py --framework=tensorflow --use-case=image_recognition --model-name=mobilenet_v1 --precision=fp32 --mode=inference --model-source-dir=/workspace/models --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=100 --socket-id=-1 --accuracy-only --verbose --checkpoint=/checkpoints --in-graph=/in_graph/mobilenet_v1_1.0_224_frozen.pb --data-location=/dataset,python /workspace/intelai_models/inference/fp32/accuracy.py --batch_size=100 --num_inter_threads=2 --input_graph=/in_graph/mobilenet_v1_1.0_224_frozen.pb --num_intra_threads=56 --data_location=/dataset -run_tf_benchmark.py --framework=tensorflow --use-case=recommendation --model-name=ncf --precision=fp32 --mode=inference --model-source-dir=/workspace/models --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=256 --socket-id 0 --accuracy-only --verbose --checkpoint=/checkpoints --data-location=/dataset,numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/inference/fp32/ncf_main.py --data_dir=/dataset --model_dir=/checkpoints --intra_op_parallelism_threads=28 --inter_op_parallelism_threads=1 --batch_size=256 --inference_only --accuracy_only -run_tf_benchmark.py --framework=tensorflow --use-case=recommendation --model-name=ncf --precision=fp32 --mode=inference --model-source-dir=/workspace/models --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=256 --socket-id 0 --benchmark-only --verbose --checkpoint=/checkpoints --data-location=/dataset,numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/inference/fp32/ncf_main.py --data_dir=/dataset --model_dir=/checkpoints --intra_op_parallelism_threads=28 --inter_op_parallelism_threads=1 --batch_size=256 --inference_only --benchmark_only -run_tf_benchmark.py --framework=tensorflow --use-case=recommendation --model-name=ncf --precision=fp32 --mode=inference --model-source-dir=/workspace/models --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=1 --socket-id 0 --benchmark-only --verbose --checkpoint=/checkpoints --data-location=/dataset,numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/inference/fp32/ncf_main.py --data_dir=/dataset --model_dir=/checkpoints --intra_op_parallelism_threads=28 --inter_op_parallelism_threads=1 --batch_size=1 --inference_only --benchmark_only -run_tf_benchmark.py --framework=tensorflow --use-case=image_segmentation --model-name=maskrcnn --precision=fp32 --mode=inference --model-source-dir=/workspace/models --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=1 --socket-id=0 --benchmark-only --verbose --data-location=/dataset,numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/coco.py evaluate --dataset=/dataset --num_inter_threads 1 --num_intra_threads 28 --nw 5 --nb 50 --model=coco --infbs 1 -run_tf_benchmark.py --framework=tensorflow --use-case=image_segmentation --model-name=maskrcnn --precision=fp32 --mode=inference --model-source-dir=/workspace/models --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=1 --socket-id=0 --benchmark-only --verbose --data-location=/dataset --num-inter-threads 4 --num-intra-threads 16,numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/coco.py evaluate --dataset=/dataset --num_inter_threads 4 --num_intra_threads 16 --nw 5 --nb 50 --model=coco --infbs 1 -run_tf_benchmark.py --framework=tensorflow --use-case=object_detection --model-name=faster_rcnn --precision=int8 --mode=inference --model-source-dir=/workspace/models --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=-1 --socket-id=-1 --accuracy-only --verbose --in-graph=/in_graph/pretrained_int8_faster_rcnn_model.pb --data-location=/dataset,LD_PRELOAD=/usr/lib/libtcmalloc.so.4.2.6 /workspace/intelai_models/inference/int8/coco_int8.sh /in_graph/pretrained_int8_faster_rcnn_model.pb /dataset /workspace/models -run_tf_benchmark.py --framework=tensorflow --use-case=object_detection --model-name=faster_rcnn --precision=int8 --mode=inference --model-source-dir=/workspace/models --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=-1 --socket-id=-1 --benchmark-only --verbose --in-graph=/in_graph/pretrained_int8_faster_rcnn_model.pb --data-location=/dataset,LD_PRELOAD=/usr/lib/libtcmalloc.so.4.2.6 python /workspace/intelai_models/inference/int8/run_frozen_graph_rcnn.py -g /in_graph/pretrained_int8_faster_rcnn_model.pb -d /dataset --num-inter-threads 2 --num-intra-threads 56 -run_tf_benchmark.py --framework=tensorflow --use-case=object_detection --model-name=faster_rcnn --precision=int8 --mode=inference --model-source-dir=/workspace/models --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=-1 --socket-id=-1 --benchmark-only --verbose --in-graph=/in_graph/pretrained_int8_faster_rcnn_model.pb --data-location=/dataset --number-of-steps=500,LD_PRELOAD=/usr/lib/libtcmalloc.so.4.2.6 python /workspace/intelai_models/inference/int8/run_frozen_graph_rcnn.py -g /in_graph/pretrained_int8_faster_rcnn_model.pb -n 500 -d /dataset --num-inter-threads 2 --num-intra-threads 56 -run_tf_benchmark.py --framework=tensorflow --use-case=adversarial_networks --model-name=dcgan --precision=fp32 --mode=inference --model-source-dir=/workspace/models --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=100 --socket-id=0 --benchmark-only --verbose --checkpoint=/checkpoints --data-location=/dataset,numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/inference/fp32/inference_bench.py -ckpt /checkpoints -dl /dataset --num_inter_threads 1 --num_intra_threads 28 -nw 100 -nb 500 --bs 100 --kmp_blocktime 1 --kmp_settings 1 -run_tf_benchmark.py --framework=tensorflow --use-case=content_creation --model-name=draw --precision=fp32 --mode=inference --model-source-dir=/workspace/models --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=1 --socket-id=0 --benchmark-only --verbose --checkpoint=/checkpoints --data-location=/dataset,numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/inference/fp32/draw_inf.py --cp /checkpoints --num_inter_threads 1 --num_intra_threads 28 --bs 1 --dl /dataset --nw 100 --nb 200 -run_tf_benchmark.py --framework=tensorflow --use-case=content_creation --model-name=draw --precision=fp32 --mode=inference --model-source-dir=/workspace/models --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=100 --socket-id=0 --benchmark-only --verbose --checkpoint=/checkpoints --data-location=/dataset,numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/inference/fp32/draw_inf.py --cp /checkpoints --num_inter_threads 1 --num_intra_threads 28 --bs 100 --dl /dataset --nw 100 --nb 200 -run_tf_benchmark.py --framework=tensorflow --use-case=language_translation --model-name=gnmt --precision=fp32 --mode=inference --model-source-dir=/workspace/models --benchmark-dir=/workspace/benchmarks --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=1 --socket-id=0 --output-dir=/workspace/benchmarks/common/tensorflow/logs --benchmark-only --verbose --checkpoint=/checkpoints --data-location=/dataset --infer_mode=beam_search,numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/fp32/nmt.py --src=de --tgt=en --hparams_path=/workspace/intelai_models/fp32/standard_hparams/wmt16_gnmt_4_layer_internal.json --out_dir=/workspace/benchmarks/common/tensorflow/logs --vocab_prefix=/dataset/vocab.bpe.32000 --ckpt=/checkpoints/translate.ckpt --infer_batch_size=1 --inference_input_file=/dataset/newstest2015.tok.bpe.32000.de --inference_output_file=/workspace/benchmarks/common/tensorflow/logs/output_infer --inference_ref_file=/dataset/newstest2015.tok.bpe.32000.en --num_inter_threads=1 --num_intra_threads=28 --infer_mode=beam_search -run_tf_benchmark.py --framework=tensorflow --use-case=language_translation --model-name=gnmt --precision=fp32 --mode=inference --model-source-dir=/workspace/models --benchmark-dir=/workspace/benchmarks --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=32 --socket-id=0 --output-dir=/workspace/benchmarks/common/tensorflow/logs --benchmark-only --verbose --checkpoint=/checkpoints --data-location=/dataset --infer_mode=beam_search,numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/fp32/nmt.py --src=de --tgt=en --hparams_path=/workspace/intelai_models/fp32/standard_hparams/wmt16_gnmt_4_layer_internal.json --out_dir=/workspace/benchmarks/common/tensorflow/logs --vocab_prefix=/dataset/vocab.bpe.32000 --ckpt=/checkpoints/translate.ckpt --infer_batch_size=32 --inference_input_file=/dataset/newstest2015.tok.bpe.32000.de --inference_output_file=/workspace/benchmarks/common/tensorflow/logs/output_infer --inference_ref_file=/dataset/newstest2015.tok.bpe.32000.en --num_inter_threads=1 --num_intra_threads=28 --infer_mode=beam_search -run_tf_benchmark.py --framework=tensorflow --use-case=content_creation --model-name=draw --precision=fp32 --mode=inference --model-source-dir=/workspace/models --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=100 --socket-id=0 --benchmark-only --verbose --checkpoint=/checkpoints --data-location=/dataset --num-inter-threads 4 --num-intra-threads 16,numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/inference/fp32/draw_inf.py --cp /checkpoints --num_inter_threads 4 --num_intra_threads 16 --bs 100 --dl /dataset --nw 100 --nb 200 -run_tf_benchmark.py --framework=tensorflow --use-case=image_segmentation --model-name=unet --precision=fp32 --mode=inference --model-source-dir=/workspace/models --benchmark-dir=/workspace/benchmarks --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=1 --socket-id=0 --benchmark-only --verbose --checkpoint=/checkpoints --checkpoint_name=model.ckpt,numactl --cpunodebind=0 --membind=0 python /workspace/benchmarks/image_segmentation/tensorflow/unet/inference/fp32/unet_infer.py -bs 1 -cp /checkpoints/model.ckpt --num_inter_threads 1 --num_intra_threads 28 -nw 80 -nb 400 -run_tf_benchmark.py --framework=tensorflow --use-case=recommendation --model-name=wide_deep_large_ds --precision=fp32 --mode=inference --model-source-dir=/workspace/models --benchmark-dir=/workspace/benchmarks --intelai-models=/workspace/intelai_models --num-cores=-1 --socket-id=0 --output-dir=/workspace/benchmarks/common/tensorflow/logs --benchmark-only --verbose --in-graph=/in_graph/wide_deep_fp32_pretrained_model.pb --data-location=/dataset --num-parallel-batches=14,numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/inference/inference.py --num_intra_threads=1 --num_inter_threads=28 --input_graph=/in_graph/wide_deep_fp32_pretrained_model.pb --data_location=/dataset --num_parallel_batches=14 -run_tf_benchmark.py --framework=tensorflow --use-case=recommendation --model-name=wide_deep_large_ds --precision=fp32 --mode=inference --model-source-dir=/workspace/models --benchmark-dir=/workspace/benchmarks --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=1 --socket-id=0 --output-dir=/workspace/benchmarks/common/tensorflow/logs --benchmark-only --verbose --in-graph=/in_graph/wide_deep_fp32_pretrained_model.pb --data-location=/dataset --num-parallel-batches=14,numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/inference/inference.py --num_intra_threads=1 --num_inter_threads=28 --batch_size=1 --input_graph=/in_graph/wide_deep_fp32_pretrained_model.pb --data_location=/dataset --num_parallel_batches=14 -run_tf_benchmark.py --framework=tensorflow --use-case=recommendation --model-name=wide_deep_large_ds --precision=fp32 --mode=inference --model-source-dir=/workspace/models --benchmark-dir=/workspace/benchmarks --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=512 --socket-id=0 --output-dir=/workspace/benchmarks/common/tensorflow/logs --benchmark-only --verbose --in-graph=/in_graph/wide_deep_fp32_pretrained_model.pb --data-location=/dataset --num-parallel-batches=14,numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/inference/inference.py --num_intra_threads=1 --num_inter_threads=28 --batch_size=512 --input_graph=/in_graph/wide_deep_fp32_pretrained_model.pb --data_location=/dataset --num_parallel_batches=14 -run_tf_benchmark.py --framework=tensorflow --use-case=recommendation --model-name=wide_deep_large_ds --precision=int8 --mode=inference --model-source-dir=/workspace/models --benchmark-dir=/workspace/benchmarks --intelai-models=/workspace/intelai_models --num-cores=-1 --socket-id=0 --output-dir=/workspace/benchmarks/common/tensorflow/logs --benchmark-only --verbose --in-graph=/in_graph/wide_deep_int8_pretrained_model.pb --data-location=/dataset --num-parallel-batches=14,LD_PRELOAD=/usr/lib/libtcmalloc.so.4.2.6 numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/inference/inference.py --num_intra_threads=1 --num_inter_threads=28 --input_graph=/in_graph/wide_deep_int8_pretrained_model.pb --data_location=/dataset --num_parallel_batches=14 -run_tf_benchmark.py --framework=tensorflow --use-case=recommendation --model-name=wide_deep_large_ds --precision=int8 --mode=inference --model-source-dir=/workspace/models --benchmark-dir=/workspace/benchmarks --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=1 --socket-id=0 --output-dir=/workspace/benchmarks/common/tensorflow/logs --benchmark-only --verbose --in-graph=/in_graph/wide_deep_int8_pretrained_model.pb --data-location=/dataset --num-parallel-batches=14,LD_PRELOAD=/usr/lib/libtcmalloc.so.4.2.6 numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/inference/inference.py --num_intra_threads=1 --num_inter_threads=28 --batch_size=1 --input_graph=/in_graph/wide_deep_int8_pretrained_model.pb --data_location=/dataset --num_parallel_batches=14 -run_tf_benchmark.py --framework=tensorflow --use-case=recommendation --model-name=wide_deep_large_ds --precision=int8 --mode=inference --model-source-dir=/workspace/models --benchmark-dir=/workspace/benchmarks --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=512 --socket-id=0 --output-dir=/workspace/benchmarks/common/tensorflow/logs --benchmark-only --verbose --in-graph=/in_graph/wide_deep_int8_pretrained_model.pb --data-location=/dataset --num-parallel-batches=14,LD_PRELOAD=/usr/lib/libtcmalloc.so.4.2.6 numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/inference/inference.py --num_intra_threads=1 --num_inter_threads=28 --batch_size=512 --input_graph=/in_graph/wide_deep_int8_pretrained_model.pb --data_location=/dataset --num_parallel_batches=14 -run_tf_benchmark.py --framework=tensorflow --use-case=face_detection_and_alignment --model-name=mtcc --precision=fp32 --mode=inference --model-source-dir=/workspace/models --benchmark-dir=/workspace/benchmarks --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=-1 --socket-id=0 --output-dir=/workspace/benchmarks/common/tensorflow/logs --benchmark-only --verbose --checkpoint=/checkpoints --data-location=/dataset,numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/inference/fp32/one_image_test.py --num_inter_threads 1 --num_intra_threads 28 -ckpt /checkpoints -dl /dataset -run_tf_benchmark.py --framework=tensorflow --use-case=face_detection_and_alignment --model-name=facenet --precision=fp32 --mode=inference --benchmark-dir=/workspace/benchmarks --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=1 --socket-id=0 --output-dir=/workspace/benchmarks/common/tensorflow/logs --benchmark-only --verbose --model-source-dir=/workspace/models --checkpoint=/checkpoints --data-location=/dataset, numactl --cpunodebind=0 --membind=0 python /workspace/models/src/validate_on_lfw.py /dataset /checkpoints --distance_metric 1 --use_flipped_images --subtract_mean --use_fixed_image_standardization --num_inter_threads=1 --num_intra_threads=28 --lfw_batch_size=1 --lfw_pairs=/workspace/models/data/pairs.txt --warmup_steps=200 --max_steps=1000 -run_tf_benchmark.py --framework=tensorflow --use-case=face_detection_and_alignment --model-name=facenet --precision=fp32 --mode=inference --benchmark-dir=/workspace/benchmarks --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=100 --socket-id=0 --output-dir=/workspace/benchmarks/common/tensorflow/logs --benchmark-only --verbose --model-source-dir=/workspace/models --checkpoint=/checkpoints --data-location=/dataset, numactl --cpunodebind=0 --membind=0 python /workspace/models/src/validate_on_lfw.py /dataset /checkpoints --distance_metric 1 --use_flipped_images --subtract_mean --use_fixed_image_standardization --num_inter_threads=2 --num_intra_threads=28 --lfw_batch_size=100 --lfw_pairs=/workspace/models/data/pairs.txt --warmup_steps=40 --max_steps=1000 -run_tf_benchmark.py --framework=tensorflow --use-case=face_detection_and_alignment --model-name=facenet --precision=fp32 --mode=inference --benchmark-dir=/workspace/benchmarks --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=100 --socket-id=0 --output-dir=/workspace/benchmarks/common/tensorflow/logs --accuracy-only --verbose --model-source-dir=/workspace/models --checkpoint=/checkpoints --data-location=/dataset, numactl --cpunodebind=0 --membind=0 python /workspace/models/src/validate_on_lfw.py /dataset /checkpoints --distance_metric 1 --use_flipped_images --subtract_mean --use_fixed_image_standardization --num_inter_threads=2 --num_intra_threads=28 --lfw_batch_size=100 --lfw_pairs=/workspace/models/data/pairs.txt --warmup_steps=40 --max_steps=1000 -run_tf_benchmark.py --framework=tensorflow --use-case=object_detection --model-name=ssd_vgg16 --precision=int8 --mode=inference --benchmark-dir=/workspace/benchmarks --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=1 --socket-id=0 --output-dir=/workspace/benchmarks/common/tensorflow/logs --benchmark-only --verbose --model-source-dir=/workspace/models --in-graph=/in_graph/ssdvgg16_int8_pretrained_model.pb --data-location=/dataset --num-inter-threads=11 --num-intra-threads=21 --data-num-inter-threads=21 --data-num-intra-threads=28 --steps=500 --warmup-steps=100,LD_PRELOAD=/usr/lib/libtcmalloc.so.4.2.6 numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/inference/eval_ssd.py --input-graph=/in_graph/ssdvgg16_int8_pretrained_model.pb --num-inter-threads=11 --num-intra-threads=21 --data-num-inter-threads=21 --data-num-intra-threads=28 --warmup-steps=100 --steps=500 --data-location=/dataset -run_tf_benchmark.py --framework=tensorflow --use-case=object_detection --model-name=ssd_vgg16 --precision=int8 --mode=inference --benchmark-dir=/workspace/benchmarks --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=1 --socket-id=-1 --output-dir=/workspace/benchmarks/common/tensorflow/logs --accuracy-only --verbose --model-source-dir=/workspace/models --in-graph=/in_graph/ssdvgg16_int8_pretrained_model.pb --data-location=/dataset,LD_PRELOAD=/usr/lib/libtcmalloc.so.4.2.6 python /workspace/intelai_models/inference/eval_ssd.py --input-graph=/in_graph/ssdvgg16_int8_pretrained_model.pb --num-inter-threads=2 --num-intra-threads=56 --data-location=/dataset --accuracy-only -run_tf_benchmark.py --framework=tensorflow --use-case=object_detection --model-name=ssd_vgg16 --precision=fp32 --mode=inference --benchmark-dir=/workspace/benchmarks --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=1 --socket-id=-1 --output-dir=/workspace/benchmarks/common/tensorflow/logs --accuracy-only --verbose --model-source-dir=/workspace/models --in-graph=/in_graph/ssdvgg16_fp32_pretrained_model.pb --data-location=/dataset,python /workspace/intelai_models/inference/eval_ssd.py --input-graph=/in_graph/ssdvgg16_fp32_pretrained_model.pb --num-inter-threads=2 --num-intra-threads=56 --data-location=/dataset --accuracy-only -run_tf_benchmark.py --framework=tensorflow --use-case=object_detection --model-name=ssd_vgg16 --precision=fp32 --mode=inference --benchmark-dir=/workspace/benchmarks --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=1 --socket-id=0 --output-dir=/workspace/benchmarks/common/tensorflow/logs --benchmark-only --verbose --model-source-dir=/workspace/models --in-graph=/in_graph/ssdvgg16_int8_pretrained_model.pb --data-location=/dataset --num-inter-threads=11 --num-intra-threads=21 --data-num-inter-threads=21 --data-num-intra-threads=28 --steps=500 --warmup-steps=100,numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/inference/eval_ssd.py --input-graph=/in_graph/ssdvgg16_int8_pretrained_model.pb --num-inter-threads=11 --num-intra-threads=21 --data-num-inter-threads=21 --data-num-intra-threads=28 --warmup-steps=100 --steps=500 --data-location=/dataset \ No newline at end of file diff --git a/tests/unit/common/tensorflow/tf_model_args/tf_dcgan_args.json b/tests/unit/common/tensorflow/tf_model_args/tf_dcgan_args.json new file mode 100644 index 000000000..e5802f700 --- /dev/null +++ b/tests/unit/common/tensorflow/tf_model_args/tf_dcgan_args.json @@ -0,0 +1,5 @@ +[ + { "_comment": "FP32 benchmark", + "input": "run_tf_benchmark.py --framework=tensorflow --use-case=adversarial_networks --model-name=dcgan --precision=fp32 --mode=inference --model-source-dir=/workspace/models --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=100 --socket-id=0 --benchmark-only --verbose --checkpoint=/checkpoints --data-location=/dataset", + "output": "numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/inference/fp32/inference_bench.py -ckpt /checkpoints -dl /dataset --num_inter_threads 1 --num_intra_threads 28 -nw 100 -nb 500 --bs 100 --kmp_blocktime 1 --kmp_settings 1"} +] diff --git a/tests/unit/common/tensorflow/tf_model_args/tf_densenet169_args.json b/tests/unit/common/tensorflow/tf_model_args/tf_densenet169_args.json new file mode 100644 index 000000000..a5d665547 --- /dev/null +++ b/tests/unit/common/tensorflow/tf_model_args/tf_densenet169_args.json @@ -0,0 +1,15 @@ +[ + { "_comment": "FP32 latency benchmark", + "input": "run_tf_benchmark.py --framework=tensorflow --use-case=image_recognition --model-name=densenet169 --precision=fp32 --mode=inference --benchmark-dir=/workspace/benchmarks --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=1 --socket-id=0 --output-dir=/workspace/benchmarks/common/tensorflow/logs --benchmark-only --verbose --model-source-dir=/workspace/models --in-graph=/in_graph/densenet169_fp32_pretrained_model.pb", + "output": "numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/inference/fp32/benchmark.py --num_intra_threads=28 --num_inter_threads=1 --batch_size=1 --input_graph=/in_graph/densenet169_fp32_pretrained_model.pb"}, + + { "_comment": "Fp32 accuracy", + "input": "run_tf_benchmark.py --framework=tensorflow --use-case=image_recognition --model-name=densenet169 --precision=fp32 --mode=inference --benchmark-dir=/workspace/benchmarks --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=100 --socket-id=0 --output-dir=/workspace/benchmarks/common/tensorflow/logs --accuracy-only --verbose --model-source-dir=/workspace/models --in-graph=/in_graph/densenet169_fp32_pretrained_model.pb --data-location=/dataset", + "output": "numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/inference/fp32/accuracy.py --num_intra_threads=28 --num_inter_threads=1 --batch_size=100 --input_graph=/in_graph/densenet169_fp32_pretrained_model.pb --data_location=/dataset"}, + + { "_comment": "FP32 Throughput benchmark", + "input": "run_tf_benchmark.py --framework=tensorflow --use-case=image_recognition --model-name=densenet169 --precision=fp32 --mode=inference --benchmark-dir=/workspace/benchmarks --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=100 --socket-id=0 --output-dir=/workspace/benchmarks/common/tensorflow/logs --benchmark-only --verbose --model-source-dir=/workspace/models --in-graph=/in_graph/densenet169_fp32_pretrained_model.pb", + "output": "numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/inference/fp32/benchmark.py --num_intra_threads=28 --num_inter_threads=1 --batch_size=100 --input_graph=/in_graph/densenet169_fp32_pretrained_model.pb"} +] + + diff --git a/tests/unit/common/tensorflow/tf_model_args/tf_draw_args.json b/tests/unit/common/tensorflow/tf_model_args/tf_draw_args.json new file mode 100644 index 000000000..d638d7492 --- /dev/null +++ b/tests/unit/common/tensorflow/tf_model_args/tf_draw_args.json @@ -0,0 +1,15 @@ +[ + { "_comment": "FP32 latency benchmark", + "input": "run_tf_benchmark.py --framework=tensorflow --use-case=content_creation --model-name=draw --precision=fp32 --mode=inference --model-source-dir=/workspace/models --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=1 --socket-id=0 --benchmark-only --verbose --checkpoint=/checkpoints --data-location=/dataset", + "output": "numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/inference/fp32/draw_inf.py --cp /checkpoints --num_inter_threads 1 --num_intra_threads 28 --bs 1 --dl /dataset --nw 100 --nb 200"}, + + { "_comment": "FP32 throughput benchmark with --num-inter-threads 4 --num-intra-threads 16", + "input": "run_tf_benchmark.py --framework=tensorflow --use-case=content_creation --model-name=draw --precision=fp32 --mode=inference --model-source-dir=/workspace/models --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=100 --socket-id=0 --benchmark-only --verbose --checkpoint=/checkpoints --data-location=/dataset --num-inter-threads 4 --num-intra-threads 16", + "output": "numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/inference/fp32/draw_inf.py --cp /checkpoints --num_inter_threads 4 --num_intra_threads 16 --bs 100 --dl /dataset --nw 100 --nb 200"}, + + { "_comment": "FP32 Throughput benchmark", + "input": "run_tf_benchmark.py --framework=tensorflow --use-case=content_creation --model-name=draw --precision=fp32 --mode=inference --model-source-dir=/workspace/models --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=100 --socket-id=0 --benchmark-only --verbose --checkpoint=/checkpoints --data-location=/dataset", + "output": "numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/inference/fp32/draw_inf.py --cp /checkpoints --num_inter_threads 1 --num_intra_threads 28 --bs 100 --dl /dataset --nw 100 --nb 200"} +] + + diff --git a/tests/unit/common/tensorflow/tf_model_args/tf_facenet_args.json b/tests/unit/common/tensorflow/tf_model_args/tf_facenet_args.json new file mode 100644 index 000000000..34b5af1fe --- /dev/null +++ b/tests/unit/common/tensorflow/tf_model_args/tf_facenet_args.json @@ -0,0 +1,13 @@ +[ + { "_comment": "FP32 latency benchmark", + "input": "run_tf_benchmark.py --framework=tensorflow --use-case=face_detection_and_alignment --model-name=facenet --precision=fp32 --mode=inference --benchmark-dir=/workspace/benchmarks --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=1 --socket-id=0 --output-dir=/workspace/benchmarks/common/tensorflow/logs --benchmark-only --verbose --model-source-dir=/workspace/models --checkpoint=/checkpoints --data-location=/dataset", + "output": "numactl --cpunodebind=0 --membind=0 python /workspace/models/src/validate_on_lfw.py /dataset /checkpoints --distance_metric 1 --use_flipped_images --subtract_mean --use_fixed_image_standardization --num_inter_threads=1 --num_intra_threads=28 --lfw_batch_size=1 --lfw_pairs=/workspace/models/data/pairs.txt --warmup_steps=200 --max_steps=1000"}, + + { "_comment": "Fp32 accuracy", + "input": "run_tf_benchmark.py --framework=tensorflow --use-case=face_detection_and_alignment --model-name=facenet --precision=fp32 --mode=inference --benchmark-dir=/workspace/benchmarks --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=100 --socket-id=0 --output-dir=/workspace/benchmarks/common/tensorflow/logs --accuracy-only --verbose --model-source-dir=/workspace/models --checkpoint=/checkpoints --data-location=/dataset", + "output": "numactl --cpunodebind=0 --membind=0 python /workspace/models/src/validate_on_lfw.py /dataset /checkpoints --distance_metric 1 --use_flipped_images --subtract_mean --use_fixed_image_standardization --num_inter_threads=2 --num_intra_threads=28 --lfw_batch_size=100 --lfw_pairs=/workspace/models/data/pairs.txt --warmup_steps=40 --max_steps=1000"}, + + { "_comment": "FP32 Throughput benchmark", + "input": "run_tf_benchmark.py --framework=tensorflow --use-case=face_detection_and_alignment --model-name=facenet --precision=fp32 --mode=inference --benchmark-dir=/workspace/benchmarks --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=100 --socket-id=0 --output-dir=/workspace/benchmarks/common/tensorflow/logs --benchmark-only --verbose --model-source-dir=/workspace/models --checkpoint=/checkpoints --data-location=/dataset", + "output": "numactl --cpunodebind=0 --membind=0 python /workspace/models/src/validate_on_lfw.py /dataset /checkpoints --distance_metric 1 --use_flipped_images --subtract_mean --use_fixed_image_standardization --num_inter_threads=2 --num_intra_threads=28 --lfw_batch_size=100 --lfw_pairs=/workspace/models/data/pairs.txt --warmup_steps=40 --max_steps=1000"} +] diff --git a/tests/unit/common/tensorflow/tf_model_args/tf_faster_rcnn_args.json b/tests/unit/common/tensorflow/tf_model_args/tf_faster_rcnn_args.json new file mode 100644 index 000000000..ea6c0a75a --- /dev/null +++ b/tests/unit/common/tensorflow/tf_model_args/tf_faster_rcnn_args.json @@ -0,0 +1,28 @@ +[ + { "_comment": "FP32 accuracy command", + "input": "run_tf_benchmark.py --framework=tensorflow --use-case=object_detection --model-name=faster_rcnn --precision=fp32 --mode=inference --model-source-dir=/workspace/models --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=1 --socket-id=-1 --accuracy-only --verbose --checkpoint=/checkpoints --data-location=/dataset --in-graph=/in_graph/frozen_inference_graph.pb", + "output": "sh /workspace/intelai_models/inference/fp32/coco_accuracy.sh /in_graph/frozen_inference_graph.pb /dataset/coco_val.record /workspace/models"}, + + { "_comment": "FP32 benchmark command", + "input": "run_tf_benchmark.py --framework tensorflow --use-case object_detection --precision fp32 --mode inference --model-name faster_rcnn --checkpoint /checkpoints --intelai-models . --model-source-dir . --socket-id 0 --verbose --config_file=pipeline.config", + "output": "numactl --cpunodebind=0 --membind=0 python ./inference/fp32/eval.py --num_inter_threads 1 --num_intra_threads 28 --pipeline_config_path /checkpoints/pipeline.config --checkpoint_dir /checkpoints --eval_dir ./research/object_detection/log/eval"}, + + { "_comment": "FP32 benchmark command with custom --num_inter_threads 4 --num_intra_threads 16", + "input": "run_tf_benchmark.py --framework tensorflow --use-case object_detection --precision fp32 --mode inference --model-name faster_rcnn --checkpoint /checkpoints --intelai-models . --model-source-dir . --socket-id 0 --verbose --config_file=pipeline.config --num-inter-threads 4 --num-intra-threads 16", + "output": "numactl --cpunodebind=0 --membind=0 python ./inference/fp32/eval.py --num_inter_threads 4 --num_intra_threads 16 --pipeline_config_path /checkpoints/pipeline.config --checkpoint_dir /checkpoints --eval_dir ./research/object_detection/log/eval"}, + + { "_comment": "Int8 command for throughput benchmark with --number-of-steps enabled.", + "input": "run_tf_benchmark.py --framework=tensorflow --use-case=object_detection --model-name=faster_rcnn --precision=int8 --mode=inference --model-source-dir=/workspace/models --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=-1 --socket-id=-1 --benchmark-only --verbose --in-graph=/in_graph/pretrained_int8_faster_rcnn_model.pb --data-location=/dataset --number-of-steps=500", + "output": "LD_PRELOAD=/usr/lib/libtcmalloc.so.4.2.6 python /workspace/intelai_models/inference/int8/run_frozen_graph_rcnn.py -g /in_graph/pretrained_int8_faster_rcnn_model.pb -n 500 -d /dataset --num-inter-threads 2 --num-intra-threads 56"}, + + { "_comment": "Int8 accuracy command", + "input": "run_tf_benchmark.py --framework=tensorflow --use-case=object_detection --model-name=faster_rcnn --precision=int8 --mode=inference --model-source-dir=/workspace/models --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=-1 --socket-id=-1 --accuracy-only --verbose --in-graph=/in_graph/pretrained_int8_faster_rcnn_model.pb --data-location=/dataset", + "output": "LD_PRELOAD=/usr/lib/libtcmalloc.so.4.2.6 /workspace/intelai_models/inference/int8/coco_int8.sh /in_graph/pretrained_int8_faster_rcnn_model.pb /dataset /workspace/models"}, + + { "_comment": "FP32 benchmark command", + "input": "run_tf_benchmark.py --framework=tensorflow --use-case=object_detection --model-name=faster_rcnn --precision=int8 --mode=inference --model-source-dir=/workspace/models --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=-1 --socket-id=-1 --benchmark-only --verbose --in-graph=/in_graph/pretrained_int8_faster_rcnn_model.pb --data-location=/dataset", + "output": "LD_PRELOAD=/usr/lib/libtcmalloc.so.4.2.6 python /workspace/intelai_models/inference/int8/run_frozen_graph_rcnn.py -g /in_graph/pretrained_int8_faster_rcnn_model.pb -d /dataset --num-inter-threads 2 --num-intra-threads 56" + } +] + + diff --git a/tests/unit/common/tensorflow/tf_model_args/tf_gnmt_args.json b/tests/unit/common/tensorflow/tf_model_args/tf_gnmt_args.json new file mode 100644 index 000000000..7fe7db376 --- /dev/null +++ b/tests/unit/common/tensorflow/tf_model_args/tf_gnmt_args.json @@ -0,0 +1,11 @@ +[ + { "_comment": "FP32 latency benchmark", + "input": "run_tf_benchmark.py --framework=tensorflow --use-case=language_translation --model-name=gnmt --precision=fp32 --mode=inference --model-source-dir=/workspace/models --benchmark-dir=/workspace/benchmarks --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=1 --socket-id=0 --output-dir=/workspace/benchmarks/common/tensorflow/logs --benchmark-only --verbose --checkpoint=/checkpoints --data-location=/dataset --infer_mode=beam_search", + "output": "numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/fp32/nmt.py --src=de --tgt=en --hparams_path=/workspace/intelai_models/fp32/standard_hparams/wmt16_gnmt_4_layer_internal.json --out_dir=/workspace/benchmarks/common/tensorflow/logs --vocab_prefix=/dataset/vocab.bpe.32000 --ckpt=/checkpoints/translate.ckpt --infer_batch_size=1 --inference_input_file=/dataset/newstest2015.tok.bpe.32000.de --inference_output_file=/workspace/benchmarks/common/tensorflow/logs/output_infer --inference_ref_file=/dataset/newstest2015.tok.bpe.32000.en --num_inter_threads=1 --num_intra_threads=28 --infer_mode=beam_search"}, + + { "_comment": "FP32 Throughput benchmark", + "input": "run_tf_benchmark.py --framework=tensorflow --use-case=language_translation --model-name=gnmt --precision=fp32 --mode=inference --model-source-dir=/workspace/models --benchmark-dir=/workspace/benchmarks --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=32 --socket-id=0 --output-dir=/workspace/benchmarks/common/tensorflow/logs --benchmark-only --verbose --checkpoint=/checkpoints --data-location=/dataset --infer_mode=beam_search", + "output": "numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/fp32/nmt.py --src=de --tgt=en --hparams_path=/workspace/intelai_models/fp32/standard_hparams/wmt16_gnmt_4_layer_internal.json --out_dir=/workspace/benchmarks/common/tensorflow/logs --vocab_prefix=/dataset/vocab.bpe.32000 --ckpt=/checkpoints/translate.ckpt --infer_batch_size=32 --inference_input_file=/dataset/newstest2015.tok.bpe.32000.de --inference_output_file=/workspace/benchmarks/common/tensorflow/logs/output_infer --inference_ref_file=/dataset/newstest2015.tok.bpe.32000.en --num_inter_threads=1 --num_intra_threads=28 --infer_mode=beam_search"} +] + + diff --git a/tests/unit/common/tensorflow/tf_model_args/tf_inception_resnet_v2_args.json b/tests/unit/common/tensorflow/tf_model_args/tf_inception_resnet_v2_args.json new file mode 100644 index 000000000..c1a59e0b5 --- /dev/null +++ b/tests/unit/common/tensorflow/tf_model_args/tf_inception_resnet_v2_args.json @@ -0,0 +1,27 @@ +[ + { "_comment": "Int8 accuracy command", + "input": "run_tf_benchmark.py --framework=tensorflow --use-case=image_recognition --model-name=inception_resnet_v2 --precision=int8 --mode=inference --model-source-dir=/workspace/models --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=100 --socket-id=-1 --accuracy-only --verbose --in-graph=/in_graph/inception_resnet_v2_int8_pretrained_model.pb --data-location=/dataset", + "output": "LD_PRELOAD=/usr/lib/libtcmalloc.so.4.2.6 python /workspace/intelai_models/eval_image_classifier_accuracy.py --input_graph=/in_graph/inception_resnet_v2_int8_pretrained_model.pb --data_location=/dataset --input_height=299 --input_width=299 --num_inter_threads=2 --num_intra_threads=56 --output_layer=InceptionResnetV2/Logits/Predictions --batch_size=100"}, + + { "_comment": "Int8 command for latency benchmark", + "input": "run_tf_benchmark.py --framework=tensorflow --use-case=image_recognition --model-name=inception_resnet_v2 --precision=int8 --mode=inference --model-source-dir=/workspace/models --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=1 --socket-id=0 --benchmark-only --verbose --in-graph=/in_graph/inception_resnet_v2_int8_pretrained_model.pb --data-location=/dataset", + "output": "LD_PRELOAD=/usr/lib/libtcmalloc.so.4.2.6 numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/eval_image_classifier_benchmark.py --input-graph=/in_graph/inception_resnet_v2_int8_pretrained_model.pb --inter-op-parallelism-threads=1 --intra-op-parallelism-threads=28 --batch-size=1"}, + + { "_comment": "Int8 command for throughput benchmark", + "input": "run_tf_benchmark.py --framework=tensorflow --use-case=image_recognition --model-name=inception_resnet_v2 --precision=int8 --mode=inference --model-source-dir=/workspace/models --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=128 --socket-id=0 --benchmark-only --verbose --in-graph=/in_graph/inception_resnet_v2_int8_pretrained_model.pb --data-location=/dataset", + "output": "LD_PRELOAD=/usr/lib/libtcmalloc.so.4.2.6 numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/eval_image_classifier_benchmark.py --input-graph=/in_graph/inception_resnet_v2_int8_pretrained_model.pb --inter-op-parallelism-threads=1 --intra-op-parallelism-threads=28 --batch-size=128"}, + + { "_comment": "Fp32 accuracy command", + "input": "run_tf_benchmark.py --framework=tensorflow --use-case=image_recognition --model-name=inception_resnet_v2 --precision=fp32 --mode=inference --model-source-dir=/workspace/models --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=100 --socket-id=-1 --accuracy-only --verbose --in-graph=/in_graph/inception_resnet_v2_fp32_pretrained_model.pb --data-location=/dataset", + "output": "python /workspace/intelai_models/eval_image_classifier_accuracy.py --input_graph=/in_graph/inception_resnet_v2_fp32_pretrained_model.pb --data_location=/dataset --input_height=299 --input_width=299 --num_inter_threads=2 --num_intra_threads=56 --output_layer=InceptionResnetV2/Logits/Predictions --batch_size=100"}, + + { "_comment": "FP32 command for latency benchmark", + "input": "run_tf_benchmark.py --framework=tensorflow --use-case=image_recognition --model-name=inception_resnet_v2 --precision=fp32 --mode=inference --benchmark-dir=/workspace/benchmarks --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=1 --socket-id=0 --output-dir=/workspace/benchmarks/common/tensorflow/logs --benchmark-only --verbose --model-source-dir=/workspace/models --in-graph=/in_graph/inception_resnet_v2_fp32_pretrained_model.pb", + "output": "numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/eval_image_classifier_benchmark.py --input-graph=/in_graph/inception_resnet_v2_fp32_pretrained_model.pb --inter-op-parallelism-threads=2 --intra-op-parallelism-threads=28 --batch-size=1"}, + + { "_comment": "FP32 command for throughput benchmark", + "input": "run_tf_benchmark.py --framework=tensorflow --use-case=image_recognition --model-name=inception_resnet_v2 --precision=fp32 --mode=inference --benchmark-dir=/workspace/benchmarks --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=128 --socket-id=0 --output-dir=/workspace/benchmarks/common/tensorflow/logs --benchmark-only --verbose --model-source-dir=/workspace/models --in-graph=/in_graph/inception_resnet_v2_fp32_pretrained_model.pb", + "output": "numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/eval_image_classifier_benchmark.py --input-graph=/in_graph/inception_resnet_v2_fp32_pretrained_model.pb --inter-op-parallelism-threads=2 --intra-op-parallelism-threads=28 --batch-size=128"} +] + + diff --git a/tests/unit/common/tensorflow/tf_model_args/tf_inceptionv3_args.json b/tests/unit/common/tensorflow/tf_model_args/tf_inceptionv3_args.json new file mode 100644 index 000000000..733b691ee --- /dev/null +++ b/tests/unit/common/tensorflow/tf_model_args/tf_inceptionv3_args.json @@ -0,0 +1,44 @@ +[ + { "_comment": "Int8 accuracy command", + "input": "run_tf_benchmark.py --framework tensorflow --use-case image_recognition --precision int8 --mode inference --model-name inceptionv3 --batch-size 100 --in-graph /final_int8_inceptionv3.pb --intelai-models . --accuracy-only --verbose", + "output": "LD_PRELOAD=/usr/lib/libtcmalloc.so.4.2.6 python ./int8/accuracy.py --num_intra_threads=56 --num_inter_threads=2 --batch_size=100 --input_graph=/final_int8_inceptionv3.pb"}, + + { "_comment": "Int8 command for latency benchmark with default --num-inter-threads, --num-intra-threads.", + "input": "run_tf_benchmark.py --framework tensorflow --use-case image_recognition --precision int8 --mode inference --model-name inceptionv3 --batch-size 1 --in-graph /final_int8_inceptionv3.pb --intelai-models . --benchmark-only --socket-id 0 --verbose", + "output": "LD_PRELOAD=/usr/lib/libtcmalloc.so.4.2.6 numactl --cpunodebind=0 --membind=0 python ./int8/benchmark.py --warmup_steps=10 --num_intra_threads=28 --num_inter_threads=1 --batch_size=1 --input_graph=/final_int8_inceptionv3.pb --steps=50 --num_cores=28"}, + + { "_comment": "Int8 command for throughput benchmark", + "input": "run_tf_benchmark.py --framework tensorflow --use-case image_recognition --precision int8 --mode inference --model-name inceptionv3 --batch-size 128 --in-graph /final_int8_inceptionv3.pb --intelai-models . --benchmark-only --socket-id 0 --verbose", + "output": "LD_PRELOAD=/usr/lib/libtcmalloc.so.4.2.6 numactl --cpunodebind=0 --membind=0 python ./int8/benchmark.py --warmup_steps=10 --num_intra_threads=28 --num_inter_threads=1 --batch_size=128 --input_graph=/final_int8_inceptionv3.pb --steps=50 --num_cores=28"}, + + { "_comment": "Int8 command for throughput benchmark with --steps=200 --warmup-steps=20", + "input": "run_tf_benchmark.py --framework=tensorflow --use-case=image_recognition --model-name=inceptionv3 --precision=int8 --mode=inference --model-source-dir=/workspace/models --benchmark-dir=/workspace/benchmarks --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=128 --socket-id=0 --output-dir=/workspace/benchmarks/common/tensorflow/logs --benchmark-only --verbose --in-graph=/in_graph/inception_frozen_max_min.pb --steps=200 --warmup-steps=20", + "output": "LD_PRELOAD=/usr/lib/libtcmalloc.so.4.2.6 numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/int8/benchmark.py --warmup_steps=20 --num_intra_threads=28 --num_inter_threads=1 --batch_size=128 --input_graph=/in_graph/inception_frozen_max_min.pb --steps=200 --num_cores=28"}, + + { "_comment": "Int8 command for latency benchmark with --steps=200 --warmup-steps=20", + "input": "run_tf_benchmark.py --framework=tensorflow --use-case=image_recognition --model-name=inceptionv3 --precision=int8 --mode=inference --model-source-dir=/workspace/models --benchmark-dir=/workspace/benchmarks --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=1 --socket-id=0 --output-dir=/workspace/benchmarks/common/tensorflow/logs --benchmark-only --verbose --in-graph=/in_graph/inception_frozen_max_min.pb --steps=200 --warmup-steps=20", + "output": "LD_PRELOAD=/usr/lib/libtcmalloc.so.4.2.6 numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/int8/benchmark.py --warmup_steps=20 --num_intra_threads=28 --num_inter_threads=1 --batch_size=1 --input_graph=/in_graph/inception_frozen_max_min.pb --steps=200 --num_cores=28"}, + + { "_comment": "Int8 command for throughput benchmark with --disable-tcmalloc=True", + "input": "run_tf_benchmark.py --framework tensorflow --use-case image_recognition --precision int8 --mode inference --model-name inceptionv3 --batch-size 128 --in-graph /final_int8_inceptionv3.pb --intelai-models . --benchmark-only --socket-id 0 --disable-tcmalloc=True", + "output": "numactl --cpunodebind=0 --membind=0 python ./int8/benchmark.py --warmup_steps=10 --num_intra_threads=28 --num_inter_threads=1 --batch_size=128 --input_graph=/final_int8_inceptionv3.pb --steps=50 --num_cores=28"}, + + { "_comment": "Fp32 accuracy command", + "input": "run_tf_benchmark.py --framework tensorflow --use-case image_recognition --precision fp32 --mode inference --model-name inceptionv3 --batch-size 100 --accuracy-only --data-location /dataset --in-graph /inceptionv3_fp32_pretrained_model.pb --intelai-models . --verbose", + "output": "python ./fp32/eval_image_classifier_inference.py --input-graph=/inceptionv3_fp32_pretrained_model.pb --num-inter-threads=2 --num-intra-threads=56 --num-cores=28 --batch-size=100 --warmup-steps=10 --steps=50 --data-location=/dataset --accuracy-only"}, + + { "_comment": "FP32 command for latency benchmark", + "input": "run_tf_benchmark.py --framework tensorflow --use-case image_recognition --precision fp32 --mode inference --model-name inceptionv3 --batch-size 1 --in-graph /inceptionv3_fp32_pretrained_model.pb --intelai-models . --socket-id 0 --verbose", + "output": "numactl --cpunodebind=0 --membind=0 python ./fp32/eval_image_classifier_inference.py --input-graph=/inceptionv3_fp32_pretrained_model.pb --num-inter-threads=1 --num-intra-threads=28 --batch-size=1 --warmup-steps=10 --steps=50 --num-cores=28" + }, + + { "_comment": "FP32 command for throughput benchmark", + "input": "run_tf_benchmark.py --framework tensorflow --use-case image_recognition --precision fp32 --mode inference --model-name inceptionv3 --batch-size 128 --in-graph /inceptionv3_fp32_pretrained_model.pb --intelai-models . --socket-id 0 --verbose", + "output": "numactl --cpunodebind=0 --membind=0 python ./fp32/eval_image_classifier_inference.py --input-graph=/inceptionv3_fp32_pretrained_model.pb --num-inter-threads=1 --num-intra-threads=28 --batch-size=128 --warmup-steps=10 --steps=50 --num-cores=28"}, + + { "_comment": "FP32 command for throughput benchmark with --num-inter-threads 4 --num-intra-threads 16", + "input": "run_tf_benchmark.py --framework tensorflow --use-case image_recognition --precision fp32 --mode inference --model-name inceptionv3 --batch-size 128 --in-graph /inceptionv3_fp32_pretrained_model.pb --intelai-models . --socket-id 0 --verbose --num-inter-threads 4 --num-intra-threads 16", + "output": "numactl --cpunodebind=0 --membind=0 python ./fp32/eval_image_classifier_inference.py --input-graph=/inceptionv3_fp32_pretrained_model.pb --num-inter-threads=4 --num-intra-threads=16 --batch-size=128 --warmup-steps=10 --steps=50 --num-cores=28"} +] + + diff --git a/tests/unit/common/tensorflow/tf_model_args/tf_inceptionv4_args.json b/tests/unit/common/tensorflow/tf_model_args/tf_inceptionv4_args.json new file mode 100644 index 000000000..0535c2eef --- /dev/null +++ b/tests/unit/common/tensorflow/tf_model_args/tf_inceptionv4_args.json @@ -0,0 +1,19 @@ +[ + { "_comment": "Int8 accuracy command", + "input": "run_tf_benchmark.py --framework=tensorflow --use-case=image_recognition --precision=int8 --mode=inference --model-name=inceptionv4 --model-source-dir=/workspace/models --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=240 --socket-id 0 --accuracy-only --verbose --in-graph=/in_graph/inceptionv4_int8_pretrained_model.pb --data-location=/dataset", + "output": "LD_PRELOAD=/usr/lib/libtcmalloc.so.4.2.6 numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/inference/accuracy.py --batch_size=240 --num_inter_threads=2 --input_graph=/in_graph/inceptionv4_int8_pretrained_model.pb --num_intra_threads=28 --data_location=/dataset"}, + + { "_comment": "Int8 command for latency benchmark with default --num-inter-threads, --num-intra-threads.", + "input": "run_tf_benchmark.py --framework=tensorflow --use-case=image_recognition --precision=int8 --mode=inference --model-name=inceptionv4 --model-source-dir=/workspace/models --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=1 --socket-id 0 --benchmark-only --verbose --in-graph=/in_graph/inceptionv4_int8_pretrained_model.pb --data-location=/dataset", + "output": "LD_PRELOAD=/usr/lib/libtcmalloc.so.4.2.6 numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/inference/benchmark.py --batch_size=1 --num_inter_threads=2 --input_graph=/in_graph/inceptionv4_int8_pretrained_model.pb --num_intra_threads=28"}, + + { "_comment": "Int8 command for throughput benchmark", + "input": "run_tf_benchmark.py --framework=tensorflow --use-case=image_recognition --precision=int8 --mode=inference --model-name=inceptionv4 --model-source-dir=/workspace/models --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=240 --socket-id 0 --benchmark-only --verbose --in-graph=/in_graph/inceptionv4_int8_pretrained_model.pb --data-location=/dataset", + "output": "LD_PRELOAD=/usr/lib/libtcmalloc.so.4.2.6 numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/inference/benchmark.py --batch_size=240 --num_inter_threads=2 --input_graph=/in_graph/inceptionv4_int8_pretrained_model.pb --num_intra_threads=28"}, + + { "_comment": "Fp32 accuracy command", + "input": "run_tf_benchmark.py --framework=tensorflow --use-case=image_recognition --precision=fp32 --mode=inference --model-name=inceptionv4 --model-source-dir=/workspace/models --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=240 --socket-id 0 --accuracy-only --verbose --in-graph=/in_graph/inceptionv4_fp32_pretrained_model.pb --data-location=/dataset", + "output": "numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/inference/accuracy.py --batch_size=240 --num_inter_threads=2 --input_graph=/in_graph/inceptionv4_fp32_pretrained_model.pb --num_intra_threads=28 --data_location=/dataset"} +] + + diff --git a/tests/unit/common/tensorflow/tf_model_args/tf_lm_1b_args.json b/tests/unit/common/tensorflow/tf_model_args/tf_lm_1b_args.json new file mode 100644 index 000000000..26d11e1c3 --- /dev/null +++ b/tests/unit/common/tensorflow/tf_model_args/tf_lm_1b_args.json @@ -0,0 +1,7 @@ +[ + { "_comment": "FP32 benchmark", + "input": "run_tf_benchmark.py --framework=tensorflow --use-case=language_modeling --model-name=lm-1b --precision=fp32 --mode=inference --benchmark-dir=/workspace/benchmarks --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=1 --socket-id=0 --output-dir=/workspace/benchmarks/common/tensorflow/logs --benchmark-only --verbose --model-source-dir=/workspace/models", + "output": "numactl --cpunodebind=0 --membind=0 python /workspace/models/benchmark.py -b=1 -I=100 --inter=1 --intra=28"} +] + + diff --git a/tests/unit/common/tensorflow/tf_model_args/tf_maskrcnn_args.json b/tests/unit/common/tensorflow/tf_model_args/tf_maskrcnn_args.json new file mode 100644 index 000000000..5900877d3 --- /dev/null +++ b/tests/unit/common/tensorflow/tf_model_args/tf_maskrcnn_args.json @@ -0,0 +1,11 @@ +[ + { "_comment": "FP32 benchmark", + "input": "run_tf_benchmark.py --framework=tensorflow --use-case=image_segmentation --model-name=maskrcnn --precision=fp32 --mode=inference --model-source-dir=/workspace/models --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=1 --socket-id=0 --benchmark-only --verbose --data-location=/dataset", + "output": "numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/coco.py evaluate --dataset=/dataset --num_inter_threads 1 --num_intra_threads 28 --nw 5 --nb 50 --model=coco --infbs 1"}, + + { "_comment": "FP32 benchmark with --num-inter-threads 4 --num-intra-threads 16", + "input": "run_tf_benchmark.py --framework=tensorflow --use-case=image_segmentation --model-name=maskrcnn --precision=fp32 --mode=inference --model-source-dir=/workspace/models --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=1 --socket-id=0 --benchmark-only --verbose --data-location=/dataset --num-inter-threads 4 --num-intra-threads 16", + "output": "numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/coco.py evaluate --dataset=/dataset --num_inter_threads 4 --num_intra_threads 16 --nw 5 --nb 50 --model=coco --infbs 1"} +] + + diff --git a/tests/unit/common/tensorflow/tf_model_args/tf_mobilenet_v1_args.json b/tests/unit/common/tensorflow/tf_model_args/tf_mobilenet_v1_args.json new file mode 100644 index 000000000..c98ada086 --- /dev/null +++ b/tests/unit/common/tensorflow/tf_model_args/tf_mobilenet_v1_args.json @@ -0,0 +1,36 @@ +[ + { "_comment": "FP32 accuracy command", + "input": "run_tf_benchmark.py --framework=tensorflow --use-case=image_recognition --model-name=mobilenet_v1 --precision=fp32 --mode=inference --model-source-dir=/workspace/models --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=100 --socket-id=-1 --accuracy-only --verbose --checkpoint=/checkpoints --in-graph=/in_graph/mobilenet_v1_1.0_224_frozen.pb --data-location=/dataset", + "output": "python /workspace/intelai_models/inference/fp32/accuracy.py --batch_size=100 --num_inter_threads=2 --input_graph=/in_graph/mobilenet_v1_1.0_224_frozen.pb --num_intra_threads=56 --data_location=/dataset"}, + + { "_comment": "FP32 latency benchmark command", + "input": "run_tf_benchmark.py --framework=tensorflow --use-case=image_recognition --model-name=mobilenet_v1 --precision=fp32 --mode=inference --model-source-dir=/workspace/models --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=1 --socket-id 0 --benchmark-only --verbose --checkpoint=/checkpoints --data-location=/dataset", + "output": "numactl --cpunodebind=0 -l python /workspace/intelai_models/inference/fp32/eval_image_classifier.py --dataset_name imagenet --checkpoint_path /checkpoints --dataset_dir /dataset --dataset_split_name=validation --clone_on_cpu=True --model_name mobilenet_v1 --inter_op_parallelism_threads 2 --intra_op_parallelism_threads 28 --batch_size 1"}, + + { "_comment": "FP32 throughput benchmark command", + "input": "run_tf_benchmark.py --framework=tensorflow --use-case=image_recognition --model-name=mobilenet_v1 --precision=fp32 --mode=inference --model-source-dir=/workspace/models --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=100 --socket-id 0 --benchmark-only --verbose --checkpoint=/checkpoints --data-location=/dataset", + "output": "numactl --cpunodebind=0 -l python /workspace/intelai_models/inference/fp32/eval_image_classifier.py --dataset_name imagenet --checkpoint_path /checkpoints --dataset_dir /dataset --dataset_split_name=validation --clone_on_cpu=True --model_name mobilenet_v1 --inter_op_parallelism_threads 2 --intra_op_parallelism_threads 28 --batch_size 100"}, + + { "_comment": "FP32 benchmark command with dummy data and --output-dir specified", + "input": "run_tf_benchmark.py --framework=tensorflow --use-case=image_recognition --model-name=mobilenet_v1 --precision=fp32 --mode=inference --benchmark-dir=/workspace/benchmarks --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=100 --socket-id=0 --output-dir=/workspace/benchmarks/common/tensorflow/logs --benchmark-only --verbose --model-source-dir=/workspace/models --checkpoint=/checkpoints", + "output": "numactl --cpunodebind=0 -l python /workspace/intelai_models/inference/fp32/eval_image_classifier.py --dataset_name imagenet --checkpoint_path /checkpoints --dataset_split_name=validation --clone_on_cpu=True --model_name mobilenet_v1 --inter_op_parallelism_threads 2 --intra_op_parallelism_threads 28 --batch_size 100"}, + + { "_comment": "Int8 command for throughput benchmark with --number-of-steps enabled.", + "input": "run_tf_benchmark.py --framework=tensorflow --use-case=object_detection --model-name=faster_rcnn --precision=int8 --mode=inference --model-source-dir=/workspace/models --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=-1 --socket-id=-1 --benchmark-only --verbose --in-graph=/in_graph/pretrained_int8_faster_rcnn_model.pb --data-location=/dataset --number-of-steps=500", + "output": "LD_PRELOAD=/usr/lib/libtcmalloc.so.4.2.6 python /workspace/intelai_models/inference/int8/run_frozen_graph_rcnn.py -g /in_graph/pretrained_int8_faster_rcnn_model.pb -n 500 -d /dataset --num-inter-threads 2 --num-intra-threads 56"}, + + { "_comment": "Int8 accuracy command", + "input": "run_tf_benchmark.py --framework=tensorflow --use-case=image_recognition --model-name=mobilenet_v1 --precision=int8 --mode=inference --benchmark-dir=/workspace/benchmarks --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=100 --socket-id=0 --output-dir=/workspace/benchmarks/common/tensorflow/logs --accuracy-only --verbose --model-source-dir=/workspace/models --in-graph=/in_graph/models_mobilenetv1_int8_pretrained_model.pb --data-location=/dataset --input_height=224 --input_width=224", + "output": "LD_PRELOAD=/usr/lib/libtcmalloc.so.4.2.6 numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/inference/int8/accuracy.py --input_height=224 --input_width=224 --num_intra_threads=28 --output_layer=MobilenetV1/Predictions/Reshape_1 --num_inter_threads=1 --batch_size=100 --input_graph=/in_graph/models_mobilenetv1_int8_pretrained_model.pb --data_location=/dataset --input_layer=input"}, + + { "_comment": "Int8 latency benchmark command", + "input": "run_tf_benchmark.py --framework=tensorflow --use-case=image_recognition --model-name=mobilenet_v1 --precision=int8 --mode=inference --benchmark-dir=/workspace/benchmarks --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=1 --socket-id=0 --output-dir=/workspace/benchmarks/common/tensorflow/logs --benchmark-only --verbose --model-source-dir=/workspace/models --in-graph=/in_graph/models_mobilenetv1_int8_pretrained_model.pb --input_height=224 --input_width=224 --warmup_steps=10 --steps=50", + "output": "LD_PRELOAD=/usr/lib/libtcmalloc.so.4.2.6 numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/inference/int8/benchmark.py --input_height=224 --input_width=224 --warmup_steps=10 --num_intra_threads=28 --output_layer=MobilenetV1/Predictions/Reshape_1 --num_inter_threads=1 --batch_size=1 --input_graph=/in_graph/models_mobilenetv1_int8_pretrained_model.pb --input_layer=input --steps=50"}, + + + { "_comment": "Int8 throughput benchmark command", + "input": "run_tf_benchmark.py --framework=tensorflow --use-case=image_recognition --model-name=mobilenet_v1 --precision=int8 --mode=inference --benchmark-dir=/workspace/benchmarks --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=240 --socket-id=0 --output-dir=/workspace/benchmarks/common/tensorflow/logs --benchmark-only --verbose --model-source-dir=/workspace/models --in-graph=/in_graph/models_mobilenetv1_int8_pretrained_model.pb --input_height=224 --input_width=224 --warmup_steps=10 --steps=50", + "output": "LD_PRELOAD=/usr/lib/libtcmalloc.so.4.2.6 numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/inference/int8/benchmark.py --input_height=224 --input_width=224 --warmup_steps=10 --num_intra_threads=28 --output_layer=MobilenetV1/Predictions/Reshape_1 --num_inter_threads=1 --batch_size=240 --input_graph=/in_graph/models_mobilenetv1_int8_pretrained_model.pb --input_layer=input --steps=50"} +] + + diff --git a/tests/unit/common/tensorflow/tf_model_args/tf_mtcc_args.json b/tests/unit/common/tensorflow/tf_model_args/tf_mtcc_args.json new file mode 100644 index 000000000..b0093db93 --- /dev/null +++ b/tests/unit/common/tensorflow/tf_model_args/tf_mtcc_args.json @@ -0,0 +1,5 @@ +[ + { "_comment": "FP32 benchmark", + "input": "run_tf_benchmark.py --framework=tensorflow --use-case=face_detection_and_alignment --model-name=mtcc --precision=fp32 --mode=inference --model-source-dir=/workspace/models --benchmark-dir=/workspace/benchmarks --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=-1 --socket-id=0 --output-dir=/workspace/benchmarks/common/tensorflow/logs --benchmark-only --verbose --checkpoint=/checkpoints --data-location=/dataset", + "output": "numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/inference/fp32/one_image_test.py --num_inter_threads 1 --num_intra_threads 28 -ckpt /checkpoints -dl /dataset"} +] diff --git a/tests/unit/common/tensorflow/tf_model_args/tf_ncf_args.json b/tests/unit/common/tensorflow/tf_model_args/tf_ncf_args.json new file mode 100644 index 000000000..67fa8402c --- /dev/null +++ b/tests/unit/common/tensorflow/tf_model_args/tf_ncf_args.json @@ -0,0 +1,15 @@ +[ + { "_comment": "FP32 latency benchmark", + "input": "run_tf_benchmark.py --framework=tensorflow --use-case=recommendation --model-name=ncf --precision=fp32 --mode=inference --model-source-dir=/workspace/models --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=1 --socket-id 0 --benchmark-only --verbose --checkpoint=/checkpoints --data-location=/dataset", + "output": "numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/inference/fp32/ncf_main.py --data_dir=/dataset --model_dir=/checkpoints --intra_op_parallelism_threads=28 --inter_op_parallelism_threads=1 --batch_size=1 --inference_only --benchmark_only"}, + + { "_comment": "Fp32 accuracy", + "input": "run_tf_benchmark.py --framework=tensorflow --use-case=recommendation --model-name=ncf --precision=fp32 --mode=inference --model-source-dir=/workspace/models --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=256 --socket-id 0 --accuracy-only --verbose --checkpoint=/checkpoints --data-location=/dataset", + "output": "numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/inference/fp32/ncf_main.py --data_dir=/dataset --model_dir=/checkpoints --intra_op_parallelism_threads=28 --inter_op_parallelism_threads=1 --batch_size=256 --inference_only --accuracy_only"}, + + { "_comment": "FP32 Throughput benchmark", + "input": "run_tf_benchmark.py --framework=tensorflow --use-case=recommendation --model-name=ncf --precision=fp32 --mode=inference --model-source-dir=/workspace/models --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=256 --socket-id 0 --benchmark-only --verbose --checkpoint=/checkpoints --data-location=/dataset", + "output": "numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/inference/fp32/ncf_main.py --data_dir=/dataset --model_dir=/checkpoints --intra_op_parallelism_threads=28 --inter_op_parallelism_threads=1 --batch_size=256 --inference_only --benchmark_only"} +] + + diff --git a/tests/unit/common/tensorflow/tf_model_args/tf_resnet101_args.json b/tests/unit/common/tensorflow/tf_model_args/tf_resnet101_args.json new file mode 100644 index 000000000..4c9132a79 --- /dev/null +++ b/tests/unit/common/tensorflow/tf_model_args/tf_resnet101_args.json @@ -0,0 +1,17 @@ +[ + { "_comment": "FP32 accuracy command", + "input": "run_tf_benchmark.py --framework=tensorflow --use-case=image_recognition --model-name=resnet101 --precision=fp32 --mode=inference --model-source-dir=/workspace/models --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=128 --accuracy-only --verbose --in-graph=/in_graph/resnet101_fp32_model.pb --data-location=/dataset", + "output": "python /workspace/intelai_models/inference/eval_image_classifier_inference.py --batch-size=128 --num-inter-threads=2 --input-graph=/in_graph/resnet101_fp32_model.pb --num-intra-threads=56 --warmup-steps=10 --steps=50 --data-location=/dataset --accuracy-only"}, + + { "_comment": "FP32 command for throughput benchmark", + "input": "run_tf_benchmark.py --framework=tensorflow --use-case=image_recognition --model-name=resnet101 --precision=fp32 --mode=inference --model-source-dir=/workspace/models --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=128 --socket-id 0 --benchmark-only --verbose --in-graph=/in_graph/resnet101_fp32_model.pb", + "output": "numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/inference/eval_image_classifier_inference.py --batch-size=128 --num-inter-threads=1 --input-graph=/in_graph/resnet101_fp32_model.pb --num-intra-threads=28 --warmup-steps=10 --steps=50"}, + + { "_comment": "Int8 latency benchmark command", + "input": "run_tf_benchmark.py --framework=tensorflow --use-case=image_recognition --model-name=resnet101 --precision=int8 --mode=inference --model-source-dir=/workspace/models --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=1 --socket-id 0 --benchmark-only --verbose --in-graph=/in_graph/resnet101_int8_model.pb", + "output": "LD_PRELOAD=/usr/lib/libtcmalloc.so.4.2.6 numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/inference/eval_image_classifier_inference.py --batch-size=1 --num-intra-threads=28 --num-inter-threads=1 --input-graph=/in_graph/resnet101_int8_model.pb --warmup-steps=40 --steps=100"}, + + { "_comment": "FP32 command for latency benchmark", + "input": "run_tf_benchmark.py --framework=tensorflow --use-case=image_recognition --model-name=resnet101 --precision=fp32 --mode=inference --model-source-dir=/workspace/models --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=1 --socket-id 0 --benchmark-only --verbose --in-graph=/in_graph/resnet101_fp32_model.pb", + "output": "numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/inference/eval_image_classifier_inference.py --batch-size=1 --num-inter-threads=1 --input-graph=/in_graph/resnet101_fp32_model.pb --num-intra-threads=28 --warmup-steps=10 --steps=50"} +] diff --git a/tests/unit/common/tensorflow/tf_model_args/tf_resnet50_args.json b/tests/unit/common/tensorflow/tf_model_args/tf_resnet50_args.json new file mode 100644 index 000000000..199ae2c0f --- /dev/null +++ b/tests/unit/common/tensorflow/tf_model_args/tf_resnet50_args.json @@ -0,0 +1,40 @@ +[ + { "_comment": "FP32 accuracy command", + "input": "run_tf_benchmark.py --framework=tensorflow --use-case=image_recognition --model-name=resnet50 --precision=fp32 --mode=inference --model-source-dir=/workspace/models --intelai-models=/workspace/intelai_models --batch-size 100 --socket-id 0 --accuracy-only --verbose --in-graph=/in_graph/freezed_resnet50.pb --accuracy-only --data-location=/dataset", + "output": "numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/inference/eval_image_classifier_inference.py --input-graph=/in_graph/freezed_resnet50.pb --num-inter-threads=1 --num-intra-threads=28 --batch-size=100 --data-location=/dataset --accuracy-only --num-cores=28 --warmup-steps=10 --steps=50"}, + + { "_comment": "FP32 command for latency benchmark with default --num-inter-threads, --num-intra-threads.", + "input": "run_tf_benchmark.py --framework tensorflow --use-case image_recognition --precision fp32 --mode inference --model-name resnet50 --batch-size 128 --in-graph /freezed_resnet50.pb --intelai-models . --socket-id 0 --verbose", + "output": "numactl --cpunodebind=0 --membind=0 python ./inference/eval_image_classifier_inference.py --input-graph=/freezed_resnet50.pb --num-inter-threads=1 --num-intra-threads=28 --batch-size=128 --warmup-steps=10 --steps=50 --num-cores=28"}, + + { "_comment": "FP32 command for latency benchmark with --num-inter-threads 4 --num-intra-threads 16", + "input": "run_tf_benchmark.py --framework tensorflow --use-case image_recognition --precision fp32 --mode inference --model-name resnet50 --batch-size 1 --in-graph /freezed_resnet50.pb --intelai-models . --socket-id 0 --verbose --num-inter-threads 4 --num-intra-threads 16", + "output": "numactl --cpunodebind=0 --membind=0 python ./inference/eval_image_classifier_inference.py --input-graph=/freezed_resnet50.pb --num-inter-threads=4 --num-intra-threads=16 --batch-size=1 --warmup-steps=10 --steps=50 --num-cores=28"}, + + { "_comment": "FP32 command for throughput benchmark with --num-inter-threads=1 --num-intra-threads=28", + "input": "run_tf_benchmark.py --framework tensorflow --use-case image_recognition --precision fp32 --mode inference --model-name resnet50 --batch-size 128 --in-graph /freezed_resnet50.pb --intelai-models . --socket-id 0 --verbose", + "output": "numactl --cpunodebind=0 --membind=0 python ./inference/eval_image_classifier_inference.py --input-graph=/freezed_resnet50.pb --num-inter-threads=1 --num-intra-threads=28 --batch-size=128 --warmup-steps=10 --steps=50 --num-cores=28"}, + + { "_comment": "Int8 command for throughput benchmark with --output-dir enabled.", + "input": "run_tf_benchmark.py --framework=tensorflow --use-case=image_recognition --model-name=resnet50 --precision=int8 --mode=inference --model-source-dir=/workspace/models --benchmark-dir=/workspace/benchmarks --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=128 --socket-id=0 --output-dir=/workspace/benchmarks/common/tensorflow/logs --benchmark-only --verbose --in-graph=/in_graph/resnet50_int8_pretrained_model.pb --steps=200 --warmup-steps=20", + "output": "LD_PRELOAD=/usr/lib/libtcmalloc.so.4.2.6 numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/inference/eval_image_classifier_inference.py --input-graph=/in_graph/resnet50_int8_pretrained_model.pb --num-inter-threads=1 --num-intra-threads=28 --batch-size=128 --warmup-steps=20 --steps=200"}, + + { "_comment": "Int8 command for data calibration with --calibration-only", + "input": "run_tf_benchmark.py --framework=tensorflow --use-case=image_recognition --model-name=resnet50 --precision=int8 --mode=inference --benchmark-dir=/workspace/benchmarks --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=100 --socket-id=-1 --output-dir=/workspace/benchmarks/common/tensorflow/logs --accuracy-only --verbose --model-source-dir=/workspace/models --in-graph=/in_graph/resnet50_int8_pretrained_model.pb --data-location=/dataset --calibration-only", + "output": "LD_PRELOAD=/usr/lib/libtcmalloc.so.4.2.6 python /workspace/intelai_models/int8/generate_calibration_data.py --num_intra_threads=56 --num_inter_threads=2 --batch_size=100 --input_graph=/in_graph/resnet50_int8_pretrained_model.pb --data_location=/dataset"}, + + { "_comment": "Fp32 command for throughput benchmark with --output-results enabled.", + "input": "run_tf_benchmark.py --framework=tensorflow --use-case=image_recognition --model-name=resnet50 --precision=fp32 --mode=inference --benchmark-dir=/workspace/benchmarks --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=100 --socket-id=0 --output-dir=/workspace/benchmarks/common/tensorflow/logs --benchmark-only --output-results --verbose --model-source-dir=/workspace/models --in-graph=/in_graph/resnet50_fp32_pretrained_model.pb --data-location=/dataset", + "output": "numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/inference/eval_image_classifier_inference.py --input-graph=/in_graph/resnet50_fp32_pretrained_model.pb --num-inter-threads=1 --num-intra-threads=28 --num-cores=28 --batch-size=100 --warmup-steps=10 --steps=50 --data-location=/dataset --results-file-path /workspace/benchmarks/common/tensorflow/logs/resnet50_fp32_inference_results*.txt"}, + + { "_comment": "Int8 accuracy command", + "input": "run_tf_benchmark.py --framework tensorflow --use-case image_recognition --precision int8 --mode inference --model-name resnet50 --batch-size 100 --data-location /dataset --in-graph /final_int8_resnet50.pb --intelai-models . --accuracy-only --verbose", + "output": "LD_PRELOAD=/usr/lib/libtcmalloc.so.4.2.6 python ./inference/eval_image_classifier_inference.py --input-graph=/final_int8_resnet50.pb --num-inter-threads=2 --num-intra-threads=56 --batch-size=100 --warmup-steps=10 --steps=50 --data-location=/dataset --accuracy-only"}, + + { "_comment": "Int8 command for throughput benchmark with --steps=200 --warmup-steps=20", + "input": "run_tf_benchmark.py --framework=tensorflow --use-case=image_recognition --model-name=resnet50 --precision=int8 --mode=inference --model-source-dir=/workspace/models --benchmark-dir=/workspace/benchmarks --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=128 --socket-id=0 --output-dir=/workspace/benchmarks/common/tensorflow/logs --benchmark-only --verbose --in-graph=/in_graph/resnet50_int8_pretrained_model.pb --steps=200 --warmup-steps=20", + "output": "LD_PRELOAD=/usr/lib/libtcmalloc.so.4.2.6 numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/inference/eval_image_classifier_inference.py --input-graph=/in_graph/resnet50_int8_pretrained_model.pb --num-inter-threads=1 --num-intra-threads=28 --batch-size=128 --warmup-steps=20 --steps=200" + } +] + + diff --git a/tests/unit/common/tensorflow/tf_model_args/tf_rfcn_args.json b/tests/unit/common/tensorflow/tf_model_args/tf_rfcn_args.json new file mode 100644 index 000000000..f8dc9b0a0 --- /dev/null +++ b/tests/unit/common/tensorflow/tf_model_args/tf_rfcn_args.json @@ -0,0 +1,17 @@ +[ + { "_comment": "FP32 accuracy command", + "input": "run_tf_benchmark.py --framework=tensorflow --use-case=object_detection --model-name=rfcn --precision=fp32 --mode=inference --model-source-dir=/workspace/models --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=-1 --verbose --in-graph=/in_graph/frozen_inference_graph.pb --data-location=/dataset --accuracy-only --split=accuracy_message", + "output": "FROZEN_GRAPH=/in_graph/frozen_inference_graph.pb TF_RECORD_FILE=/dataset SPLIT=accuracy_message TF_MODELS_ROOT=/workspace/models /workspace/intelai_models/inference/fp32/coco_mAP.sh"}, + + { "_comment": "FP32 command for benchmark", + "input": "run_tf_benchmark.py --framework tensorflow --use-case object_detection --precision fp32 --mode inference --model-name rfcn --checkpoint /checkpoints --intelai-models . --model-source-dir . --socket-id 0 --verbose --config_file=rfcn_pipeline.config", + "output": "numactl --cpunodebind=0 --membind=0 python ./inference/fp32/eval.py --inter_op 1 --intra_op 28 --omp 28 --pipeline_config_path /checkpoints/rfcn_pipeline.config --checkpoint_dir /checkpoints --eval_dir ./research/object_detection/models/rfcn/eval --logtostderr --blocktime=0 --run_once=True"}, + + { "_comment": "Int8 accuracy command", + "input": "run_tf_benchmark.py --framework=tensorflow --use-case=object_detection --model-name=rfcn --precision=int8 --mode=inference --model-source-dir=/workspace/models --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=-1 --verbose --in-graph=/in_graph/rfcn_resnet101_int8_coco_pretrained_model.pb --data-location=/dataset --accuracy-only --split=accuracy_message", + "output": "LD_PRELOAD=/usr/lib/libtcmalloc.so.4.2.6 FROZEN_GRAPH=/in_graph/rfcn_resnet101_int8_coco_pretrained_model.pb TF_RECORD_FILE=/dataset SPLIT=accuracy_message TF_MODELS_ROOT=/workspace/models /workspace/intelai_models/inference/int8/coco_mAP.sh"}, + + { "_comment": "Int8 benchmark command", + "input": "run_tf_benchmark.py --framework=tensorflow --use-case=object_detection --model-name=rfcn --precision=int8 --mode=inference --model-source-dir=/workspace/models --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=-1 --verbose --in-graph=/in_graph/rfcn_resnet101_int8_coco_pretrained_model.pb --data-location=/dataset --benchmark-only --number_of_steps=500", + "output": "LD_PRELOAD=/usr/lib/libtcmalloc.so.4.2.6 python /workspace/intelai_models/inference/int8/run_rfcn_inference.py -m /workspace/models -g /in_graph/rfcn_resnet101_int8_coco_pretrained_model.pb -x 500 -d /dataset --num-inter-threads 2 --num-intra-threads 56"} +] diff --git a/tests/unit/common/tensorflow/tf_model_args/tf_squeezenet_args.json b/tests/unit/common/tensorflow/tf_model_args/tf_squeezenet_args.json new file mode 100644 index 000000000..9232b10fe --- /dev/null +++ b/tests/unit/common/tensorflow/tf_model_args/tf_squeezenet_args.json @@ -0,0 +1,11 @@ +[ + { "_comment": "FP32 command for latency benchmark", + "input": "run_tf_benchmark.py --framework tensorflow --use-case image_recognition --precision fp32 --mode inference --model-name squeezenet --batch-size 1 --checkpoint /checkpoints --intelai-models . --socket-id 0 --verbose", + "output": "taskset -c 0-27 python ./fp32/train_squeezenet.py --data_location None --batch_size 1 --num_inter_threads 1 --num_intra_threads 28 --model_dir /checkpoints --inference-only --verbose"}, + + { "_comment": "FP32 command for throughput benchmark", + "input": "run_tf_benchmark.py --framework tensorflow --use-case image_recognition --precision fp32 --mode inference --model-name squeezenet --batch-size 64 --checkpoint /checkpoints --intelai-models . --socket-id 0 --verbose", + "output": "taskset -c 0-27 python ./fp32/train_squeezenet.py --data_location None --batch_size 64 --num_inter_threads 1 --num_intra_threads 28 --model_dir /checkpoints --inference-only --verbose"} +] + + diff --git a/tests/unit/common/tensorflow/tf_model_args/tf_ssd_mobilenet_args.json b/tests/unit/common/tensorflow/tf_model_args/tf_ssd_mobilenet_args.json new file mode 100644 index 000000000..fc4a7b1d9 --- /dev/null +++ b/tests/unit/common/tensorflow/tf_model_args/tf_ssd_mobilenet_args.json @@ -0,0 +1,17 @@ +[ + { "_comment": "FP32 accuracy command", + "input": "run_tf_benchmark.py --framework=tensorflow --use-case=object_detection --model-name=ssd-mobilenet --precision=fp32 --mode=inference --model-source-dir=/workspace/models --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=-1 --socket-id=0 --accuracy-only --verbose --in-graph=/in_graph/frozen_inference_graph.pb --benchmark-dir=/workspace/benchmarks --data-location=/dataset", + "output": "sh /workspace/benchmarks/object_detection/tensorflow/ssd-mobilenet/inference/ssdmobilenet_accuracy.sh /in_graph/frozen_inference_graph.pb /dataset"}, + + { "_comment": "FP32 benchmark command", + "input": "run_tf_benchmark.py --framework=tensorflow --use-case=object_detection --model-name=ssd-mobilenet --precision=fp32 --mode=inference --model-source-dir=/workspace/models --benchmark-dir=/workspace/benchmarks --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=-1 --socket-id=0 --benchmark-only --verbose --in-graph=/in_graph/frozen_inference_graph.pb --data-location=/dataset", + "output": "numactl --cpunodebind=0 --membind=0 python /workspace/benchmarks/object_detection/tensorflow/ssd-mobilenet/inference/fp32/infer_detections.py --input_tfrecord_paths=/dataset --output_tfrecord_path=/SSD-mobilenet-out.tfrecord --inference_graph=/in_graph/frozen_inference_graph.pb --discard_image_pixels=True --num_inter_threads=2 --num_intra_threads=28"}, + + { "_comment": "Int8 accuracy command", + "input": "run_tf_benchmark.py --framework=tensorflow --use-case=object_detection --model-name=ssd-mobilenet --precision=int8 --mode=inference --benchmark-dir=/workspace/benchmarks --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=1 --socket-id=0 --output-dir=/workspace/benchmarks/common/tensorflow/logs --accuracy-only --verbose --model-source-dir=/workspace/models --in-graph=/in_graph/ssdmobilenet_int8_pretrained_model.pb --data-location=/dataset", + "output": "sh /workspace/intelai_models/inference/int8/coco_int8.sh /in_graph/ssdmobilenet_int8_pretrained_model.pb /dataset"}, + + { "_comment": "Int8 benchmark command", + "input": "run_tf_benchmark.py --framework=tensorflow --use-case=object_detection --model-name=ssd-mobilenet --precision=int8 --mode=inference --model-source-dir=/workspace/models --intelai-models=/workspace/intelai_models --batch-size=1 --socket-id 0 --data-location=/dataset --verbose --in-graph=/in_graph/ssdmobilenet_int8_pretrained_model.pb --benchmark-only --in-graph=/in_graph/ssdmobilenet_int8_pretrained_model.pb", + "output": "LD_PRELOAD=/usr/lib/libtcmalloc.so.4.2.6 numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/inference/int8/run_frozen_graph_ssdmob.py -g /in_graph/ssdmobilenet_int8_pretrained_model.pb -n 5000 -d /dataset -x --num-inter-threads 2 --num-intra-threads 28"} +] diff --git a/tests/unit/common/tensorflow/tf_model_args/tf_ssd_resnet34_args.json b/tests/unit/common/tensorflow/tf_model_args/tf_ssd_resnet34_args.json new file mode 100644 index 000000000..0aa2ca495 --- /dev/null +++ b/tests/unit/common/tensorflow/tf_model_args/tf_ssd_resnet34_args.json @@ -0,0 +1,11 @@ +[ + { "_comment": "FP32 accuracy command", + "input": "run_tf_benchmark.py --framework=tensorflow --use-case=object_detection --model-name=ssd-resnet34 --precision=fp32 --mode=inference --benchmark-dir=/workspace/benchmarks --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=1 --socket-id=0 --output-dir=/workspace/benchmarks/common/tensorflow/logs --accuracy-only --verbose --model-source-dir=/workspace/models --in-graph=/in_graph/ssd_resnet34_bs1.pb --data-location=/dataset", + "output": "numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/inference/fp32/infer_detections.py --input-graph /in_graph/ssd_resnet34_bs1.pb --batch-size 1 --inter-op-parallelism-threads 1 --intra-op-parallelism-threads 28 --accuracy-only --data-location /dataset"}, + + { "_comment": "FP32 benchmark command", + "input": "run_tf_benchmark.py --framework=tensorflow --use-case=object_detection --model-name=ssd-resnet34 --precision=fp32 --mode=inference --benchmark-dir=/workspace/benchmarks --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=1 --socket-id=0 --output-dir=/workspace/benchmarks/common/tensorflow/logs --benchmark-only --verbose --model-source-dir=/workspace/models --in-graph=/in_graph/ssd_resnet34_bs1.pb --data-location=/dataset", + "output": "numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/inference/fp32/infer_detections.py --input-graph /in_graph/ssd_resnet34_bs1.pb --batch-size 1 --inter-op-parallelism-threads 1 --intra-op-parallelism-threads 28"} +] + + diff --git a/tests/unit/common/tensorflow/tf_model_args/tf_ssd_vgg16_args.json b/tests/unit/common/tensorflow/tf_model_args/tf_ssd_vgg16_args.json new file mode 100644 index 000000000..37d478e1f --- /dev/null +++ b/tests/unit/common/tensorflow/tf_model_args/tf_ssd_vgg16_args.json @@ -0,0 +1,17 @@ +[ + { "_comment": "FP32 accuracy command", + "input": "run_tf_benchmark.py --framework=tensorflow --use-case=object_detection --model-name=ssd_vgg16 --precision=fp32 --mode=inference --benchmark-dir=/workspace/benchmarks --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=1 --socket-id=-1 --output-dir=/workspace/benchmarks/common/tensorflow/logs --accuracy-only --verbose --model-source-dir=/workspace/models --in-graph=/in_graph/ssdvgg16_fp32_pretrained_model.pb --data-location=/dataset", + "output": "python /workspace/intelai_models/inference/eval_ssd.py --input-graph=/in_graph/ssdvgg16_fp32_pretrained_model.pb --num-inter-threads=2 --num-intra-threads=56 --data-location=/dataset --accuracy-only"}, + + { "_comment": "FP32 benchmark command", + "input": "run_tf_benchmark.py --framework=tensorflow --use-case=object_detection --model-name=ssd_vgg16 --precision=fp32 --mode=inference --benchmark-dir=/workspace/benchmarks --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=1 --socket-id=0 --output-dir=/workspace/benchmarks/common/tensorflow/logs --benchmark-only --verbose --model-source-dir=/workspace/models --in-graph=/in_graph/ssdvgg16_int8_pretrained_model.pb --data-location=/dataset --num-inter-threads=11 --num-intra-threads=21 --data-num-inter-threads=21 --data-num-intra-threads=28 --steps=500 --warmup-steps=100", + "output": "numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/inference/eval_ssd.py --input-graph=/in_graph/ssdvgg16_int8_pretrained_model.pb --num-inter-threads=11 --num-intra-threads=21 --data-num-inter-threads=21 --data-num-intra-threads=28 --warmup-steps=100 --steps=500 --data-location=/dataset"}, + + { "_comment": "Int8 accuracy command", + "input": "run_tf_benchmark.py --framework=tensorflow --use-case=object_detection --model-name=ssd_vgg16 --precision=int8 --mode=inference --benchmark-dir=/workspace/benchmarks --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=1 --socket-id=-1 --output-dir=/workspace/benchmarks/common/tensorflow/logs --accuracy-only --verbose --model-source-dir=/workspace/models --in-graph=/in_graph/ssdvgg16_int8_pretrained_model.pb --data-location=/dataset", + "output": "LD_PRELOAD=/usr/lib/libtcmalloc.so.4.2.6 python /workspace/intelai_models/inference/eval_ssd.py --input-graph=/in_graph/ssdvgg16_int8_pretrained_model.pb --num-inter-threads=2 --num-intra-threads=56 --data-location=/dataset --accuracy-only"}, + + { "_comment": "Int8 benchmark command", + "input": "run_tf_benchmark.py --framework=tensorflow --use-case=object_detection --model-name=ssd_vgg16 --precision=int8 --mode=inference --benchmark-dir=/workspace/benchmarks --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=1 --socket-id=0 --output-dir=/workspace/benchmarks/common/tensorflow/logs --benchmark-only --verbose --model-source-dir=/workspace/models --in-graph=/in_graph/ssdvgg16_int8_pretrained_model.pb --data-location=/dataset --num-inter-threads=11 --num-intra-threads=21 --data-num-inter-threads=21 --data-num-intra-threads=28 --steps=500 --warmup-steps=100", + "output": "LD_PRELOAD=/usr/lib/libtcmalloc.so.4.2.6 numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/inference/eval_ssd.py --input-graph=/in_graph/ssdvgg16_int8_pretrained_model.pb --num-inter-threads=11 --num-intra-threads=21 --data-num-inter-threads=21 --data-num-intra-threads=28 --warmup-steps=100 --steps=500 --data-location=/dataset"} +] diff --git a/tests/unit/common/tensorflow/tf_model_args/tf_transformer_language_args.json b/tests/unit/common/tensorflow/tf_model_args/tf_transformer_language_args.json new file mode 100644 index 000000000..5662ad83e --- /dev/null +++ b/tests/unit/common/tensorflow/tf_model_args/tf_transformer_language_args.json @@ -0,0 +1,9 @@ +[ + { "_comment": "FP32 latency benchmark", + "input": "run_tf_benchmark.py --framework=tensorflow --use-case=language_translation --model-name=transformer_language --precision=fp32 --mode=inference --model-source-dir=/workspace/models --benchmark-dir=/workspace/benchmarks --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=1 --socket-id=0 --benchmark-only --verbose --checkpoint=/checkpoints --data-location=/dataset --decode_from_file=/checkpoints/newstest2015.en --reference=/checkpoints/newstest2015.de", + "output": "numactl --cpunodebind=0 --membind=0 python /workspace/models/tensor2tensor/bin/t2t_decoder.py --problem=translate_ende_wmt32k --model=transformer --hparams_set=transformer_base_single_gpu --decode_hparams=beam_size=4,alpha=0.6,batch_size=1 --data_dir=/dataset --output_dir=/checkpoints --decode_from_file=/checkpoints/newstest2015.en --decode_to_file=/workspace/models/out_dir/output_infer --reference=/checkpoints/newstest2015.de --inter_op_parallelism_threads=1 --intra_op_parallelism_threads=28"}, + + { "_comment": "Fp32 throughput", + "input": "run_tf_benchmark.py --framework=tensorflow --use-case=language_translation --model-name=transformer_language --precision=fp32 --mode=inference --model-source-dir=/workspace/models --benchmark-dir=/workspace/benchmarks --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=32 --socket-id=0 --benchmark-only --verbose --checkpoint=/checkpoints --data-location=/dataset --decode_from_file=/checkpoints/newstest2015.en --reference=/checkpoints/newstest2015.de", + "output": "numactl --cpunodebind=0 --membind=0 python /workspace/models/tensor2tensor/bin/t2t_decoder.py --problem=translate_ende_wmt32k --model=transformer --hparams_set=transformer_base_single_gpu --decode_hparams=beam_size=4,alpha=0.6,batch_size=32 --data_dir=/dataset --output_dir=/checkpoints --decode_from_file=/checkpoints/newstest2015.en --decode_to_file=/workspace/models/out_dir/output_infer --reference=/checkpoints/newstest2015.de --inter_op_parallelism_threads=1 --intra_op_parallelism_threads=28"} +] diff --git a/tests/unit/common/tensorflow/tf_model_args/tf_transformer_lt_official_args.json b/tests/unit/common/tensorflow/tf_model_args/tf_transformer_lt_official_args.json new file mode 100644 index 000000000..1ccbf4bc4 --- /dev/null +++ b/tests/unit/common/tensorflow/tf_model_args/tf_transformer_lt_official_args.json @@ -0,0 +1,9 @@ +[ + { "_comment": "FP32 latency benchmark", + "input": "run_tf_benchmark.py --framework=tensorflow --use-case=language_translation --model-name=transformer_lt_official --precision=fp32 --mode=inference --model-source-dir=/workspace/models --benchmark-dir=/workspace/benchmarks --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=1 --socket-id=0 --benchmark-only --verbose --file=/dataset/newstest2014.en --reference=/dataset/newstest2014.de --vocab_file=/dataset/vocab.txt --in_graph=/in_graph/fp32_graphdef.pb", + "output": "numactl --cpunodebind=0 --membind=0 python /workspace/models/official/transformer/infer_ab.py --param_set=big --in_graph=/in_graph/fp32_graphdef.pb --batch_size=1 --file=/dataset/newstest2014.en --file_out=/models/benchmarks/common/tensorflow/logs/translate.txt --vocab_file=/dataset/vocab.txt"}, + + { "_comment": "FP32 throughput benchmark", + "input": "run_tf_benchmark.py --framework=tensorflow --use-case=language_translation --model-name=transformer_lt_official --precision=fp32 --mode=inference --model-source-dir=/workspace/models --benchmark-dir=/workspace/benchmarks --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=64 --socket-id=0 --benchmark-only --verbose --file=/dataset/newstest2014.en --reference=/dataset/newstest2014.de --vocab_file=/dataset/vocab.txt --in_graph=/in_graph/fp32_graphdef.pb", + "output": "numactl --cpunodebind=0 --membind=0 python /workspace/models/official/transformer/infer_ab.py --param_set=big --in_graph=/in_graph/fp32_graphdef.pb --batch_size=64 --file=/dataset/newstest2014.en --file_out=/models/benchmarks/common/tensorflow/logs/translate.txt --vocab_file=/dataset/vocab.txt"} +] diff --git a/tests/unit/common/tensorflow/tf_model_args/tf_unet_args.json b/tests/unit/common/tensorflow/tf_model_args/tf_unet_args.json new file mode 100644 index 000000000..cbbe2f3f4 --- /dev/null +++ b/tests/unit/common/tensorflow/tf_model_args/tf_unet_args.json @@ -0,0 +1,7 @@ +[ + { "_comment": "FP32 benchmark", + "input": "run_tf_benchmark.py --framework=tensorflow --use-case=image_segmentation --model-name=unet --precision=fp32 --mode=inference --model-source-dir=/workspace/models --benchmark-dir=/workspace/benchmarks --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=1 --socket-id=0 --benchmark-only --verbose --checkpoint=/checkpoints --checkpoint_name=model.ckpt", + "output": "numactl --cpunodebind=0 --membind=0 python /workspace/benchmarks/image_segmentation/tensorflow/unet/inference/fp32/unet_infer.py -bs 1 -cp /checkpoints/model.ckpt --num_inter_threads 1 --num_intra_threads 28 -nw 80 -nb 400"} +] + + diff --git a/tests/unit/common/tensorflow/tf_model_args/tf_wavenet_args.json b/tests/unit/common/tensorflow/tf_model_args/tf_wavenet_args.json new file mode 100644 index 000000000..49ea2e09e --- /dev/null +++ b/tests/unit/common/tensorflow/tf_model_args/tf_wavenet_args.json @@ -0,0 +1,7 @@ +[ + { "_comment": "FP32 benchmark command", + "input": "run_tf_benchmark.py --framework tensorflow --use-case text_to_speech --precision fp32 --mode inference --model-name wavenet --num-cores 1 --checkpoint /checkpoints --intelai-models . --model-source-dir . --socket-id 0 --verbose --checkpoint_name=model.ckpt-99 --sample=8510", + "output": "numactl --physcpubind=0-0 --membind=0 python generate.py /checkpoints/model.ckpt-99 --num_inter_threads=1 --num_intra_threads=1 --sample=8510"} +] + + diff --git a/tests/unit/common/tensorflow/tf_model_args/tf_wide_deep_args.json b/tests/unit/common/tensorflow/tf_model_args/tf_wide_deep_args.json new file mode 100644 index 000000000..64fddac5b --- /dev/null +++ b/tests/unit/common/tensorflow/tf_model_args/tf_wide_deep_args.json @@ -0,0 +1,5 @@ +[ + { "_comment": "FP32 benchmark", + "input": "run_tf_benchmark.py --framework tensorflow --use-case recommendation --precision fp32 --mode inference --model-name wide_deep --batch-size 1024 --data-location /dataset --checkpoint /checkpoints --intelai-models . --verbose", + "output": "OMP_NUM_THREADS=1 numactl --cpunodebind=0 --membind=0 python inference/fp32/wide_deep_inference.py --data_dir=/dataset --model_dir=/checkpoints --batch_size=1024"} +] diff --git a/tests/unit/common/tensorflow/tf_model_args/tf_wide_deep_large_ds_args.json b/tests/unit/common/tensorflow/tf_model_args/tf_wide_deep_large_ds_args.json new file mode 100644 index 000000000..3d2297515 --- /dev/null +++ b/tests/unit/common/tensorflow/tf_model_args/tf_wide_deep_large_ds_args.json @@ -0,0 +1,27 @@ +[ + { "_comment": "Int8 benchmark command", + "input": "run_tf_benchmark.py --framework=tensorflow --use-case=recommendation --model-name=wide_deep_large_ds --precision=int8 --mode=inference --model-source-dir=/workspace/models --benchmark-dir=/workspace/benchmarks --intelai-models=/workspace/intelai_models --num-cores=-1 --socket-id=0 --output-dir=/workspace/benchmarks/common/tensorflow/logs --benchmark-only --verbose --in-graph=/in_graph/wide_deep_int8_pretrained_model.pb --data-location=/dataset --num-parallel-batches=14", + "output": "LD_PRELOAD=/usr/lib/libtcmalloc.so.4.2.6 numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/inference/inference.py --num_intra_threads=1 --num_inter_threads=28 --input_graph=/in_graph/wide_deep_int8_pretrained_model.pb --data_location=/dataset --num_parallel_batches=14"}, + + { "_comment": "Int8 latency benchmark command", + "input": "run_tf_benchmark.py --framework=tensorflow --use-case=recommendation --model-name=wide_deep_large_ds --precision=int8 --mode=inference --model-source-dir=/workspace/models --benchmark-dir=/workspace/benchmarks --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=1 --socket-id=0 --output-dir=/workspace/benchmarks/common/tensorflow/logs --benchmark-only --verbose --in-graph=/in_graph/wide_deep_int8_pretrained_model.pb --data-location=/dataset --num-parallel-batches=14", + "output": "LD_PRELOAD=/usr/lib/libtcmalloc.so.4.2.6 numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/inference/inference.py --num_intra_threads=1 --num_inter_threads=28 --batch_size=1 --input_graph=/in_graph/wide_deep_int8_pretrained_model.pb --data_location=/dataset --num_parallel_batches=14"}, + + { "_comment": "Int8 command for throughput benchmark", + "input": "run_tf_benchmark.py --framework=tensorflow --use-case=recommendation --model-name=wide_deep_large_ds --precision=int8 --mode=inference --model-source-dir=/workspace/models --benchmark-dir=/workspace/benchmarks --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=512 --socket-id=0 --output-dir=/workspace/benchmarks/common/tensorflow/logs --benchmark-only --verbose --in-graph=/in_graph/wide_deep_int8_pretrained_model.pb --data-location=/dataset --num-parallel-batches=14", + "output": "LD_PRELOAD=/usr/lib/libtcmalloc.so.4.2.6 numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/inference/inference.py --num_intra_threads=1 --num_inter_threads=28 --batch_size=512 --input_graph=/in_graph/wide_deep_int8_pretrained_model.pb --data_location=/dataset --num_parallel_batches=14"}, + + { "_comment": "FP32 benchmark command", + "input": "run_tf_benchmark.py --framework=tensorflow --use-case=recommendation --model-name=wide_deep_large_ds --precision=fp32 --mode=inference --model-source-dir=/workspace/models --benchmark-dir=/workspace/benchmarks --intelai-models=/workspace/intelai_models --num-cores=-1 --socket-id=0 --output-dir=/workspace/benchmarks/common/tensorflow/logs --benchmark-only --verbose --in-graph=/in_graph/wide_deep_fp32_pretrained_model.pb --data-location=/dataset --num-parallel-batches=14", + "output": "numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/inference/inference.py --num_intra_threads=1 --num_inter_threads=28 --input_graph=/in_graph/wide_deep_fp32_pretrained_model.pb --data_location=/dataset --num_parallel_batches=14"}, + + { "_comment": "Fp32 command for throughput benchmark", + "input": "run_tf_benchmark.py --framework=tensorflow --use-case=recommendation --model-name=wide_deep_large_ds --precision=fp32 --mode=inference --model-source-dir=/workspace/models --benchmark-dir=/workspace/benchmarks --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=512 --socket-id=0 --output-dir=/workspace/benchmarks/common/tensorflow/logs --benchmark-only --verbose --in-graph=/in_graph/wide_deep_fp32_pretrained_model.pb --data-location=/dataset --num-parallel-batches=14", + "output": "numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/inference/inference.py --num_intra_threads=1 --num_inter_threads=28 --batch_size=512 --input_graph=/in_graph/wide_deep_fp32_pretrained_model.pb --data_location=/dataset --num_parallel_batches=14"}, + + { "_comment": "Fp32 latency benchmark command", + "input": "run_tf_benchmark.py --framework=tensorflow --use-case=recommendation --model-name=wide_deep_large_ds --precision=fp32 --mode=inference --model-source-dir=/workspace/models --benchmark-dir=/workspace/benchmarks --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=1 --socket-id=0 --output-dir=/workspace/benchmarks/common/tensorflow/logs --benchmark-only --verbose --in-graph=/in_graph/wide_deep_fp32_pretrained_model.pb --data-location=/dataset --num-parallel-batches=14", + "output": "numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/inference/inference.py --num_intra_threads=1 --num_inter_threads=28 --batch_size=1 --input_graph=/in_graph/wide_deep_fp32_pretrained_model.pb --data_location=/dataset --num_parallel_batches=14"} +] + + From 850a00372c63878c46e8e66184b4b1f163b4d67d Mon Sep 17 00:00:00 2001 From: Dina Suehiro Jones Date: Tue, 7 May 2019 09:51:00 -0700 Subject: [PATCH 31/62] Update docker images in README files use to TF 1.14 (#297) --- .../tensorflow/dcgan/README.md | 2 +- .../tensorflow/draw/README.md | 4 ++-- .../tensorflow/facenet/README.md | 6 ++--- .../tensorflow/mtcc/README.md | 2 +- .../tensorflow/densenet169/README.md | 6 ++--- .../tensorflow/inception_resnet_v2/README.md | 12 +++++----- .../tensorflow/inceptionv3/README.md | 22 +++++++------------ .../tensorflow/inceptionv4/README.md | 18 +++++---------- .../tensorflow/mobilenet_v1/README.md | 12 +++++----- .../tensorflow/resnet101/README.md | 21 ++++++------------ .../tensorflow/resnet50/README.md | 18 +++++---------- .../tensorflow/squeezenet/README.md | 4 ++-- .../tensorflow/maskrcnn/README.md | 2 +- .../tensorflow/unet/README.md | 2 +- .../tensorflow/lm-1b/README.md | 4 ++-- .../tensorflow/gnmt/README.md | 4 ++-- .../tensorflow/transformer_language/README.md | 4 ++-- .../tensorflow/faster_rcnn/README.md | 14 ++++-------- .../tensorflow/rfcn/README.md | 14 ++++-------- .../tensorflow/ssd-mobilenet/README.md | 13 ++++------- .../tensorflow/ssd-resnet34/README.md | 4 ++-- .../tensorflow/ssd_vgg16/README.md | 8 +++---- .../recommendation/tensorflow/ncf/README.md | 6 ++--- .../tensorflow/wavenet/README.md | 2 +- docs/general/tensorflow/LaunchBenchmark.md | 4 ++-- 25 files changed, 83 insertions(+), 125 deletions(-) diff --git a/benchmarks/adversarial_networks/tensorflow/dcgan/README.md b/benchmarks/adversarial_networks/tensorflow/dcgan/README.md index d552ac46d..77953b78b 100644 --- a/benchmarks/adversarial_networks/tensorflow/dcgan/README.md +++ b/benchmarks/adversarial_networks/tensorflow/dcgan/README.md @@ -61,7 +61,7 @@ $ python launch_benchmark.py \ --socket-id 0 \ --checkpoint /home//dcgan_fp32_unconditional_cifar10_pretrained_model \ --data-location /home//cifar10 \ - --docker-image intelaipg/intel-optimized-tensorflow:latest-devel-mkl + --docker-image intelaipg/intel-optimized-tensorflow:1.14 ``` 5. Log files are located at the value of `--output-dir`. diff --git a/benchmarks/content_creation/tensorflow/draw/README.md b/benchmarks/content_creation/tensorflow/draw/README.md index 159f8de7b..fdf18fade 100644 --- a/benchmarks/content_creation/tensorflow/draw/README.md +++ b/benchmarks/content_creation/tensorflow/draw/README.md @@ -48,7 +48,7 @@ modes/precisions: --model-name draw \ --mode inference \ --framework tensorflow \ - --docker-image intelaipg/intel-optimized-tensorflow:latest-devel-mkl-py3 \ + --docker-image intelaipg/intel-optimized-tensorflow:1.14-py3 \ --checkpoint /home//draw_fp32_pretrained_model \ --data-location /home//mnist \ --batch-size 1 \ @@ -61,7 +61,7 @@ modes/precisions: --model-name draw \ --mode inference \ --framework tensorflow \ - --docker-image intelaipg/intel-optimized-tensorflow:latest-devel-mkl-py3 \ + --docker-image intelaipg/intel-optimized-tensorflow:1.14-py3 \ --checkpoint /home//draw_fp32_pretrained_model \ --data-location /home//mnist \ --batch-size 100 \ diff --git a/benchmarks/face_detection_and_alignment/tensorflow/facenet/README.md b/benchmarks/face_detection_and_alignment/tensorflow/facenet/README.md index 0e7e0d307..db5706b5d 100644 --- a/benchmarks/face_detection_and_alignment/tensorflow/facenet/README.md +++ b/benchmarks/face_detection_and_alignment/tensorflow/facenet/README.md @@ -61,7 +61,7 @@ python launch_benchmark.py \ --checkpoint /home//checkpoints \ --data-location /home//dataset \ --model-source-dir /home//facenet/ \ - --docker-image intelaipg/intel-optimized-tensorflow:latest-devel-mkl + --docker-image intelaipg/intel-optimized-tensorflow:1.14 ``` Example log tail when benchmarking for latency: ``` @@ -96,7 +96,7 @@ python launch_benchmark.py \ --checkpoint /home//checkpoints \ --data-location /home//dataset \ --model-source-dir /home//facenet/ \ - --docker-image intelaipg/intel-optimized-tensorflow:latest-devel-mkl + --docker-image intelaipg/intel-optimized-tensorflow:1.14 ``` Example log tail when benchmarking for throughput: ``` @@ -128,7 +128,7 @@ python launch_benchmark.py \ --checkpoint /home//checkpoints \ --data-location /home//dataset \ --model-source-dir /home//facenet/ \ - --docker-image intelaipg/intel-optimized-tensorflow:latest-devel-mkl + --docker-image intelaipg/intel-optimized-tensorflow:1.14 ``` Example log tail when benchmarking for accuracy: ``` diff --git a/benchmarks/face_detection_and_alignment/tensorflow/mtcc/README.md b/benchmarks/face_detection_and_alignment/tensorflow/mtcc/README.md index a659f397f..9095110f9 100644 --- a/benchmarks/face_detection_and_alignment/tensorflow/mtcc/README.md +++ b/benchmarks/face_detection_and_alignment/tensorflow/mtcc/README.md @@ -56,7 +56,7 @@ Run benchmarking: --mode inference \ --socket-id 0 \ --checkpoint /home//MTCNN_model \ - --docker-image intelaipg/intel-optimized-tensorflow:nightly-latestprs-bdw + --docker-image intelaipg/intel-optimized-tensorflow:1.14 ``` 6. The log file is saved to the value of `--output-dir`. diff --git a/benchmarks/image_recognition/tensorflow/densenet169/README.md b/benchmarks/image_recognition/tensorflow/densenet169/README.md index bf6b1f84f..fa02b7a80 100644 --- a/benchmarks/image_recognition/tensorflow/densenet169/README.md +++ b/benchmarks/image_recognition/tensorflow/densenet169/README.md @@ -56,7 +56,7 @@ following modes/precisions: --batch-size 100 \ --socket-id 0 \ --in-graph /home//densenet169_fp32_pretrained_model.pb \ - --docker-image intelaipg/intel-optimized-tensorflow:latest-devel-mkl \ + --docker-image intelaipg/intel-optimized-tensorflow:1.14 \ -- input_height=224 input_width=224 warmup_steps=20 steps=100 \ input_layer="input" output_layer="densenet169/predictions/Reshape_1" ``` @@ -72,7 +72,7 @@ following modes/precisions: --batch-size 1 \ --socket-id 0 \ --in-graph /home//densenet169_fp32_pretrained_model.pb \ - --docker-image intelaipg/intel-optimized-tensorflow:latest-devel-mkl \ + --docker-image intelaipg/intel-optimized-tensorflow:1.14 \ -- input_height=224 input_width=224 warmup_steps=20 steps=100 \ input_layer="input" output_layer="densenet169/predictions/Reshape_1" ``` @@ -89,7 +89,7 @@ following modes/precisions: --batch-size 100 \ --socket-id 0 \ --in-graph /home//densenet169_fp32_pretrained_model.pb \ - --docker-image intelaipg/intel-optimized-tensorflow:latest-devel-mkl \ + --docker-image intelaipg/intel-optimized-tensorflow:1.14 \ --data-location /home//imagenet_validation_dataset \ -- input_height=224 input_width=224 \ input_layer="input" output_layer="densenet169/predictions/Reshape_1" diff --git a/benchmarks/image_recognition/tensorflow/inception_resnet_v2/README.md b/benchmarks/image_recognition/tensorflow/inception_resnet_v2/README.md index 4b0543e56..a55413eb3 100644 --- a/benchmarks/image_recognition/tensorflow/inception_resnet_v2/README.md +++ b/benchmarks/image_recognition/tensorflow/inception_resnet_v2/README.md @@ -87,7 +87,7 @@ python launch_benchmark.py \ --framework tensorflow \ --accuracy-only \ --batch-size 100 \ - --docker-image intelaipg/intel-optimized-tensorflow:latest-prs-b5d67b7-devel-mkl \ + --docker-image intelaipg/intel-optimized-tensorflow:1.14 \ --in-graph /home//inception_resnet_v2_int8_pretrained_model.pb \ --data-location /home//datasets/ImageNet_TFRecords ``` @@ -103,7 +103,7 @@ python launch_benchmark.py \ --benchmark-only \ --batch-size 1 \ --socket-id 0 \ - --docker-image intelaipg/intel-optimized-tensorflow:latest-prs-b5d67b7-devel-mkl \ + --docker-image intelaipg/intel-optimized-tensorflow:1.14 \ --in-graph /home//inception_resnet_v2_int8_pretrained_model.pb ``` @@ -118,7 +118,7 @@ python launch_benchmark.py \ --benchmark-only \ --batch-size 128 \ --socket-id 0 \ - --docker-image intelaipg/intel-optimized-tensorflow:latest-prs-b5d67b7-devel-mkl \ + --docker-image intelaipg/intel-optimized-tensorflow:1.14 \ --in-graph /home//inception_resnet_v2_int8_pretrained_model.pb ``` @@ -247,7 +247,7 @@ python launch_benchmark.py \ --framework tensorflow \ --accuracy-only \ --batch-size 100 \ - --docker-image intelaipg/intel-optimized-tensorflow:latest-devel-mkl \ + --docker-image intelaipg/intel-optimized-tensorflow:1.14 \ --in-graph /home//inception_resnet_v2_fp32_pretrained_model.pb \ --data-location /home//datasets/ImageNet_TFRecords ``` @@ -264,7 +264,7 @@ python launch_benchmark.py \ --batch-size 1 \ --socket-id 0 \ --in-graph /home//inception_resnet_v2_fp32_pretrained_model.pb \ - --docker-image intelaipg/intel-optimized-tensorflow:latest-devel-mkl + --docker-image intelaipg/intel-optimized-tensorflow:1.14 ``` For throughput (using `--benchmark-only`, `--socket-id 0` and `--batch-size 128`): @@ -279,7 +279,7 @@ python launch_benchmark.py \ --batch-size 128 \ --socket-id 0 \ --in-graph /home//inception_resnet_v2_fp32_pretrained_model.pb \ - --docker-image intelaipg/intel-optimized-tensorflow:latest-devel-mkl + --docker-image intelaipg/intel-optimized-tensorflow:1.14 ``` Note that the `--verbose` or `--output-dir` flag can be added to any of the above commands diff --git a/benchmarks/image_recognition/tensorflow/inceptionv3/README.md b/benchmarks/image_recognition/tensorflow/inceptionv3/README.md index 3e8cf2f0b..e02c73331 100644 --- a/benchmarks/image_recognition/tensorflow/inceptionv3/README.md +++ b/benchmarks/image_recognition/tensorflow/inceptionv3/README.md @@ -100,7 +100,7 @@ python launch_benchmark.py \ --framework tensorflow \ --accuracy-only \ --batch-size 100 \ - --docker-image intelaipg/intel-optimized-tensorflow:PR25765-devel-mkl \ + --docker-image intelaipg/intel-optimized-tensorflow:1.14 \ --in-graph /home//inceptionv3_int8_pretrained_model.pb \ --data-location /home//datasets/ImageNet_TFRecords ``` @@ -121,7 +121,7 @@ python launch_benchmark.py \ --benchmark-only \ --batch-size 1 \ --socket-id 0 \ - --docker-image intelaipg/intel-optimized-tensorflow:PR25765-devel-mkl \ + --docker-image intelaipg/intel-optimized-tensorflow:1.14 \ --in-graph /home//inceptionv3_int8_pretrained_model.pb \ --data-location /home//datasets/ImageNet_TFRecords \ -- warmup_steps=50 steps=500 @@ -138,7 +138,7 @@ python launch_benchmark.py \ --benchmark-only \ --batch-size 1 \ --socket-id 0 \ - --docker-image intelaipg/intel-optimized-tensorflow:PR25765-devel-mkl \ + --docker-image intelaipg/intel-optimized-tensorflow:1.14 \ --in-graph /home//inceptionv3_int8_pretrained_model.pb \ -- warmup_steps=50 steps=500 ``` @@ -154,7 +154,7 @@ python launch_benchmark.py \ --benchmark-only \ --batch-size 128 \ --socket-id 0 \ - --docker-image intelaipg/intel-optimized-tensorflow:PR25765-devel-mkl \ + --docker-image intelaipg/intel-optimized-tensorflow:1.14 \ --in-graph /home//inceptionv3_int8_pretrained_model.pb \ --data-location /home//datasets/ImageNet_TFRecords \ -- warmup_steps=50 steps=500 @@ -171,17 +171,11 @@ python launch_benchmark.py \ --benchmark-only \ --batch-size 128 \ --socket-id 0 \ - --docker-image intelaipg/intel-optimized-tensorflow:PR25765-devel-mkl \ + --docker-image intelaipg/intel-optimized-tensorflow:1.14 \ --in-graph /home//inceptionv3_int8_pretrained_model.pb \ -- warmup_steps=50 steps=500 ``` -The docker image (`intelaipg/intel-optimized-tensorflow:PR25765-devel-mkl`) -used in the commands above were built using -[TensorFlow](git@github.com:tensorflow/tensorflow.git) master -([e889ea1](https://github.com/tensorflow/tensorflow/commit/e889ea1dd965c31c391106aa3518fc23d2689954)) and -[PR #25765](https://github.com/tensorflow/tensorflow/pull/25765). - Note that the `--verbose` or `--output-dir` flag can be added to any of the above commands to get additional debug output or change the default output location.. @@ -267,7 +261,7 @@ python launch_benchmark.py \ --framework tensorflow \ --batch-size 1 \ --socket-id 0 \ - --docker-image intelaipg/intel-optimized-tensorflow:latest-devel-mkl \ + --docker-image intelaipg/intel-optimized-tensorflow:1.14 \ --in-graph /home//inceptionv3_fp32_pretrained_model.pb ``` Example log tail when benchmarking for latency: @@ -298,7 +292,7 @@ python launch_benchmark.py \ --framework tensorflow \ --batch-size 128 \ --socket-id 0 \ - --docker-image intelaipg/intel-optimized-tensorflow:latest-devel-mkl \ + --docker-image intelaipg/intel-optimized-tensorflow:1.14 \ --in-graph /home//inceptionv3_fp32_pretrained_model.pb ``` Example log tail when benchmarking for throughput: @@ -330,7 +324,7 @@ python launch_benchmark.py \ --accuracy-only \ --batch-size 100 \ --data-location /dataset/Imagenet_Validation \ - --docker-image intelaipg/intel-optimized-tensorflow:latest-devel-mkl \ + --docker-image intelaipg/intel-optimized-tensorflow:1.14 \ --in-graph /home//inceptionv3_fp32_pretrained_model.pb ``` Example log tail when benchmarking for accuracy: diff --git a/benchmarks/image_recognition/tensorflow/inceptionv4/README.md b/benchmarks/image_recognition/tensorflow/inceptionv4/README.md index edb391d84..e89d13dee 100644 --- a/benchmarks/image_recognition/tensorflow/inceptionv4/README.md +++ b/benchmarks/image_recognition/tensorflow/inceptionv4/README.md @@ -57,7 +57,7 @@ when calling `launch_benchmark.py` and the script will run without TCMalloc. --accuracy-only \ --batch-size 100 \ --socket-id 0 \ - --docker-image intelaipg/intel-optimized-tensorflow:PR25765-devel-mkl \ + --docker-image intelaipg/intel-optimized-tensorflow:1.14 \ --in-graph /home//inceptionv4_int8_pretrained_model.pb \ --data-location /home//ImageNet_TFRecords ``` @@ -72,7 +72,7 @@ when calling `launch_benchmark.py` and the script will run without TCMalloc. --benchmark-only \ --batch-size 240 \ --socket-id 0 \ - --docker-image intelaipg/intel-optimized-tensorflow:PR25765-devel-mkl \ + --docker-image intelaipg/intel-optimized-tensorflow:1.14 \ --in-graph /home//inceptionv4_int8_pretrained_model.pb ``` @@ -86,16 +86,10 @@ when calling `launch_benchmark.py` and the script will run without TCMalloc. --benchmark-only \ --batch-size 1 \ --socket-id 0 \ - --docker-image intelaipg/intel-optimized-tensorflow:PR25765-devel-mkl \ + --docker-image intelaipg/intel-optimized-tensorflow:1.14 \ --in-graph /home//inceptionv4_int8_pretrained_model.pb ``` - The docker image (`intelaipg/intel-optimized-tensorflow:PR25765-devel-mkl`) - used in the commands above were built using - [TensorFlow](git@github.com:tensorflow/tensorflow.git) master - ([e889ea1](https://github.com/tensorflow/tensorflow/commit/e889ea1dd965c31c391106aa3518fc23d2689954)) and - [PR #25765](https://github.com/tensorflow/tensorflow/pull/25765). - Note that the `--verbose` flag can be added to any of the above commands to get additional debug output. @@ -188,7 +182,7 @@ when calling `launch_benchmark.py` and the script will run without TCMalloc. --accuracy-only \ --batch-size 100 \ --socket-id 0 \ - --docker-image intelaipg/intel-optimized-tensorflow:latest-devel-mkl \ + --docker-image intelaipg/intel-optimized-tensorflow:1.14 \ --in-graph /home//inceptionv4_fp32_pretrained_model.pb \ --data-location /home//ImageNet_TFRecords ``` @@ -203,7 +197,7 @@ when calling `launch_benchmark.py` and the script will run without TCMalloc. --benchmark-only \ --batch-size 240 \ --socket-id 0 \ - --docker-image intelaipg/intel-optimized-tensorflow:latest-devel-mkl \ + --docker-image intelaipg/intel-optimized-tensorflow:1.14 \ --in-graph /home//inceptionv4_fp32_pretrained_model.pb ``` @@ -217,7 +211,7 @@ when calling `launch_benchmark.py` and the script will run without TCMalloc. --benchmark-only \ --batch-size 1 \ --socket-id 0 \ - --docker-image intelaipg/intel-optimized-tensorflow:latest-devel-mkl \ + --docker-image intelaipg/intel-optimized-tensorflow:1.14 \ --in-graph /home//inceptionv4_fp32_pretrained_model.pb ``` diff --git a/benchmarks/image_recognition/tensorflow/mobilenet_v1/README.md b/benchmarks/image_recognition/tensorflow/mobilenet_v1/README.md index bc84ba6c2..af5947e20 100644 --- a/benchmarks/image_recognition/tensorflow/mobilenet_v1/README.md +++ b/benchmarks/image_recognition/tensorflow/mobilenet_v1/README.md @@ -65,7 +65,7 @@ when calling `launch_benchmark.py` and the script will run without TCMalloc. --batch-size 240 \ --socket-id 0 \ --in-graph /home//mobilenetv1_int8_pretrained_model.pb \ - --docker-image intelaipg/intel-optimized-tensorflow:latest-prs-b5d67b7-avx2-devel-mkl \ + --docker-image intelaipg/intel-optimized-tensorflow:1.14 \ -- input_height=224 input_width=224 warmup_steps=10 steps=50 \ input_layer="input" output_layer="MobilenetV1/Predictions/Reshape_1" ``` @@ -81,7 +81,7 @@ when calling `launch_benchmark.py` and the script will run without TCMalloc. --batch-size 1 \ --socket-id 0 \ --in-graph /home//mobilenetv1_int8_pretrained_model.pb \ - --docker-image intelaipg/intel-optimized-tensorflow:latest-prs-b5d67b7-avx2-devel-mkl \ + --docker-image intelaipg/intel-optimized-tensorflow:1.14 \ -- input_height=224 input_width=224 warmup_steps=10 steps=50 \ input_layer="input" output_layer="MobilenetV1/Predictions/Reshape_1" ``` @@ -98,7 +98,7 @@ when calling `launch_benchmark.py` and the script will run without TCMalloc. --batch-size 100 \ --socket-id 0 \ --in-graph /home//mobilenetv1_int8_pretrained_model.pb \ - --docker-image intelaipg/intel-optimized-tensorflow:latest-prs-b5d67b7-avx2-devel-mkl \ + --docker-image intelaipg/intel-optimized-tensorflow:1.14 \ --data-location /home//imagenet_validation_dataset \ -- input_height=224 input_width=224 \ input_layer="input" output_layer="MobilenetV1/Predictions/Reshape_1" @@ -216,7 +216,7 @@ when calling `launch_benchmark.py` and the script will run without TCMalloc. --model-name mobilenet_v1 \ --mode inference \ --framework tensorflow \ - --docker-image intelaipg/intel-optimized-tensorflow:latest-devel-mkl \ + --docker-image intelaipg/intel-optimized-tensorflow:1.14 \ --model-source-dir /home//tensorflow/models \ --batch-size 1 \ --socket-id 0 \ @@ -232,7 +232,7 @@ when calling `launch_benchmark.py` and the script will run without TCMalloc. --model-name mobilenet_v1 \ --mode inference \ --framework tensorflow \ - --docker-image intelaipg/intel-optimized-tensorflow:latest-devel-mkl \ + --docker-image intelaipg/intel-optimized-tensorflow:1.14 \ --model-source-dir /home//tensorflow/models \ --batch-size 100 \ --socket-id 0 \ @@ -246,7 +246,7 @@ when calling `launch_benchmark.py` and the script will run without TCMalloc. --model-name mobilenet_v1 \ --mode inference \ --framework tensorflow \ - --docker-image intelaipg/intel-optimized-tensorflow:latest-devel-mkl \ + --docker-image intelaipg/intel-optimized-tensorflow:1.14 \ --model-source-dir /home//tensorflow/models \ --batch-size 100 \ --accuracy-only \ diff --git a/benchmarks/image_recognition/tensorflow/resnet101/README.md b/benchmarks/image_recognition/tensorflow/resnet101/README.md index 4bb6a8ded..2c87b7ea1 100644 --- a/benchmarks/image_recognition/tensorflow/resnet101/README.md +++ b/benchmarks/image_recognition/tensorflow/resnet101/README.md @@ -85,7 +85,7 @@ $ python launch_benchmark.py \ --framework tensorflow \ --accuracy-only \ --batch-size 100 \ - --docker-image intelaipg/intel-optimized-tensorflow:PR25765-devel-mkl \ + --docker-image intelaipg/intel-optimized-tensorflow:1.14 \ --data-location /home//dataset/FullImageNetData_directory \ --in-graph=/home//resnet101_int8_pretrained_model.pb ``` @@ -106,7 +106,7 @@ python launch_benchmark.py \ --benchmark-only \ --batch-size 1 \ --socket-id 0 \ - --docker-image intelaipg/intel-optimized-tensorflow:PR25765-devel-mkl \ + --docker-image intelaipg/intel-optimized-tensorflow:1.14 \ --in-graph=/home//resnet101_int8_pretrained_model.pb \ -- warmup_steps=50 steps=500 ``` @@ -123,7 +123,7 @@ python launch_benchmark.py \ --batch-size 1 \ --socket-id 0 \ --data-location /home//dataset/FullImageNetData_directory \ - --docker-image intelaipg/intel-optimized-tensorflow:PR25765-devel-mkl \ + --docker-image intelaipg/intel-optimized-tensorflow:1.14 \ --in-graph=/home//resnet101_int8_pretrained_model.pb \ -- warmup_steps=50 steps=500 ``` @@ -139,7 +139,7 @@ python launch_benchmark.py \ --benchmark-only \ --batch-size 128 \ --socket-id 0 \ - --docker-image intelaipg/intel-optimized-tensorflow:PR25765-devel-mkl \ + --docker-image intelaipg/intel-optimized-tensorflow:1.14 \ --in-graph=/home//resnet101_int8_pretrained_model.pb \ -- warmup_steps=50 steps=500 ``` @@ -156,18 +156,11 @@ python launch_benchmark.py \ --batch-size 128 \ --data-location /home//dataset/FullImageNetData_directory \ --socket-id 0 \ - --docker-image intelaipg/intel-optimized-tensorflow:PR25765-devel-mkl \ + --docker-image intelaipg/intel-optimized-tensorflow:1.14 \ --in-graph=/home//resnet101_int8_pretrained_model.pb \ -- warmup_steps=50 steps=500 ``` - -The docker image (`intelaipg/intel-optimized-tensorflow:PR25765-devel-mkl`) -used in the commands above were built using -[TensorFlow](git@github.com:tensorflow/tensorflow.git) master -([e889ea1](https://github.com/tensorflow/tensorflow/commit/e889ea1dd965c31c391106aa3518fc23d2689954)) and -[PR #25765](https://github.com/tensorflow/tensorflow/pull/25765). - Note that the `--verbose` or `--output-dir` flag can be added to any of the above commands to get additional debug output or change the default output location.. @@ -257,7 +250,7 @@ $ wget https://storage.googleapis.com/intel-optimized-tensorflow/models/resnet10 --mode inference \ --model-name resnet101 \ --batch-size 128 \ - --docker-image intelaipg/intel-optimized-tensorflow:latest-devel-mkl \ + --docker-image intelaipg/intel-optimized-tensorflow:1.14 \ --in-graph /home//trained_models/resnet101_fp32_pretrained_model.pb \ --socket-id 0 ``` @@ -284,7 +277,7 @@ $ wget https://storage.googleapis.com/intel-optimized-tensorflow/models/resnet10 --mode inference \ --model-name resnet101 \ --batch-size 100 \ - --docker-image intelaipg/intel-optimized-tensorflow:latest-devel-mkl \ + --docker-image intelaipg/intel-optimized-tensorflow:1.14 \ --in-graph /home//trained_models/resnet101_fp32_pretrained_model.pb \ --data-location /home//imagenet_validation_dataset \ --accuracy-only \ diff --git a/benchmarks/image_recognition/tensorflow/resnet50/README.md b/benchmarks/image_recognition/tensorflow/resnet50/README.md index 5a666c6dd..31c06609a 100644 --- a/benchmarks/image_recognition/tensorflow/resnet50/README.md +++ b/benchmarks/image_recognition/tensorflow/resnet50/README.md @@ -43,12 +43,6 @@ $ git clone https://github.com/IntelAI/models.git The optimized ResNet50 model files are attached to the [intelai/models](https://github.com/intelai/models) repo and located at `models/models/image_recognition/tensorflow/resnet50/`. - The docker image (`intelaipg/intel-optimized-tensorflow:PR25765-devel-mkl`) - used in the commands above were built using - [TensorFlow](git@github.com:tensorflow/tensorflow.git) master - ([e889ea1](https://github.com/tensorflow/tensorflow/commit/e889ea1dd965c31c391106aa3518fc23d2689954)) and - [PR #25765](https://github.com/tensorflow/tensorflow/pull/25765). - * Calculate the model accuracy, the required parameters parameters include: the `ImageNet` dataset location (from step 1), the pre-trained `final_int8_resnet50.pb` input graph file (from step 2), and the `--accuracy-only` flag. @@ -64,7 +58,7 @@ $ python launch_benchmark.py \ --mode inference \ --batch-size=100 \ --accuracy-only \ - --docker-image intelaipg/intel-optimized-tensorflow:PR25765-devel-mkl + --docker-image intelaipg/intel-optimized-tensorflow:1.14 ``` The log file is saved to the value of `--output-dir`. @@ -100,7 +94,7 @@ $ python launch_benchmark.py \ --mode inference \ --batch-size=128 \ --benchmark-only \ - --docker-image intelaipg/intel-optimized-tensorflow:PR25765-devel-mkl + --docker-image intelaipg/intel-optimized-tensorflow:1.14 -- warmup_steps=50 steps=500 ``` The tail of the log output when the benchmarking completes should look @@ -161,7 +155,7 @@ $ python launch_benchmark.py \ --mode inference \ --batch-size=1 \ --socket-id 0 \ - --docker-image intelaipg/intel-optimized-tensorflow:latest-devel-mkl + --docker-image intelaipg/intel-optimized-tensorflow:1.14 ``` The log file is saved to the value of `--output-dir`. @@ -196,7 +190,7 @@ $ python launch_benchmark.py \ --mode inference \ --batch-size=128 \ --socket-id 0 \ - --docker-image intelaipg/intel-optimized-tensorflow:latest-devel-mkl + --docker-image intelaipg/intel-optimized-tensorflow:1.14 ``` The log file is saved to the value of `--output-dir`. @@ -234,7 +228,7 @@ $ python launch_benchmark.py \ --batch-size 100 \ --socket-id 0 \ --data-location /home//dataset/ImageNetData_directory \ - --docker-image intelaipg/intel-optimized-tensorflow:latest-devel-mkl + --docker-image intelaipg/intel-optimized-tensorflow:1.14 ``` The log file is saved to the value of `--output-dir`. @@ -268,7 +262,7 @@ $ python launch_benchmark.py \ --batch-size 100 \ --socket-id 0 \ --data-location /home//dataset/ImageNetData_directory \ - --docker-image intelaipg/intel-optimized-tensorflow:latest-devel-mkl + --docker-image intelaipg/intel-optimized-tensorflow:1.14 ``` The results file will be written to the `models/benchmarks/common/tensorflow/logs` directory, unless another diff --git a/benchmarks/image_recognition/tensorflow/squeezenet/README.md b/benchmarks/image_recognition/tensorflow/squeezenet/README.md index 355efca72..1cf855218 100644 --- a/benchmarks/image_recognition/tensorflow/squeezenet/README.md +++ b/benchmarks/image_recognition/tensorflow/squeezenet/README.md @@ -79,7 +79,7 @@ $ python launch_benchmark.py \ --framework tensorflow \ --socket-id 0 \ --batch-size 64 \ - --docker-image intelaipg/intel-optimized-tensorflow:latest-devel-mkl \ + --docker-image intelaipg/intel-optimized-tensorflow:1.14 \ --checkpoint /home//squeezenet_checkpoints \ --data-location /home//datasets/ImageNet_TFRecords ``` @@ -94,7 +94,7 @@ $ python launch_benchmark.py \ --framework tensorflow \ --socket-id 0 \ --batch-size 1 \ - --docker-image intelaipg/intel-optimized-tensorflow:latest-devel-mkl \ + --docker-image intelaipg/intel-optimized-tensorflow:1.14 \ --checkpoint /home//squeezenet_checkpoints \ --data-location /home//datasets/ImageNet_TFRecords ``` diff --git a/benchmarks/image_segmentation/tensorflow/maskrcnn/README.md b/benchmarks/image_segmentation/tensorflow/maskrcnn/README.md index c862032f7..bdf3cdae1 100644 --- a/benchmarks/image_segmentation/tensorflow/maskrcnn/README.md +++ b/benchmarks/image_segmentation/tensorflow/maskrcnn/README.md @@ -62,7 +62,7 @@ $ python launch_benchmark.py \ --batch-size 1 \ --socket-id 0 \ --data-location /home//COCO2014 \ - --docker-image intelaipg/intel-optimized-tensorflow:latest-devel-mkl-py3 + --docker-image intelaipg/intel-optimized-tensorflow:1.14-py3 ``` 5. Log files are located at the value of `--output-dir`. diff --git a/benchmarks/image_segmentation/tensorflow/unet/README.md b/benchmarks/image_segmentation/tensorflow/unet/README.md index 6f6671e66..fd5968ad6 100644 --- a/benchmarks/image_segmentation/tensorflow/unet/README.md +++ b/benchmarks/image_segmentation/tensorflow/unet/README.md @@ -57,7 +57,7 @@ modes/precisions: --benchmark-only \ --batch-size 1 \ --socket-id 0 \ - --docker-image intelaipg/intel-optimized-tensorflow:latest-devel-mkl \ + --docker-image intelaipg/intel-optimized-tensorflow:1.14 \ --checkpoint /home//unet_trained \ --model-source-dir /home//tf_unet \ -- checkpoint_name=model.cpkt diff --git a/benchmarks/language_modeling/tensorflow/lm-1b/README.md b/benchmarks/language_modeling/tensorflow/lm-1b/README.md index 525ff352b..82b42cac9 100644 --- a/benchmarks/language_modeling/tensorflow/lm-1b/README.md +++ b/benchmarks/language_modeling/tensorflow/lm-1b/README.md @@ -54,7 +54,7 @@ python launch_benchmark.py \ --framework tensorflow \ --batch-size 1 \ --socket-id 0 \ - --docker-image intelaipg/intel-optimized-tensorflow:latest-devel-mkl \ + --docker-image intelaipg/intel-optimized-tensorflow:1.14 \ --model-source-dir /inference/cloud/language_modeling ``` @@ -69,7 +69,7 @@ python launch_benchmark.py \ --framework tensorflow \ --batch-size 1024 \ --socket-id 0 \ - --docker-image intelaipg/intel-optimized-tensorflow:latest-devel-mkl \ + --docker-image intelaipg/intel-optimized-tensorflow:1.14 \ --model-source-dir /inference/cloud/language_modeling \ -- steps=4 \ ``` diff --git a/benchmarks/language_translation/tensorflow/gnmt/README.md b/benchmarks/language_translation/tensorflow/gnmt/README.md index f52bcdfc6..00bc1807f 100644 --- a/benchmarks/language_translation/tensorflow/gnmt/README.md +++ b/benchmarks/language_translation/tensorflow/gnmt/README.md @@ -82,7 +82,7 @@ python launch_benchmark.py \ --socket-id 0 \ --checkpoint /home//gnmt_checkpoints \ --data-location /home//wmt16 \ ---docker-image intelaipg/intel-optimized-tensorflow:latest-devel-mkl \ +--docker-image intelaipg/intel-optimized-tensorflow:1.14 \ -- infer_mode=beam_search ``` @@ -99,7 +99,7 @@ python launch_benchmark.py \ --socket-id 0 \ --checkpoint /home//gnmt_checkpoints \ --data-location /home//wmt16 \ ---docker-image intelaipg/intel-optimized-tensorflow:latest-devel-mkl \ +--docker-image intelaipg/intel-optimized-tensorflow:1.14 \ -- infer_mode=beam_search ``` diff --git a/benchmarks/language_translation/tensorflow/transformer_language/README.md b/benchmarks/language_translation/tensorflow/transformer_language/README.md index abc931d51..a21a4fbd6 100644 --- a/benchmarks/language_translation/tensorflow/transformer_language/README.md +++ b/benchmarks/language_translation/tensorflow/transformer_language/README.md @@ -81,7 +81,7 @@ python launch_benchmark.py \ --framework tensorflow \ --batch-size 1 \ --socket-id 0 \ - --docker-image intelaipg/intel-optimized-tensorflow:latest-devel-mkl \ + --docker-image intelaipg/intel-optimized-tensorflow:1.14 \ --checkpoint /home//transformer_lt_fp32_pretrained_model \ --data-location /home//t2t_data \ --model-source-dir /home//tensor2tensor/ \ @@ -98,7 +98,7 @@ python launch_benchmark.py \ --framework tensorflow \ --batch-size 32 \ --socket-id 0 \ - --docker-image intelaipg/intel-optimized-tensorflow:latest-devel-mkl \ + --docker-image intelaipg/intel-optimized-tensorflow:1.14 \ --checkpoint /home//transformer_lt_fp32_pretrained_model \ --data-location /home//t2t_data \ --model-source-dir /home//tensor2tensor/ \ diff --git a/benchmarks/object_detection/tensorflow/faster_rcnn/README.md b/benchmarks/object_detection/tensorflow/faster_rcnn/README.md index e69fba728..072d91d2c 100644 --- a/benchmarks/object_detection/tensorflow/faster_rcnn/README.md +++ b/benchmarks/object_detection/tensorflow/faster_rcnn/README.md @@ -152,7 +152,7 @@ $ python launch_benchmark.py \ --mode inference \ --socket-id 0 \ --checkpoint /home//faster_rcnn_resnet50_fp32_coco \ - --docker-image intelaipg/intel-optimized-tensorflow:latest-devel-mkl \ + --docker-image intelaipg/intel-optimized-tensorflow:1.14 \ -- config_file=pipeline.config ``` @@ -165,7 +165,7 @@ python launch_benchmark.py \ --mode inference \ --precision fp32 \ --framework tensorflow \ - --docker-image intelaipg/intel-optimized-tensorflow:latest-devel-mkl \ + --docker-image intelaipg/intel-optimized-tensorflow:1.14 \ --model-source-dir /home//tensorflow/models \ --data-location /home//coco/output \ --in-graph /home//faster_rcnn_resnet50_fp32_coco/frozen_inference_graph.pb \ @@ -257,7 +257,7 @@ $ python launch_benchmark.py \ --mode inference \ --socket-id 0 \ --in-graph /home//faster_rcnn_int8_pretrained_model.pb \ - --docker-image intelaipg/intel-optimized-tensorflow:PR25765-devel-mkl \ + --docker-image intelaipg/intel-optimized-tensorflow:1.14 \ --benchmark-only \ -- number_of_steps=5000 ``` @@ -272,19 +272,13 @@ python launch_benchmark.py \ --precision int8 \ --framework tensorflow \ --socket-id 0 \ - --docker-image intelaipg/intel-optimized-tensorflow:PR25765-devel-mkl \ + --docker-image intelaipg/intel-optimized-tensorflow:1.14 \ --model-source-dir /home//tensorflow/models \ --data-location /home//coco_dataset/coco_val.record \ --in-graph /home//faster_rcnn_int8_pretrained_model.pb \ --accuracy-only ``` -The docker image (`intelaipg/intel-optimized-tensorflow:PR25765-devel-mkl`) -used in the commands above were built using -[TensorFlow](git@github.com:tensorflow/tensorflow.git) master -([e889ea1](https://github.com/tensorflow/tensorflow/commit/e889ea1dd965c31c391106aa3518fc23d2689954)) and -[PR #25765](https://github.com/tensorflow/tensorflow/pull/25765). - 5. The log file is saved to the value of `--output-dir`. Below is a sample log file tail when running benchmarking for throughput diff --git a/benchmarks/object_detection/tensorflow/rfcn/README.md b/benchmarks/object_detection/tensorflow/rfcn/README.md index f42ab9313..efe0a9489 100644 --- a/benchmarks/object_detection/tensorflow/rfcn/README.md +++ b/benchmarks/object_detection/tensorflow/rfcn/README.md @@ -140,7 +140,7 @@ python launch_benchmark.py \ --mode inference \ --precision int8 \ --framework tensorflow \ - --docker-image intelaipg/intel-optimized-tensorflow:PR25765-devel-mkl \ + --docker-image intelaipg/intel-optimized-tensorflow:1.14 \ --model-source-dir /home//tensorflow/models \ --data-location /home//val/val2017 \ --in-graph /home//rfcn_resnet101_int8_coco_pretrained_model.pb \ @@ -157,7 +157,7 @@ python launch_benchmark.py \ --mode inference \ --precision int8 \ --framework tensorflow \ - --docker-image intelaipg/intel-optimized-tensorflow:PR25765-devel-mkl \ + --docker-image intelaipg/intel-optimized-tensorflow:1.14 \ --model-source-dir /home//tensorflow/models \ --data-location /home//coco/output/coco_val.record-00000-of-00001 \ --in-graph /home//rfcn_resnet101_int8_coco_pretrained_model.pb \ @@ -165,12 +165,6 @@ python launch_benchmark.py \ -- split="accuracy_message" ``` -The docker image (`intelaipg/intel-optimized-tensorflow:PR25765-devel-mkl`) -used in the commands above were built using -[TensorFlow](git@github.com:tensorflow/tensorflow.git) master -([e889ea1](https://github.com/tensorflow/tensorflow/commit/e889ea1dd965c31c391106aa3518fc23d2689954)) and -[PR #25765](https://github.com/tensorflow/tensorflow/pull/25765). - Note that the `--verbose` or `--output-dir` flag can be added to any of the above commands to get additional debug output or change the default output location. @@ -338,7 +332,7 @@ $ python launch_benchmark.py \ --mode inference \ --socket-id 0 \ --checkpoint /home//rfcn_resnet101_fp32_coco \ - --docker-image intelaipg/intel-optimized-tensorflow:latest-devel-mkl \ + --docker-image intelaipg/intel-optimized-tensorflow:1.14 \ -- config_file=rfcn_pipeline.config ``` @@ -351,7 +345,7 @@ python launch_benchmark.py \ --mode inference \ --precision fp32 \ --framework tensorflow \ - --docker-image intelaipg/intel-optimized-tensorflow:latest-devel-mkl \ + --docker-image intelaipg/intel-optimized-tensorflow:1.14 \ --model-source-dir /home//tensorflow/models \ --data-location /home//coco/output/coco_val.record \ --in-graph /home//rfcn_resnet101_fp32_coco/frozen_inference_graph.pb \ diff --git a/benchmarks/object_detection/tensorflow/ssd-mobilenet/README.md b/benchmarks/object_detection/tensorflow/ssd-mobilenet/README.md index 33ac1d237..571fdeedd 100644 --- a/benchmarks/object_detection/tensorflow/ssd-mobilenet/README.md +++ b/benchmarks/object_detection/tensorflow/ssd-mobilenet/README.md @@ -121,7 +121,7 @@ python launch_benchmark.py \ --precision int8 \ --framework tensorflow \ --socket-id 0 \ - --docker-image intelaipg/intel-optimized-tensorflow:latest-prs-b5d67b7-avx2-devel-mkl \ + --docker-image intelaipg/intel-optimized-tensorflow:1.14 \ --model-source-dir /home//tensorflow/models \ --data-location /home//val/val2017 \ --in-graph /home//ssdmobilenet_int8_pretrained_model.pb \ @@ -138,7 +138,7 @@ python launch_benchmark.py \ --precision int8 \ --framework tensorflow \ --socket-id 0 \ - --docker-image intelaipg/intel-optimized-tensorflow:latest-prs-b5d67b7-avx2-devel-mkl \ + --docker-image intelaipg/intel-optimized-tensorflow:1.14 \ --model-source-dir /home//tensorflow/models \ --data-location /home//coco/output/coco_val.record \ --in-graph /home//ssdmobilenet_int8_pretrained_model.pb \ @@ -146,11 +146,6 @@ python launch_benchmark.py \ --batch-size 1 ``` -Note that it is required to use the docker image specified in the -commands above (`intelaipg/intel-optimized-tensorflow:latest-prs-b5d67b7`) -to run SSD-MobileNet Int8, as it includes PRs that are required to run -this model. - Note that the `--verbose` or `--output-dir` flag can be added to any of the above commands to get additional debug output or change the default output location. @@ -354,7 +349,7 @@ $ python launch_benchmark.py \ --precision fp32 \ --mode inference \ --socket-id 0 \ - --docker-image intelaipg/intel-optimized-tensorflow:1.12.0-mkl \ + --docker-image intelaipg/intel-optimized-tensorflow:1.14 \ --benchmark-only ``` @@ -373,7 +368,7 @@ $ python launch_benchmark.py \ --precision fp32 \ --mode inference \ --socket-id 0 \ - --docker-image intelaipg/intel-optimized-tensorflow:1.12.0-mkl \ + --docker-image intelaipg/intel-optimized-tensorflow:1.14 \ --accuracy-only ``` diff --git a/benchmarks/object_detection/tensorflow/ssd-resnet34/README.md b/benchmarks/object_detection/tensorflow/ssd-resnet34/README.md index f4e419f79..c5dac8657 100644 --- a/benchmarks/object_detection/tensorflow/ssd-resnet34/README.md +++ b/benchmarks/object_detection/tensorflow/ssd-resnet34/README.md @@ -131,7 +131,7 @@ $ python launch_benchmark.py \ --mode inference \ --socket-id 0 \ --batch-size=1 \ - --docker-image intelaipg/intel-optimized-tensorflow:1.13.1-devel-mkl-py3 \ + --docker-image intelaipg/intel-optimized-tensorflow:1.14-py3 \ --benchmark-only ``` @@ -151,7 +151,7 @@ $ python launch_benchmark.py \ --mode inference \ --socket-id 0 \ --batch-size=1 \ - --docker-image intelaipg/intel-optimized-tensorflow:1.13.1-devel-mkl-py3 \ + --docker-image intelaipg/intel-optimized-tensorflow:1.14-py3 \ --accuracy-only ``` diff --git a/benchmarks/object_detection/tensorflow/ssd_vgg16/README.md b/benchmarks/object_detection/tensorflow/ssd_vgg16/README.md index 320223c95..9d2cb7b3c 100644 --- a/benchmarks/object_detection/tensorflow/ssd_vgg16/README.md +++ b/benchmarks/object_detection/tensorflow/ssd_vgg16/README.md @@ -103,7 +103,7 @@ python launch_benchmark.py \ --mode inference \ --precision int8 \ --framework tensorflow \ - --docker-image intelaipg/intel-optimized-tensorflow:nightly-master-devel-mkl-py3 \ + --docker-image intelaipg/intel-optimized-tensorflow:1.14-py3 \ --model-source-dir /home//SSD.TensorFlow \ --data-location /home//tf_records \ --in-graph /home//ssdvgg16_int8_pretrained_model.pb \ @@ -133,7 +133,7 @@ python launch_benchmark.py \ --mode inference \ --precision int8 \ --framework tensorflow \ - --docker-image intelaipg/intel-optimized-tensorflow:nightly-master-devel-mkl-py3 \ + --docker-image intelaipg/intel-optimized-tensorflow:1.14-py3 \ --model-source-dir /home//SSD.TensorFlow \ --data-location /home//tf_records \ --in-graph /home//ssdvgg16_int8_pretrained_model.pb \ @@ -210,7 +210,7 @@ $ python launch_benchmark.py \ --framework tensorflow \ --precision fp32 \ --mode inference \ - --docker-image intelaipg/intel-optimized-tensorflow:nightly-master-devel-mkl-py3 \ + --docker-image intelaipg/intel-optimized-tensorflow:1.14-py3 \ --batch-size 1 \ --socket-id 0 \ --num-inter-threads 11 \ @@ -237,7 +237,7 @@ python launch_benchmark.py \ --mode inference \ --precision fp32 \ --framework tensorflow \ - --docker-image intelaipg/intel-optimized-tensorflow:nightly-master-devel-mkl-py3 \ + --docker-image intelaipg/intel-optimized-tensorflow:1.14-py3 \ --model-source-dir /home//SSD.TensorFlow \ --data-location /home//tf_records \ --in-graph /home//ssdvgg16_fp32_pretrained_model.pb \ diff --git a/benchmarks/recommendation/tensorflow/ncf/README.md b/benchmarks/recommendation/tensorflow/ncf/README.md index c6b92d938..73efa89a8 100644 --- a/benchmarks/recommendation/tensorflow/ncf/README.md +++ b/benchmarks/recommendation/tensorflow/ncf/README.md @@ -53,7 +53,7 @@ $ python launch_benchmark.py \ --framework tensorflow \ --precision fp32 \ --mode inference \ - --docker-image intelaipg/intel-optimized-tensorflow:latest-devel-mkl + --docker-image intelaipg/intel-optimized-tensorflow:1.14 ``` The tail of Throughput log, looks as below. @@ -83,7 +83,7 @@ $ python launch_benchmark.py \ --framework tensorflow \ --precision fp32 \ --mode inference \ - --docker-image intelaipg/intel-optimized-tensorflow:latest-devel-mkl + --docker-image intelaipg/intel-optimized-tensorflow:1.14 ``` The tail of Latency log, looks as below. @@ -115,7 +115,7 @@ $ python launch_benchmark.py \ --framework tensorflow \ --precision fp32 \ --mode inference \ - --docker-image intelaipg/intel-optimized-tensorflow:latest-devel-mkl + --docker-image intelaipg/intel-optimized-tensorflow:1.14 ``` The tail of accuracy log, looks as below. diff --git a/benchmarks/text_to_speech/tensorflow/wavenet/README.md b/benchmarks/text_to_speech/tensorflow/wavenet/README.md index 340736a6e..782a55964 100644 --- a/benchmarks/text_to_speech/tensorflow/wavenet/README.md +++ b/benchmarks/text_to_speech/tensorflow/wavenet/README.md @@ -71,7 +71,7 @@ python launch_benchmark.py \ --framework tensorflow \ --socket-id 0 \ --num-cores 1 \ - --docker-image intelaipg/intel-optimized-tensorflow:latest-devel-mkl \ + --docker-image intelaipg/intel-optimized-tensorflow:1.14 \ --model-source-dir /home//wavenet/tensorflow-wavenet \ --checkpoint /home//wavenet_checkpoints \ -- checkpoint_name=model.ckpt-99 sample=8510 diff --git a/docs/general/tensorflow/LaunchBenchmark.md b/docs/general/tensorflow/LaunchBenchmark.md index 59b9eb68d..139070e5f 100644 --- a/docs/general/tensorflow/LaunchBenchmark.md +++ b/docs/general/tensorflow/LaunchBenchmark.md @@ -164,7 +164,7 @@ $ python launch_benchmark.py \ --batch-size 1 \ --socket-id 0 \ --data-location /home//Imagenet_Validation \ - --docker-image intelaipg/intel-optimized-tensorflow:latest-devel-mkl \ + --docker-image intelaipg/intel-optimized-tensorflow:1.14 \ --volume /home//custom_folder_1:/custom_folder_1 \ --volume /home//custom_folder_2:/custom_folder_2 ``` @@ -201,7 +201,7 @@ Below is an example showing how to use the `--debug` flag: --batch-size=1 \ --socket-id 0 \ --data-location /home//Imagenet_Validation \ - --docker-image intelaipg/intel-optimized-tensorflow:latest-devel-mkl \ + --docker-image intelaipg/intel-optimized-tensorflow:1.14 \ --debug # ls From 37958bdb7a8d5a55fe15662f94d91bbf8de32373 Mon Sep 17 00:00:00 2001 From: Dina Suehiro Jones Date: Thu, 9 May 2019 13:07:54 -0700 Subject: [PATCH 32/62] Update FasterRCNN Int8 README file to note benchmarking uses raw images (#300) * Update FasterRCNN Int8 README file to note benchmarking uses raw images * Fixing 'TR' records to 'TF' records * Reformatting FP32 steps so that the raw image download is one step * Adding missing parentheses --- .../tensorflow/faster_rcnn/README.md | 42 ++++++++++++------- 1 file changed, 27 insertions(+), 15 deletions(-) diff --git a/benchmarks/object_detection/tensorflow/faster_rcnn/README.md b/benchmarks/object_detection/tensorflow/faster_rcnn/README.md index 072d91d2c..b5ee91e71 100644 --- a/benchmarks/object_detection/tensorflow/faster_rcnn/README.md +++ b/benchmarks/object_detection/tensorflow/faster_rcnn/README.md @@ -44,8 +44,8 @@ sed -i.bak 95s/input_config/input_config[0]/ offline_eval_map_corloc.py ``` -2. Download the 2017 validation -[COCO dataset](http://cocodataset.org/#home) and annotations: +2. Download and unzip the 2017 validation +[COCO dataset](http://cocodataset.org/#home) images: ``` $ mkdir val @@ -53,7 +53,10 @@ $ cd val $ wget http://images.cocodataset.org/zips/val2017.zip $ unzip val2017.zip $ cd .. +``` +3. Download and unzip the coco dataset annotations file: +``` $ mkdir annotations $ cd annotations $ wget http://images.cocodataset.org/annotations/annotations_trainval2017.zip @@ -73,14 +76,15 @@ $ echo "{ \"images\": {}, \"categories\": {}}" > empty.json $ cd .. ``` -3. Now that you have the raw COCO dataset, we need to convert it to the +4. Now that you have the raw COCO dataset and annotations files, we need to convert it to the TF records format in order to use it with the inference script. We will do this by running the `create_coco_tf_record.py` file in the TensorFlow models repo. Follow [instructions](https://github.com/tensorflow/models/blob/master/research/object_detection/g3doc/installation.md#dependencies) to install the required dependencies (`cocoapi` and `Protobuf 3.0.0`). Follow the steps below to navigate to the proper directory and point the -script to the raw COCO dataset files that you have downloaded in step 2. +script to the raw COCO dataset files that you have downloaded in step 2 +and the annotations files that you downloaded and created in step 3. The `--output_dir` is the location where the TF record files will be located after the script has completed. @@ -113,13 +117,13 @@ $ git checkout master The `coco_val.record` file is what we will use in this inference example. -4. Download and extract the pre-trained model. +5. Download and extract the pre-trained model. ``` $ wget https://storage.googleapis.com/intel-optimized-tensorflow/models/faster_rcnn_resnet50_fp32_coco_pretrained_model.tar.gz $ tar -xzvf faster_rcnn_resnet50_fp32_coco_pretrained_model.tar.gz ``` -5. Clone the [intelai/models](https://github.com/intelai/models) repo. +6. Clone the [intelai/models](https://github.com/intelai/models) repo. This repo has the launch script for running benchmarking. ``` @@ -133,10 +137,10 @@ Receiving objects: 100% (11/11), done. Resolving deltas: 100% (3/3), done. ``` -6. Run the `launch_benchmark.py` script from the intelai/models repo +7. Run the `launch_benchmark.py` script from the intelai/models repo , with the appropriate parameters including: the -`coco_val.record` data location (from step 3), the pre-trained model -`pipeline.config` file and the checkpoint location (from step 4, and the +`coco_val.record` data location (from step 4), the pre-trained model +`pipeline.config` file and the checkpoint location (from step 5), and the location of your `tensorflow/models` clone (from step 1). Run benchmarking for throughput and latency: @@ -158,7 +162,7 @@ $ python launch_benchmark.py \ Or for accuracy where the `--data-location` is the path the directory where your `coco_val.record` file is located and the `--in-graph` is -the pre-trained graph located in the pre-trained model directory (from step 4): +the pre-trained graph located in the pre-trained model directory (from step 5): ``` python launch_benchmark.py \ --model-name faster_rcnn \ @@ -172,7 +176,7 @@ python launch_benchmark.py \ --accuracy-only ``` -7. The log file is saved to the value of `--output-dir`. +8. The log file is saved to the value of `--output-dir`. Below is a sample log file tail when running benchmarking for throughput and latency: @@ -218,7 +222,13 @@ better performance results for Int8 precision models with smaller batch sizes. If you want to disable the use of TCMalloc, set `--disable-tcmalloc=True` when calling `launch_benchmark.py` and the script will run without TCMalloc. -1. Please follow step 1, 2 and 3 of Faster R-CNN FP32 instructions written above. +1. Please follow the steps from the +[Faster R-CNN FP32 instructions](#fp32-inference-instructions) written +above for cloning dependecy repositories and getting the coco dataset: +* Performance bechmarking uses the raw coco dataset images. Follow steps +1 and 2 from the FP32 instructions. +* Accuracy testing requires the coco daataset to be in the TF records +format. Follow steps 1, 2, 3, and 4 from the FP32 instructions. 2. Download the pre-trained model. ``` @@ -244,12 +254,14 @@ with the appropriate parameters. To run on single socket use `--socket_id` switc by default it will be using all available sockets. Optional parameter `number_of_steps` (default value = 5000) can be added at the end of command after `--` as shown below: -Run benchmarking for throughput and latency: +Run benchmarking for throughput and latency using the following command. +The `--data-location` is the path to the directory that contains the +raw coco dataset validation images which you downloaded and unzipped: ``` $ cd /home//models/benchmarks $ python launch_benchmark.py \ - --data-location /home//coco/output/ \ + --data-location /home//val2017 \ --model-source-dir /home//tensorflow/models \ --model-name faster_rcnn \ --framework tensorflow \ @@ -274,7 +286,7 @@ python launch_benchmark.py \ --socket-id 0 \ --docker-image intelaipg/intel-optimized-tensorflow:1.14 \ --model-source-dir /home//tensorflow/models \ - --data-location /home//coco_dataset/coco_val.record \ + --data-location /home//output/coco_val.record \ --in-graph /home//faster_rcnn_int8_pretrained_model.pb \ --accuracy-only ``` From 94edbc7287efd1387694c123a8dc3132b989d9ff Mon Sep 17 00:00:00 2001 From: Jitendra Patil Date: Mon, 13 May 2019 09:53:58 -0700 Subject: [PATCH 33/62] fix docker build command (#306) --- docs/general/tensorflow_serving/InstallationGuide.md | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/general/tensorflow_serving/InstallationGuide.md b/docs/general/tensorflow_serving/InstallationGuide.md index 3c6b84c2f..bc557d040 100644 --- a/docs/general/tensorflow_serving/InstallationGuide.md +++ b/docs/general/tensorflow_serving/InstallationGuide.md @@ -54,6 +54,7 @@ The recommended way to use TensorFlow Serving is with Docker images. Letā€™s bui $ cd $TF_SERVING_ROOT/tensorflow_serving/tools/docker/ $ docker build \ -f Dockerfile.devel-mkl \ + --build-arg TF_SERVING_BAZEL_OPTIONS="--incompatible_disallow_data_transition=false --incompatible_disallow_filetype=false" \ --build-arg TF_SERVING_VERSION_GIT_BRANCH="1.13.0" \ -t tensorflow/serving:latest-devel-mkl . ``` From 5e19f8a9f2744dbf1ba91adb18acc7a9fd1f11fc Mon Sep 17 00:00:00 2001 From: Karthik Vadla Date: Thu, 16 May 2019 16:52:48 -0700 Subject: [PATCH 34/62] ADD: Tensorflow Serving Benchmarking (#307) --- benchmarks/README.md | 11 +- .../common/tensorflow_serving/__init__.py | 19 +++ .../build_tfserving_image.sh | 73 ++++++++ benchmarks/common/tensorflow_serving/start.sh | 161 ++++++++++++++++++ .../tensorflow_serving/__init__.py | 19 +++ .../tensorflow_serving/inceptionv3/README.md | 91 ++++++++++ .../inceptionv3/__init__.py | 19 +++ .../inceptionv3/inference/__init__.py | 19 +++ .../inceptionv3/inference/fp32/__init__.py | 19 +++ .../fp32/image_recognition_benchmark.py | 117 +++++++++++++ .../fp32}/image_recognition_client.py | 76 ++++----- .../fp32}/model_graph_to_saved_model.py | 2 - .../inceptionv3/inference/fp32/util.py | 61 +++++++ .../inceptionv3/inference/int8/__init__.py | 19 +++ benchmarks/launch_benchmark.py | 74 ++++++-- .../tensorflow_serving/InstallationGuide.md | 6 +- .../tensorflow_serving/Tutorial.md | 35 ++-- .../src/image_recognition_benchmark.py | 117 ------------- .../tensorflow_serving/src/util.py | 61 ------- tests/unit/test_launch_benchmark.py | 72 +++++++- tox.ini | 1 + 21 files changed, 813 insertions(+), 259 deletions(-) create mode 100644 benchmarks/common/tensorflow_serving/__init__.py create mode 100644 benchmarks/common/tensorflow_serving/build_tfserving_image.sh create mode 100644 benchmarks/common/tensorflow_serving/start.sh create mode 100644 benchmarks/image_recognition/tensorflow_serving/__init__.py create mode 100644 benchmarks/image_recognition/tensorflow_serving/inceptionv3/README.md create mode 100644 benchmarks/image_recognition/tensorflow_serving/inceptionv3/__init__.py create mode 100644 benchmarks/image_recognition/tensorflow_serving/inceptionv3/inference/__init__.py create mode 100644 benchmarks/image_recognition/tensorflow_serving/inceptionv3/inference/fp32/__init__.py create mode 100644 benchmarks/image_recognition/tensorflow_serving/inceptionv3/inference/fp32/image_recognition_benchmark.py rename {docs/image_recognition/tensorflow_serving/src => benchmarks/image_recognition/tensorflow_serving/inceptionv3/inference/fp32}/image_recognition_client.py (52%) rename {docs/image_recognition/tensorflow_serving/src => benchmarks/image_recognition/tensorflow_serving/inceptionv3/inference/fp32}/model_graph_to_saved_model.py (99%) create mode 100644 benchmarks/image_recognition/tensorflow_serving/inceptionv3/inference/fp32/util.py create mode 100644 benchmarks/image_recognition/tensorflow_serving/inceptionv3/inference/int8/__init__.py delete mode 100644 docs/image_recognition/tensorflow_serving/src/image_recognition_benchmark.py delete mode 100644 docs/image_recognition/tensorflow_serving/src/util.py diff --git a/benchmarks/README.md b/benchmarks/README.md index d0f2a15b5..0875f2baa 100644 --- a/benchmarks/README.md +++ b/benchmarks/README.md @@ -11,7 +11,7 @@ dependencies to be installed: * [git](https://git-scm.com/book/en/v2/Getting-Started-Installing-Git) * `wget` for downloading pre-trained models -## Use Cases +## TensorFlow Use Cases | Use Case | Framework | Model | Mode | Benchmarking Instructions | | -----------------------| --------------| ------------------- | --------- |------------------------------| @@ -42,3 +42,12 @@ dependencies to be installed: | Recommendation | TensorFlow | [Wide & Deep Large Dataset](https://arxiv.org/pdf/1606.07792.pdf) | Inference | [Int8](recommendation/tensorflow/wide_deep_large_ds/README.md#int8-inference-instructions) [FP32](recommendation/tensorflow/wide_deep_large_ds/README.md#fp32-inference-instructions) | | Recommendation | TensorFlow | [Wide & Deep](https://arxiv.org/pdf/1606.07792.pdf) | Inference | [FP32](recommendation/tensorflow/wide_deep/README.md#fp32-inference-instructions) | | Text-to-Speech | TensorFlow | [WaveNet](https://arxiv.org/pdf/1609.03499.pdf) | Inference | [FP32](text_to_speech/tensorflow/wavenet/README.md#fp32-inference-instructions) | + + +## TensorFlow Serving Use Cases + + +| Use Case | Framework | Model | Mode | Benchmarking Instructions | +| -----------------------| --------------| ------------------- | --------- |------------------------------| +| Image Recognition | TensorFlow Serving | [Inception V3](https://arxiv.org/pdf/1512.00567.pdf) | Inference | [FP32](image_recognition/tensorflow_serving/inceptionv3/README.md#fp32-inference-instructions) | + diff --git a/benchmarks/common/tensorflow_serving/__init__.py b/benchmarks/common/tensorflow_serving/__init__.py new file mode 100644 index 000000000..cf793ec6a --- /dev/null +++ b/benchmarks/common/tensorflow_serving/__init__.py @@ -0,0 +1,19 @@ +# +# -*- coding: utf-8 -*- +# +# Copyright (c) 2018 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# SPDX-License-Identifier: EPL-2.0 +# diff --git a/benchmarks/common/tensorflow_serving/build_tfserving_image.sh b/benchmarks/common/tensorflow_serving/build_tfserving_image.sh new file mode 100644 index 000000000..a47505f88 --- /dev/null +++ b/benchmarks/common/tensorflow_serving/build_tfserving_image.sh @@ -0,0 +1,73 @@ +# +# Copyright (c) 2019 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# SPDX-License-Identifier: EPL-2.0 +# + +# Bash script to build tensorflow serving image +# Setup proxy on your terminal before running the script. + +# To build image separately +# TF_SERVING_VERSION=1.13.0 MKL_IMAGE_TAG=tensorflow/serving:latest-mkl bash build_tfserving_image.sh + +#!/usr/bin/env bash +set -e +set -x + +WORKDIR=serving_workspace + +if [ -d ${WORKDIR} ]; then + rm -rf ${WORKDIR} +fi + +pushd $(pwd) + +mkdir -p ${WORKDIR} +cd ${WORKDIR} + +# Build Tensorflow Serving image +TF_SERVING_VERSION=${TF_SERVING_VERSION:-"1.13.0"} +echo "Using TF_SERVING_VERSION=${TF_SERVING_VERSION} to build docker image" + +# Clone official tensorflow serving repo +git clone https://github.com/tensorflow/serving.git + +TF_SERVING_ROOT=$(pwd)/serving +cd ${TF_SERVING_ROOT}/tensorflow_serving/tools/docker/ + +# Build Dockerfile.devel-mkl +docker build \ + --build-arg TF_SERVING_BAZEL_OPTIONS="--incompatible_disallow_data_transition=false --incompatible_disallow_filetype=false" \ + --build-arg TF_SERVING_VERSION_GIT_BRANCH=${TF_SERVING_VERSION} \ + --build-arg HTTP_PROXY=${HTTP_PROXY} \ + --build-arg HTTPS_PROXY=${HTTPS_PROXY} \ + --build-arg http_proxy=${http_proxy} \ + --build-arg https_proxy=${https_proxy} \ + -f Dockerfile.devel-mkl -t tensorflow/serving:latest-devel-mkl . + +# Build Dockerfile.mkl, which uses above image as base_image +docker build \ + --build-arg TF_SERVING_VERSION_GIT_BRANCH=${TF_SERVING_VERSION} \ + --build-arg HTTP_PROXY=${HTTP_PROXY} \ + --build-arg HTTPS_PROXY=${HTTPS_PROXY} \ + --build-arg http_proxy=${http_proxy} \ + --build-arg https_proxy=${https_proxy} \ + -f Dockerfile.mkl -t ${MKL_IMAGE_TAG} . + +popd + +rm -rf ${WORKDIR} + +echo "Image built with tag: ${MKL_IMAGE_TAG}" diff --git a/benchmarks/common/tensorflow_serving/start.sh b/benchmarks/common/tensorflow_serving/start.sh new file mode 100644 index 000000000..e611cc931 --- /dev/null +++ b/benchmarks/common/tensorflow_serving/start.sh @@ -0,0 +1,161 @@ +#!/usr/bin/env bash +# +# -*- coding: utf-8 -*- +# +# Copyright (c) 2018 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# SPDX-License-Identifier: EPL-2.0 +# +#!/usr/bin/env bash +set -e +set -x + +echo 'Running with parameters:' +echo " USE_CASE: ${USE_CASE}" +echo " FRAMEWORK: ${FRAMEWORK}" +echo " WORKSPACE: ${WORKSPACE}" +echo " IN_GRAPH: ${IN_GRAPH}" +echo " MODEL_NAME: ${MODEL_NAME}" +echo " MODE: ${MODE}" +echo " PRECISION: ${PRECISION}" +echo " BATCH_SIZE: ${BATCH_SIZE}" +echo " BENCHMARK_ONLY: ${BENCHMARK_ONLY}" +echo " ACCURACY_ONLY: ${ACCURACY_ONLY}" +echo " OMP_NUM_THREADS: ${OMP_NUM_THREADS}" +echo " NUM_INTRA_THREADS: ${NUM_INTRA_THREADS}" +echo " NUM_INTER_THREADS: ${NUM_INTER_THREADS}" +echo " OUTPUT_DIR: ${OUTPUT_DIR}" +echo " TF_SERVING_VERSION: ${TF_SERVING_VERSION}" + + +if [ ${ACCURACY_ONLY} == "True" ]; then + echo "Accuracy is not supported with Tensorflow Serving" + exit 1 +fi + +WORKDIR=workspace + +if [ -d ${WORKDIR} ]; then + rm -rf ${WORKDIR} +fi + +pushd $(pwd) + +mkdir -p ${WORKDIR} +cd ${WORKDIR} + +# Check docker +if ! [[ $(which docker) && $(docker --version) ]]; then + echo "Docker not found, please install docker to proceed." + exit 1 +fi + +# Check for pip +if ! [[ $(which pip) && $(pip --version) ]]; then + echo "pip not found, please install pip to proceed." + exit 1 +fi + +timestamp=`date +%Y%m%d_%H%M%S` +LOG_FILENAME="benchmark_${MODEL_NAME}_${MODE}_${PRECISION}_${timestamp}.log" +if [ ! -d "${OUTPUT_DIR}" ]; then + mkdir ${OUTPUT_DIR} +fi + +MKL_IMAGE_TAG=tensorflow/serving:latest-mkl + +# Build Tensorflow Serving docker image +echo "Building tensorflow serving image..." +echo "First time it takes few minutes to build images, consecutive builds are much faster" + +TF_SERVING_VERSION=${TF_SERVING_VERSION} MKL_IMAGE_TAG=${MKL_IMAGE_TAG} bash ${WORKSPACE}/build_tfserving_image.sh + +function docker_run(){ + docker run \ + --name=${CONTAINER_NAME} \ + --rm \ + -d \ + -p 8500:8500 \ + -v /tmp:/models/${MODEL_NAME} \ + -e MODEL_NAME=${MODEL_NAME} \ + -e OMP_NUM_THREADS=${OMP_NUM_THREADS} \ + -e TENSORFLOW_INTER_OP_PARALLELISM=${NUM_INTER_THREADS} \ + -e TENSORFLOW_INTRA_OP_PARALLELISM=${NUM_INTRA_THREADS} \ + ${MKL_IMAGE_TAG} +} + + +function resnet50_or_inceptionv3(){ + # Setup virtual env + pip install virtualenv + virtualenv venv + + source venv/bin/activate + pip install grpc \ + requests \ + intel-tensorflow \ + tensorflow-serving-api + + # cd to image recognition tfserving scripts + cd ${WORKSPACE}/../../${USE_CASE}/${FRAMEWORK}/${MODEL_NAME}/${MODE}/${PRECISION} + + # by default converted model is saved at /tmp/1 + rm -rf /tmp/1 + + # convert pretrained model to savedmodel + python model_graph_to_saved_model.py --import_path ${IN_GRAPH} + + RUNNING=$(docker ps --filter="expose=8501/tcp" -q | xargs) + if [[ -n ${RUNNING} ]]; then + docker rm -f ${RUNNING} + fi + + CONTAINER_NAME=tfserving_${RANDOM} + + # Run container + MKL_IMAGE_TAG=${MKL_IMAGE_TAG} CONTAINER_NAME=${CONTAINER_NAME} docker_run + + # Test + python image_recognition_client.py --model ${MODEL_NAME} + + + if [ ${BATCH_SIZE} == 1 ];then + # Test Average latency + python image_recognition_benchmark.py --batch_size ${BATCH_SIZE} --model ${MODEL_NAME} + else + # Test max throughput + python image_recognition_benchmark.py --batch_size ${BATCH_SIZE} --model ${MODEL_NAME} + fi + + # Clean up + docker rm -f ${CONTAINER_NAME} +} + +LOGFILE=${OUTPUT_DIR}/${LOG_FILENAME} + +MODEL_NAME=$(echo ${MODEL_NAME} | tr 'A-Z' 'a-z') +if [ ${MODEL_NAME} == "inceptionv3" ] || [ ${MODEL_NAME} == "resnet50" ] && [ ${PRECISION} == "fp32" ]; then + resnet50_or_inceptionv3 | tee -a ${LOGFILE} +else + echo "Unsupported Model: ${MODEL_NAME} or Precision: ${PRECISION}" + exit 1 +fi + +popd + +# Clean up work directory +rm -rf ${WORKDIR} + +echo "Log output location: ${LOGFILE}" | tee -a ${LOGFILE} diff --git a/benchmarks/image_recognition/tensorflow_serving/__init__.py b/benchmarks/image_recognition/tensorflow_serving/__init__.py new file mode 100644 index 000000000..cf793ec6a --- /dev/null +++ b/benchmarks/image_recognition/tensorflow_serving/__init__.py @@ -0,0 +1,19 @@ +# +# -*- coding: utf-8 -*- +# +# Copyright (c) 2018 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# SPDX-License-Identifier: EPL-2.0 +# diff --git a/benchmarks/image_recognition/tensorflow_serving/inceptionv3/README.md b/benchmarks/image_recognition/tensorflow_serving/inceptionv3/README.md new file mode 100644 index 000000000..1ddb7bb14 --- /dev/null +++ b/benchmarks/image_recognition/tensorflow_serving/inceptionv3/README.md @@ -0,0 +1,91 @@ +# Inception V3 + +This document has instructions for how to run Inception V3 for the +following modes/precisions: +* [FP32 inference](#fp32-inference-instructions) + +## FP32 Inference Instructions + +1. Clone this [intelai/models](https://github.com/IntelAI/models) +repository: + +``` +$ git clone https://github.com/IntelAI/models.git +``` + +2. Download the pre-trained model. +``` +$ wget https://storage.googleapis.com/intel-optimized-tensorflow/models/inceptionv3_fp32_pretrained_model.pb +``` + +3. Navigate to the `benchmarks` directory in your local clone of +the [intelai/models](https://github.com/IntelAI/models) repo from step 1. +The `launch_benchmark.py` script in the `benchmarks` directory is +used for starting a tensorflow serving benchmarking using optimized TensorFlow Serving docker +container. It has arguments to specify which model, framework, mode, +precision, and input graph. + +Substitute in your own `--in-graph` pretrained model file path (from step 2). + +4. Inception V3 can be run for `latency` benchmarking and `throughput` +benchmarking. Use one of the following examples below, +depending on your use case. + +* For latency with dummy data (using `--batch-size 1`): + +``` +python launch_benchmark.py \ + --in-graph /home//inceptionv3_fp32_pretrained_model.pb \ + --model-name inceptionv3 \ + --framework tensorflow_serving \ + --precision fp32 \ + --mode inference \ + --batch-size=1 \ + --benchmark-only +``` +Example log tail when benchmarking for latency: +``` +Iteration 35: 0.019 sec +Iteration 36: 0.020 sec +Iteration 37: 0.018 sec +Iteration 38: 0.018 sec +Iteration 39: 0.019 sec +Iteration 40: 0.018 sec +Average time: 0.019 sec +Batch size = 1 +Latency: 18.801 ms +Throughput: 53.189 images/sec +tfserving_3784 +Log output location: {--output-dir value}/benchmark_inceptionv3_inference_fp32_20190516_103531.log +``` + +* For throughput with dummy data (using `--batch-size 128`): + +``` +python launch_benchmark.py \ + --in-graph /home//inceptionv3_fp32_pretrained_model.pb \ + --model-name inceptionv3 \ + --framework tensorflow_serving \ + --precision fp32 \ + --mode inference \ + --batch-size=128 \ + --benchmark-only +``` +Example log tail when benchmarking for throughput: +``` +Iteration 34: 0.779 sec +Iteration 35: 0.916 sec +Iteration 36: 0.809 sec +Iteration 37: 0.793 sec +Iteration 38: 0.813 sec +Iteration 39: 0.796 sec +Iteration 40: 0.796 sec +Average time: 0.817 sec +Batch size = 128 +Throughput: 156.752 images/sec +tfserving_5299 +Log output location: {--output-dir value}/benchmark_inceptionv3_inference_fp32_20190516_103958.log +``` + +Note that the `--verbose` or `--output-dir` flag can be added to any of the above commands +to get additional debug output or change the default output location. diff --git a/benchmarks/image_recognition/tensorflow_serving/inceptionv3/__init__.py b/benchmarks/image_recognition/tensorflow_serving/inceptionv3/__init__.py new file mode 100644 index 000000000..cf793ec6a --- /dev/null +++ b/benchmarks/image_recognition/tensorflow_serving/inceptionv3/__init__.py @@ -0,0 +1,19 @@ +# +# -*- coding: utf-8 -*- +# +# Copyright (c) 2018 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# SPDX-License-Identifier: EPL-2.0 +# diff --git a/benchmarks/image_recognition/tensorflow_serving/inceptionv3/inference/__init__.py b/benchmarks/image_recognition/tensorflow_serving/inceptionv3/inference/__init__.py new file mode 100644 index 000000000..cf793ec6a --- /dev/null +++ b/benchmarks/image_recognition/tensorflow_serving/inceptionv3/inference/__init__.py @@ -0,0 +1,19 @@ +# +# -*- coding: utf-8 -*- +# +# Copyright (c) 2018 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# SPDX-License-Identifier: EPL-2.0 +# diff --git a/benchmarks/image_recognition/tensorflow_serving/inceptionv3/inference/fp32/__init__.py b/benchmarks/image_recognition/tensorflow_serving/inceptionv3/inference/fp32/__init__.py new file mode 100644 index 000000000..cf793ec6a --- /dev/null +++ b/benchmarks/image_recognition/tensorflow_serving/inceptionv3/inference/fp32/__init__.py @@ -0,0 +1,19 @@ +# +# -*- coding: utf-8 -*- +# +# Copyright (c) 2018 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# SPDX-License-Identifier: EPL-2.0 +# diff --git a/benchmarks/image_recognition/tensorflow_serving/inceptionv3/inference/fp32/image_recognition_benchmark.py b/benchmarks/image_recognition/tensorflow_serving/inceptionv3/inference/fp32/image_recognition_benchmark.py new file mode 100644 index 000000000..3178741db --- /dev/null +++ b/benchmarks/image_recognition/tensorflow_serving/inceptionv3/inference/fp32/image_recognition_benchmark.py @@ -0,0 +1,117 @@ +# +# -*- coding: utf-8 -*- +# +# Copyright (c) 2019 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# SPDX-License-Identifier: EPL-2.0 +# + +"""Send simulated image data to tensorflow_model_server loaded with ResNet50 or InceptionV3 model. + +""" + +from __future__ import print_function + +import os +import random + +import grpc +import numpy as np +import sys +import tensorflow as tf +import time +from tensorflow_serving.apis import predict_pb2 +from tensorflow_serving.apis import prediction_service_pb2_grpc + +from util import preprocess_image, parse_example_proto + +tf.app.flags.DEFINE_string('server', 'localhost:8500', + 'PredictionService host:port') +tf.app.flags.DEFINE_integer('batch_size', 1, 'Batch size to use') +tf.app.flags.DEFINE_string('data_dir', '', 'path to images in TF records format') +tf.app.flags.DEFINE_string('model', 'resnet50', 'Name of model (resnet50 or inceptionv3).') +FLAGS = tf.app.flags.FLAGS + + +def sample_images(image_size): + """Pull a random batch of images from FLAGS.data_dir containing TF record formatted ImageNet validation set + Returns: + ndarray of float32 with shape [FLAGS.batch_size, image_size, image_size, 3] + """ + + sample_file = random.choice(os.listdir(FLAGS.data_dir)) + dataset = tf.data.TFRecordDataset(os.path.join(FLAGS.data_dir, sample_file)) + dataset = dataset.map(lambda x: parse_example_proto(x)).shuffle(True).batch(FLAGS.batch_size) + iterator = dataset.make_one_shot_iterator() + next_element = iterator.get_next() + with tf.Session() as sess: + images, labels = sess.run(next_element) + images = np.array([sess.run(preprocess_image(x, FLAGS.model, image_size)) for x in images]) + + return images + + +def main(_): + if FLAGS.model == 'resnet50': + image_size = 224 + elif FLAGS.model == 'inceptionv3': + image_size = 299 + else: + print('Please specify model as either resnet50 or inceptionv3.') + sys.exit(-1) + + channel = grpc.insecure_channel(FLAGS.server) + stub = prediction_service_pb2_grpc.PredictionServiceStub(channel) + i = 0 + num_iteration = 40 + warm_up_iteration = 10 + total_time = 0 + for _ in range(num_iteration): + i += 1 + if FLAGS.data_dir: + image_np = sample_images(image_size) + else: + image_np = np.random.rand(FLAGS.batch_size, image_size, image_size, 3).astype(np.float32) + if FLAGS.model == 'resnet50': + # For ResNet50, rescale to [0, 256] + image_np *= 256.0 + elif FLAGS.model == 'inceptionv3': + # For InceptionV3, rescale to [-1, 1] + image_np = (image_np - 0.5) * 2.0 + + request = predict_pb2.PredictRequest() + request.model_spec.name = FLAGS.model + request.model_spec.signature_name = 'serving_default' + request.inputs['input'].CopyFrom( + tf.contrib.util.make_tensor_proto(image_np, shape=[FLAGS.batch_size, image_size, image_size, 3])) + start_time = time.time() + stub.Predict(request, 10.0) # 10 secs timeout + time_consume = time.time() - start_time + print('Iteration %d: %.3f sec' % (i, time_consume)) + if i > warm_up_iteration: + total_time += time_consume + + time_average = total_time / (num_iteration - warm_up_iteration) + print('Average time: %.3f sec' % (time_average)) + + print('Batch size = %d' % FLAGS.batch_size) + if (FLAGS.batch_size == 1): + print('Latency: %.3f ms' % (time_average * 1000)) + + print('Throughput: %.3f images/sec' % (FLAGS.batch_size / time_average)) + + +if __name__ == '__main__': + tf.app.run() diff --git a/docs/image_recognition/tensorflow_serving/src/image_recognition_client.py b/benchmarks/image_recognition/tensorflow_serving/inceptionv3/inference/fp32/image_recognition_client.py similarity index 52% rename from docs/image_recognition/tensorflow_serving/src/image_recognition_client.py rename to benchmarks/image_recognition/tensorflow_serving/inceptionv3/inference/fp32/image_recognition_client.py index abdc77d05..2926f4621 100644 --- a/docs/image_recognition/tensorflow_serving/src/image_recognition_client.py +++ b/benchmarks/image_recognition/tensorflow_serving/inceptionv3/inference/fp32/image_recognition_client.py @@ -24,12 +24,11 @@ from __future__ import print_function -import sys import grpc -import requests import numpy as np +import requests +import sys import tensorflow as tf - from tensorflow_serving.apis import predict_pb2 from tensorflow_serving.apis import prediction_service_pb2_grpc @@ -41,45 +40,46 @@ tf.app.flags.DEFINE_string('server', 'localhost:8500', 'PredictionService host:port') tf.app.flags.DEFINE_string('image', '', 'path to image in JPEG format') -tf.app.flags.DEFINE_string('model', 'resnet50', 'Name of model (resnet50 or inceptionv3).') +tf.app.flags.DEFINE_string('model', 'resnet50', 'Name of model (resnet50 or Inceptionv3).') FLAGS = tf.app.flags.FLAGS def main(_): - if FLAGS.model == 'resnet50': - image_size = 224 - elif FLAGS.model == 'inceptionv3': - image_size = 299 - else: - print('Please specify model as either resnet50 or inceptionv3.') - sys.exit(-1) - - if FLAGS.image: - with open(FLAGS.image, 'rb') as f: - data = f.read() - else: - # Download the image URL if a path is not provided as input - dl_request = requests.get(IMAGE_URL, stream=True) - dl_request.raise_for_status() - data = dl_request.content - - channel = grpc.insecure_channel(FLAGS.server) - stub = prediction_service_pb2_grpc.PredictionServiceStub(channel) - request = predict_pb2.PredictRequest() - request.model_spec.name = FLAGS.model - request.model_spec.signature_name = 'serving_default' - image_data = tf.reshape(preprocess_image(data, FLAGS.model, image_size), [1, image_size, image_size, 3]) - - # Run the graph - with tf.Session() as sess: - sess.run(tf.global_variables_initializer()) - image_data = (sess.run(image_data)) - - request.inputs['input'].CopyFrom(tf.contrib.util.make_tensor_proto(image_data, shape=[1, image_size, image_size, 3])) - result = stub.Predict(request) - print(result) - print('Predicted class: ', str(np.argmax(result.outputs['predict'].float_val))) + if FLAGS.model == 'resnet50': + image_size = 224 + elif FLAGS.model == 'inceptionv3': + image_size = 299 + else: + print('Please specify model as either resnet50 or Inceptionv3.') + sys.exit(-1) + + if FLAGS.image: + with open(FLAGS.image, 'rb') as f: + data = f.read() + else: + # Download the image URL if a path is not provided as input + dl_request = requests.get(IMAGE_URL, stream=True) + dl_request.raise_for_status() + data = dl_request.content + + channel = grpc.insecure_channel(FLAGS.server) + stub = prediction_service_pb2_grpc.PredictionServiceStub(channel) + request = predict_pb2.PredictRequest() + request.model_spec.name = FLAGS.model + request.model_spec.signature_name = 'serving_default' + image_data = tf.reshape(preprocess_image(data, FLAGS.model, image_size), [1, image_size, image_size, 3]) + + # Run the graph + with tf.Session() as sess: + sess.run(tf.global_variables_initializer()) + image_data = (sess.run(image_data)) + + request.inputs['input'].CopyFrom( + tf.contrib.util.make_tensor_proto(image_data, shape=[1, image_size, image_size, 3])) + result = stub.Predict(request) + print(result) + print('Predicted class: ', str(np.argmax(result.outputs['predict'].float_val))) if __name__ == '__main__': - tf.app.run() + tf.app.run() diff --git a/docs/image_recognition/tensorflow_serving/src/model_graph_to_saved_model.py b/benchmarks/image_recognition/tensorflow_serving/inceptionv3/inference/fp32/model_graph_to_saved_model.py similarity index 99% rename from docs/image_recognition/tensorflow_serving/src/model_graph_to_saved_model.py rename to benchmarks/image_recognition/tensorflow_serving/inceptionv3/inference/fp32/model_graph_to_saved_model.py index ca4f8092c..a593539ca 100644 --- a/docs/image_recognition/tensorflow_serving/src/model_graph_to_saved_model.py +++ b/benchmarks/image_recognition/tensorflow_serving/inceptionv3/inference/fp32/model_graph_to_saved_model.py @@ -25,9 +25,7 @@ from __future__ import print_function -import os import sys - import tensorflow as tf import tensorflow.tools.graph_transforms as graph_transforms diff --git a/benchmarks/image_recognition/tensorflow_serving/inceptionv3/inference/fp32/util.py b/benchmarks/image_recognition/tensorflow_serving/inceptionv3/inference/fp32/util.py new file mode 100644 index 000000000..70eaba0de --- /dev/null +++ b/benchmarks/image_recognition/tensorflow_serving/inceptionv3/inference/fp32/util.py @@ -0,0 +1,61 @@ +# +# -*- coding: utf-8 -*- +# +# Copyright (c) 2019 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# SPDX-License-Identifier: EPL-2.0 +# + +from __future__ import print_function + +import tensorflow as tf + + +def preprocess_image(image_buffer, model, image_size): + """Preprocess JPEG encoded bytes to 3D float Tensor.""" + + # Decode the string as an RGB JPEG of unknown height and width. + image = tf.image.decode_jpeg(image_buffer, channels=3) + # Convert pixels to [0, 1) + image = tf.image.convert_image_dtype(image, dtype=tf.float32) + # Crop the central region to 87.5% of the original image. + image = tf.image.central_crop(image, central_fraction=0.875) + # Resize the image to image_size x image_size. + image = tf.expand_dims(image, 0) + image = tf.image.resize_bilinear(image, [image_size, image_size], align_corners=False) + image = tf.squeeze(image, [0]) + if model == 'resnet50': + # For ResNet50, rescale to [0, 256] + image = tf.multiply(image, 256.0) + elif model == 'Inceptionv3': + # For InceptionV3, rescale to [-1, 1] + image = tf.subtract(image, 0.5) + image = tf.multiply(image, 2.0) + return image + + +def parse_example_proto(example_serialized): + # Dense features in Example proto. + feature_map = { + 'image/encoded': tf.FixedLenFeature([], dtype=tf.string, + default_value=''), + 'image/class/label': tf.FixedLenFeature([1], dtype=tf.int64, + default_value=-1), + } + + features = tf.parse_single_example(example_serialized, feature_map) + label = tf.cast(features['image/class/label'], dtype=tf.int32) + + return features['image/encoded'], label diff --git a/benchmarks/image_recognition/tensorflow_serving/inceptionv3/inference/int8/__init__.py b/benchmarks/image_recognition/tensorflow_serving/inceptionv3/inference/int8/__init__.py new file mode 100644 index 000000000..cf793ec6a --- /dev/null +++ b/benchmarks/image_recognition/tensorflow_serving/inceptionv3/inference/int8/__init__.py @@ -0,0 +1,19 @@ +# +# -*- coding: utf-8 -*- +# +# Copyright (c) 2018 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# SPDX-License-Identifier: EPL-2.0 +# diff --git a/benchmarks/launch_benchmark.py b/benchmarks/launch_benchmark.py index 32c0f68ae..6da9d7cb6 100644 --- a/benchmarks/launch_benchmark.py +++ b/benchmarks/launch_benchmark.py @@ -29,7 +29,9 @@ import sys from argparse import ArgumentParser from common import base_benchmark_util +from common import platform_util from common.utils.validators import check_no_spaces, check_volume_mount +from common.base_model_init import BaseModelInitializer class LaunchBenchmark(base_benchmark_util.BaseBenchmarkUtil): @@ -213,13 +215,66 @@ def run_bare_metal(self, benchmark_scripts, intelai_models, env_var_dict): # setup volume directories to be the local system directories, since we aren't # mounting volumes when running bare metal, but start.sh expects these args args = self.args - mount_benchmark = benchmark_scripts - mount_external_models_source = args.model_source_dir - mount_intelai_models = intelai_models workspace = os.path.join(benchmark_scripts, "common", args.framework) + mount_benchmark = benchmark_scripts in_graph_path = args.input_graph - dataset_path = args.data_location checkpoint_path = args.checkpoint + dataset_path = args.data_location + + # To Launch Tensorflow Serving benchmark we need only --in-graph arg. + # It does not support checkpoint files. + if args.framework == "tensorflow_serving": + if args.docker_image: + raise ValueError("--docker-image arg is not supported with tensorflow serving benchmarking, " + "as script automatically builds image and supplies it.") + + if checkpoint_path: + raise ValueError("--checkpoint-path arg is not supported with tensorflow serving benchmarking") + + if args.mode != "inference": + raise ValueError("--mode arg should be set to inference") + + if in_graph_path: + env_var_dict["IN_GRAPH"] = in_graph_path + else: + raise ValueError("--in-graph arg is required to run tensorflow serving benchmarking") + + for env_var_name in env_var_dict: + os.environ[env_var_name] = str(env_var_dict[env_var_name]) + + # We need this env to be set for the platform util + os.environ["PYTHON_EXE"] = str(sys.executable if not args.docker_image else "python") + + # Get Platformutil + platform_util_obj = None or platform_util.PlatformUtil(self.args) + + # Configure num_inter_threads and num_intra_threads + base_obj = BaseModelInitializer(args=self.args, custom_args=[], platform_util=platform_util_obj) + base_obj.set_num_inter_intra_threads() + + # Update num_inter_threads and num_intra_threads in env dictionary + env_var_dict["NUM_INTER_THREADS"] = self.args.num_inter_threads + env_var_dict["NUM_INTRA_THREADS"] = self.args.num_intra_threads + + # Set OMP_NUM_THREADS + env_var_dict["OMP_NUM_THREADS"] = self.args.num_intra_threads + + else: + mount_external_models_source = args.model_source_dir + mount_intelai_models = intelai_models + + # Add env vars with bare metal settings + env_var_dict["MOUNT_EXTERNAL_MODELS_SOURCE"] = mount_external_models_source + env_var_dict["MOUNT_INTELAI_MODELS_SOURCE"] = mount_intelai_models + + if in_graph_path: + env_var_dict["IN_GRAPH"] = in_graph_path + + if checkpoint_path: + env_var_dict["CHECKPOINT_DIRECTORY"] = checkpoint_path + + if dataset_path: + env_var_dict["DATASET_LOCATION"] = dataset_path # if using the default output directory, get the full path if args.output_dir == "/models/benchmarks/common/tensorflow/logs": @@ -228,19 +283,8 @@ def run_bare_metal(self, benchmark_scripts, intelai_models, env_var_dict): # Add env vars with bare metal settings env_var_dict["WORKSPACE"] = workspace env_var_dict["MOUNT_BENCHMARK"] = mount_benchmark - env_var_dict["MOUNT_EXTERNAL_MODELS_SOURCE"] = mount_external_models_source - env_var_dict["MOUNT_INTELAI_MODELS_SOURCE"] = mount_intelai_models env_var_dict["OUTPUT_DIR"] = args.output_dir - if in_graph_path: - env_var_dict["IN_GRAPH"] = in_graph_path - - if checkpoint_path: - env_var_dict["CHECKPOINT_DIRECTORY"] = checkpoint_path - - if dataset_path: - env_var_dict["DATASET_LOCATION"] = dataset_path - # Set env vars for bare metal for env_var_name in env_var_dict: os.environ[env_var_name] = str(env_var_dict[env_var_name]) diff --git a/docs/general/tensorflow_serving/InstallationGuide.md b/docs/general/tensorflow_serving/InstallationGuide.md index bc557d040..f8a30f2f2 100644 --- a/docs/general/tensorflow_serving/InstallationGuide.md +++ b/docs/general/tensorflow_serving/InstallationGuide.md @@ -36,7 +36,7 @@ We will break down the installation into 2 steps: * Step 1: Build the Intel Optimized TensorFlow Serving Docker image * Step 2: Verify the Docker image by serving a simple model - half_plus_two -### Step 1: Build TensorFlow Serving Docker image +### Step 1: Build TensorFlow Serving Docker image. The recommended way to use TensorFlow Serving is with Docker images. Letā€™s build a docker image with TensorFlow Serving optimized for IntelĀ® Processors. * Login into your machine via SSH and clone the [Tensorflow Serving](https://github.com/tensorflow/serving/) repository and save the path of this cloned directory (Also, adding it to `.bashrc` ) for ease of use for the remainder of this tutorial. @@ -45,7 +45,9 @@ The recommended way to use TensorFlow Serving is with Docker images. Letā€™s bui $ export TF_SERVING_ROOT=$(pwd)/serving $ echo "export TF_SERVING_ROOT=$(pwd)/serving" >> ~/.bashrc ``` - + +* You can also build image using [this](/benchmarks/common/tensorflow_serving/build_tfserving_image.sh) script, run as per comments mentioned. Or Continue manual steps as below. + * Using `Dockerfile.devel-mkl`, build an image with Intel optimized ModelServer. This creates an image with all the required development tools and builds from sources. The image size will be around 5GB and will take some time. On AWS c5.4xlarge instance (16 logical cores), it took about 25min. **NOTE**: It is recommended that you build an official release version using `--build-arg TF_SERVING_VERSION_GIT_BRANCH=""`, but if you wish to build the (unstable) head of master, omit the build argument and master will be used by default. diff --git a/docs/image_recognition/tensorflow_serving/Tutorial.md b/docs/image_recognition/tensorflow_serving/Tutorial.md index f7c325686..f94350da7 100644 --- a/docs/image_recognition/tensorflow_serving/Tutorial.md +++ b/docs/image_recognition/tensorflow_serving/Tutorial.md @@ -1,6 +1,6 @@ # Image Recognition with TensorFlow Serving on CPU ### Real-time and Max Throughput Inference -Models: ResNet50, InceptionV3 +Model: InceptionV3 and ResNet50 ## Goal @@ -27,21 +27,23 @@ Tuning TensorFlow Serving to take full advantage of your hardware for image reco 3. Running a client script to measure latency and throughput 4. Experimenting with the TensorFlow Serving settings on your own to further optimize for your model and use case -## Hands-on Tutorial - ResNet50 or InceptionV3 +## Hands-on Tutorial - InceptionV3 and Resnet50 For steps 1 and 2, refer to the Intel Model Zoo FP32 benchmarks: -* [ResNet50 README](/benchmarks/image_recognition/tensorflow/resnet50#fp32-inference-instructions) * [InceptionV3 README](/benchmarks/image_recognition/tensorflow/inceptionv3#fp32-inference-instructions) +* [ResNet50 README](/benchmarks/image_recognition/tensorflow/resnet50#fp32-inference-instructions) + +NOTE: The below example shows InceptionV3. The same code snippets will work for ResNet50 by replacing the model name to `resnet50`. -1. **Download the Model**: Download and extract the ResNet50 or InceptionV3 pre-trained model (FP32), using the instructions in one of the READMEs above. +1. **Download the Model**: Download and extract the InceptionV3 pre-trained model (FP32), using the instructions in above README. 2. **(Optional) Download Data**: If you are interested only in testing latency and throughput, not accuracy, you can skip this step and use synthetic data. - If you want to verify prediction accuracy by testing on real data, follow the instructions in one of the READMEs above to download the ImageNet dataset. + If you want to verify prediction accuracy by testing on real data, follow the instructions in the above README to download the ImageNet dataset. -3. **Clone this repository**: Clone the [intelai/models](https://github.com/intelai/models) repository and `cd` into the `docs/image_recognition/tensorflow_serving/src` directory. +3. **Clone this repository**: Clone the [intelai/models](https://github.com/intelai/models) repository and `cd` into the `models/benchmarks/image_recognition/tensorflow_serving/inceptionv3/inference/fp32` directory. ``` $ git clone https://github.com/IntelAI/models.git - $ cd models/docs/image_recognition/tensorflow_serving/src + $ cd models/benchmarks/image_recognition/tensorflow_serving/inceptionv3/inference/fp32 ``` 4. **Set up your environment**: In this tutorial, we use a virtual environment to install a few required Python packages. @@ -60,6 +62,7 @@ For steps 1 and 2, refer to the Intel Model Zoo FP32 benchmarks: (venv)$ pip install tensorflow-serving-api ``` 5. **Create a SavedModel**: Using the conversion script `model_graph_to_saved_model.py`, convert the pre-trained model graph to a SavedModel. + (For ResNet50, substitute the name of the ResNet50 FP32 pre-trained model.) Example: ``` @@ -118,13 +121,13 @@ For steps 1 and 2, refer to the Intel Model Zoo FP32 benchmarks: To see average inference latency (in ms), run the benchmark script `image_recognition_benchmark.py` using batch_size 1: ``` (venv)$ python image_recognition_benchmark.py --batch_size 1 --model inceptionv3 - Iteration 1: 0.017 sec + Iteration 1: ... sec ... - Iteration 40: 0.016 sec - Average time: 0.016 sec + Iteration 40: ... sec + Average time: ... sec Batch size = 1 - Latency: 16.496 ms - Throughput: 60.619 images/sec + Latency: ... ms + Throughput: ... images/sec ``` In some cases, it is desirable to constrain the inference server to a single core or socket. @@ -156,12 +159,12 @@ For steps 1 and 2, refer to the Intel Model Zoo FP32 benchmarks: To see average throughput (in images/sec), run the benchmark script `image_recognition_benchmark.py` using batch_size 128: ``` (venv)$ python image_recognition_benchmark.py --batch_size 128 --model inceptionv3 - Iteration 1: 1.706 sec + Iteration 1: ... sec ... - Iteration 40: 0.707 sec - Average time: 0.693 sec + Iteration 40: ... sec + Average time: ... sec Batch size = 128 - Throughput: 184.669 images/sec + Throughput: ... images/sec ``` 11. **Clean up**: diff --git a/docs/image_recognition/tensorflow_serving/src/image_recognition_benchmark.py b/docs/image_recognition/tensorflow_serving/src/image_recognition_benchmark.py deleted file mode 100644 index 658812cd9..000000000 --- a/docs/image_recognition/tensorflow_serving/src/image_recognition_benchmark.py +++ /dev/null @@ -1,117 +0,0 @@ -# -# -*- coding: utf-8 -*- -# -# Copyright (c) 2019 Intel Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -# SPDX-License-Identifier: EPL-2.0 -# - -"""Send simulated image data to tensorflow_model_server loaded with ResNet50 or InceptionV3 model. - -""" - -from __future__ import print_function - -import os -import sys -import random -import time -import grpc -import tensorflow as tf -import numpy as np - -from tensorflow_serving.apis import predict_pb2 -from tensorflow_serving.apis import prediction_service_pb2_grpc - -from util import preprocess_image, parse_example_proto - -tf.app.flags.DEFINE_string('server', 'localhost:8500', - 'PredictionService host:port') -tf.app.flags.DEFINE_integer('batch_size', 1, 'Batch size to use') -tf.app.flags.DEFINE_string('data_dir', '', 'path to images in TF records format') -tf.app.flags.DEFINE_string('model', 'resnet50', 'Name of model (resnet50 or inceptionv3).') -FLAGS = tf.app.flags.FLAGS - - -def sample_images(image_size): - """Pull a random batch of images from FLAGS.data_dir containing TF record formatted ImageNet validation set - - Returns: - ndarray of float32 with shape [FLAGS.batch_size, image_size, image_size, 3] - """ - - sample_file = random.choice(os.listdir(FLAGS.data_dir)) - dataset = tf.data.TFRecordDataset(os.path.join(FLAGS.data_dir, sample_file)) - dataset = dataset.map(lambda x: parse_example_proto(x)).shuffle(True).batch(FLAGS.batch_size) - iterator = dataset.make_one_shot_iterator() - next_element = iterator.get_next() - with tf.Session() as sess: - images, labels = sess.run(next_element) - images = np.array([sess.run(preprocess_image(x, FLAGS.model, image_size)) for x in images]) - - return images - -def main(_): - if FLAGS.model == 'resnet50': - image_size = 224 - elif FLAGS.model == 'inceptionv3': - image_size = 299 - else: - print('Please specify model as either resnet50 or inceptionv3.') - sys.exit(-1) - - channel = grpc.insecure_channel(FLAGS.server) - stub = prediction_service_pb2_grpc.PredictionServiceStub(channel) - i = 0 - num_iteration = 40 - warm_up_iteration = 10 - total_time = 0 - for _ in range(num_iteration): - i += 1 - if FLAGS.data_dir: - image_np = sample_images(image_size) - else: - image_np = np.random.rand(FLAGS.batch_size, image_size, image_size, 3).astype(np.float32) - if FLAGS.model == 'resnet50': - # For ResNet50, rescale to [0, 256] - image_np *= 256.0 - elif FLAGS.model == 'inceptionv3': - # For InceptionV3, rescale to [-1, 1] - image_np = (image_np - 0.5) * 2.0 - - request = predict_pb2.PredictRequest() - request.model_spec.name = FLAGS.model - request.model_spec.signature_name = 'serving_default' - request.inputs['input'].CopyFrom( - tf.contrib.util.make_tensor_proto(image_np, shape=[FLAGS.batch_size, image_size, image_size, 3])) - start_time = time.time() - result = stub.Predict(request, 10.0) # 10 secs timeout - time_consume = time.time() - start_time - print('Iteration %d: %.3f sec' % (i, time_consume)) - if i > warm_up_iteration: - total_time += time_consume - - time_average = total_time / (num_iteration - warm_up_iteration) - print('Average time: %.3f sec' % (time_average)) - - print('Batch size = %d' % FLAGS.batch_size) - if (FLAGS.batch_size == 1): - print('Latency: %.3f ms' % (time_average * 1000)) - - print('Throughput: %.3f images/sec' % (FLAGS.batch_size / time_average)) - - -if __name__ == '__main__': - tf.app.run() diff --git a/docs/image_recognition/tensorflow_serving/src/util.py b/docs/image_recognition/tensorflow_serving/src/util.py deleted file mode 100644 index 8877e932d..000000000 --- a/docs/image_recognition/tensorflow_serving/src/util.py +++ /dev/null @@ -1,61 +0,0 @@ -# -# -*- coding: utf-8 -*- -# -# Copyright (c) 2019 Intel Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -# SPDX-License-Identifier: EPL-2.0 -# - -from __future__ import print_function - -import tensorflow as tf - -def preprocess_image(image_buffer, model, image_size): - """Preprocess JPEG encoded bytes to 3D float Tensor.""" - - # Decode the string as an RGB JPEG of unknown height and width. - image = tf.image.decode_jpeg(image_buffer, channels=3) - # Convert pixels to [0, 1) - image = tf.image.convert_image_dtype(image, dtype=tf.float32) - # Crop the central region to 87.5% of the original image. - image = tf.image.central_crop(image, central_fraction=0.875) - # Resize the image to image_size x image_size. - image = tf.expand_dims(image, 0) - image = tf.image.resize_bilinear(image, [image_size, image_size], align_corners=False) - image = tf.squeeze(image, [0]) - if model == 'resnet50': - # For ResNet50, rescale to [0, 256] - image = tf.multiply(image, 256.0) - elif model == 'inceptionv3': - # For InceptionV3, rescale to [-1, 1] - image = tf.subtract(image, 0.5) - image = tf.multiply(image, 2.0) - return image - -def parse_example_proto(example_serialized): - - # Dense features in Example proto. - feature_map = { - 'image/encoded': tf.FixedLenFeature([], dtype=tf.string, - default_value=''), - 'image/class/label': tf.FixedLenFeature([1], dtype=tf.int64, - default_value=-1), - } - - features = tf.parse_single_example(example_serialized, feature_map) - label = tf.cast(features['image/class/label'], dtype=tf.int32) - - return features['image/encoded'], label - diff --git a/tests/unit/test_launch_benchmark.py b/tests/unit/test_launch_benchmark.py index 32a9eaec1..6145cf614 100644 --- a/tests/unit/test_launch_benchmark.py +++ b/tests/unit/test_launch_benchmark.py @@ -37,6 +37,9 @@ test_docker_image = "foo" test_batch_size = "100" test_num_cores = "1" +# need a valid file for tests to work, see conftest.py for where this is managed +test_input_graph = "test.pb" +test_tfserving_framework = "tensorflow_serving" @pytest.fixture @@ -66,8 +69,35 @@ def mock_system_platform(patch): return patch("common.base_benchmark_util.platform_util.system_platform") +@pytest.fixture +def mock_path_exists(patch): + return patch("os.path.exists", MagicMock(return_value=True)) + + +@pytest.fixture +def mock_isfile(patch): + return patch("os.path.isfile", MagicMock(return_value=True)) + + +@pytest.fixture +def mock_isdir(patch): + return patch("os.path.isdir", MagicMock(return_value=True)) + + +@pytest.fixture +def mock_islink(patch): + return patch("os.path.islink", MagicMock(return_value=False)) + + +@pytest.fixture +def mock_stat(patch): + stat = MagicMock() + stat.return_value.st_nlink = 0 + return patch("os.stat", stat) + + @pytest.fixture(autouse=True) -def launch_benchmark(mock_platform_util, request): +def launch_benchmark(mock_platform_util, request, mock_isdir, mock_isfile, mock_islink, mock_stat, mock_path_exists): """sets up launch_benchmark obj for every test case and handles catching errors if we wanna test that To catch errors called when running launch_benchmark, use something like: ['catch_error', SystemExit, [{args}], {error_message}] in parametrize @@ -113,8 +143,10 @@ def launch_benchmark(mock_platform_util, request): req_args = request.param[2] error_message = request.param[3] if len(request.param) == 4 else '' else: + # add extra arguments to the default ones when calling LaunchBenchmark req_args = request.param + example_req_args else: + # only use default arguments when calling LaunchBenchmark req_args = example_req_args with mock_patch.object(sys, "argv", ['run_tf_benchmark.py'] + req_args): @@ -168,12 +200,13 @@ def test_launch_benchmark_parse_unknown_args(launch_benchmark): "--output-results"], "--output-results can only be used when running " "inference with a dataset"], - ['catch_error', SystemExit, ["--model-name", test_model_name, - "--framework", test_framework, - "--mode", test_mode, - "--precision", test_precision, - "--volume", "~:test"], - "Volume mounts can only be used when running in a docker container"], + ['catch_error_override_all_params', SystemExit, + ["--model-name", test_model_name, + "--framework", test_framework, + "--mode", test_mode, + "--precision", test_precision, + "--volume", "~:test"], + "Volume mounts can only be used when running in a docker container"] ], indirect=True) def test_launch_benchmark_parse_bad_args(launch_benchmark): """ @@ -224,6 +257,31 @@ def test_bare_metal(launch_benchmark, mock_popen): assert os.environ["TEST_ENV_VAR_2"] == test_env_vars["TEST_ENV_VAR_2"] +@pytest.mark.parametrize('launch_benchmark', [["--in-graph", test_input_graph]], indirect=True) +def test_launch_benchmark_tensorflow_serving_framework(launch_benchmark, mock_popen): + """ + Tests that the launch script works for tensorflow serving framework + """ + test_env_vars = {"TEST_ENV_VAR_1": "a", "TEST_ENV_VAR_2": "b"} + # Override framework and docker image. + launch_benchmark.args.framework = test_tfserving_framework + launch_benchmark.args.docker_image = None + launch_benchmark.run_bare_metal("/foo", "/bar", test_env_vars) + assert mock_popen.called + args, kwargs = mock_popen.call_args + + assert launch_benchmark.args.input_graph == test_input_graph + assert launch_benchmark.args.framework == test_tfserving_framework + + # make sure that the start script is run + assert "bash" == args[0][0] + assert "start.sh" in args[0][1] + + # ensure env vars are set + assert os.environ["TEST_ENV_VAR_1"] == test_env_vars["TEST_ENV_VAR_1"] + assert os.environ["TEST_ENV_VAR_2"] == test_env_vars["TEST_ENV_VAR_2"] + + def test_help(mock_platform_util, capsys): """ Tests `launch_benchmark.py --help` output and ensures there is no error """ with mock_patch.object(sys, 'argv', ["launch_benchmark.py", "--help"]): diff --git a/tox.ini b/tox.ini index 90ac004f4..20ae07d16 100644 --- a/tox.ini +++ b/tox.ini @@ -37,6 +37,7 @@ omit = .tox/* .pytest_cache/* __pycache__/* + benchmarks/image_recognition/tensorflow_serving/* benchmarks/image_segmentation/tensorflow/unet/inference/fp32/unet_infer.py benchmarks/object_detection/tensorflow/ssd-mobilenet/inference/fp32/infer_detections.py benchmarks/recommendation/tensorflow/wide_deep/inference/fp32/data_download.py From cb2bb07dcb31c5f0aa6894024e75275da2c83a26 Mon Sep 17 00:00:00 2001 From: Dina Suehiro Jones Date: Wed, 22 May 2019 10:52:36 -0700 Subject: [PATCH 35/62] Make reference file optional for Transformer LT benchmarking (#312) --- benchmarks/common/tensorflow/start.sh | 11 +++++----- .../tensorflow/transformer_language/README.md | 3 ++- .../inference/fp32/model_init.py | 21 +++++++++++-------- .../tf_transformer_language_args.json | 9 ++++++-- 4 files changed, 26 insertions(+), 18 deletions(-) diff --git a/benchmarks/common/tensorflow/start.sh b/benchmarks/common/tensorflow/start.sh index 26d25af86..ac6297d93 100755 --- a/benchmarks/common/tensorflow/start.sh +++ b/benchmarks/common/tensorflow/start.sh @@ -696,10 +696,6 @@ function transformer_language() { echo "transformer-language requires -- decode_from_file arg to be defined" exit 1 fi - if [[ -z "${reference}" ]]; then - echo "transformer-language requires -- reference arg to be defined" - exit 1 - fi if [[ -z "${CHECKPOINT_DIRECTORY}" ]]; then echo "transformer-language requires --checkpoint arg to be defined" exit 1 @@ -717,8 +713,11 @@ function transformer_language() { cp ${MOUNT_INTELAI_MODELS_SOURCE}/${MODE}/${PRECISION}/decoding.py ${MOUNT_EXTERNAL_MODELS_SOURCE}/tensor2tensor/utils/decoding.py - CMD="${CMD} --decode_from_file=${CHECKPOINT_DIRECTORY}/${decode_from_file} \ - --reference=${CHECKPOINT_DIRECTORY}/${reference}" + CMD="${CMD} --decode_from_file=${CHECKPOINT_DIRECTORY}/${decode_from_file}" + + if [[ -n "${reference}" ]]; then + CMD="${CMD} --reference=${CHECKPOINT_DIRECTORY}/${reference}" + fi PYTHONPATH=${PYTHONPATH} CMD=${CMD} run_model else diff --git a/benchmarks/language_translation/tensorflow/transformer_language/README.md b/benchmarks/language_translation/tensorflow/transformer_language/README.md index a21a4fbd6..cb3a80e17 100644 --- a/benchmarks/language_translation/tensorflow/transformer_language/README.md +++ b/benchmarks/language_translation/tensorflow/transformer_language/README.md @@ -69,7 +69,8 @@ Substitute the `--model-source-dir` for the location where you cloned the Transformer Language can run for latency or throughput benchmarking. Use one of the following examples below, depending on -your use case. +your use case. Note that if no `reference` file is provided in the +launch script parameters, then the BLEU score cannot be calculated. For latency (using `--socket-id 0` and `--batch-size 1`): diff --git a/benchmarks/language_translation/tensorflow/transformer_language/inference/fp32/model_init.py b/benchmarks/language_translation/tensorflow/transformer_language/inference/fp32/model_init.py index b4fd1bc30..8d01493ae 100644 --- a/benchmarks/language_translation/tensorflow/transformer_language/inference/fp32/model_init.py +++ b/benchmarks/language_translation/tensorflow/transformer_language/inference/fp32/model_init.py @@ -98,14 +98,15 @@ def __init__(self, args, custom_args, platform_util=None): " --output_dir=" + self.args.checkpoint + \ " --decode_from_file=" + self.args.decode_from_file + \ " --decode_to_file=" + self.args.decode_to_file + \ - " --reference=" + self.args.reference + \ " --inter_op_parallelism_threads=" + \ str(self.args.num_inter_threads) + \ " --intra_op_parallelism_threads=" + \ str(self.args.num_intra_threads) - self.bleu_params += " --translation=" + self.args.decode_to_file + \ - " --reference=" + self.args.reference + # If a reference file was provided, also calculate the bleu file + if self.args.reference: + self.bleu_params += " --translation=" + self.args.decode_to_file + \ + " --reference=" + self.args.reference self.cmd = self.cmd + run_script + cmd_args @@ -114,10 +115,12 @@ def run(self): os.chdir(self.args.model_source_dir) self.run_command(self.cmd) - # calculate the bleu number after inference is done - bleucmd = "python " + \ - os.path.join(self.args.model_source_dir, - "tensor2tensor/bin/t2t_bleu.py") + \ - self.bleu_params - os.system(bleucmd) + # calculate the bleu number after inference is done (this is skipped if no reference file is provided) + if self.bleu_params: + bleucmd = "python " + \ + os.path.join(self.args.model_source_dir, + "tensor2tensor/bin/t2t_bleu.py") + \ + self.bleu_params + os.system(bleucmd) + os.chdir(original_dir) diff --git a/tests/unit/common/tensorflow/tf_model_args/tf_transformer_language_args.json b/tests/unit/common/tensorflow/tf_model_args/tf_transformer_language_args.json index 5662ad83e..bf5759531 100644 --- a/tests/unit/common/tensorflow/tf_model_args/tf_transformer_language_args.json +++ b/tests/unit/common/tensorflow/tf_model_args/tf_transformer_language_args.json @@ -1,9 +1,14 @@ [ { "_comment": "FP32 latency benchmark", "input": "run_tf_benchmark.py --framework=tensorflow --use-case=language_translation --model-name=transformer_language --precision=fp32 --mode=inference --model-source-dir=/workspace/models --benchmark-dir=/workspace/benchmarks --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=1 --socket-id=0 --benchmark-only --verbose --checkpoint=/checkpoints --data-location=/dataset --decode_from_file=/checkpoints/newstest2015.en --reference=/checkpoints/newstest2015.de", - "output": "numactl --cpunodebind=0 --membind=0 python /workspace/models/tensor2tensor/bin/t2t_decoder.py --problem=translate_ende_wmt32k --model=transformer --hparams_set=transformer_base_single_gpu --decode_hparams=beam_size=4,alpha=0.6,batch_size=1 --data_dir=/dataset --output_dir=/checkpoints --decode_from_file=/checkpoints/newstest2015.en --decode_to_file=/workspace/models/out_dir/output_infer --reference=/checkpoints/newstest2015.de --inter_op_parallelism_threads=1 --intra_op_parallelism_threads=28"}, + "output": "numactl --cpunodebind=0 --membind=0 python /workspace/models/tensor2tensor/bin/t2t_decoder.py --problem=translate_ende_wmt32k --model=transformer --hparams_set=transformer_base_single_gpu --decode_hparams=beam_size=4,alpha=0.6,batch_size=1 --data_dir=/dataset --output_dir=/checkpoints --decode_from_file=/checkpoints/newstest2015.en --decode_to_file=/workspace/models/out_dir/output_infer --inter_op_parallelism_threads=1 --intra_op_parallelism_threads=28"}, { "_comment": "Fp32 throughput", "input": "run_tf_benchmark.py --framework=tensorflow --use-case=language_translation --model-name=transformer_language --precision=fp32 --mode=inference --model-source-dir=/workspace/models --benchmark-dir=/workspace/benchmarks --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=32 --socket-id=0 --benchmark-only --verbose --checkpoint=/checkpoints --data-location=/dataset --decode_from_file=/checkpoints/newstest2015.en --reference=/checkpoints/newstest2015.de", - "output": "numactl --cpunodebind=0 --membind=0 python /workspace/models/tensor2tensor/bin/t2t_decoder.py --problem=translate_ende_wmt32k --model=transformer --hparams_set=transformer_base_single_gpu --decode_hparams=beam_size=4,alpha=0.6,batch_size=32 --data_dir=/dataset --output_dir=/checkpoints --decode_from_file=/checkpoints/newstest2015.en --decode_to_file=/workspace/models/out_dir/output_infer --reference=/checkpoints/newstest2015.de --inter_op_parallelism_threads=1 --intra_op_parallelism_threads=28"} + "output": "numactl --cpunodebind=0 --membind=0 python /workspace/models/tensor2tensor/bin/t2t_decoder.py --problem=translate_ende_wmt32k --model=transformer --hparams_set=transformer_base_single_gpu --decode_hparams=beam_size=4,alpha=0.6,batch_size=32 --data_dir=/dataset --output_dir=/checkpoints --decode_from_file=/checkpoints/newstest2015.en --decode_to_file=/workspace/models/out_dir/output_infer --inter_op_parallelism_threads=1 --intra_op_parallelism_threads=28" + }, + { "_comment": "Fp32 benchmarking with no reference file", + "input": "run_tf_benchmark.py --framework=tensorflow --use-case=language_translation --model-name=transformer_language --precision=fp32 --mode=inference --model-source-dir=/workspace/models --benchmark-dir=/workspace/benchmarks --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=32 --socket-id=0 --benchmark-only --verbose --checkpoint=/checkpoints --data-location=/dataset --decode_from_file=/checkpoints/newstest2015.en", + "output": "numactl --cpunodebind=0 --membind=0 python /workspace/models/tensor2tensor/bin/t2t_decoder.py --problem=translate_ende_wmt32k --model=transformer --hparams_set=transformer_base_single_gpu --decode_hparams=beam_size=4,alpha=0.6,batch_size=32 --data_dir=/dataset --output_dir=/checkpoints --decode_from_file=/checkpoints/newstest2015.en --decode_to_file=/workspace/models/out_dir/output_infer --inter_op_parallelism_threads=1 --intra_op_parallelism_threads=28" + } ] From dbc54be50ebed9923bfeacdb9f2ce82ada89a3ff Mon Sep 17 00:00:00 2001 From: "Li, Guizi" Date: Fri, 24 May 2019 01:18:46 +0800 Subject: [PATCH 36/62] Add SSD-ResNet34 Int8 benchmarking and refactor FP32 code (#301) * refactor ssd-resnet34 fp32 code * add ssd-resnet34 int8 * fix for style check * remove debug code and remove data-location for benchmark mode * update tf to 1.14 and remove data location for benchmark * update pre-trianed model link * update pb file name --- benchmarks/README.md | 2 +- benchmarks/common/tensorflow/start.sh | 8 +- .../tensorflow/ssd-resnet34/README.md | 198 ++- .../ssd-resnet34/inference/fp32/model_init.py | 2 +- .../ssd-resnet34/inference/int8/__init__.py | 19 + .../ssd-resnet34/inference/int8/config.json | 7 + .../ssd-resnet34/inference/int8/model_init.py | 76 + .../inference/fp32/coco_metric.py | 193 --- .../ssd-resnet34/inference/fp32/datasets.py | 251 ---- .../inference/fp32/infer_detections.py | 6 +- .../inference/fp32/preprocessing.py | 1259 ----------------- .../inference/fp32/ssd_constants.py | 118 -- .../inference/fp32/ssd_dataloader.py | 382 ----- .../ssd-resnet34/inference/fp32/ssd_model.py | 171 --- .../ssd-resnet34/inference/int8/__init__.py | 20 + .../inference/int8/infer_detections.py | 211 +++ 16 files changed, 534 insertions(+), 2389 deletions(-) create mode 100644 benchmarks/object_detection/tensorflow/ssd-resnet34/inference/int8/__init__.py create mode 100644 benchmarks/object_detection/tensorflow/ssd-resnet34/inference/int8/config.json create mode 100644 benchmarks/object_detection/tensorflow/ssd-resnet34/inference/int8/model_init.py delete mode 100644 models/object_detection/tensorflow/ssd-resnet34/inference/fp32/coco_metric.py delete mode 100644 models/object_detection/tensorflow/ssd-resnet34/inference/fp32/datasets.py delete mode 100644 models/object_detection/tensorflow/ssd-resnet34/inference/fp32/preprocessing.py delete mode 100644 models/object_detection/tensorflow/ssd-resnet34/inference/fp32/ssd_constants.py delete mode 100644 models/object_detection/tensorflow/ssd-resnet34/inference/fp32/ssd_dataloader.py delete mode 100644 models/object_detection/tensorflow/ssd-resnet34/inference/fp32/ssd_model.py create mode 100644 models/object_detection/tensorflow/ssd-resnet34/inference/int8/__init__.py create mode 100644 models/object_detection/tensorflow/ssd-resnet34/inference/int8/infer_detections.py diff --git a/benchmarks/README.md b/benchmarks/README.md index 0875f2baa..25cea61b6 100644 --- a/benchmarks/README.md +++ b/benchmarks/README.md @@ -36,8 +36,8 @@ dependencies to be installed: | Object Detection | TensorFlow | [R-FCN](https://arxiv.org/pdf/1605.06409.pdf) | Inference | [Int8](object_detection/tensorflow/rfcn/README.md#int8-inference-instructions) [FP32](object_detection/tensorflow/rfcn/README.md#fp32-inference-instructions) | | Object Detection | TensorFlow | [Faster R-CNN](https://arxiv.org/pdf/1506.01497.pdf) | Inference | [Int8](object_detection/tensorflow/faster_rcnn/README.md#int8-inference-instructions) [FP32](object_detection/tensorflow/faster_rcnn/README.md#fp32-inference-instructions) | | Object Detection | TensorFlow | [SSD-MobileNet](https://arxiv.org/pdf/1704.04861.pdf) | Inference | [Int8](object_detection/tensorflow/ssd-mobilenet/README.md#int8-inference-instructions) [FP32](object_detection/tensorflow/ssd-mobilenet/README.md#fp32-inference-instructions) | +| Object Detection | TensorFlow | [SSD-ResNet34](https://arxiv.org/pdf/1512.02325.pdf) | Inference | [INT8](object_detection/tensorflow/ssd-resnet34/README.md#int8-inference-instructions) [FP32](object_detection/tensorflow/ssd-resnet34/README.md#fp32-inference-instructions) | | Object Detection | TensorFlow | [SSD-VGG16](https://arxiv.org/pdf/1512.02325.pdf) | Inference | [Int8](object_detection/tensorflow/ssd_vgg16/README.md#int8-inference-instructions) [FP32](object_detection/tensorflow/ssd_vgg16/README.md#fp32-inference-instructions) | -| Object Detection | TensorFlow | [SSD-ResNet34](https://arxiv.org/pdf/1512.02325.pdf) | Inference | [FP32](object_detection/tensorflow/ssd-resnet34/README.md#fp32-inference-instructions) | | Recommendation | TensorFlow | [NCF](https://arxiv.org/pdf/1708.05031.pdf) | Inference | [FP32](recommendation/tensorflow/ncf/README.md#fp32-inference-instructions) | | Recommendation | TensorFlow | [Wide & Deep Large Dataset](https://arxiv.org/pdf/1606.07792.pdf) | Inference | [Int8](recommendation/tensorflow/wide_deep_large_ds/README.md#int8-inference-instructions) [FP32](recommendation/tensorflow/wide_deep_large_ds/README.md#fp32-inference-instructions) | | Recommendation | TensorFlow | [Wide & Deep](https://arxiv.org/pdf/1606.07792.pdf) | Inference | [FP32](recommendation/tensorflow/wide_deep/README.md#fp32-inference-instructions) | diff --git a/benchmarks/common/tensorflow/start.sh b/benchmarks/common/tensorflow/start.sh index ac6297d93..be2d30f5c 100755 --- a/benchmarks/common/tensorflow/start.sh +++ b/benchmarks/common/tensorflow/start.sh @@ -627,12 +627,18 @@ function ssd_mobilenet() { # SSD-ResNet34 model function ssd-resnet34() { - if [ ${PRECISION} == "fp32" ]; then + if [ ${PRECISION} == "fp32" ] || [ ${PRECISION} == "int8" ]; then if [ ${NOINSTALL} != "True" ]; then for line in $(cat ${MOUNT_BENCHMARK}/object_detection/tensorflow/ssd-resnet34/requirements.txt) do pip install $line done + old_dir=${PWD} + cd /tmp + git clone --single-branch https://github.com/tensorflow/benchmarks.git + cd benchmarks + git checkout 1e7d788042dfc6d5e5cd87410c57d5eccee5c664 + cd ${old_dir} fi CMD=${CMD} run_model diff --git a/benchmarks/object_detection/tensorflow/ssd-resnet34/README.md b/benchmarks/object_detection/tensorflow/ssd-resnet34/README.md index c5dac8657..a16d716ae 100644 --- a/benchmarks/object_detection/tensorflow/ssd-resnet34/README.md +++ b/benchmarks/object_detection/tensorflow/ssd-resnet34/README.md @@ -3,6 +3,7 @@ This document has instructions for how to run SSD-ResNet34 for the following modes/precisions: * [FP32 inference](#fp32-inference-instructions) +* [INT8 inference](#int8-inference-instructions) Benchmarking instructions and scripts for model training and inference other precisions are coming later. @@ -96,7 +97,11 @@ $ git checkout f505cecde2d8ebf6fe15f40fb8bc350b2b1ed5dc The `coco_val.record` file is what we will use in this inference example. -5. A link to download the pre-trained model is coming soon. +5. Download the pretrained model: + +``` +$ wget https://storage.googleapis.com/intel-optimized-tensorflow/models/ssd_resnet34_fp32_bs1_pretrained_model.pb +``` 6. Clone the [intelai/models](https://github.com/intelai/models) repo. This repo has the launch script for running benchmarking, which we will @@ -110,20 +115,18 @@ $ git clone https://github.com/IntelAI/models.git [intelai/models](https://github.com/intelai/models) repo that was just cloned in the previous step. SSD-ResNet34 can be run for benchmarking throughput and latency, or testing accuracy. Note that we are running -SSD-ResNet34 with a TensorFlow 1.13 docker image. +SSD-ResNet34 with a TensorFlow 1.14 docker image. To benchmarking for throughput and latency, use the following command, -but replace in your path to the unzipped coco dataset images from step 3 -for the `--dataset-location`, the path to the frozen graph that you -downloaded in step 5 as the `--in-graph`, and use the `--benchmark-only` +the path to the frozen graph that you downloaded in step 5 as +the `--in-graph`, and use the `--benchmark-only` flag: ``` $ cd /home//models/benchmarks $ python launch_benchmark.py \ - --data-location /home//coco/output/ \ - --in-graph /home//ssd_resnet34_coco_pretained_model/ssd_resnet34_bs1.pb \ + --in-graph /home//ssd_resnet34_fp32_bs1_pretrained_model.pb \ --model-source-dir /home//tensorflow/models \ --model-name ssd-resnet34 \ --framework tensorflow \ @@ -143,7 +146,7 @@ the path to the frozen graph that you downloaded in step 5 as the ``` $ python launch_benchmark.py \ --data-location /home//coco/output/ \ - --in-graph /home//ssd_resnet34_coco_pretained_model/ssd_resnet34_bs1.pb \ + --in-graph /home//ssd_resnet34_fp32_bs1_pretrained_model.pb \ --model-source-dir /home//tensorflow/models \ --model-name ssd-resnet34 \ --framework tensorflow \ @@ -181,6 +184,181 @@ Below is a sample log file tail when testing accuracy: Average Recall (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.334 Average Recall (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.494 Current AP: 0.21082 -Ran inference with batch size -1 -Log location outside container: {--output-dir value}/benchmark_ssd-mobilenet_inference_fp32_20190123_225145.log +``` + +## INT8 Inference Instructions + +1. Clone the `tensorflow/models` repository with the specified SHA, +since we are using an older version of the models repo for +SSD-ResNet34. + +``` +$ git clone https://github.com/tensorflow/models.git +$ cd models +$ git checkout f505cecde2d8ebf6fe15f40fb8bc350b2b1ed5dc +$ git clone https://github.com/cocodataset/cocoapi.git +``` + +The TensorFlow models repo will be used for running inference as well as +converting the coco dataset to the TF records format. + +2. Follow the TensorFlow models object detection +[installation instructions](https://github.com/tensorflow/models/blob/master/research/object_detection/g3doc/installation.md#installation) +to get your environment setup with the required dependencies. + +3. Download the 2017 validation +[COCO dataset](http://cocodataset.org/#home) and annotations: + +``` +$ mkdir val +$ cd val +$ wget http://images.cocodataset.org/zips/val2017.zip +$ unzip val2017.zip +$ cd .. + +$ mkdir annotations +$ cd annotations +$ wget http://images.cocodataset.org/annotations/annotations_trainval2017.zip +$ unzip annotations_trainval2017.zip +$ cd .. +``` + +Since we are only using the validation dataset in this example, we will +create an empty directory and empty annotations json file to pass as the +train and test directories in the next step. + +``` +$ mkdir empty_dir + +$ cd annotations +$ echo "{ \"images\": {}, \"categories\": {}}" > empty.json +$ cd .. +``` + +4. Now that you have the raw COCO dataset, we need to convert it to the +TF records format in order to use it with the inference script. We will +do this by running the `create_coco_tf_record.py` file in the TensorFlow +models repo. + +Follow the steps below to navigate to the proper directory and point the +script to the raw COCO dataset files that you have downloaded in step 2. +The `--output_dir` is the location where the TF record files will be +located after the script has completed. + +``` + +# We are going to use an older version of the conversion script to checkout the git commit +$ cd models +$ git checkout 7a9934df2afdf95be9405b4e9f1f2480d748dc40 + +$ cd research/object_detection/dataset_tools/ +$ python create_coco_tf_record.py --logtostderr \ + --train_image_dir="/home//coco/empty_dir" \ + --val_image_dir="/home//coco/val/val2017" \ + --test_image_dir="/home//coco/empty_dir" \ + --train_annotations_file="/home//coco/annotations/empty.json" \ + --val_annotations_file="/home//coco/annotations/instances_val2017.json" \ + --testdev_annotations_file="/home//coco/annotations/empty.json" \ + --output_dir="/home//coco/output" + +$ ll /home//coco/output +total 1598276 +-rw-rw-r--. 1 0 Nov 2 21:46 coco_testdev.record +-rw-rw-r--. 1 0 Nov 2 21:46 coco_train.record +-rw-rw-r--. 1 818336740 Nov 2 21:46 coco_val.record + +# Go back to the main models directory and checkout the SHA that we are using for SSD-ResMet34 +$ cd /home//models +$ git checkout f505cecde2d8ebf6fe15f40fb8bc350b2b1ed5dc +``` + +The `coco_val.record` file is what we will use in this inference example. + +5. Download the pretrained model: + +``` +$ wget https://storage.googleapis.com/intel-optimized-tensorflow/models/ssd_resnet34_int8_bs1_pretrained_model.pb +``` + +6. Clone the [intelai/models](https://github.com/intelai/models) repo. +This repo has the launch script for running benchmarking, which we will +use in the next step. + +``` +$ git clone https://github.com/IntelAI/models.git +``` + +7. Next, navigate to the `benchmarks` directory of the +[intelai/models](https://github.com/intelai/models) repo that was just +cloned in the previous step. SSD-ResNet34 can be run for benchmarking +throughput and latency, or testing accuracy. Note that we are running +SSD-ResNet34 with a TensorFlow 1.14 docker image. + +To benchmarking for throughput and latency, use the following command, +the path to the frozen graph that you downloaded in step 5 as +the `--in-graph`, and use the `--benchmark-only` +flag: + +``` +$ cd /home//models/benchmarks + +$ python launch_benchmark.py \ + --in-graph /home//ssd_resnet34_int8_bs1_pretrained_model.pb \ + --model-source-dir /home//tensorflow/models \ + --model-name ssd-resnet34 \ + --framework tensorflow \ + --precision int8 \ + --mode inference \ + --socket-id 0 \ + --batch-size=1 \ + --docker-image intelaipg/intel-optimized-tensorflow:latest-prs-b5d67b7-devel-mkl-py3 \ + --benchmark-only +``` + +To test accuracy, use the following command but replace in your path to +the tf record file that you generated in step 4 for the `--data-location`, +the path to the frozen graph that you downloaded in step 5 as the +`--in-graph`, and use the `--accuracy-only` flag: + +``` +$ python launch_benchmark.py \ + --data-location /home//coco/output/ \ + --in-graph /home//ssd_resnet34_int8_bs1_pretrained_model.pb \ + --model-source-dir /home//tensorflow/models \ + --model-name ssd-resnet34 \ + --framework tensorflow \ + --precision int8 \ + --mode inference \ + --socket-id 0 \ + --batch-size=1 \ + --docker-image intelaipg/intel-optimized-tensorflow:latest-prs-b5d67b7-devel-mkl-py3 \ + --accuracy-only +``` + +8. The log file is saved to the value of `--output-dir`. + +Below is a sample log file tail when running benchmarking: + +``` +Batchsize: 1 +Time spent per BATCH: 12.0245 ms +Total samples/sec: 83.1635 samples/s +``` + +Below is a sample log file tail when testing accuracy: + +``` + Average Precision (AP) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.204 + Average Precision (AP) @[ IoU=0.50 | area= all | maxDets=100 ] = 0.360 + Average Precision (AP) @[ IoU=0.75 | area= all | maxDets=100 ] = 0.208 + Average Precision (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.051 + Average Precision (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.213 + Average Precision (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.335 + Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 1 ] = 0.210 + Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 10 ] = 0.294 + Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.301 + Average Recall (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.083 + Average Recall (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.327 + Average Recall (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.484 +Current AP: 0.20408 ``` diff --git a/benchmarks/object_detection/tensorflow/ssd-resnet34/inference/fp32/model_init.py b/benchmarks/object_detection/tensorflow/ssd-resnet34/inference/fp32/model_init.py index 20bfcccf5..0b53a0112 100644 --- a/benchmarks/object_detection/tensorflow/ssd-resnet34/inference/fp32/model_init.py +++ b/benchmarks/object_detection/tensorflow/ssd-resnet34/inference/fp32/model_init.py @@ -69,8 +69,8 @@ def __init__(self, args, custom_args, platform_util): self.run_cmd += " --data-location {0}".format(self.args.data_location) def run(self): - print(self.run_cmd) old_python_path = os.environ["PYTHONPATH"] os.environ["PYTHONPATH"] = os.path.join(self.args.model_source_dir, "research") + os.environ["PYTHONPATH"] += ":/tmp/benchmarks/scripts/tf_cnn_benchmarks/" self.run_command(self.run_cmd) os.environ["PYTHONPATH"] = old_python_path diff --git a/benchmarks/object_detection/tensorflow/ssd-resnet34/inference/int8/__init__.py b/benchmarks/object_detection/tensorflow/ssd-resnet34/inference/int8/__init__.py new file mode 100644 index 000000000..d9c4123de --- /dev/null +++ b/benchmarks/object_detection/tensorflow/ssd-resnet34/inference/int8/__init__.py @@ -0,0 +1,19 @@ +# +# -*- coding: utf-8 -*- +# +# Copyright (c) 2019 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# SPDX-License-Identifier: EPL-2.0 +# diff --git a/benchmarks/object_detection/tensorflow/ssd-resnet34/inference/int8/config.json b/benchmarks/object_detection/tensorflow/ssd-resnet34/inference/int8/config.json new file mode 100644 index 000000000..273b45b40 --- /dev/null +++ b/benchmarks/object_detection/tensorflow/ssd-resnet34/inference/int8/config.json @@ -0,0 +1,7 @@ +{ + "optimization_parameters": { + "KMP_AFFINITY": "granularity=fine,verbose,compact,1,0", + "KMP_BLOCKTIME": 1, + "KMP_SETTINGS": 1 + } +} diff --git a/benchmarks/object_detection/tensorflow/ssd-resnet34/inference/int8/model_init.py b/benchmarks/object_detection/tensorflow/ssd-resnet34/inference/int8/model_init.py new file mode 100644 index 000000000..0b53a0112 --- /dev/null +++ b/benchmarks/object_detection/tensorflow/ssd-resnet34/inference/int8/model_init.py @@ -0,0 +1,76 @@ +# +# -*- coding: utf-8 -*- +# +# Copyright (c) 2019 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# SPDX-License-Identifier: EPL-2.0 +# + +import os +import sys + +from common.base_model_init import BaseModelInitializer +from common.base_model_init import set_env_var + + +class ModelInitializer(BaseModelInitializer): + def run_inference_sanity_checks(self, args, custom_args): + if not args.input_graph: + sys.exit("Please provide a path to the frozen graph directory" + " via the '--in-graph' flag.") + if not args.data_location and self.args.accuracy_only: + sys.exit("Please provide a path to the data directory via the " + "'--data-location' flag.") + if args.socket_id == -1 and args.num_cores == -1: + print("***Warning***: Running inference on all cores could degrade" + " performance. Pass a '--socket-id' to specify running on a" + " single socket instead.\n") + + def __init__(self, args, custom_args, platform_util): + super(ModelInitializer, self).__init__(args, custom_args, platform_util) + + self.run_inference_sanity_checks(self.args, self.custom_args) + + # Set KMP env vars, if they haven't already been set + config_file_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), "config.json") + self.set_kmp_vars(config_file_path) + + self.set_num_inter_intra_threads() + + set_env_var("OMP_NUM_THREADS", self.args.num_intra_threads) + + self.model_dir = os.path.join(self.args.intelai_models, self.args.mode, self.args.precision) + + # get benchmark command + benchmark_script = os.path.join(self.model_dir, "infer_detections.py") + + # get command with numactl + self.run_cmd = self.get_command_prefix(self.args.socket_id) + self.run_cmd += "{0} {1}".format(self.python_exe, benchmark_script) + self.run_cmd += " --input-graph {0}".format(self.args.input_graph) + self.run_cmd += " --batch-size {0}".format(args.batch_size) + self.run_cmd += " --inter-op-parallelism-threads {0}".format(self.args.num_inter_threads) + self.run_cmd += " --intra-op-parallelism-threads {0}".format(self.args.num_intra_threads) + + if self.args.accuracy_only: + self.run_cmd += " --accuracy-only " + self.run_cmd += " --data-location {0}".format(self.args.data_location) + + def run(self): + old_python_path = os.environ["PYTHONPATH"] + os.environ["PYTHONPATH"] = os.path.join(self.args.model_source_dir, "research") + os.environ["PYTHONPATH"] += ":/tmp/benchmarks/scripts/tf_cnn_benchmarks/" + self.run_command(self.run_cmd) + os.environ["PYTHONPATH"] = old_python_path diff --git a/models/object_detection/tensorflow/ssd-resnet34/inference/fp32/coco_metric.py b/models/object_detection/tensorflow/ssd-resnet34/inference/fp32/coco_metric.py deleted file mode 100644 index 08f3b7e5a..000000000 --- a/models/object_detection/tensorflow/ssd-resnet34/inference/fp32/coco_metric.py +++ /dev/null @@ -1,193 +0,0 @@ -# Copyright 2018 Google. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""COCO-style evaluation metrics. - -Forked from reference model implementation. - -COCO API: github.com/cocodataset/cocoapi/ -""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import atexit -import tempfile - -from absl import flags - -import numpy as np -from pycocotools.coco import COCO -from pycocotools.cocoeval import COCOeval -import six - -import tensorflow as tf - -import ssd_constants - -FLAGS = flags.FLAGS - - -# https://github.com/cocodataset/cocoapi/issues/49 -if six.PY3: - import pycocotools.coco - pycocotools.coco.unicode = str - - -def async_eval_runner(queue_predictions, queue_results, val_json_file): - """Load intermediate eval results and get COCO metrics.""" - while True: - message = queue_predictions.get() - if message == 'STOP': # poison pill - break - step, predictions = message - results = compute_map(predictions, val_json_file) - queue_results.put((step, results)) - - -def compute_map(predictions, val_json_file): - """Use model predictions to compute mAP. - - Args: - predictions: a list of tuples returned by decoded_predictions function, - each containing the following elements: - image source_id, box coordinates in XYWH order, probability score, label - val_json_file: path to COCO annotation file - Returns: - A dictionary that maps all COCO metrics (keys) to their values - """ - - if val_json_file.startswith("gs://"): - _, local_val_json = tempfile.mkstemp(suffix=".json") - tf.gfile.Remove(local_val_json) - - tf.gfile.Copy(val_json_file, local_val_json) - atexit.register(tf.gfile.Remove, local_val_json) - else: - local_val_json = val_json_file - - cocoGt = COCO(local_val_json) - cocoDt = cocoGt.loadRes(np.array(predictions)) - E = COCOeval(cocoGt, cocoDt, iouType='bbox') - E.evaluate() - E.accumulate() - E.summarize() - print("Current AP: {:.5f}".format(E.stats[0])) - metric_names = ['AP', 'AP50', 'AP75', 'APs', 'APm', 'APl', 'ARmax1', - 'ARmax10', 'ARmax100', 'ARs', 'ARm', 'ARl'] - - # Prefix with "COCO" to group in TensorBoard. - return {"COCO/" + key: value for key, value in zip(metric_names, E.stats)} - - -def calc_iou(target, candidates): - target_tiled = np.tile(target[np.newaxis, :], (candidates.shape[0], 1)) - # Left Top & Right Bottom - lt = np.maximum(target_tiled[:,:2], candidates[:,:2]) - - rb = np.minimum(target_tiled[:,2:], candidates[:,2:]) - - delta = np.maximum(rb - lt, 0) - - intersect = delta[:,0] * delta[:,1] - - delta1 = target_tiled[:,2:] - candidates[:,:2] - area1 = delta1[:,0] * delta1[:,1] - delta2 = target_tiled[:,2:] - candidates[:,:2] - area2 = delta2[:,0] * delta2[:,1] - - iou = intersect/(area1 + area2 - intersect) - return iou - - -# TODO(haoyuzhang): Rewrite this NumPy based implementation to TensorFlow based -# implementation under ssd_model.py accuracy_function. -def decode_predictions(labels_and_predictions): - """Decode predictions and remove unused boxes and labels.""" - predictions = [] - for example in labels_and_predictions: - source_id = int(example[ssd_constants.SOURCE_ID]) - pred_box = example[ssd_constants.PRED_BOXES] - pred_scores = example[ssd_constants.PRED_SCORES] - - locs, labels, probs = decode_single( - pred_box, pred_scores, ssd_constants.OVERLAP_CRITERIA, - ssd_constants.MAX_NUM_EVAL_BOXES, ssd_constants.MAX_NUM_EVAL_BOXES) - - raw_height, raw_width, _ = example[ssd_constants.RAW_SHAPE] - for loc, label, prob in zip(locs, labels, probs): - # Ordering convention differs, hence [1], [0] rather than [0], [1] - x, y = loc[1] * raw_width, loc[0] * raw_height - w, h = (loc[3] - loc[1]) * raw_width, (loc[2] - loc[0]) * raw_height - predictions.append( - [source_id, x, y, w, h, prob, ssd_constants.CLASS_INV_MAP[label]]) - return predictions - - -def decode_single(bboxes_in, scores_in, criteria, max_output, max_num=200): - # Reference to https://github.com/amdegroot/ssd.pytorch - - bboxes_out = [] - scores_out = [] - labels_out = [] - - for i, score in enumerate(np.split(scores_in, scores_in.shape[1], 1)): - score = np.squeeze(score, 1) - - # skip background - if i == 0: - continue - - mask = score > ssd_constants.MIN_SCORE - if not np.any(mask): - continue - - bboxes, score = bboxes_in[mask, :], score[mask] - - score_idx_sorted = np.argsort(score) - score_sorted = score[score_idx_sorted] - - score_idx_sorted = score_idx_sorted[-max_num:] - candidates = [] - - # perform non-maximum suppression - while len(score_idx_sorted): - idx = score_idx_sorted[-1] - bboxes_sorted = bboxes[score_idx_sorted, :] - bboxes_idx = bboxes[idx, :] - iou = calc_iou(bboxes_idx, bboxes_sorted) - - score_idx_sorted = score_idx_sorted[iou < criteria] - candidates.append(idx) - - bboxes_out.append(bboxes[candidates, :]) - scores_out.append(score[candidates]) - labels_out.extend([i]*len(candidates)) - - if len(scores_out) == 0: - tf.logging.info("No objects detected. Returning dummy values.") - return ( - np.zeros(shape=(1, 4), dtype=np.float32), - np.zeros(shape=(1,), dtype=np.int32), - np.ones(shape=(1,), dtype=np.float32) * ssd_constants.DUMMY_SCORE, - ) - - bboxes_out = np.concatenate(bboxes_out, axis=0) - scores_out = np.concatenate(scores_out, axis=0) - labels_out = np.array(labels_out) - - max_ids = np.argsort(scores_out)[-max_output:] - - return bboxes_out[max_ids, :], labels_out[max_ids], scores_out[max_ids] diff --git a/models/object_detection/tensorflow/ssd-resnet34/inference/fp32/datasets.py b/models/object_detection/tensorflow/ssd-resnet34/inference/fp32/datasets.py deleted file mode 100644 index 58c0f0dff..000000000 --- a/models/object_detection/tensorflow/ssd-resnet34/inference/fp32/datasets.py +++ /dev/null @@ -1,251 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Benchmark dataset utilities. -""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -from abc import abstractmethod -import os - -import numpy as np -import six -from six.moves import cPickle -from six.moves import xrange # pylint: disable=redefined-builtin -import tensorflow as tf - -from tensorflow.python.platform import gfile -import preprocessing - -IMAGENET_NUM_TRAIN_IMAGES = 1281167 -IMAGENET_NUM_VAL_IMAGES = 50000 - -COCO_NUM_TRAIN_IMAGES = 118287 -COCO_NUM_VAL_IMAGES = 4952 - - -class Dataset(object): - """Abstract class for cnn benchmarks dataset.""" - - def __init__(self, - name, - data_dir=None, - queue_runner_required=False, - num_classes=None): - self.name = name - self.data_dir = data_dir - self._queue_runner_required = queue_runner_required - self._num_classes = num_classes - - def tf_record_pattern(self, subset): - return os.path.join(self.data_dir, '%s-*-of-*' % subset) - - def reader(self): - return tf.TFRecordReader() - - @property - def num_classes(self): - return self._num_classes - - @num_classes.setter - def num_classes(self, val): - self._num_classes = val - - @abstractmethod - def num_examples_per_epoch(self, subset): - pass - - def __str__(self): - return self.name - - def get_input_preprocessor(self, input_preprocessor='default'): - assert not self.use_synthetic_gpu_inputs() - return _SUPPORTED_INPUT_PREPROCESSORS[self.name][input_preprocessor] - - def queue_runner_required(self): - return self._queue_runner_required - - def use_synthetic_gpu_inputs(self): - return not self.data_dir - - -class LibrispeechDataset(Dataset): - """Configuration for LibriSpeech dataset.""" - - def __init__(self, data_dir=None): - super(LibrispeechDataset, self).__init__( - 'librispeech', data_dir, num_classes=29) - - def tf_record_pattern(self, subset): - if subset == 'train': - return os.path.join(self.data_dir, 'train-clean-*.tfrecords') - elif subset == 'validation': - return os.path.join(self.data_dir, 'test-clean.tfrecords') - else: - return '' - - def num_examples_per_epoch(self, subset='train'): - del subset - return 2 # TODO(laigd): currently this is an arbitrary number. - - -class ImageDataset(Dataset): - """Abstract class for image datasets.""" - - def __init__(self, - name, - height, - width, - depth=None, - data_dir=None, - queue_runner_required=False, - num_classes=1001): - super(ImageDataset, self).__init__(name, data_dir, queue_runner_required, - num_classes) - self.height = height - self.width = width - self.depth = depth or 3 - - -class ImagenetDataset(ImageDataset): - """Configuration for Imagenet dataset.""" - - def __init__(self, data_dir=None): - super(ImagenetDataset, self).__init__( - 'imagenet', 300, 300, data_dir=data_dir) - - def num_examples_per_epoch(self, subset='train'): - if subset == 'train': - return IMAGENET_NUM_TRAIN_IMAGES - elif subset == 'validation': - return IMAGENET_NUM_VAL_IMAGES - else: - raise ValueError('Invalid data subset "%s"' % subset) - - -class Cifar10Dataset(ImageDataset): - """Configuration for cifar 10 dataset. - - It will mount all the input images to memory. - """ - - def __init__(self, data_dir=None): - super(Cifar10Dataset, self).__init__( - 'cifar10', - 32, - 32, - data_dir=data_dir, - queue_runner_required=True, - num_classes=11) - - def read_data_files(self, subset='train'): - """Reads from data file and returns images and labels in a numpy array.""" - assert self.data_dir, ('Cannot call `read_data_files` when using synthetic ' - 'data') - if subset == 'train': - filenames = [ - os.path.join(self.data_dir, 'data_batch_%d' % i) - for i in xrange(1, 6) - ] - elif subset == 'validation': - filenames = [os.path.join(self.data_dir, 'test_batch')] - else: - raise ValueError('Invalid data subset "%s"' % subset) - - inputs = [] - for filename in filenames: - with gfile.Open(filename, 'rb') as f: - # python2 does not have the encoding parameter - encoding = {} if six.PY2 else {'encoding': 'bytes'} - inputs.append(cPickle.load(f, **encoding)) - # See http://www.cs.toronto.edu/~kriz/cifar.html for a description of the - # input format. - all_images = np.concatenate( - [each_input[b'data'] for each_input in inputs]).astype(np.float32) - all_labels = np.concatenate( - [each_input[b'labels'] for each_input in inputs]) - return all_images, all_labels - - def num_examples_per_epoch(self, subset='train'): - if subset == 'train': - return 50000 - elif subset == 'validation': - return 10000 - else: - raise ValueError('Invalid data subset "%s"' % subset) - - -class COCODataset(ImageDataset): - """COnfiguration for COCO dataset.""" - - def __init__(self, data_dir=None, image_size=300): - super(COCODataset, self).__init__( - 'coco', image_size, image_size, data_dir=data_dir, num_classes=81) - - def num_examples_per_epoch(self, subset='train'): - if subset == 'train': - return COCO_NUM_TRAIN_IMAGES - elif subset == 'validation': - return COCO_NUM_VAL_IMAGES - else: - raise ValueError('Invalid data subset "%s"' % subset) - - -_SUPPORTED_DATASETS = { - 'imagenet': ImagenetDataset, - 'cifar10': Cifar10Dataset, - 'librispeech': LibrispeechDataset, - 'coco': COCODataset, -} - -_SUPPORTED_INPUT_PREPROCESSORS = { - 'imagenet': { - 'default': preprocessing.RecordInputImagePreprocessor, - 'official_models_imagenet': preprocessing.ImagenetPreprocessor, - }, - 'cifar10': { - 'default': preprocessing.Cifar10ImagePreprocessor - }, - 'librispeech': { - 'default': preprocessing.LibrispeechPreprocessor - }, - 'coco': { - 'default': preprocessing.COCOPreprocessor - }, -} - - -def create_dataset(data_dir, data_name): - """Create a Dataset instance based on data_dir and data_name.""" - if not data_dir and not data_name: - # When using synthetic data, use synthetic imagenet images by default. - data_name = 'imagenet' - - # Infere dataset name from data_dir if data_name is not provided. - if data_name is None: - for supported_name in _SUPPORTED_DATASETS: - if supported_name in data_dir: - data_name = supported_name - break - else: # Failed to identify dataset name from data dir. - raise ValueError('Could not identify name of dataset. ' - 'Please specify with --data_name option.') - if data_name not in _SUPPORTED_DATASETS: - raise ValueError('Unknown dataset. Must be one of %s' % ', '.join( - [key for key in sorted(_SUPPORTED_DATASETS.keys())])) - - return _SUPPORTED_DATASETS[data_name](data_dir) diff --git a/models/object_detection/tensorflow/ssd-resnet34/inference/fp32/infer_detections.py b/models/object_detection/tensorflow/ssd-resnet34/inference/fp32/infer_detections.py index f2666a94c..657469658 100644 --- a/models/object_detection/tensorflow/ssd-resnet34/inference/fp32/infer_detections.py +++ b/models/object_detection/tensorflow/ssd-resnet34/inference/fp32/infer_detections.py @@ -23,9 +23,10 @@ from argparse import ArgumentParser +import benchmark_cnn import datasets import ssd_constants -import ssd_model +from models import ssd_model from preprocessing import COCOPreprocessor IMAGE_SIZE = 300 @@ -168,7 +169,8 @@ def accuracy_check(self): ds_init = tf.get_collection(tf.GraphKeys.TABLE_INITIALIZERS) ds_sess = tf.Session() - self.model = ssd_model.SSD300Model(self.args.data_location) + params = benchmark_cnn.make_params(data_dir=self.args.data_location) + self.model = ssd_model.SSD300Model(params=params) print("Inference for accuracy check.") with tf.Session(graph=self.freeze_graph, config=self.config) as sess: diff --git a/models/object_detection/tensorflow/ssd-resnet34/inference/fp32/preprocessing.py b/models/object_detection/tensorflow/ssd-resnet34/inference/fp32/preprocessing.py deleted file mode 100644 index 6814a48cd..000000000 --- a/models/object_detection/tensorflow/ssd-resnet34/inference/fp32/preprocessing.py +++ /dev/null @@ -1,1259 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Image pre-processing utilities. -""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import math -from six.moves import xrange # pylint: disable=redefined-builtin -import tensorflow as tf - -from tensorflow.contrib.data.python.ops import threadpool -from tensorflow.contrib.image.python.ops import distort_image_ops -from tensorflow.contrib.data.python.ops import interleave_ops -from tensorflow.contrib.data.python.ops import batching -from tensorflow.python.framework import function -from tensorflow.python.layers import utils -from tensorflow.python.ops import data_flow_ops -from tensorflow.python.platform import gfile - - -def parse_example_proto(example_serialized): - """Parses an Example proto containing a training example of an image. - - The output of the build_image_data.py image preprocessing script is a dataset - containing serialized Example protocol buffers. Each Example proto contains - the following fields: - - image/height: 462 - image/width: 581 - image/colorspace: 'RGB' - image/channels: 3 - image/class/label: 615 - image/class/synset: 'n03623198' - image/class/text: 'knee pad' - image/object/bbox/xmin: 0.1 - image/object/bbox/xmax: 0.9 - image/object/bbox/ymin: 0.2 - image/object/bbox/ymax: 0.6 - image/object/bbox/label: 615 - image/format: 'JPEG' - image/filename: 'ILSVRC2012_val_00041207.JPEG' - image/encoded: - - Args: - example_serialized: scalar Tensor tf.string containing a serialized - Example protocol buffer. - - Returns: - image_buffer: Tensor tf.string containing the contents of a JPEG file. - label: Tensor tf.int32 containing the label. - bbox: 3-D float Tensor of bounding boxes arranged [1, num_boxes, coords] - where each coordinate is [0, 1) and the coordinates are arranged as - [ymin, xmin, ymax, xmax]. - text: Tensor tf.string containing the human-readable label. - """ - # Dense features in Example proto. - feature_map = { - 'image/encoded': tf.FixedLenFeature([], dtype=tf.string, - default_value=''), - 'image/class/label': tf.FixedLenFeature([1], dtype=tf.int64, - default_value=-1), - 'image/class/text': tf.FixedLenFeature([], dtype=tf.string, - default_value=''), - } - sparse_float32 = tf.VarLenFeature(dtype=tf.float32) - # Sparse features in Example proto. - feature_map.update( - {k: sparse_float32 for k in ['image/object/bbox/xmin', - 'image/object/bbox/ymin', - 'image/object/bbox/xmax', - 'image/object/bbox/ymax']}) - - features = tf.parse_single_example(example_serialized, feature_map) - label = tf.cast(features['image/class/label'], dtype=tf.int32) - - xmin = tf.expand_dims(features['image/object/bbox/xmin'].values, 0) - ymin = tf.expand_dims(features['image/object/bbox/ymin'].values, 0) - xmax = tf.expand_dims(features['image/object/bbox/xmax'].values, 0) - ymax = tf.expand_dims(features['image/object/bbox/ymax'].values, 0) - - # Note that we impose an ordering of (y, x) just to make life difficult. - bbox = tf.concat([ymin, xmin, ymax, xmax], 0) - - # Force the variable number of bounding boxes into the shape - # [1, num_boxes, coords]. - bbox = tf.expand_dims(bbox, 0) - bbox = tf.transpose(bbox, [0, 2, 1]) - - return features['image/encoded'], label, bbox, features['image/class/text'] - - -_RESIZE_METHOD_MAP = { - 'nearest': tf.image.ResizeMethod.NEAREST_NEIGHBOR, - 'bilinear': tf.image.ResizeMethod.BILINEAR, - 'bicubic': tf.image.ResizeMethod.BICUBIC, - 'area': tf.image.ResizeMethod.AREA -} - - -def get_image_resize_method(resize_method, batch_position=0): - """Get tensorflow resize method. - - If resize_method is 'round_robin', return different methods based on batch - position in a round-robin fashion. NOTE: If the batch size is not a multiple - of the number of methods, then the distribution of methods will not be - uniform. - - Args: - resize_method: (string) nearest, bilinear, bicubic, area, or round_robin. - batch_position: position of the image in a batch. NOTE: this argument can - be an integer or a tensor - Returns: - one of resize type defined in tf.image.ResizeMethod. - """ - - if resize_method != 'round_robin': - return _RESIZE_METHOD_MAP[resize_method] - - # return a resize method based on batch position in a round-robin fashion. - resize_methods = list(_RESIZE_METHOD_MAP.values()) - def lookup(index): - return resize_methods[index] - - def resize_method_0(): - return utils.smart_cond(batch_position % len(resize_methods) == 0, - lambda: lookup(0), resize_method_1) - - def resize_method_1(): - return utils.smart_cond(batch_position % len(resize_methods) == 1, - lambda: lookup(1), resize_method_2) - - def resize_method_2(): - return utils.smart_cond(batch_position % len(resize_methods) == 2, - lambda: lookup(2), lambda: lookup(3)) - - # NOTE(jsimsa): Unfortunately, we cannot use a single recursive function here - # because TF would not be able to construct a finite graph. - - return resize_method_0() - - -def decode_jpeg(image_buffer, scope=None): # , dtype=tf.float32): - """Decode a JPEG string into one 3-D float image Tensor. - - Args: - image_buffer: scalar string Tensor. - scope: Optional scope for op_scope. - Returns: - 3-D float Tensor with values ranging from [0, 1). - """ - # with tf.op_scope([image_buffer], scope, 'decode_jpeg'): - # with tf.name_scope(scope, 'decode_jpeg', [image_buffer]): - with tf.name_scope(scope or 'decode_jpeg'): - # Decode the string as an RGB JPEG. - # Note that the resulting image contains an unknown height and width - # that is set dynamically by decode_jpeg. In other words, the height - # and width of image is unknown at compile-time. - image = tf.image.decode_jpeg(image_buffer, channels=3, - fancy_upscaling=False, - dct_method='INTEGER_FAST') - - # image = tf.Print(image, [tf.shape(image)], 'Image shape: ') - - return image - - -_R_MEAN = 123.68 -_G_MEAN = 116.78 -_B_MEAN = 103.94 -_CHANNEL_MEANS = [_R_MEAN, _G_MEAN, _B_MEAN] - - -def normalized_image(images): - # Rescale from [0, 255] to [0, 2] - images = tf.multiply(images, 1. / 127.5) - # Rescale to [-1, 1] - return tf.subtract(images, 1.0) - - -def eval_image(image, - height, - width, - batch_position, - resize_method, - summary_verbosity=0): - """Get the image for model evaluation. - - We preprocess the image simiarly to Slim, see - https://github.com/tensorflow/models/blob/master/research/slim/preprocessing/vgg_preprocessing.py - Validation images do not have bounding boxes, so to crop the image, we first - resize the image such that the aspect ratio is maintained and the resized - height and width are both at least 1.145 times `height` and `width` - respectively. Then, we do a central crop to size (`height`, `width`). - - Args: - image: 3-D float Tensor representing the image. - height: The height of the image that will be returned. - width: The width of the image that will be returned. - batch_position: position of the image in a batch, which affects how images - are distorted and resized. NOTE: this argument can be an integer or a - tensor - resize_method: one of the strings 'round_robin', 'nearest', 'bilinear', - 'bicubic', or 'area'. - summary_verbosity: Verbosity level for summary ops. Pass 0 to disable both - summaries and checkpoints. - Returns: - An image of size (output_height, output_width, 3) that is resized and - cropped as described above. - """ - # TODO(reedwm): Currently we resize then crop. Investigate if it's faster to - # crop then resize. - with tf.name_scope('eval_image'): - if summary_verbosity >= 3: - tf.summary.image( - 'original_image', tf.expand_dims(image, 0)) - - shape = tf.shape(image) - image_height = shape[0] - image_width = shape[1] - image_height_float = tf.cast(image_height, tf.float32) - image_width_float = tf.cast(image_width, tf.float32) - - # This value is chosen so that in resnet, images are cropped to a size of - # 256 x 256, which matches what other implementations do. The final image - # size for resnet is 224 x 224, and floor(224 * 1.145) = 256. - scale_factor = 1.145 - - # Compute resize_height and resize_width to be the minimum values such that - # 1. The aspect ratio is maintained (i.e. resize_height / resize_width is - # image_height / image_width), and - # 2. resize_height >= height * `scale_factor`, and - # 3. resize_width >= width * `scale_factor` - max_ratio = tf.maximum(height / image_height_float, - width / image_width_float) - resize_height = tf.cast(image_height_float * max_ratio * scale_factor, - tf.int32) - resize_width = tf.cast(image_width_float * max_ratio * scale_factor, - tf.int32) - - # Resize the image to shape (`resize_height`, `resize_width`) - image_resize_method = get_image_resize_method(resize_method, batch_position) - distorted_image = tf.image.resize_images(image, - [resize_height, resize_width], - image_resize_method, - align_corners=False) - - # Do a central crop of the image to size (height, width). - # MLPerf requires us to log (height, width) with two different keys. - total_crop_height = (resize_height - height) - crop_top = total_crop_height // 2 - total_crop_width = (resize_width - width) - crop_left = total_crop_width // 2 - distorted_image = tf.slice(distorted_image, [crop_top, crop_left, 0], - [height, width, 3]) - - distorted_image.set_shape([height, width, 3]) - if summary_verbosity >= 3: - tf.summary.image( - 'cropped_resized_image', tf.expand_dims(distorted_image, 0)) - image = distorted_image - return image - - -def train_image(image_buffer, - height, - width, - bbox, - batch_position, - resize_method, - distortions, - scope=None, - summary_verbosity=0, - distort_color_in_yiq=False, - fuse_decode_and_crop=False): - """Distort one image for training a network. - - Distorting images provides a useful technique for augmenting the data - set during training in order to make the network invariant to aspects - of the image that do not effect the label. - - Args: - image_buffer: scalar string Tensor representing the raw JPEG image buffer. - height: integer - width: integer - bbox: 3-D float Tensor of bounding boxes arranged [1, num_boxes, coords] - where each coordinate is [0, 1) and the coordinates are arranged - as [ymin, xmin, ymax, xmax]. - batch_position: position of the image in a batch, which affects how images - are distorted and resized. NOTE: this argument can be an integer or a - tensor - resize_method: round_robin, nearest, bilinear, bicubic, or area. - distortions: If true, apply full distortions for image colors. - scope: Optional scope for op_scope. - summary_verbosity: Verbosity level for summary ops. Pass 0 to disable both - summaries and checkpoints. - distort_color_in_yiq: distort color of input images in YIQ space. - fuse_decode_and_crop: fuse the decode/crop operation. - Returns: - 3-D float Tensor of distorted image used for training. - """ - # with tf.op_scope([image, height, width, bbox], scope, 'distort_image'): - # with tf.name_scope(scope, 'distort_image', [image, height, width, bbox]): - with tf.name_scope(scope or 'distort_image'): - # A large fraction of image datasets contain a human-annotated bounding box - # delineating the region of the image containing the object of interest. We - # choose to create a new bounding box for the object which is a randomly - # distorted version of the human-annotated bounding box that obeys an - # allowed range of aspect ratios, sizes and overlap with the human-annotated - # bounding box. If no box is supplied, then we assume the bounding box is - # the entire image. - min_object_covered = 0.1 - aspect_ratio_range = [0.75, 1.33] - area_range = [0.05, 1.0] - max_attempts = 100 - - sample_distorted_bounding_box = tf.image.sample_distorted_bounding_box( - tf.image.extract_jpeg_shape(image_buffer), - bounding_boxes=bbox, - min_object_covered=min_object_covered, - aspect_ratio_range=aspect_ratio_range, - area_range=area_range, - max_attempts=max_attempts, - use_image_if_no_bounding_boxes=True) - bbox_begin, bbox_size, distort_bbox = sample_distorted_bounding_box - if summary_verbosity >= 3: - image = tf.image.decode_jpeg(image_buffer, channels=3, - dct_method='INTEGER_FAST') - image = tf.image.convert_image_dtype(image, dtype=tf.float32) - image_with_distorted_box = tf.image.draw_bounding_boxes( - tf.expand_dims(image, 0), distort_bbox) - tf.summary.image( - 'images_with_distorted_bounding_box', - image_with_distorted_box) - - # Crop the image to the specified bounding box. - if fuse_decode_and_crop: - offset_y, offset_x, _ = tf.unstack(bbox_begin) - target_height, target_width, _ = tf.unstack(bbox_size) - crop_window = tf.stack([offset_y, offset_x, target_height, target_width]) - image = tf.image.decode_and_crop_jpeg( - image_buffer, crop_window, channels=3) - else: - image = tf.image.decode_jpeg(image_buffer, channels=3, - dct_method='INTEGER_FAST') - image = tf.slice(image, bbox_begin, bbox_size) - - distorted_image = tf.image.random_flip_left_right(image) - - # This resizing operation may distort the images because the aspect - # ratio is not respected. - image_resize_method = get_image_resize_method(resize_method, batch_position) - distorted_image = tf.image.resize_images( - distorted_image, [height, width], - image_resize_method, - align_corners=False) - # Restore the shape since the dynamic slice based upon the bbox_size loses - # the third dimension. - distorted_image.set_shape([height, width, 3]) - if summary_verbosity >= 3: - tf.summary.image('cropped_resized_maybe_flipped_image', - tf.expand_dims(distorted_image, 0)) - - if distortions: - distorted_image = tf.cast(distorted_image, dtype=tf.float32) - # Images values are expected to be in [0,1] for color distortion. - distorted_image /= 255. - # Randomly distort the colors. - distorted_image = distort_color(distorted_image, batch_position, - distort_color_in_yiq=distort_color_in_yiq) - - # Note: This ensures the scaling matches the output of eval_image - distorted_image *= 255 - - if summary_verbosity >= 3: - tf.summary.image( - 'final_distorted_image', - tf.expand_dims(distorted_image, 0)) - return distorted_image - - -def distort_color(image, batch_position=0, distort_color_in_yiq=False, - scope=None): - """Distort the color of the image. - - Each color distortion is non-commutative and thus ordering of the color ops - matters. Ideally we would randomly permute the ordering of the color ops. - Rather then adding that level of complication, we select a distinct ordering - of color ops based on the position of the image in a batch. - - Args: - image: float32 Tensor containing single image. Tensor values should be in - range [0, 1]. - batch_position: the position of the image in a batch. NOTE: this argument - can be an integer or a tensor - distort_color_in_yiq: distort color of input images in YIQ space. - scope: Optional scope for op_scope. - Returns: - color-distorted image - """ - with tf.name_scope(scope or 'distort_color'): - - def distort_fn_0(image=image): - """Variant 0 of distort function.""" - image = tf.image.random_brightness(image, max_delta=32. / 255.) - if distort_color_in_yiq: - image = distort_image_ops.random_hsv_in_yiq( - image, lower_saturation=0.5, upper_saturation=1.5, - max_delta_hue=0.2 * math.pi) - else: - image = tf.image.random_saturation(image, lower=0.5, upper=1.5) - image = tf.image.random_hue(image, max_delta=0.2) - image = tf.image.random_contrast(image, lower=0.5, upper=1.5) - return image - - def distort_fn_1(image=image): - """Variant 1 of distort function.""" - image = tf.image.random_brightness(image, max_delta=32. / 255.) - image = tf.image.random_contrast(image, lower=0.5, upper=1.5) - if distort_color_in_yiq: - image = distort_image_ops.random_hsv_in_yiq( - image, lower_saturation=0.5, upper_saturation=1.5, - max_delta_hue=0.2 * math.pi) - else: - image = tf.image.random_saturation(image, lower=0.5, upper=1.5) - image = tf.image.random_hue(image, max_delta=0.2) - return image - - image = utils.smart_cond(batch_position % 2 == 0, distort_fn_0, - distort_fn_1) - # The random_* ops do not necessarily clamp. - image = tf.clip_by_value(image, 0.0, 1.0) - return image - - -class InputPreprocessor(object): - """Base class for all model preprocessors.""" - - def __init__(self, batch_size, output_shapes): - self.batch_size = batch_size - self.output_shapes = output_shapes - - def supports_datasets(self): - """Whether this preprocessor supports dataset.""" - return False - - def minibatch(self, dataset, subset, params, shift_ratio=-1): - """Returns tensors representing a minibatch of all the input.""" - raise NotImplementedError('Must be implemented by subclass.') - - # The methods added below are only supported/used if supports_datasets() - # returns True. - # TODO(laigd): refactor benchmark_cnn.py and put the logic of - # _build_input_processing() into InputPreprocessor. - - def parse_and_preprocess(self, value, batch_position): - """Function to parse and preprocess an Example proto in input pipeline.""" - raise NotImplementedError('Must be implemented by subclass.') - - def build_prefetch_input_processing(self, batch_size, model_input_shapes, - num_splits, cpu_device, params, - gpu_devices, model_input_data_types, - dataset, doing_eval): - """"Returns FunctionBufferingResources that do input pre(processing).""" - assert self.supports_datasets() - with tf.device(cpu_device): - if doing_eval: - subset = 'validation' - else: - subset = 'train' - - function_buffering_resources = [] - remote_fn, args = self.minibatch_fn( - batch_size=batch_size, - model_input_shapes=model_input_shapes, - num_splits=num_splits, - dataset=dataset, - subset=subset, - train=(not doing_eval), - datasets_repeat_cached_sample=params.datasets_repeat_cached_sample, - num_threads=params.datasets_num_private_threads, - datasets_use_caching=params.datasets_use_caching, - datasets_parallel_interleave_cycle_length=( - params.datasets_parallel_interleave_cycle_length), - datasets_sloppy_parallel_interleave=( - params.datasets_sloppy_parallel_interleave), - datasets_parallel_interleave_prefetch=( - params.datasets_parallel_interleave_prefetch)) - for device_num in range(len(gpu_devices)): - with tf.device(gpu_devices[device_num]): - buffer_resource_handle = prefetching_ops.function_buffering_resource( - f=remote_fn, - output_types=model_input_data_types, - target_device=cpu_device, - string_arg=args[0], - buffer_size=params.datasets_prefetch_buffer_size, - shared_name=None) - function_buffering_resources.append(buffer_resource_handle) - return function_buffering_resources - - # TODO(laigd): figure out how to remove these parameters, since the - # preprocessor itself has self.batch_size, self.num_splits, etc defined. - def build_multi_device_iterator(self, batch_size, num_splits, cpu_device, - params, gpu_devices, dataset, doing_eval): - """Creates a MultiDeviceIterator.""" - assert self.supports_datasets() - assert num_splits == len(gpu_devices) - with tf.name_scope('batch_processing'): - if doing_eval: - subset = 'validation' - else: - subset = 'train' - batch_size_per_split = batch_size // num_splits - ds = self.create_dataset( - batch_size, - num_splits, - batch_size_per_split, - dataset, - subset, - train=(not doing_eval), - datasets_repeat_cached_sample=params.datasets_repeat_cached_sample, - num_threads=params.datasets_num_private_threads, - datasets_use_caching=params.datasets_use_caching, - datasets_parallel_interleave_cycle_length=( - params.datasets_parallel_interleave_cycle_length), - datasets_sloppy_parallel_interleave=( - params.datasets_sloppy_parallel_interleave), - datasets_parallel_interleave_prefetch=( - params.datasets_parallel_interleave_prefetch)) - multi_device_iterator = multi_device_iterator_ops.MultiDeviceIterator( - ds, - gpu_devices, - source_device=cpu_device, - max_buffer_size=params.multi_device_iterator_max_buffer_size) - tf.add_to_collection(tf.GraphKeys.TABLE_INITIALIZERS, - multi_device_iterator.initializer) - return multi_device_iterator - - def create_dataset(self, - batch_size, - num_splits, - batch_size_per_split, - dataset, - subset, - train, - datasets_repeat_cached_sample, - num_threads=None, - datasets_use_caching=False, - datasets_parallel_interleave_cycle_length=None, - datasets_sloppy_parallel_interleave=False, - datasets_parallel_interleave_prefetch=None): - """Creates a dataset for the benchmark.""" - raise NotImplementedError('Must be implemented by subclass.') - - def create_iterator(self, ds): - ds_iterator = ds.make_initializable_iterator() - tf.add_to_collection(tf.GraphKeys.TABLE_INITIALIZERS, - ds_iterator.initializer) - return ds_iterator - - def minibatch_fn(self, batch_size, model_input_shapes, num_splits, - dataset, subset, train, datasets_repeat_cached_sample, - num_threads, datasets_use_caching, - datasets_parallel_interleave_cycle_length, - datasets_sloppy_parallel_interleave, - datasets_parallel_interleave_prefetch): - """Returns a function and list of args for the fn to create a minibatch.""" - assert self.supports_datasets() - batch_size_per_split = batch_size // num_splits - assert batch_size_per_split == model_input_shapes[0][0] - with tf.name_scope('batch_processing'): - ds = self.create_dataset(batch_size, num_splits, batch_size_per_split, - dataset, subset, train, - datasets_repeat_cached_sample, num_threads, - datasets_use_caching, - datasets_parallel_interleave_cycle_length, - datasets_sloppy_parallel_interleave, - datasets_parallel_interleave_prefetch) - ds_iterator = self.create_iterator(ds) - - ds_iterator_string_handle = ds_iterator.string_handle() - - @function.Defun(tf.string) - def _fn(h): - remote_iterator = tf.data.Iterator.from_string_handle( - h, ds_iterator.output_types, ds_iterator.output_shapes) - input_list = remote_iterator.get_next() - reshaped_input_list = [ - tf.reshape(input_list[i], shape=model_input_shapes[i]) - for i in range(len(input_list)) - ] - return reshaped_input_list - - return _fn, [ds_iterator_string_handle] - - -class BaseImagePreprocessor(InputPreprocessor): - """Base class for all image model preprocessors.""" - - def __init__(self, - batch_size, - output_shapes, - num_splits, - dtype, - train, - distortions, - resize_method, - shift_ratio=-1, - summary_verbosity=0, - distort_color_in_yiq=True, - fuse_decode_and_crop=True, - match_mlperf=False): - super(BaseImagePreprocessor, self).__init__(batch_size, output_shapes) - image_shape = output_shapes[0] - # image_shape is in form (batch_size, height, width, depth) - self.height = image_shape[1] - self.width = image_shape[2] - self.depth = image_shape[3] - self.num_splits = num_splits - self.dtype = dtype - self.train = train - self.resize_method = resize_method - self.shift_ratio = shift_ratio - self.distortions = distortions - self.distort_color_in_yiq = distort_color_in_yiq - self.fuse_decode_and_crop = fuse_decode_and_crop - if self.batch_size % self.num_splits != 0: - raise ValueError( - ('batch_size must be a multiple of num_splits: ' - 'batch_size %d, num_splits: %d') % - (self.batch_size, self.num_splits)) - self.batch_size_per_split = self.batch_size // self.num_splits - self.summary_verbosity = summary_verbosity - self.match_mlperf = match_mlperf - - def parse_and_preprocess(self, value, batch_position): - assert self.supports_datasets() - image_buffer, label_index, bbox, _ = parse_example_proto(value) - if self.match_mlperf: - bbox = tf.zeros((1, 0, 4), dtype=bbox.dtype) - image = self.preprocess(image_buffer, bbox, batch_position) - return (image, label_index) - - def preprocess(self, image_buffer, bbox, batch_position): - raise NotImplementedError('Must be implemented by subclass.') - - def create_dataset(self, - batch_size, - num_splits, - batch_size_per_split, - dataset, - subset, - train, - datasets_repeat_cached_sample, - num_threads=None, - datasets_use_caching=False, - datasets_parallel_interleave_cycle_length=None, - datasets_sloppy_parallel_interleave=False, - datasets_parallel_interleave_prefetch=None): - """Creates a dataset for the benchmark.""" - assert self.supports_datasets() - glob_pattern = dataset.tf_record_pattern(subset) - file_names = gfile.Glob(glob_pattern) - if not file_names: - raise ValueError('Found no files in --data_dir matching: {}' - .format(glob_pattern)) - ds = tf.data.TFRecordDataset.list_files(file_names) - ds = ds.apply( - interleave_ops.parallel_interleave( - tf.data.TFRecordDataset, - cycle_length=datasets_parallel_interleave_cycle_length or 10, - sloppy=datasets_sloppy_parallel_interleave, - prefetch_input_elements=datasets_parallel_interleave_prefetch)) - if datasets_repeat_cached_sample: - # Repeat a single sample element indefinitely to emulate memory-speed IO. - ds = ds.take(1).cache().repeat() - counter = tf.data.Dataset.range(batch_size) - counter = counter.repeat() - ds = tf.data.Dataset.zip((ds, counter)) - ds = ds.prefetch(buffer_size=batch_size) - if datasets_use_caching: - ds = ds.cache() - if train: - buffer_size = 10000 - ds = ds.apply( - tf.data.experimental.shuffle_and_repeat(buffer_size=buffer_size)) - else: - ds = ds.repeat() - ds = ds.apply( - batching.map_and_batch( - map_func=self.parse_and_preprocess, - batch_size=batch_size_per_split, - num_parallel_batches=num_splits)) - ds = ds.prefetch(buffer_size=num_splits) - if num_threads: - ds = threadpool.override_threadpool( - ds, - threadpool.PrivateThreadPool( - num_threads, display_name='input_pipeline_thread_pool')) - return ds - - -class RecordInputImagePreprocessor(BaseImagePreprocessor): - """Preprocessor for images with RecordInput format.""" - - def preprocess(self, image_buffer, bbox, batch_position): - """Preprocessing image_buffer as a function of its batch position.""" - if self.train: - image = train_image(image_buffer, self.height, self.width, bbox, - batch_position, self.resize_method, self.distortions, - None, summary_verbosity=self.summary_verbosity, - distort_color_in_yiq=self.distort_color_in_yiq, - fuse_decode_and_crop=self.fuse_decode_and_crop) - else: - image = tf.image.decode_jpeg( - image_buffer, channels=3, dct_method='INTEGER_FAST') - image = eval_image(image, self.height, self.width, batch_position, - self.resize_method, - summary_verbosity=self.summary_verbosity) - # Note: image is now float32 [height,width,3] with range [0, 255] - - # image = tf.cast(image, tf.uint8) # HACK TESTING - - if self.match_mlperf: - normalized = image - _CHANNEL_MEANS - else: - normalized = normalized_image(image) - return tf.cast(normalized, self.dtype) - - def minibatch(self, - dataset, - subset, - params, - shift_ratio=-1): - if shift_ratio < 0: - shift_ratio = self.shift_ratio - with tf.name_scope('batch_processing'): - # Build final results per split. - images = [[] for _ in range(self.num_splits)] - labels = [[] for _ in range(self.num_splits)] - if params.use_datasets: - ds = self.create_dataset( - self.batch_size, self.num_splits, self.batch_size_per_split, - dataset, subset, self.train, - datasets_repeat_cached_sample=params.datasets_repeat_cached_sample, - num_threads=params.datasets_num_private_threads, - datasets_use_caching=params.datasets_use_caching, - datasets_parallel_interleave_cycle_length=( - params.datasets_parallel_interleave_cycle_length), - datasets_sloppy_parallel_interleave=( - params.datasets_sloppy_parallel_interleave), - datasets_parallel_interleave_prefetch=( - params.datasets_parallel_interleave_prefetch)) - ds_iterator = self.create_iterator(ds) - for d in xrange(self.num_splits): - images[d], labels[d] = ds_iterator.get_next() - - # TODO(laigd): consider removing the --use_datasets option, it should - # always use datasets. - else: - record_input = data_flow_ops.RecordInput( - file_pattern=dataset.tf_record_pattern(subset), - seed=301, - parallelism=64, - buffer_size=10000, - batch_size=self.batch_size, - shift_ratio=shift_ratio, - name='record_input') - records = record_input.get_yield_op() - records = tf.split(records, self.batch_size, 0) - records = [tf.reshape(record, []) for record in records] - for idx in xrange(self.batch_size): - value = records[idx] - (image, label) = self.parse_and_preprocess(value, idx) - split_index = idx % self.num_splits - labels[split_index].append(label) - images[split_index].append(image) - - for split_index in xrange(self.num_splits): - if not params.use_datasets: - images[split_index] = tf.parallel_stack(images[split_index]) - labels[split_index] = tf.concat(labels[split_index], 0) - images[split_index] = tf.reshape( - images[split_index], - shape=[self.batch_size_per_split, self.height, self.width, - self.depth]) - labels[split_index] = tf.reshape(labels[split_index], - [self.batch_size_per_split]) - return images, labels - - def supports_datasets(self): - return True - - -class ImagenetPreprocessor(RecordInputImagePreprocessor): - - def preprocess(self, image_buffer, bbox, batch_position): - # pylint: disable=g-import-not-at-top - try: - from official.resnet.imagenet_preprocessing import preprocess_image - except ImportError: - tf.logging.fatal('Please include tensorflow/models to the PYTHONPATH.') - raise - if self.train: - image = preprocess_image( - image_buffer, bbox, self.height, self.width, self.depth, - is_training=True) - else: - image = preprocess_image( - image_buffer, bbox, self.height, self.width, self.depth, - is_training=False) - return tf.cast(image, self.dtype) - - -class Cifar10ImagePreprocessor(BaseImagePreprocessor): - """Preprocessor for Cifar10 input images.""" - - def _distort_image(self, image): - """Distort one image for training a network. - - Adopted the standard data augmentation scheme that is widely used for - this dataset: the images are first zero-padded with 4 pixels on each side, - then randomly cropped to again produce distorted images; half of the images - are then horizontally mirrored. - - Args: - image: input image. - Returns: - distorted image. - """ - image = tf.image.resize_image_with_crop_or_pad( - image, self.height + 8, self.width + 8) - distorted_image = tf.random_crop(image, - [self.height, self.width, self.depth]) - # Randomly flip the image horizontally. - distorted_image = tf.image.random_flip_left_right(distorted_image) - if self.summary_verbosity >= 3: - tf.summary.image('distorted_image', tf.expand_dims(distorted_image, 0)) - return distorted_image - - def _eval_image(self, image): - """Get the image for model evaluation.""" - distorted_image = tf.image.resize_image_with_crop_or_pad( - image, self.width, self.height) - if self.summary_verbosity >= 3: - tf.summary.image('cropped.image', tf.expand_dims(distorted_image, 0)) - return distorted_image - - def preprocess(self, raw_image): - """Preprocessing raw image.""" - if self.summary_verbosity >= 3: - tf.summary.image('raw.image', tf.expand_dims(raw_image, 0)) - if self.train and self.distortions: - image = self._distort_image(raw_image) - else: - image = self._eval_image(raw_image) - normalized = normalized_image(image) - return tf.cast(normalized, self.dtype) - - def minibatch(self, - dataset, - subset, - params, - shift_ratio=-1): - # TODO(jsimsa): Implement datasets code path - del shift_ratio, params - with tf.name_scope('batch_processing'): - all_images, all_labels = dataset.read_data_files(subset) - all_images = tf.constant(all_images) - all_labels = tf.constant(all_labels) - input_image, input_label = tf.train.slice_input_producer( - [all_images, all_labels]) - input_image = tf.cast(input_image, self.dtype) - input_label = tf.cast(input_label, tf.int32) - # Ensure that the random shuffling has good mixing properties. - min_fraction_of_examples_in_queue = 0.4 - min_queue_examples = int(dataset.num_examples_per_epoch(subset) * - min_fraction_of_examples_in_queue) - raw_images, raw_labels = tf.train.shuffle_batch( - [input_image, input_label], batch_size=self.batch_size, - capacity=min_queue_examples + 3 * self.batch_size, - min_after_dequeue=min_queue_examples) - - images = [[] for i in range(self.num_splits)] - labels = [[] for i in range(self.num_splits)] - - # Create a list of size batch_size, each containing one image of the - # batch. Without the unstack call, raw_images[i] would still access the - # same image via a strided_slice op, but would be slower. - raw_images = tf.unstack(raw_images, axis=0) - raw_labels = tf.unstack(raw_labels, axis=0) - for i in xrange(self.batch_size): - split_index = i % self.num_splits - # The raw image read from data has the format [depth, height, width] - # reshape to the format returned by minibatch. - raw_image = tf.reshape(raw_images[i], - [dataset.depth, dataset.height, dataset.width]) - raw_image = tf.transpose(raw_image, [1, 2, 0]) - image = self.preprocess(raw_image) - images[split_index].append(image) - - labels[split_index].append(raw_labels[i]) - - for split_index in xrange(self.num_splits): - images[split_index] = tf.parallel_stack(images[split_index]) - labels[split_index] = tf.parallel_stack(labels[split_index]) - return images, labels - - -class COCOPreprocessor(BaseImagePreprocessor): - """Preprocessor for COCO dataset input images, boxes, and labels.""" - - def minibatch(self, - dataset, - subset, - params, - shift_ratio=-1): - del shift_ratio # Not used when using datasets instead of data_flow_ops - with tf.name_scope('batch_processing'): - ds = self.create_dataset( - self.batch_size, self.num_splits, self.batch_size_per_split, - dataset, subset, self.train, params.datasets_repeat_cached_sample) - ds_iterator = self.create_iterator(ds) - - # Training data: 4 tuple - # Validation data: 5 tuple - # See get_input_shapes in models/ssd_model.py for details. - input_len = 4 if subset == 'train' else 5 - input_lists = [[None for _ in range(self.num_splits)] - for _ in range(input_len)] - for d in xrange(self.num_splits): - input_list = ds_iterator.get_next() - for i in range(input_len): - input_lists[i][d] = input_list[i] - return input_lists - - def preprocess(self, data): - try: - import ssd_dataloader # pylint: disable=g-import-not-at-top - import ssd_constants # pylint: disable=g-import-not-at-top - from object_detection.core import preprocessor # pylint: disable=g-import-not-at-top - except ImportError: - raise ImportError('To use the COCO dataset, you must clone the ' - 'repo https://github.com/tensorflow/models and add ' - 'tensorflow/models and tensorflow/models/research to ' - 'the PYTHONPATH, and compile the protobufs by ' - 'following https://github.com/tensorflow/models/blob/' - 'master/research/object_detection/g3doc/installation.md' - '#protobuf-compilation') - image_buffer = data['image_buffer'] - boxes = data['groundtruth_boxes'] - classes = tf.reshape(data['groundtruth_classes'], [-1, 1]) - source_id = tf.string_to_number(data['source_id']) - raw_shape = data['raw_shape'] - - ssd_encoder = ssd_dataloader.Encoder() - - # Only 80 of the 90 COCO classes are used. - class_map = tf.convert_to_tensor(ssd_constants.CLASS_MAP) - classes = tf.gather(class_map, classes) - classes = tf.cast(classes, dtype=tf.float32) - - if self.train: - image, boxes, classes = ssd_dataloader.ssd_decode_and_crop( - image_buffer, boxes, classes, raw_shape) - # ssd_crop resizes and returns image of dtype float32 and does not change - # its range (i.e., value in between 0--255). Divide by 255. converts it - # to [0, 1] range. Not doing this before cropping to avoid dtype cast - # (which incurs additional memory copy). - image /= 255. - - image, boxes = preprocessor.random_horizontal_flip( - image=image, boxes=boxes) - # Random horizontal flip probability is 50% - # See https://github.com/tensorflow/models/blob/master/research/object_detection/core/preprocessor.py # pylint: disable=line-too-long - - image = ssd_dataloader.color_jitter( - image, brightness=0.125, contrast=0.5, saturation=0.5, hue=0.05) - image = ssd_dataloader.normalize_image(image) - image = tf.cast(image, self.dtype) - - encoded_returns = ssd_encoder.encode_labels(boxes, classes) - encoded_classes, encoded_boxes, num_matched_boxes = encoded_returns - - # Shape of image: [width, height, channel] - # Shape of encoded_boxes: [NUM_SSD_BOXES, 4] - # Shape of encoded_classes: [NUM_SSD_BOXES, 1] - # Shape of num_matched_boxes: [1] - return (image, encoded_boxes, encoded_classes, num_matched_boxes) - - else: - image = tf.image.decode_jpeg(image_buffer) - image = tf.image.resize_images( - image, size=(ssd_constants.IMAGE_SIZE, ssd_constants.IMAGE_SIZE)) - # resize_image returns image of dtype float32 and does not change its - # range. Divide by 255 to convert image to [0, 1] range. - image /= 255. - - image = ssd_dataloader.normalize_image(image) - image = tf.cast(image, self.dtype) - - def trim_and_pad(inp_tensor): - """Limit the number of boxes, and pad if necessary.""" - inp_tensor = inp_tensor[:ssd_constants.MAX_NUM_EVAL_BOXES] - num_pad = ssd_constants.MAX_NUM_EVAL_BOXES - tf.shape(inp_tensor)[0] - inp_tensor = tf.pad(inp_tensor, [[0, num_pad], [0, 0]]) - return tf.reshape(inp_tensor, [ssd_constants.MAX_NUM_EVAL_BOXES, - inp_tensor.get_shape()[1]]) - - boxes, classes = trim_and_pad(boxes), trim_and_pad(classes) - - # Shape of boxes: [MAX_NUM_EVAL_BOXES, 4] - # Shape of classes: [MAX_NUM_EVAL_BOXES, 1] - # Shape of source_id: [] (scalar tensor) - # Shape of raw_shape: [3] - return (image, boxes, classes, source_id, raw_shape) - - def create_dataset(self, - batch_size, - num_splits, - batch_size_per_split, - dataset, - subset, - train, - datasets_repeat_cached_sample, - num_threads=None, - datasets_use_caching=False, - datasets_parallel_interleave_cycle_length=None, - datasets_sloppy_parallel_interleave=False, - datasets_parallel_interleave_prefetch=None): - """Creates a dataset for the benchmark.""" - try: - import ssd_dataloader # pylint: disable=g-import-not-at-top - except ImportError: - raise ImportError('To use the COCO dataset, you must clone the ' - 'repo https://github.com/tensorflow/models and add ' - 'tensorflow/models and tensorflow/models/research to ' - 'the PYTHONPATH, and compile the protobufs by ' - 'following https://github.com/tensorflow/models/blob/' - 'master/research/object_detection/g3doc/installation.md' - '#protobuf-compilation') - assert self.supports_datasets() - - glob_pattern = dataset.tf_record_pattern(subset) - file_names = gfile.Glob(glob_pattern) - if not file_names: - raise ValueError('Found no files in --data_dir matching: {}' - .format(glob_pattern)) - - ds = tf.data.TFRecordDataset.list_files(file_names) - # TODO(haoyuzhang): Enable map+filter fusion after cl/218399112 in release - # options = tf.data.Options() - # options.experimental_map_and_filter_fusion = True - # ds = ds.with_options(options) - - ds = ds.apply( - interleave_ops.parallel_interleave( - tf.data.TFRecordDataset, - cycle_length=datasets_parallel_interleave_cycle_length or 10, - sloppy=datasets_sloppy_parallel_interleave)) - if datasets_repeat_cached_sample: - # Repeat a single sample element indefinitely to emulate memory-speed IO. - ds = ds.take(1).cache().repeat() - ds = ds.prefetch(buffer_size=batch_size) - if datasets_use_caching: - ds = ds.cache() - if train: - ds = ds.apply(tf.data.experimental.shuffle_and_repeat(buffer_size=10000)) - else: - ds = ds.repeat() - - ds = ds.map(ssd_dataloader.ssd_parse_example_proto, num_parallel_calls=64) - ds = ds.filter( - lambda data: tf.greater(tf.shape(data['groundtruth_boxes'])[0], 0)) - ds = ds.apply( - batching.map_and_batch( - map_func=self.preprocess, - batch_size=batch_size_per_split, - num_parallel_batches=num_splits, - drop_remainder=train)) - ds = ds.prefetch(buffer_size=num_splits) - if num_threads: - ds = threadpool.override_threadpool( - ds, - threadpool.PrivateThreadPool( - num_threads, display_name='input_pipeline_thread_pool')) - return ds - - def supports_datasets(self): - return True - - -class LibrispeechPreprocessor(InputPreprocessor): - """Preprocessor for librispeech class for all image model preprocessors.""" - - def __init__(self, batch_size, output_shapes, num_splits, dtype, train, - **kwargs): - del kwargs - super(LibrispeechPreprocessor, self).__init__(batch_size, output_shapes) - self.num_splits = num_splits - self.dtype = dtype - self.is_train = train - if self.batch_size % self.num_splits != 0: - raise ValueError(('batch_size must be a multiple of num_splits: ' - 'batch_size %d, num_splits: %d') % (self.batch_size, - self.num_splits)) - self.batch_size_per_split = self.batch_size // self.num_splits - - def create_dataset(self, - batch_size, - num_splits, - batch_size_per_split, - dataset, - subset, - train, - datasets_repeat_cached_sample, - num_threads=None, - datasets_use_caching=False, - datasets_parallel_interleave_cycle_length=None, - datasets_sloppy_parallel_interleave=False, - datasets_parallel_interleave_prefetch=None): - """Creates a dataset for the benchmark.""" - # TODO(laigd): currently the only difference between this and the one in - # BaseImagePreprocessor is, this uses map() and padded_batch() while the - # latter uses tf.data.experimental.map_and_batch(). Try to merge them. - assert self.supports_datasets() - glob_pattern = dataset.tf_record_pattern(subset) - file_names = gfile.Glob(glob_pattern) - if not file_names: - raise ValueError('Found no files in --data_dir matching: {}' - .format(glob_pattern)) - ds = tf.data.TFRecordDataset.list_files(file_names) - ds = ds.apply( - tf.data.experimental.parallel_interleave( - tf.data.TFRecordDataset, - cycle_length=datasets_parallel_interleave_cycle_length or 10, - sloppy=datasets_sloppy_parallel_interleave, - prefetch_input_elements=datasets_parallel_interleave_prefetch)) - if datasets_repeat_cached_sample: - # Repeat a single sample element indefinitely to emulate memory-speed IO. - ds = ds.take(1).cache().repeat() - counter = tf.data.Dataset.range(batch_size) - counter = counter.repeat() - ds = tf.data.Dataset.zip((ds, counter)) - ds = ds.prefetch(buffer_size=batch_size) - if datasets_use_caching: - ds = ds.cache() - if train: - ds = ds.apply(tf.data.experimental.shuffle_and_repeat(buffer_size=10000)) - else: - ds = ds.repeat() - ds = ds.map(map_func=self.parse_and_preprocess, - num_parallel_calls=batch_size_per_split*num_splits) - ds = ds.padded_batch( - batch_size=batch_size_per_split, - padded_shapes=tuple([ - tf.TensorShape(output_shape[1:]) - for output_shape in self.output_shapes - ]), - drop_remainder=True) - ds = ds.prefetch(buffer_size=num_splits) - if num_threads: - ds = threadpool.override_threadpool( - ds, - threadpool.PrivateThreadPool( - num_threads, display_name='input_pipeline_thread_pool')) - return ds - - def minibatch(self, dataset, subset, params, shift_ratio=-1): - assert params.use_datasets - # TODO(laigd): unify this with CNNModel's minibatch() - # TODO(laigd): in distributed mode we use shift_ratio so different workers - # won't work on same inputs, so we should respect that. - del shift_ratio - with tf.name_scope('batch_processing'): - ds = self.create_dataset( - self.batch_size, - self.num_splits, - self.batch_size_per_split, - dataset, - subset, - self.is_train, - datasets_repeat_cached_sample=params.datasets_repeat_cached_sample, - num_threads=params.datasets_num_private_threads, - datasets_use_caching=params.datasets_use_caching, - datasets_parallel_interleave_cycle_length=( - params.datasets_parallel_interleave_cycle_length), - datasets_sloppy_parallel_interleave=( - params.datasets_sloppy_parallel_interleave), - datasets_parallel_interleave_prefetch=( - params.datasets_parallel_interleave_prefetch)) - ds_iterator = self.create_iterator(ds) - - # The four lists are: input spectrogram feature, labels, input lengths, - # label lengths - input_lists = [[None for _ in range(self.num_splits)] for _ in range(4)] - for d in xrange(self.num_splits): - input_list = ds_iterator.get_next() - for i in range(4): - input_lists[i][d] = input_list[i] - - assert self.output_shapes == [ - input_lists[i][0].shape.as_list() for i in range(4) - ] - return tuple(input_lists) - - def supports_datasets(self): - return True - - def parse_and_preprocess(self, value, batch_position): - """Parse an TFRecord.""" - del batch_position - assert self.supports_datasets() - context_features = { - 'labels': tf.VarLenFeature(dtype=tf.int64), - 'input_length': tf.FixedLenFeature([], dtype=tf.int64), - 'label_length': tf.FixedLenFeature([], dtype=tf.int64), - } - sequence_features = { - 'features': tf.FixedLenSequenceFeature([161], dtype=tf.float32) - } - context_parsed, sequence_parsed = tf.parse_single_sequence_example( - serialized=value, - context_features=context_features, - sequence_features=sequence_features, - ) - - return [ - # Input - tf.expand_dims(sequence_parsed['features'], axis=2), - # Label - tf.cast( - tf.reshape( - tf.sparse_tensor_to_dense(context_parsed['labels']), [-1]), - dtype=tf.int32), - # Input length - tf.cast( - tf.reshape(context_parsed['input_length'], [1]), - dtype=tf.int32), - # Label length - tf.cast( - tf.reshape(context_parsed['label_length'], [1]), - dtype=tf.int32), - ] diff --git a/models/object_detection/tensorflow/ssd-resnet34/inference/fp32/ssd_constants.py b/models/object_detection/tensorflow/ssd-resnet34/inference/fp32/ssd_constants.py deleted file mode 100644 index 77fa0149b..000000000 --- a/models/object_detection/tensorflow/ssd-resnet34/inference/fp32/ssd_constants.py +++ /dev/null @@ -1,118 +0,0 @@ -# Copyright 2018 Google. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Central location for all constants related to MLPerf SSD.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -# ============================================================================== -# == Model ===================================================================== -# ============================================================================== -IMAGE_SIZE = 300 - -# TODO(taylorrobie): MLPerf uses 80, but COCO documents 90. (RetinaNet uses 90) -# Update(taylorrobie): Labels > 81 show up in the pipeline. This will need to -# be resolved. -NUM_CLASSES = 81 # Including "no class". Not all COCO classes are used. - -# Note: Zero is special. (Background class) CLASS_INV_MAP[0] must be zero. -CLASS_INV_MAP = ( - 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 14, 15, 16, 17, 18, 19, 20, 21, - 22, 23, 24, 25, 27, 28, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, - 44, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, - 64, 65, 67, 70, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 84, 85, 86, 87, - 88, 89, 90) -_MAP = {j: i for i, j in enumerate(CLASS_INV_MAP)} -CLASS_MAP = tuple(_MAP.get(i, -1) for i in range(max(CLASS_INV_MAP) + 1)) - -NUM_SSD_BOXES = 8732 - -RESNET_DEPTH = 34 - -"""SSD specific""" -MIN_LEVEL = 3 -MAX_LEVEL = 8 - -FEATURE_SIZES = (38, 19, 10, 5, 3, 1) -STEPS = (8, 16, 32, 64, 100, 300) - -# https://github.com/amdegroot/ssd.pytorch/blob/master/data/config.py -SCALES = (21, 45, 99, 153, 207, 261, 315) -ASPECT_RATIOS = ((2,), (2, 3), (2, 3), (2, 3), (2,), (2,)) -NUM_DEFAULTS = (4, 6, 6, 6, 4, 4) -NUM_DEFAULTS_BY_LEVEL = {3: 4, 4: 6, 5: 6, 6: 6, 7: 4, 8: 4} -SCALE_XY = 0.1 -SCALE_HW = 0.2 -BOX_CODER_SCALES = (1 / SCALE_XY, 1 / SCALE_XY, 1 / SCALE_HW, 1 / SCALE_HW) -MATCH_THRESHOLD = 0.5 - -# https://discuss.pytorch.org/t/how-to-preprocess-input-for-pre-trained-networks/683 -NORMALIZATION_MEAN = (0.485, 0.456, 0.406) -NORMALIZATION_STD = (0.229, 0.224, 0.225) - -# SSD Cropping -NUM_CROP_PASSES = 50 -CROP_MIN_IOU_CHOICES = (0, 0.1, 0.3, 0.5, 0.7, 0.9) -P_NO_CROP_PER_PASS = 1 / (len(CROP_MIN_IOU_CHOICES) + 1) - -# Hard example mining -NEGS_PER_POSITIVE = 3 - -# Batch normalization -BATCH_NORM_DECAY = 0.997 -BATCH_NORM_EPSILON = 1e-4 - - -# ============================================================================== -# == Optimizer ================================================================= -# ============================================================================== -LEARNING_RATE_SCHEDULE = ( - (0, 1e-3), - (160000, 1e-4), - (200000, 1e-5), -) -MOMENTUM = 0.9 -WEIGHT_DECAY = 5e-4 - - -# ============================================================================== -# == Keys ====================================================================== -# ============================================================================== -BOXES = "boxes" -CLASSES = "classes" -NUM_MATCHED_BOXES = "num_matched_boxes" -IMAGE = "image" -SOURCE_ID = "source_id" -RAW_SHAPE = "raw_shape" -PRED_BOXES = "pred_boxes" -PRED_SCORES = "pred_scores" - - -# ============================================================================== -# == Evaluation ================================================================ -# ============================================================================== - -# Note: This is based on a batch size of 32 -# https://github.com/mlperf/reference/blob/master/single_stage_detector/ssd/train.py#L21-L37 -CHECKPOINT_FREQUENCY = 20000 -MAX_NUM_EVAL_BOXES = 200 -OVERLAP_CRITERIA = 0.5 # Used for nonmax supression -MIN_SCORE = 0.05 # Minimum score to be considered during evaluation. -DUMMY_SCORE = -1e5 # If no boxes are matched. - -ANNOTATION_FILE = "annotations/instances_val2017.json" -COCO_NUM_TRAIN_IMAGES = 118287 -COCO_NUM_VAL_IMAGES = 4952 diff --git a/models/object_detection/tensorflow/ssd-resnet34/inference/fp32/ssd_dataloader.py b/models/object_detection/tensorflow/ssd-resnet34/inference/fp32/ssd_dataloader.py deleted file mode 100644 index 2f291fd85..000000000 --- a/models/object_detection/tensorflow/ssd-resnet34/inference/fp32/ssd_dataloader.py +++ /dev/null @@ -1,382 +0,0 @@ -# Copyright 2018 Google. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Data loader and processing.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import itertools as it -import math - -import numpy as np -import tensorflow as tf - -from object_detection.box_coders import faster_rcnn_box_coder -from object_detection.core import box_list -from object_detection.core import region_similarity_calculator -from object_detection.core import target_assigner -from object_detection.matchers import argmax_matcher -import ssd_constants - - -class DefaultBoxes(object): - """Default bounding boxes for 300x300 5 layer SSD. - - Default bounding boxes generation follows the order of (W, H, anchor_sizes). - Therefore, the tensor converted from DefaultBoxes has a shape of - [anchor_sizes, H, W, 4]. The last dimension is the box coordinates; 'ltrb' - is [ymin, xmin, ymax, xmax] while 'xywh' is [cy, cx, h, w]. - """ - - def __init__(self): - fk = ssd_constants.IMAGE_SIZE / np.array(ssd_constants.STEPS) - - self.default_boxes = [] - # size of feature and number of feature - for idx, feature_size in enumerate(ssd_constants.FEATURE_SIZES): - sk1 = ssd_constants.SCALES[idx] / ssd_constants.IMAGE_SIZE - sk2 = ssd_constants.SCALES[idx+1] / ssd_constants.IMAGE_SIZE - sk3 = math.sqrt(sk1*sk2) - all_sizes = [(sk1, sk1), (sk3, sk3)] - - for alpha in ssd_constants.ASPECT_RATIOS[idx]: - w, h = sk1 * math.sqrt(alpha), sk1 / math.sqrt(alpha) - all_sizes.append((w, h)) - all_sizes.append((h, w)) - - assert len(all_sizes) == ssd_constants.NUM_DEFAULTS[idx] - - for w, h in all_sizes: - for i, j in it.product(range(feature_size), repeat=2): - cx, cy = (j + 0.5) / fk[idx], (i + 0.5) / fk[idx] - box = tuple(np.clip(k, 0, 1) for k in (cy, cx, h, w)) - self.default_boxes.append(box) - - assert len(self.default_boxes) == ssd_constants.NUM_SSD_BOXES - - def to_ltrb(cy, cx, h, w): - return cy - h / 2, cx - w / 2, cy + h / 2, cx + w / 2 - - # For IoU calculation - self.default_boxes_ltrb = tuple(to_ltrb(*i) for i in self.default_boxes) - - def __call__(self, order='ltrb'): - if order == 'ltrb': return self.default_boxes_ltrb - if order == 'xywh': return self.default_boxes - - -def calc_iou_tensor(boxes1, boxes2): - """Calculation of IoU based on two boxes tensor. - - Reference to https://github.com/kuangliu/pytorch-ssd - - Args: - boxes1: shape (N, 4), four coordinates of N boxes - boxes2: shape (M, 4), four coordinates of M boxes - Returns: - IoU: shape (N, M), IoU of the i-th box in `boxes1` and j-th box in `boxes2` - """ - b1_left, b1_top, b1_right, b1_bottom = tf.split(boxes1, 4, axis=1) - b2_left, b2_top, b2_right, b2_bottom = tf.split(boxes2, 4, axis=1) - - # Shape of intersect_* (N, M) - intersect_left = tf.maximum(b1_left, tf.transpose(b2_left)) - intersect_top = tf.maximum(b1_top, tf.transpose(b2_top)) - intersect_right = tf.minimum(b1_right, tf.transpose(b2_right)) - intersect_bottom = tf.minimum(b1_bottom, tf.transpose(b2_bottom)) - - boxes1_area = (b1_right - b1_left) * (b1_bottom - b1_top) - boxes2_area = (b2_right - b2_left) * (b2_bottom - b2_top) - - intersect = tf.multiply(tf.maximum((intersect_right - intersect_left), 0), - tf.maximum((intersect_bottom - intersect_top), 0)) - union = boxes1_area + tf.transpose(boxes2_area) - intersect - iou = intersect / union - - return iou - - -def ssd_parse_example_proto(example_serialized): - """Parses an Example proto containing a training example of an image. - - Each Example proto contains the following fields that we care about: - - image/encoded: - image/source_id: tf.string - image/height: tf.int64 - image/width: tf.int64 - image/object/bbox/xmin: tf.VarLenFeature(tf.float32) - image/object/bbox/xmax: tf.VarLenFeature(tf.float32) - image/object/bbox/ymin: tf.VarLenFeature(tf.float32 - image/object/bbox/ymax: tf.VarLenFeature(tf.float32) - image/object/class/label: tf.VarLenFeature(tf.int64) - image/object/class/text: tf.VarLenFeature(tf.string) - - Complete decoder can be found in: - https://github.com/tensorflow/models/blob/master/research/object_detection/data_decoders/tf_example_decoder.py - - Args: - example_serialized: scalar Tensor tf.string containing a serialized - Example protocol buffer. - - Returns: - A dictionary with the following key-values: - image_buffer: Tensor tf.string containing the contents of a JPEG file. - groundtruth_boxes: Tensor tf.float32 of shape [num_boxes, 4], containing - coordinates of object bounding boxes. - groundtruth_classeS: Tensor tf.int64 of shape [num_boxes, 1], containing - class labels of objects. - source_id: unique image identifier. - raw_shape: [height, width, 3]. - """ - feature_map = { - 'image/encoded': tf.FixedLenFeature( - (), dtype=tf.string, default_value=''), - 'image/source_id': tf.FixedLenFeature((), tf.string, default_value=''), - 'image/height': tf.FixedLenFeature((), tf.int64, default_value=1), - 'image/width': tf.FixedLenFeature((), tf.int64, default_value=1), - 'image/object/bbox/xmin': tf.VarLenFeature(dtype=tf.float32), - 'image/object/bbox/ymin': tf.VarLenFeature(dtype=tf.float32), - 'image/object/bbox/xmax': tf.VarLenFeature(dtype=tf.float32), - 'image/object/bbox/ymax': tf.VarLenFeature(dtype=tf.float32), - 'image/object/class/label': tf.VarLenFeature(dtype=tf.int64), - } - features = tf.parse_single_example(example_serialized, feature_map) - - xmin = tf.expand_dims(features['image/object/bbox/xmin'].values, 1) - ymin = tf.expand_dims(features['image/object/bbox/ymin'].values, 1) - xmax = tf.expand_dims(features['image/object/bbox/xmax'].values, 1) - ymax = tf.expand_dims(features['image/object/bbox/ymax'].values, 1) - - image_buffer = features['image/encoded'] - # Bounding box coordinates should be in ltrb order - boxes = tf.concat([ymin, xmin, ymax, xmax], 1) - classes = tf.expand_dims(features['image/object/class/label'].values, 1) - source_id = features['image/source_id'] - raw_shape = tf.stack([features['image/height'], features['image/width'], 3]) - - return {'image_buffer': image_buffer, - 'groundtruth_boxes': boxes, - 'groundtruth_classes': classes, - 'source_id': source_id, - 'raw_shape': raw_shape} - - -def ssd_decode_and_crop(image_buffer, boxes, classes, raw_shape): - """Crop image randomly and decode the cropped region. - - This function will crop an image to meet the following requirements: - 1. height to width ratio between 0.5 and 2; - 2. IoUs of some boxes exceed specified threshold; - 3. At least one box center is in the cropped region. - We defer the jpeg decoding task until after the crop to avoid wasted work. - - Reference: https://github.com/chauhan-utk/ssd.DomainAdaptation - - Args: - image_buffer: Tensor tf.string containing the contents of a JPEG file. - boxes: Tensor tf.float32 of shape [num_boxes, 4], containing coordinates of - object bounding boxes. - classes: Tensor tf.int64 of shape [num_boxes, 1], containing class labels - of objects. - raw_shape: [height, width, 3]. - - Returns: - resized_image: decoded, cropped, and resized image Tensor tf.float32 of - shape [ssd_constants.IMAGE_SIZE, ssd_constants.IMAGE_SIZE, 3], value - range 0--255. - cropped_boxes: box coordinates for objects in the cropped region. - cropped_classes: class labels for objects in the cropped region. - """ - - num_boxes = tf.shape(boxes)[0] - - def no_crop_check(): - return (tf.random_uniform(shape=(), minval=0, maxval=1, dtype=tf.float32) - < ssd_constants.P_NO_CROP_PER_PASS) - - def no_crop_proposal(): - return ( - tf.ones((), tf.bool), - tf.convert_to_tensor([0, 0, 1, 1], dtype=tf.float32), - tf.ones((num_boxes,), tf.bool), - ) - - def crop_proposal(): - rand_vec = lambda minval, maxval: tf.random_uniform( - shape=(ssd_constants.NUM_CROP_PASSES, 1), minval=minval, maxval=maxval, - dtype=tf.float32) - - width, height = rand_vec(0.3, 1), rand_vec(0.3, 1) - left, top = rand_vec(0, 1-width), rand_vec(0, 1-height) - - right = left + width - bottom = top + height - - ltrb = tf.concat([left, top, right, bottom], axis=1) - - min_iou = tf.random_shuffle(ssd_constants.CROP_MIN_IOU_CHOICES)[0] - ious = calc_iou_tensor(ltrb, boxes) - - # discard any bboxes whose center not in the cropped image - xc, yc = [tf.tile(0.5 * (boxes[:, i + 0] + boxes[:, i + 2])[tf.newaxis, :], - (ssd_constants.NUM_CROP_PASSES, 1)) for i in range(2)] - - masks = tf.reduce_all(tf.stack([ - tf.greater(xc, tf.tile(left, (1, num_boxes))), - tf.less(xc, tf.tile(right, (1, num_boxes))), - tf.greater(yc, tf.tile(top, (1, num_boxes))), - tf.less(yc, tf.tile(bottom, (1, num_boxes))), - ], axis=2), axis=2) - - # Checks of whether a crop is valid. - valid_aspect = tf.logical_and(tf.less(height/width, 2), - tf.less(width/height, 2)) - valid_ious = tf.reduce_all(tf.greater(ious, min_iou), axis=1, keepdims=True) - valid_masks = tf.reduce_any(masks, axis=1, keepdims=True) - - valid_all = tf.cast(tf.reduce_all(tf.concat( - [valid_aspect, valid_ious, valid_masks], axis=1), axis=1), tf.int32) - - # One indexed, as zero is needed for the case of no matches. - index = tf.range(1, 1 + ssd_constants.NUM_CROP_PASSES, dtype=tf.int32) - - # Either one-hot, or zeros if there is no valid crop. - selection = tf.equal(tf.reduce_max(index * valid_all), index) - - use_crop = tf.reduce_any(selection) - output_ltrb = tf.reduce_sum(tf.multiply(ltrb, tf.tile(tf.cast( - selection, tf.float32)[:, tf.newaxis], (1, 4))), axis=0) - output_masks = tf.reduce_any(tf.logical_and(masks, tf.tile( - selection[:, tf.newaxis], (1, num_boxes))), axis=0) - - return use_crop, output_ltrb, output_masks - - def proposal(*args): - return tf.cond( - pred=no_crop_check(), - true_fn=no_crop_proposal, - false_fn=crop_proposal, - ) - - _, crop_bounds, box_masks = tf.while_loop( - cond=lambda x, *_: tf.logical_not(x), - body=proposal, - loop_vars=[tf.zeros((), tf.bool), tf.zeros((4,), tf.float32), tf.zeros((num_boxes,), tf.bool)], - ) - - filtered_boxes = tf.boolean_mask(boxes, box_masks, axis=0) - - # Clip boxes to the cropped region. - filtered_boxes = tf.stack([ - tf.maximum(filtered_boxes[:, 0], crop_bounds[0]), - tf.maximum(filtered_boxes[:, 1], crop_bounds[1]), - tf.minimum(filtered_boxes[:, 2], crop_bounds[2]), - tf.minimum(filtered_boxes[:, 3], crop_bounds[3]), - ], axis=1) - - left = crop_bounds[0] - top = crop_bounds[1] - width = crop_bounds[2] - left - height = crop_bounds[3] - top - - cropped_boxes = tf.stack([ - (filtered_boxes[:, 0] - left) / width, - (filtered_boxes[:, 1] - top) / height, - (filtered_boxes[:, 2] - left) / width, - (filtered_boxes[:, 3] - top) / height, - ], axis=1) - - # crop_window containing integer coordinates of cropped region. A normalized - # coordinate value of y should be mapped to the image coordinate at - # y * (height - 1). - raw_shape = tf.cast(raw_shape, tf.float32) - crop_window = tf.stack([left * (raw_shape[0] - 1), - top * (raw_shape[1] - 1), - width * raw_shape[0], - height * raw_shape[1]]) - crop_window = tf.cast(crop_window, tf.int32) - - # Fused op only decodes the cropped portion of an image - cropped_image = tf.image.decode_and_crop_jpeg( - image_buffer, crop_window, channels=3) - - # Resize converts image dtype from uint8 to float32, without rescaling values. - resized_image = tf.image.resize_images( - cropped_image, [ssd_constants.IMAGE_SIZE, ssd_constants.IMAGE_SIZE]) - - cropped_classes = tf.boolean_mask(classes, box_masks, axis=0) - - return resized_image, cropped_boxes, cropped_classes - - -def color_jitter(image, brightness=0, contrast=0, saturation=0, hue=0): - """Distort the color of the image.""" - with tf.name_scope('distort_color'): - if brightness > 0: - image = tf.image.random_brightness(image, max_delta=brightness) - if contrast > 0: - image = tf.image.random_contrast( - image, lower=1-contrast, upper=1+contrast) - if saturation > 0: - image = tf.image.random_saturation( - image, lower=1-saturation, upper=1+saturation) - if hue > 0: - image = tf.image.random_hue(image, max_delta=hue) - return image - - -def normalize_image(image): - """Normalize the image to zero mean and unit variance. - - Args: - image: 3D tensor of type float32, value in [0, 1] - Returns: - image normalized by mean and stdev. - """ - image = tf.subtract(image, ssd_constants.NORMALIZATION_MEAN) - image = tf.divide(image, ssd_constants.NORMALIZATION_STD) - - return image - - -class Encoder(object): - """Encoder for SSD boxes and labels.""" - - def __init__(self): - similarity_calc = region_similarity_calculator.IouSimilarity() - matcher = argmax_matcher.ArgMaxMatcher( - matched_threshold=ssd_constants.MATCH_THRESHOLD, - unmatched_threshold=ssd_constants.MATCH_THRESHOLD, - negatives_lower_than_unmatched=True, - force_match_for_each_row=True) - - box_coder = faster_rcnn_box_coder.FasterRcnnBoxCoder( - scale_factors=ssd_constants.BOX_CODER_SCALES) - - self.default_boxes = DefaultBoxes()('ltrb') - self.default_boxes = box_list.BoxList( - tf.convert_to_tensor(self.default_boxes)) - self.assigner = target_assigner.TargetAssigner( - similarity_calc, matcher, box_coder) - - def encode_labels(self, gt_boxes, gt_labels): - target_boxes = box_list.BoxList(gt_boxes) - encoded_classes, _, encoded_boxes, _, matches = self.assigner.assign( - self.default_boxes, target_boxes, gt_labels) - num_matched_boxes = tf.reduce_sum( - tf.cast(tf.not_equal(matches.match_results, -1), tf.float32)) - return encoded_classes, encoded_boxes, num_matched_boxes diff --git a/models/object_detection/tensorflow/ssd-resnet34/inference/fp32/ssd_model.py b/models/object_detection/tensorflow/ssd-resnet34/inference/fp32/ssd_model.py deleted file mode 100644 index c8d67c24d..000000000 --- a/models/object_detection/tensorflow/ssd-resnet34/inference/fp32/ssd_model.py +++ /dev/null @@ -1,171 +0,0 @@ -# -# -*- coding: utf-8 -*- -# -# Copyright (c) 2019 Intel Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -# SPDX-License-Identifier: EPL-2.0 -# - -# Copyright 2018 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - - -"""SSD300 Model Configuration. - -References: - Wei Liu, Dragomir Anguelov, Dumitru Erhan, Christian Szegedy, Scott Reed, - Cheng-Yang Fu, Alexander C. Berg - SSD: Single Shot MultiBox Detector - arXiv:1512.02325 - -Ported from MLPerf reference implementation: - https://github.com/mlperf/reference/tree/ssd/single_stage_detector/ssd - -""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import multiprocessing -import os -import re -import threading -import tensorflow as tf - -import ssd_constants - - -class SSD300Model(): - """Single Shot Multibox Detection (SSD) model for 300x300 image datasets.""" - - def __init__(self, data_dir, label_num=ssd_constants.NUM_CLASSES): - # For COCO dataset, 80 categories + 1 background = 81 labels - self.label_num = label_num - self.data_dir = data_dir - - # Collected predictions for eval stage. It maps each image id in eval - # dataset to a dict containing the following information: - # source_id: raw ID of image - # raw_shape: raw shape of image - # pred_box: encoded box coordinates of prediction - # pred_scores: scores of classes in prediction - self.predictions = {} - - # Global step when predictions are collected. - self.eval_global_step = 0 - - # Average precision. In asynchronous eval mode, this is the latest AP we - # get so far and may not be the results at current eval step. - self.eval_coco_ap = 0 - - # Process, queues, and thread for asynchronous evaluation. When enabled, - # create a separte process (async_eval_process) that continously pull - # intermediate results from the predictions queue (a multiprocessing queue), - # process them, and push final results into results queue (another - # multiprocessing queue). The main thread is responsible to push message - # into predictions queue, and start a separate thread to continuously pull - # messages from results queue to update final results. - # Message in predictions queue should be a tuple of two elements: - # (evaluation step, predictions) - # Message in results queue should be a tuple of two elements: - # (evaluation step, final results) - self.async_eval_process = None - self.async_eval_predictions_queue = None - self.async_eval_results_queue = None - self.async_eval_results_getter_thread = None - - # The MLPerf reference uses a starting lr of 1e-3 at bs=32. - self.base_lr_batch_size = 32 - - def skip_final_affine_layer(self): - return True - - def postprocess(self, results): - """Postprocess results returned from model.""" - try: - import coco_metric # pylint: disable=g-import-not-at-top - except ImportError: - raise ImportError('To use the COCO dataset, you must clone the ' - 'repo https://github.com/tensorflow/models and add ' - 'tensorflow/models and tensorflow/models/research to ' - 'the PYTHONPATH, and compile the protobufs by ' - 'following https://github.com/tensorflow/models/blob/' - 'master/research/object_detection/g3doc/installation.md' - '#protobuf-compilation ; To evaluate using COCO' - 'metric, download and install Python COCO API from' - 'https://github.com/cocodataset/cocoapi') - - pred_boxes = results[ssd_constants.PRED_BOXES] - pred_scores = results[ssd_constants.PRED_SCORES] - # TODO(haoyuzhang): maybe use these values for visualization. - # gt_boxes = results['gt_boxes'] - # gt_classes = results['gt_classes'] - source_id = results[ssd_constants.SOURCE_ID] - raw_shape = results[ssd_constants.RAW_SHAPE] - - # COCO evaluation requires processing COCO_NUM_VAL_IMAGES exactly once. Due - # to rounding errors (i.e., COCO_NUM_VAL_IMAGES % batch_size != 0), setting - # `num_eval_epochs` to 1 is not enough and will often miss some images. We - # expect user to set `num_eval_epochs` to >1, which will leave some unused - # images from previous steps in `predictions`. Here we check if we are doing - # eval at a new global step. - if results['global_step'] > self.eval_global_step: - self.eval_global_step = results['global_step'] - self.predictions.clear() - - for i, sid in enumerate(source_id): - self.predictions[int(sid)] = { - ssd_constants.PRED_BOXES: pred_boxes[i], - ssd_constants.PRED_SCORES: pred_scores[i], - ssd_constants.SOURCE_ID: source_id[i], - ssd_constants.RAW_SHAPE: raw_shape[i] - } - - # COCO metric calculates mAP only after a full epoch of evaluation. Return - # dummy results for top_N_accuracy to be compatible with benchmar_cnn.py. - if len(self.predictions) >= ssd_constants.COCO_NUM_VAL_IMAGES: - print('Got results for all {:d} eval examples. Calculate mAP...'.format( - ssd_constants.COCO_NUM_VAL_IMAGES)) - - annotation_file = os.path.join(self.data_dir, - ssd_constants.ANNOTATION_FILE) - # Size of predictions before decoding about 15--30GB, while size after - # decoding is 100--200MB. When using async eval mode, decoding takes - # 20--30 seconds of main thread time but is necessary to avoid OOM during - # inter-process communication. - decoded_preds = coco_metric.decode_predictions(self.predictions.values()) - self.predictions.clear() - - eval_results = coco_metric.compute_map(decoded_preds, annotation_file) - self.eval_coco_ap = eval_results['COCO/AP'] - ret = {'top_1_accuracy': self.eval_coco_ap, 'top_5_accuracy': 0.} - return ret - print('Got {:d} out of {:d} eval examples.' - ' Waiting for the remaining to calculate mAP...'.format( - len(self.predictions), ssd_constants.COCO_NUM_VAL_IMAGES)) - return {'top_1_accuracy': self.eval_coco_ap, 'top_5_accuracy': 0.} diff --git a/models/object_detection/tensorflow/ssd-resnet34/inference/int8/__init__.py b/models/object_detection/tensorflow/ssd-resnet34/inference/int8/__init__.py new file mode 100644 index 000000000..159180624 --- /dev/null +++ b/models/object_detection/tensorflow/ssd-resnet34/inference/int8/__init__.py @@ -0,0 +1,20 @@ +# +# -*- coding: utf-8 -*- +# +# Copyright (c) 2019 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# SPDX-License-Identifier: EPL-2.0 +# + diff --git a/models/object_detection/tensorflow/ssd-resnet34/inference/int8/infer_detections.py b/models/object_detection/tensorflow/ssd-resnet34/inference/int8/infer_detections.py new file mode 100644 index 000000000..657469658 --- /dev/null +++ b/models/object_detection/tensorflow/ssd-resnet34/inference/int8/infer_detections.py @@ -0,0 +1,211 @@ +# +# -*- coding: utf-8 -*- +# +# Copyright (c) 2019 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# SPDX-License-Identifier: EPL-2.0 +# + +import tensorflow as tf +import time + +from argparse import ArgumentParser + +import benchmark_cnn +import datasets +import ssd_constants +from models import ssd_model +from preprocessing import COCOPreprocessor + +IMAGE_SIZE = 300 + +import os + +class ssd_resnet34_infer: + + def __init__(self): + arg_parser = ArgumentParser(description='Parse args') + + arg_parser.add_argument('-b', "--batch-size", + help="Specify the batch size. If this " \ + "parameter is not specified or is -1, the " \ + "largest ideal batch size for the model will " \ + "be used.", + dest="batch_size", type=int, default=-1) + + arg_parser.add_argument('-e', "--inter-op-parallelism-threads", + help='The number of inter-thread.', + dest='num_inter_threads', type=int, default=0) + + arg_parser.add_argument('-a', "--intra-op-parallelism-threads", + help='The number of intra-thread.', + dest='num_intra_threads', type=int, default=0) + + arg_parser.add_argument('-g', "--input-graph", + help='Specify the input graph.', + dest='input_graph') + + arg_parser.add_argument('-d', "--data-location", + help='Specify the location of the data. ' + 'If this parameter is not specified, ' + 'the benchmark will use random/dummy data.', + dest="data_location", default=None) + + arg_parser.add_argument('-r', "--accuracy-only", + help='For accuracy measurement only.', + dest='accuracy_only', action='store_true') + + arg_parser.add_argument("--results-file-path", + help="File path for the inference results", + dest="results_file_path", default=None) + + # parse the arguments + self.args = arg_parser.parse_args() + + self.freeze_graph = self.load_graph(self.args.input_graph) + self.config = tf.ConfigProto() + self.config.intra_op_parallelism_threads = self.args.num_intra_threads + self.config.inter_op_parallelism_threads = self.args.num_inter_threads + + if self.args.batch_size == -1: + self.args.batch_size = 64 + + self.num_batches = (ssd_constants.COCO_NUM_VAL_IMAGES // self.args.batch_size) + \ + (ssd_constants.COCO_NUM_VAL_IMAGES % self.args.batch_size > 0) + + input_layer = 'input' + output_layers = ['v/stack', 'v/Softmax'] + self.input_tensor = self.freeze_graph.get_tensor_by_name(input_layer + ":0") + self.output_tensors = [self.freeze_graph.get_tensor_by_name(x + ":0") for x in output_layers] + + + def load_graph(self, frozen_graph_filename): + print('load graph from: ' + frozen_graph_filename) + with tf.gfile.GFile(frozen_graph_filename, "rb") as f: + graph_def = tf.GraphDef() + graph_def.ParseFromString(f.read()) + + # Then, we import the graph_def into a new Graph and returns it + with tf.Graph().as_default() as graph: + # Since we load everything in a new graph, this is not needed + tf.import_graph_def(graph_def, name='') + return graph + + def run_benchmark(self): + print("Inference with dummy data.") + with tf.Session(graph=self.freeze_graph, config=self.config) as sess: + + input_images = sess.run(tf.truncated_normal( + [self.args.batch_size, IMAGE_SIZE, IMAGE_SIZE, 3], + dtype=tf.float32, + stddev=10, + name='synthetic_images')) + + total_iter = 1000 + warmup_iter = 200 + ttime = 0.0 + + print('total iteration is {0}'.format(str(total_iter))) + print('warm up iteration is {0}'.format(str(warmup_iter))) + + for step in range(total_iter): + start_time = time.time() + _ = sess.run(self.output_tensors, {self.input_tensor: input_images}) + end_time = time.time() + + duration = end_time - start_time + if (step + 1) % 10 == 0: + print('steps = {0}, {1} sec'.format(str(step), str(duration))) + + if step + 1 > warmup_iter: + ttime += duration + + total_batches = total_iter - warmup_iter + print ('Batchsize: {0}'.format(str(self.args.batch_size))) + print ('Time spent per BATCH: {0:10.4f} ms'.format(ttime / total_batches * 1000)) + print ('Total samples/sec: {0:10.4f} samples/s'.format(total_batches * self.args.batch_size / ttime)) + + + def __get_input(self): + preprocessor = COCOPreprocessor( + batch_size=self.args.batch_size, + output_shapes=[[self.args.batch_size, IMAGE_SIZE, IMAGE_SIZE, 3]], + num_splits=1, + dtype=tf.float32, + train=False, + distortions=True, + resize_method=None, + shift_ratio=0 + ) + + class params: + datasets_repeat_cached_sample = False + + self.params = params() + self.dataset = datasets.create_dataset(self.args.data_location, 'coco') + + return preprocessor.minibatch( + self.dataset, + subset='validation', + params=self.params, + shift_ratio=0) + + + def accuracy_check(self): + print(self.args) + input_list = self.__get_input() + ds_init = tf.get_collection(tf.GraphKeys.TABLE_INITIALIZERS) + + ds_sess = tf.Session() + params = benchmark_cnn.make_params(data_dir=self.args.data_location) + self.model = ssd_model.SSD300Model(params=params) + + print("Inference for accuracy check.") + with tf.Session(graph=self.freeze_graph, config=self.config) as sess: + ds_sess.run(ds_init) + global_step = 0 + + for _ in range(self.num_batches): + results = {} + input_lists = ds_sess.run(input_list) + input_images = input_lists[0][0] + input_ids = input_lists[3][0] + input_raw_shapes = input_lists[4][0] + + result = sess.run(self.output_tensors, {self.input_tensor: input_images}) + # Make global_step available in results for postprocessing. + results['global_step'] = global_step + results[ssd_constants.SOURCE_ID] = input_ids + results[ssd_constants.RAW_SHAPE] = input_raw_shapes + + results[ssd_constants.PRED_BOXES] = result[0] + results[ssd_constants.PRED_SCORES] = result[1] + + results = self.model.postprocess(results) + + + + def run(self): + if self.args.accuracy_only: + self.accuracy_check() + else: + self.run_benchmark() + + + +if __name__ == "__main__": + infer = ssd_resnet34_infer() + infer.run() + From 1ecd87bebcfdcbd7e5522e112aba7f0643cb6cfa Mon Sep 17 00:00:00 2001 From: Niranjan Hasabnis Date: Thu, 23 May 2019 11:59:05 -0700 Subject: [PATCH 37/62] Enabling ResNet50v1.5 model for FP32 and INT8 (#309) * fixed docker build command in installation guide (#303) * Revert "fixed docker build command in installation guide (#303)" (#305) This reverts commit 6c17f8e45a09a6867328ee419791c204e1aaf0f1. * Adding ResNet50v1.5 model (and steps to get FP32 numbers) * Adding INT8 support * Addressing review comments This commit addresses review comments. It also adds unit tests. * Updating README for ResNet50v1.5 with latest perf numbers * Adding URL for INT8 model --- benchmarks/README.md | 1 + benchmarks/common/base_benchmark_util.py | 3 +- benchmarks/common/tensorflow/start.sh | 2 + .../tensorflow/resnet50v1_5/README.md | 295 ++++++++++++ .../tensorflow/resnet50v1_5/__init__.py | 19 + .../resnet50v1_5/inference/__init__.py | 19 + .../resnet50v1_5/inference/fp32/__init__.py | 19 + .../resnet50v1_5/inference/fp32/config.json | 7 + .../resnet50v1_5/inference/fp32/model_init.py | 115 +++++ .../resnet50v1_5/inference/int8/__init__.py | 19 + .../resnet50v1_5/inference/int8/config.json | 7 + .../resnet50v1_5/inference/int8/model_init.py | 123 +++++ .../tensorflow/resnet50v1_5/__init__.py | 20 + .../resnet50v1_5/inference/__init__.py | 20 + .../resnet50v1_5/inference/datasets.py | 96 ++++ .../eval_image_classifier_inference.py | 268 +++++++++++ .../resnet50v1_5/inference/preprocessing.py | 177 ++++++++ .../tensorflow/resnet50v1_5/int8/__init__.py | 20 + .../tensorflow/resnet50v1_5/int8/benchmark.py | 213 +++++++++ .../tensorflow/resnet50v1_5/int8/cnn_util.py | 51 +++ .../tensorflow/resnet50v1_5/int8/datasets.py | 114 +++++ .../int8/generate_calibration_data.py | 183 ++++++++ .../resnet50v1_5/int8/preprocessing.py | 419 ++++++++++++++++++ .../int8/preprocessing_benchmark.py | 173 ++++++++ .../tf_model_args/tf_resnet50v1_5_args.json | 40 ++ 25 files changed, 2422 insertions(+), 1 deletion(-) create mode 100644 benchmarks/image_recognition/tensorflow/resnet50v1_5/README.md create mode 100644 benchmarks/image_recognition/tensorflow/resnet50v1_5/__init__.py create mode 100644 benchmarks/image_recognition/tensorflow/resnet50v1_5/inference/__init__.py create mode 100644 benchmarks/image_recognition/tensorflow/resnet50v1_5/inference/fp32/__init__.py create mode 100644 benchmarks/image_recognition/tensorflow/resnet50v1_5/inference/fp32/config.json create mode 100644 benchmarks/image_recognition/tensorflow/resnet50v1_5/inference/fp32/model_init.py create mode 100644 benchmarks/image_recognition/tensorflow/resnet50v1_5/inference/int8/__init__.py create mode 100644 benchmarks/image_recognition/tensorflow/resnet50v1_5/inference/int8/config.json create mode 100644 benchmarks/image_recognition/tensorflow/resnet50v1_5/inference/int8/model_init.py create mode 100644 models/image_recognition/tensorflow/resnet50v1_5/__init__.py create mode 100644 models/image_recognition/tensorflow/resnet50v1_5/inference/__init__.py create mode 100644 models/image_recognition/tensorflow/resnet50v1_5/inference/datasets.py create mode 100644 models/image_recognition/tensorflow/resnet50v1_5/inference/eval_image_classifier_inference.py create mode 100644 models/image_recognition/tensorflow/resnet50v1_5/inference/preprocessing.py create mode 100644 models/image_recognition/tensorflow/resnet50v1_5/int8/__init__.py create mode 100644 models/image_recognition/tensorflow/resnet50v1_5/int8/benchmark.py create mode 100644 models/image_recognition/tensorflow/resnet50v1_5/int8/cnn_util.py create mode 100644 models/image_recognition/tensorflow/resnet50v1_5/int8/datasets.py create mode 100644 models/image_recognition/tensorflow/resnet50v1_5/int8/generate_calibration_data.py create mode 100644 models/image_recognition/tensorflow/resnet50v1_5/int8/preprocessing.py create mode 100644 models/image_recognition/tensorflow/resnet50v1_5/int8/preprocessing_benchmark.py create mode 100644 tests/unit/common/tensorflow/tf_model_args/tf_resnet50v1_5_args.json diff --git a/benchmarks/README.md b/benchmarks/README.md index 25cea61b6..414e344e5 100644 --- a/benchmarks/README.md +++ b/benchmarks/README.md @@ -26,6 +26,7 @@ dependencies to be installed: | Image Recognition | TensorFlow | [MobileNet V1](https://arxiv.org/pdf/1704.04861.pdf) | Inference | [Int8](image_recognition/tensorflow/mobilenet_v1/README.md#int8-inference-instructions) [FP32](image_recognition/tensorflow/mobilenet_v1/README.md#fp32-inference-instructions) | | Image Recognition | TensorFlow | [ResNet 101](https://arxiv.org/pdf/1512.03385.pdf) | Inference | [Int8](image_recognition/tensorflow/resnet101/README.md#int8-inference-instructions) [FP32](image_recognition/tensorflow/resnet101/README.md#fp32-inference-instructions) | | Image Recognition | TensorFlow | [ResNet 50](https://arxiv.org/pdf/1512.03385.pdf) | Inference | [Int8](image_recognition/tensorflow/resnet50/README.md#int8-inference-instructions) [FP32](image_recognition/tensorflow/resnet50/README.md#fp32-inference-instructions) | +| Image Recognition | TensorFlow | [ResNet 50v1.5](https://github.com/tensorflow/models/tree/master/official/resnet) | Inference | [Int8](image_recognition/tensorflow/resnet50v1_5/README.md#int8-inference-instructions) [FP32](image_recognition/tensorflow/resnet50v1_5/README.md#fp32-inference-instructions) | | Image Recognition | TensorFlow | [SqueezeNet](https://arxiv.org/pdf/1602.07360.pdf) | Inference | [FP32](image_recognition/tensorflow/squeezenet/README.md#fp32-inference-instructions) | | Image Segmentation | TensorFlow | [Mask R-CNN](https://arxiv.org/pdf/1703.06870.pdf) | Inference | [FP32](image_segmentation/tensorflow/maskrcnn/README.md#fp32-inference-instructions) | | Image Segmentation | TensorFlow | [UNet](https://arxiv.org/pdf/1505.04597.pdf) | Inference | [FP32](image_segmentation/tensorflow/unet/README.md#fp32-inference-instructions) | diff --git a/benchmarks/common/base_benchmark_util.py b/benchmarks/common/base_benchmark_util.py index e4c92639d..1aefdebd0 100644 --- a/benchmarks/common/base_benchmark_util.py +++ b/benchmarks/common/base_benchmark_util.py @@ -228,7 +228,8 @@ def _validate_args(self): raise ValueError("Number of cores exceeds system core number: {}". format(system_num_cores)) - if args.output_results and (args.model_name != "resnet50" or args.precision != "fp32"): + if args.output_results and ((args.model_name != "resnet50" and + args.model_name != "resnet50v1_5") or args.precision != "fp32"): raise ValueError("--output-results is currently only supported for resnet50 FP32 inference.") elif args.output_results and (args.mode != "inference" or not args.data_location): raise ValueError("--output-results can only be used when running inference with a dataset.") diff --git a/benchmarks/common/tensorflow/start.sh b/benchmarks/common/tensorflow/start.sh index be2d30f5c..5884cfac9 100755 --- a/benchmarks/common/tensorflow/start.sh +++ b/benchmarks/common/tensorflow/start.sh @@ -891,6 +891,8 @@ elif [ ${MODEL_NAME} == "resnet101" ]; then resnet50_101_inceptionv3 elif [ ${MODEL_NAME} == "resnet50" ]; then resnet50_101_inceptionv3 +elif [ ${MODEL_NAME} == "resnet50v1_5" ]; then + resnet50_101_inceptionv3 elif [ ${MODEL_NAME} == "rfcn" ]; then rfcn elif [ ${MODEL_NAME} == "squeezenet" ]; then diff --git a/benchmarks/image_recognition/tensorflow/resnet50v1_5/README.md b/benchmarks/image_recognition/tensorflow/resnet50v1_5/README.md new file mode 100644 index 000000000..cc1f255a6 --- /dev/null +++ b/benchmarks/image_recognition/tensorflow/resnet50v1_5/README.md @@ -0,0 +1,295 @@ +# ResNet50 + +This document has instructions for how to run ResNet50 (v1.5) for the +following precisions: +* [Int8 inference](#int8-inference-instructions) +* [FP32 inference](#fp32-inference-instructions) + +Original ResNet model has multiple versions which have shown better accuracy +and/or throughput performance. As mentioned in TensorFlow's [official ResNet +model page](https://github.com/tensorflow/models/tree/master/official/resnet), 3 different +versions of the original ResNet model exists - ResNet50v1, ResNet50v1.5, and ResNet50v2. +As a side note, ResNet50v1.5 is also in MLPerf's [cloud inference benchmark for +image classification](https://github.com/mlperf/inference/tree/master/cloud/image_classification). + +## Int8 Inference Instructions + +1. Download the full ImageNet dataset and convert to the TF records format. + +* Clone the tensorflow/models repository: +``` +$ git clone https://github.com/tensorflow/models.git +``` +The TensorFlow models repo provides +[scripts and instructions](https://github.com/tensorflow/models/tree/master/research/slim#an-automated-script-for-processing-imagenet-data) +to download, process and convert the ImageNet dataset to the TF records format. + +* The ImageNet dataset directory location is only required to calculate the model accuracy. + +2. Download the pre-trained model. +``` +wget https://storage.googleapis.com/intel-optimized-tensorflow/models/resnet50v1_5_int8_pretrained_model.pb +``` + +3. Clone the +[intelai/models](https://github.com/intelai/models) +repository +``` +$ git clone https://github.com/IntelAI/models.git +``` + +4. Run the inference script `launch_benchmark.py` with the appropriate parameters to evaluate the model performance and/or calculate the accuracy. +The optimized ResNet50v1.5 model files are attached to the [intelai/models](https://github.com/intelai/models) repo and +located at `models/models/image_recognition/tensorflow/resnet50v1_5/`. + + The docker image (`intelaipg/intel-optimized-tensorflow:1.14`) + used in the commands above were built using + [TensorFlow](git@github.com:tensorflow/tensorflow.git) master for TensorFlow + version 1.14. + +* Calculate the model accuracy, the required parameters parameters include: the `ImageNet` dataset location (from step 1), +the pre-trained `resnet50v1_5_int8_pretrained_model.pb` input graph file (from step 2), and the `--accuracy-only` flag. +``` +$ cd /home//models/benchmarks + +$ python launch_benchmark.py \ + --data-location /home//dataset/FullImageNetData_directory + --in-graph resnet50v1_5_int8_pretrained_model.pb \ + --model-name resnet50v1_5 \ + --framework tensorflow \ + --precision int8 \ + --mode inference \ + --batch-size=100 \ + --accuracy-only \ + --docker-image intelaipg/intel-optimized-tensorflow:1.14 +``` +The log file is saved to the value of `--output-dir`. + +The tail of the log output when the benchmarking completes should look +something like this: +``` +Processed 49700 images. (Top1 accuracy, Top5 accuracy) = (0.7622, 0.9296) +Processed 49800 images. (Top1 accuracy, Top5 accuracy) = (0.7621, 0.9295) +Processed 49900 images. (Top1 accuracy, Top5 accuracy) = (0.7622, 0.9296) +Processed 50000 images. (Top1 accuracy, Top5 accuracy) = (0.7623, 0.9296) +Ran inference with batch size 100 +Log location outside container: {--output-dir value}/benchmark_resnet50_inference_int8_{timestamp}.log +``` + +* Evaluate the model performance: If just evaluate performance for dummy data, the `--data-location` is not needed. +Otherwise `--data-location` argument needs to be specified: +Calculate the model throughput `images/sec`, the required parameters to run the inference script would include: +the pre-trained `resnet50v1_5_int8_pretrained_model.pb` input graph file (from step +2), and the `--benchmark-only` flag. It is +optional to specify the number of `warmup_steps` and `steps` as extra +args, as shown in the command below. If these values are not specified, +the script will default to use `warmup_steps=10` and `steps=50`. + +``` +$ cd /home//models/benchmarks + +$ python launch_benchmark.py \ + --in-graph resnet50v1_5_int8_pretrained_model.pb \ + --model-name resnet50v1_5 \ + --framework tensorflow \ + --precision int8 \ + --mode inference \ + --batch-size=128 \ + --benchmark-only \ + --docker-image intelaipg/intel-optimized-tensorflow:1.14 + -- warmup_steps=50 steps=500 +``` +The tail of the log output when the benchmarking completes should look +something like this: +``` +... +Iteration 490: 0.249899 sec +Iteration 500: 0.249110 sec +Average time: 0.251280 sec +Batch size = 128 +Throughput: 509.392 images/sec +Ran inference with batch size 128 +Log location outside container: {--output-dir value}/benchmark_resnet50_inference_int8_{timestamp}.log +``` + +Note that the `--verbose` or `--output-dir` flag can be added to any of the above commands +to get additional debug output or change the default output location. + +## FP32 Inference Instructions + +1. Download the pre-trained model. + +If you would like to get a pre-trained model for ResNet50v1.5, +``` +$ wget https://zenodo.org/record/2535873/files/resnet50_v1.pb +``` + +2. Clone the [intelai/models](https://github.com/intelai/models) repository +``` +$ git clone https://github.com/IntelAI/models.git +``` + +3. If running resnet50 for accuracy, the ImageNet dataset will be +required (if running benchmarking for throughput/latency, then dummy +data will be used). + +The TensorFlow models repo provides +[scripts and instructions](https://github.com/tensorflow/models/tree/master/research/slim#an-automated-script-for-processing-imagenet-data) +to download, process, and convert the ImageNet dataset to the TF records format. + +4. Run the inference script `launch_benchmark.py` with the appropriate parameters to evaluate the model performance. +The optimized ResNet50v1.5 model files are attached to the [intelai/models](https://github.com/intelai/models) repo and +located at `models/models/image_recognition/tensorflow/resnet50v1_5/`. +If benchmarking uses dummy data for inference, `--data-location` flag is not required. Otherwise, +`--data-location` needs to point to point to ImageNet dataset location. + +* To measure the model latency, set `--batch-size=1` and run the benchmark script as shown: +``` +$ cd /home//models/benchmarks + +$ python launch_benchmark.py \ + --in-graph resnet50_v1.pb \ + --model-name resnet50v1_5 \ + --framework tensorflow \ + --precision fp32 \ + --mode inference \ + --batch-size=1 \ + --socket-id 0 \ + --docker-image intelaipg/intel-optimized-tensorflow:1.14 +``` + +The log file is saved to the value of `--output-dir`. + +The tail of the log output when the benchmarking completes should look +something like this: +``` +Inference with dummy data. +Iteration 1: 2.761204 sec +Iteration 2: 0.011155 sec +Iteration 3: 0.009289 sec +... +Iteration 48: 0.009315 sec +Iteration 49: 0.009343 sec +Iteration 50: 0.009278 sec +Average time: 0.009481 sec +Batch size = 1 +Latency: 9.481 ms +Throughput: 105.470 images/sec +lscpu_path_cmd = command -v lscpu +lscpu located here: /usr/bin/lscpu +Ran inference with batch size 1 +Log location outside container: {--output-dir value}/benchmark_resnet50_inference_fp32_{timestamp}.log +``` + +* To measure the model Throughput, set `--batch-size=128` and run the benchmark script as shown: +``` +$ cd /home//models/benchmarks + +$ python launch_benchmark.py \ + --in-graph resnet50_v1.pb \ + --model-name resnet50v1_5 \ + --framework tensorflow \ + --precision fp32 \ + --mode inference \ + --batch-size=128 \ + --socket-id 0 \ + --docker-image intelaipg/intel-optimized-tensorflow:1.14 +``` + +The log file is saved to the value of `--output-dir`. + +The tail of the log output when the benchmarking completes should look +something like this: +``` +Inference with dummy data. +Iteration 1: 3.013918 sec +Iteration 2: 0.543498 sec +Iteration 3: 0.536187 sec +Iteration 4: 0.532568 sec +... +Iteration 46: 0.532444 sec +Iteration 47: 0.535652 sec +Iteration 48: 0.532158 sec +Iteration 49: 0.538117 sec +Iteration 50: 0.532411 sec +Average time: 0.534427 sec +Batch size = 128 +Throughput: 239.509 images/sec +Ran inference with batch size 128 +Log location outside container: {--output-dir value}/benchmark_resnet50_inference_fp32_{timestamp}.log +``` + +* To measure the model accuracy, use the `--accuracy-only` flag and pass +the ImageNet dataset directory from step 3 as the `--data-location`: +``` +$ cd /home//models/benchmarks + +$ python launch_benchmark.py \ + --in-graph resnet50_v1.pb \ + --model-name resnet50v1_5 \ + --framework tensorflow \ + --precision fp32 \ + --mode inference \ + --accuracy-only \ + --batch-size 100 \ + --socket-id 0 \ + --data-location /home//dataset/ImageNetData_directory \ + --docker-image intelaipg/intel-optimized-tensorflow:1.14 +``` + +The log file is saved to the value of `--output-dir`. +The tail of the log output when the accuracy run completes should look +something like this: +``` +... +Processed 50000 images. (Top1 accuracy, Top5 accuracy) = (0.7651, 0.9307) +lscpu_path_cmd = command -v lscpu +lscpu located here: /usr/bin/lscpu +Ran inference with batch size 100 +Log location outside container: {--output-dir value}/benchmark_resnet50_inference_fp32_{timestamp}.log +``` + +* The `--output-results` flag can be used along with above benchmarking +or accuracy test, in order to also output a file with the inference +results (file name, actual label, and the predicted label). The results +output can only be used with real data. + +For example, the command below is the same as the accuracy test above, +except with the `--output-results` flag added: +``` +$ cd /home//models/benchmarks + +$ python launch_benchmark.py \ + --in-graph resnet50_v1.pb \ + --model-name resnet50v1_5 \ + --framework tensorflow \ + --precision fp32 \ + --mode inference \ + --accuracy-only \ + --output-results \ + --batch-size 100 \ + --socket-id 0 \ + --data-location /home//dataset/ImageNetData_directory \ + --docker-image intelaipg/intel-optimized-tensorflow:1.14 +``` +The results file will be written to the +`models/benchmarks/common/tensorflow/logs` directory, unless another +output directory is specified by the `--output-dir` arg. Below is an +example of what the inference results file will look like: +``` +filename,actual,prediction +ILSVRC2012_val_00033870.JPEG,592,592 +ILSVRC2012_val_00045598.JPEG,258,258 +ILSVRC2012_val_00047428.JPEG,736,736 +ILSVRC2012_val_00003341.JPEG,344,344 +ILSVRC2012_val_00037069.JPEG,192,192 +ILSVRC2012_val_00029701.JPEG,440,440 +ILSVRC2012_val_00016918.JPEG,286,737 +ILSVRC2012_val_00015545.JPEG,5,5 +ILSVRC2012_val_00016713.JPEG,274,274 +ILSVRC2012_val_00014735.JPEG,31,31 +... +``` + +Note that the `--verbose` or `--output-dir` flag can be added to any of the above commands +to get additional debug output or change the default output location. diff --git a/benchmarks/image_recognition/tensorflow/resnet50v1_5/__init__.py b/benchmarks/image_recognition/tensorflow/resnet50v1_5/__init__.py new file mode 100644 index 000000000..d9c4123de --- /dev/null +++ b/benchmarks/image_recognition/tensorflow/resnet50v1_5/__init__.py @@ -0,0 +1,19 @@ +# +# -*- coding: utf-8 -*- +# +# Copyright (c) 2019 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# SPDX-License-Identifier: EPL-2.0 +# diff --git a/benchmarks/image_recognition/tensorflow/resnet50v1_5/inference/__init__.py b/benchmarks/image_recognition/tensorflow/resnet50v1_5/inference/__init__.py new file mode 100644 index 000000000..d9c4123de --- /dev/null +++ b/benchmarks/image_recognition/tensorflow/resnet50v1_5/inference/__init__.py @@ -0,0 +1,19 @@ +# +# -*- coding: utf-8 -*- +# +# Copyright (c) 2019 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# SPDX-License-Identifier: EPL-2.0 +# diff --git a/benchmarks/image_recognition/tensorflow/resnet50v1_5/inference/fp32/__init__.py b/benchmarks/image_recognition/tensorflow/resnet50v1_5/inference/fp32/__init__.py new file mode 100644 index 000000000..d9c4123de --- /dev/null +++ b/benchmarks/image_recognition/tensorflow/resnet50v1_5/inference/fp32/__init__.py @@ -0,0 +1,19 @@ +# +# -*- coding: utf-8 -*- +# +# Copyright (c) 2019 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# SPDX-License-Identifier: EPL-2.0 +# diff --git a/benchmarks/image_recognition/tensorflow/resnet50v1_5/inference/fp32/config.json b/benchmarks/image_recognition/tensorflow/resnet50v1_5/inference/fp32/config.json new file mode 100644 index 000000000..273b45b40 --- /dev/null +++ b/benchmarks/image_recognition/tensorflow/resnet50v1_5/inference/fp32/config.json @@ -0,0 +1,7 @@ +{ + "optimization_parameters": { + "KMP_AFFINITY": "granularity=fine,verbose,compact,1,0", + "KMP_BLOCKTIME": 1, + "KMP_SETTINGS": 1 + } +} diff --git a/benchmarks/image_recognition/tensorflow/resnet50v1_5/inference/fp32/model_init.py b/benchmarks/image_recognition/tensorflow/resnet50v1_5/inference/fp32/model_init.py new file mode 100644 index 000000000..7231243b8 --- /dev/null +++ b/benchmarks/image_recognition/tensorflow/resnet50v1_5/inference/fp32/model_init.py @@ -0,0 +1,115 @@ +# +# -*- coding: utf-8 -*- +# +# Copyright (c) 2019 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# SPDX-License-Identifier: EPL-2.0 +# + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function +from common.base_model_init import BaseModelInitializer +from common.base_model_init import set_env_var + +import os +from argparse import ArgumentParser +import time + + +class ModelInitializer(BaseModelInitializer): + """initialize mode and run benchmark""" + + def __init__(self, args, custom_args=[], platform_util=None): + super(ModelInitializer, self).__init__(args, custom_args, platform_util) + + self.benchmark_command = "" + if not platform_util: + raise ValueError("Did not find any platform info.") + + # use default batch size if -1 + if self.args.batch_size == -1: + self.args.batch_size = 128 + + # set num_inter_threads and num_intra_threads + self.set_num_inter_intra_threads() + + arg_parser = ArgumentParser(description='Parse args') + + arg_parser.add_argument("--warmup-steps", dest='warmup_steps', + type=int, default=10, + help="number of warmup steps") + arg_parser.add_argument("--steps", dest='steps', + type=int, default=50, + help="number of steps") + arg_parser.add_argument( + '--kmp-blocktime', dest='kmp_blocktime', + help='number of kmp block time', + type=int, default=1) + + self.args = arg_parser.parse_args(self.custom_args, namespace=self.args) + + # Set KMP env vars, if they haven't already been set, but override the default KMP_BLOCKTIME value + config_file_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), "config.json") + self.set_kmp_vars(config_file_path, kmp_blocktime=str(self.args.kmp_blocktime)) + + set_env_var("OMP_NUM_THREADS", self.args.num_intra_threads) + + benchmark_script = os.path.join( + self.args.intelai_models, self.args.mode, + "eval_image_classifier_inference.py") + + self.benchmark_command = self.get_command_prefix(args.socket_id) + \ + self.python_exe + " " + benchmark_script + + num_cores = self.platform_util.num_cores_per_socket if self.args.num_cores == -1 \ + else self.args.num_cores + + self.benchmark_command = \ + self.benchmark_command + \ + " --input-graph=" + self.args.input_graph + \ + " --num-inter-threads=" + str(self.args.num_inter_threads) + \ + " --num-intra-threads=" + str(self.args.num_intra_threads) + \ + " --num-cores=" + str(num_cores) + \ + " --batch-size=" + str(self.args.batch_size) + \ + " --warmup-steps=" + str(self.args.warmup_steps) + \ + " --steps=" + str(self.args.steps) + + if self.args.data_num_inter_threads: + self.benchmark_command += " --data-num-inter-threads=" + str(self.args.data_num_inter_threads) + if self.args.data_num_intra_threads: + self.benchmark_command += " --data-num-intra-threads=" + str(self.args.data_num_intra_threads) + + # if the data location directory is not empty, then include the arg + if self.args.data_location and os.listdir(self.args.data_location): + self.benchmark_command += " --data-location=" + \ + self.args.data_location + if self.args.accuracy_only: + self.benchmark_command += " --accuracy-only" + + # if output results is enabled, generate a results file name and pass it to the inference script + if self.args.output_results: + self.results_filename = "{}_{}_{}_results_{}.txt".format( + self.args.model_name, self.args.precision, self.args.mode, + time.strftime("%Y%m%d_%H%M%S", time.gmtime())) + self.results_file_path = os.path.join(self.args.output_dir, self.results_filename) + self.benchmark_command += " --results-file-path {}".format(self.results_file_path) + + def run(self): + if self.benchmark_command: + self.run_command(self.benchmark_command) + + if self.args.output_results: + print("Inference results file in the output directory: {}".format(self.results_filename)) diff --git a/benchmarks/image_recognition/tensorflow/resnet50v1_5/inference/int8/__init__.py b/benchmarks/image_recognition/tensorflow/resnet50v1_5/inference/int8/__init__.py new file mode 100644 index 000000000..d9c4123de --- /dev/null +++ b/benchmarks/image_recognition/tensorflow/resnet50v1_5/inference/int8/__init__.py @@ -0,0 +1,19 @@ +# +# -*- coding: utf-8 -*- +# +# Copyright (c) 2019 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# SPDX-License-Identifier: EPL-2.0 +# diff --git a/benchmarks/image_recognition/tensorflow/resnet50v1_5/inference/int8/config.json b/benchmarks/image_recognition/tensorflow/resnet50v1_5/inference/int8/config.json new file mode 100644 index 000000000..273b45b40 --- /dev/null +++ b/benchmarks/image_recognition/tensorflow/resnet50v1_5/inference/int8/config.json @@ -0,0 +1,7 @@ +{ + "optimization_parameters": { + "KMP_AFFINITY": "granularity=fine,verbose,compact,1,0", + "KMP_BLOCKTIME": 1, + "KMP_SETTINGS": 1 + } +} diff --git a/benchmarks/image_recognition/tensorflow/resnet50v1_5/inference/int8/model_init.py b/benchmarks/image_recognition/tensorflow/resnet50v1_5/inference/int8/model_init.py new file mode 100644 index 000000000..03b523829 --- /dev/null +++ b/benchmarks/image_recognition/tensorflow/resnet50v1_5/inference/int8/model_init.py @@ -0,0 +1,123 @@ +# +# -*- coding: utf-8 -*- +# +# Copyright (c) 2019 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# SPDX-License-Identifier: EPL-2.0 +# + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function +from common.base_model_init import BaseModelInitializer +from common.base_model_init import set_env_var + +import argparse +import os + + +class ModelInitializer(BaseModelInitializer): + """Model initializer for resnet50 int8 inference""" + + def __init__(self, args, custom_args=[], platform_util=None): + super(ModelInitializer, self).__init__(args, custom_args, platform_util) + + # Set the num_inter_threads and num_intra_threads + self.set_num_inter_intra_threads() + # Set env vars, if they haven't already been set + set_env_var("OMP_NUM_THREADS", self.args.num_intra_threads, overwrite_existing=True) + + def parse_args(self): + parser = argparse.ArgumentParser() + parser.add_argument( + "--warmup-steps", dest="warmup_steps", + help="number of warmup steps", + type=int, default=10) + parser.add_argument( + "--steps", dest="steps", + help="number of steps", + type=int, default=50) + parser.add_argument( + '--kmp-blocktime', dest='kmp_blocktime', + help='number of kmp block time', + type=int, default=1) + parser.add_argument( + "--calibration-only", + help="Calibrate the accuracy.", + dest="calibration_only", action="store_true") + parser.add_argument( + "--calibrate", dest="calibrate", + help=" run accuracy with calibration data, " + "to generate min_max ranges, calibrate=[True/False]", + type=bool, default=False) + + self.args = parser.parse_args(self.custom_args, + namespace=self.args) + + # Set KMP env vars, if they haven't already been set, but override the default KMP_BLOCKTIME value + config_file_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), "config.json") + self.set_kmp_vars(config_file_path, kmp_blocktime=str(self.args.kmp_blocktime)) + + set_env_var("OMP_NUM_THREADS", self.args.num_intra_threads) + + def run_benchmark_or_accuracy(self): + cmd = os.path.join( + self.args.intelai_models, self.args.mode, + "eval_image_classifier_inference.py") + + cmd = self.get_command_prefix(self.args.socket_id) + self.python_exe + " " + cmd + + cmd += " --input-graph=" + self.args.input_graph + \ + " --num-inter-threads=" + str(self.args.num_inter_threads) + \ + " --num-intra-threads=" + str(self.args.num_intra_threads) + \ + " --batch-size=" + str(self.args.batch_size) + \ + " --warmup-steps=" + str(self.args.warmup_steps) + \ + " --steps=" + str(self.args.steps) + + if self.args.calibrate: + cmd += " --calibrate=" + str(self.args.calibrate) + if self.args.data_num_inter_threads: + cmd += " --data-num-inter-threads=" + str(self.args.data_num_inter_threads) + if self.args.data_num_intra_threads: + cmd += " --data-num-intra-threads=" + str(self.args.data_num_intra_threads) + + # if the data location directory is not empty, then include the arg + if self.args.data_location and os.listdir(self.args.data_location): + cmd += " --data-location=" + self.args.data_location + if self.args.accuracy_only: + cmd += " --accuracy-only" + + self.run_command(cmd) + + def run_calibration(self): + calibration_script = os.path.join(self.args.intelai_models, + self.args.precision, + "generate_calibration_data.py") + script_args_list = [ + "input_graph", "data_location", + "batch_size", + "num_inter_threads", "num_intra_threads"] + cmd_prefix = self.get_command_prefix(self.args.socket_id) + \ + self.python_exe + " " + calibration_script + cmd = self.add_args_to_command(cmd_prefix, script_args_list) + self.run_command(cmd) + + def run(self): + # Parse custom arguments and append to self.args + self.parse_args() + if self.args.accuracy_only and self.args.calibration_only: + self.run_calibration() + else: + self.run_benchmark_or_accuracy() diff --git a/models/image_recognition/tensorflow/resnet50v1_5/__init__.py b/models/image_recognition/tensorflow/resnet50v1_5/__init__.py new file mode 100644 index 000000000..159180624 --- /dev/null +++ b/models/image_recognition/tensorflow/resnet50v1_5/__init__.py @@ -0,0 +1,20 @@ +# +# -*- coding: utf-8 -*- +# +# Copyright (c) 2019 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# SPDX-License-Identifier: EPL-2.0 +# + diff --git a/models/image_recognition/tensorflow/resnet50v1_5/inference/__init__.py b/models/image_recognition/tensorflow/resnet50v1_5/inference/__init__.py new file mode 100644 index 000000000..159180624 --- /dev/null +++ b/models/image_recognition/tensorflow/resnet50v1_5/inference/__init__.py @@ -0,0 +1,20 @@ +# +# -*- coding: utf-8 -*- +# +# Copyright (c) 2019 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# SPDX-License-Identifier: EPL-2.0 +# + diff --git a/models/image_recognition/tensorflow/resnet50v1_5/inference/datasets.py b/models/image_recognition/tensorflow/resnet50v1_5/inference/datasets.py new file mode 100644 index 000000000..cb848e467 --- /dev/null +++ b/models/image_recognition/tensorflow/resnet50v1_5/inference/datasets.py @@ -0,0 +1,96 @@ +# +# -*- coding: utf-8 -*- +# +# Copyright (c) 2019 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# SPDX-License-Identifier: EPL-2.0 +# + +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +"""Benchmark dataset utilities. +""" + +from abc import abstractmethod +import os + +import tensorflow as tf + +import preprocessing + +IMAGENET_NUM_TRAIN_IMAGES = 1281167 +IMAGENET_NUM_VAL_IMAGES = 50000 +IMAGENET_NUM_CLASSES = 1000 + +class Dataset(object): + """Abstract class for cnn benchmarks dataset.""" + + def __init__(self, name, data_dir=None): + self.name = name + if data_dir is None: + raise ValueError('Data directory not specified') + self.data_dir = data_dir + + def tf_record_pattern(self, subset): + return os.path.join(self.data_dir, '%s-*-of-*' % subset) + + def reader(self): + return tf.TFRecordReader() + + @abstractmethod + def num_classes(self): + pass + + @abstractmethod + def num_examples_per_epoch(self, subset): + pass + + def __str__(self): + return self.name + + +class ImagenetData(Dataset): + + def __init__(self, data_dir=None): + super(ImagenetData, self).__init__('ImageNet', data_dir) + + def num_classes(self): + return IMAGENET_NUM_CLASSES + + def num_examples_per_epoch(self, subset='train'): + if subset == 'train': + return IMAGENET_NUM_TRAIN_IMAGES + elif subset == 'validation': + return IMAGENET_NUM_VAL_IMAGES + elif subset == 'calibrate' or subset == 'calibration': + return 100 + else: + raise ValueError('Invalid data subset "%s"' % subset) + + def get_image_preprocessor(self): + return preprocessing.RecordInputImagePreprocessor diff --git a/models/image_recognition/tensorflow/resnet50v1_5/inference/eval_image_classifier_inference.py b/models/image_recognition/tensorflow/resnet50v1_5/inference/eval_image_classifier_inference.py new file mode 100644 index 000000000..c8fe46a11 --- /dev/null +++ b/models/image_recognition/tensorflow/resnet50v1_5/inference/eval_image_classifier_inference.py @@ -0,0 +1,268 @@ +# +# -*- coding: utf-8 -*- +# +# Copyright (c) 2019 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# SPDX-License-Identifier: EPL-2.0 +# + +import time +from argparse import ArgumentParser + +import tensorflow as tf +import tensorflow.tools.graph_transforms as graph_transforms + +import datasets +import numpy as np + +INPUTS = 'input_tensor:0' +OUTPUTS = 'softmax_tensor:0' +OPTIMIZATION = 'strip_unused_nodes remove_nodes(op=Identity, op=CheckNumerics) fold_constants(ignore_errors=true) fold_batch_norms fold_old_batch_norms' + +RESNET_IMAGE_SIZE = 224 + + +class eval_classifier_optimized_graph: + """Evaluate image classifier with optimized TensorFlow graph""" + + def __init__(self): + + arg_parser = ArgumentParser(description='Parse args') + + arg_parser.add_argument('-b', "--batch-size", + help="Specify the batch size. If this " \ + "parameter is not specified or is -1, the " \ + "largest ideal batch size for the model will " \ + "be used.", + dest="batch_size", type=int, default=-1) + + arg_parser.add_argument('-e', "--num-inter-threads", + help='The number of inter-thread.', + dest='num_inter_threads', type=int, default=0) + + arg_parser.add_argument('-a', "--num-intra-threads", + help='The number of intra-thread.', + dest='num_intra_threads', type=int, default=0) + + arg_parser.add_argument('-m', "--model-name", + help='Specify the model name to run benchmark for', + dest='model_name') + + arg_parser.add_argument('-g', "--input-graph", + help='Specify the input graph for the transform tool', + dest='input_graph') + + arg_parser.add_argument('-d', "--data-location", + help='Specify the location of the data. ' + 'If this parameter is not specified, ' + 'the benchmark will use random/dummy data.', + dest="data_location", default=None) + + arg_parser.add_argument('-r', "--accuracy-only", + help='For accuracy measurement only.', + dest='accuracy_only', action='store_true') + arg_parser.add_argument('--calibrate', dest='calibrate', + help='Run accuracy with calibration data,' + 'to generate min_max ranges, calibrate=[True/False]', + type=bool, default=False) + arg_parser.add_argument("--results-file-path", + help="File path for the inference results", + dest="results_file_path", default=None) + arg_parser.add_argument("--warmup-steps", type=int, default=10, + help="number of warmup steps") + arg_parser.add_argument("--steps", type=int, default=50, + help="number of steps") + + arg_parser.add_argument( + '--data-num-inter-threads', dest='data_num_inter_threads', + help='number threads across operators', + type=int, default=32) + arg_parser.add_argument( + '--data-num-intra-threads', dest='data_num_intra_threads', + help='number threads for data layer operator', + type=int, default=14) + arg_parser.add_argument( + '--num-cores', dest='num_cores', + help='number of cores', + type=int, default=28) + + self.args = arg_parser.parse_args() + # validate the arguements + self.validate_args() + + def write_results_output(self, predictions, filenames, labels): + # If a results_file_path is provided, write the predictions to the file + if self.args.results_file_path: + top_predictions = np.argmax(predictions, 1) + with open(self.args.results_file_path, "a") as fp: + for filename, expected_label, top_prediction in zip(filenames, labels, top_predictions): + fp.write("{},{},{}\n".format(filename, expected_label, top_prediction)) + + def run(self): + """run benchmark with optimized graph""" + + print("Run inference") + + data_config = tf.ConfigProto() + data_config.intra_op_parallelism_threads = self.args.data_num_intra_threads + data_config.inter_op_parallelism_threads = self.args.data_num_inter_threads + data_config.use_per_session_threads = 1 + + infer_config = tf.ConfigProto() + infer_config.intra_op_parallelism_threads = self.args.num_intra_threads + infer_config.inter_op_parallelism_threads = self.args.num_inter_threads + infer_config.use_per_session_threads = 1 + + data_graph = tf.Graph() + with data_graph.as_default(): + if (self.args.data_location): + print("Inference with real data.") + if self.args.calibrate: + subset = 'calibration' + else: + subset = 'validation' + dataset = datasets.ImagenetData(self.args.data_location) + preprocessor = dataset.get_image_preprocessor()( + RESNET_IMAGE_SIZE, RESNET_IMAGE_SIZE, self.args.batch_size, + num_cores=self.args.num_cores, + resize_method='crop') + + images, labels, filenames = preprocessor.minibatch(dataset, subset=subset) + + # If a results file path is provided, then start the prediction output file + if self.args.results_file_path: + with open(self.args.results_file_path, "w+") as fp: + fp.write("filename,actual,prediction\n") + else: + print("Inference with dummy data.") + input_shape = [self.args.batch_size, RESNET_IMAGE_SIZE, RESNET_IMAGE_SIZE, 3] + images = tf.random.uniform(input_shape, 0.0, 255.0, dtype=tf.float32, name='synthetic_images') + + infer_graph = tf.Graph() + with infer_graph.as_default(): + graph_def = tf.GraphDef() + with tf.gfile.FastGFile(self.args.input_graph, 'rb') as input_file: + input_graph_content = input_file.read() + graph_def.ParseFromString(input_graph_content) + + output_graph = graph_transforms.TransformGraph(graph_def, + [INPUTS], [OUTPUTS], [OPTIMIZATION]) + tf.import_graph_def(output_graph, name='') + + # Definite input and output Tensors for detection_graph + input_tensor = infer_graph.get_tensor_by_name('input_tensor:0') + output_tensor = infer_graph.get_tensor_by_name('softmax_tensor:0') + + data_sess = tf.Session(graph=data_graph, config=data_config) + infer_sess = tf.Session(graph=infer_graph, config=infer_config) + + num_processed_images = 0 + num_remaining_images = dataset.num_examples_per_epoch(subset=subset) - num_processed_images \ + if self.args.data_location else datasets.IMAGENET_NUM_VAL_IMAGES + + if (not self.args.accuracy_only): + iteration = 0 + warm_up_iteration = self.args.warmup_steps + total_run = self.args.steps + total_time = 0 + + while num_remaining_images >= self.args.batch_size and iteration < total_run: + iteration += 1 + tf_filenames = None + np_labels = None + data_load_start = time.time() + if self.args.results_file_path: + image_np, np_labels, tf_filenames = data_sess.run([images, labels, filenames]) + else: + image_np = data_sess.run(images) + + data_load_time = time.time() - data_load_start + + num_processed_images += self.args.batch_size + num_remaining_images -= self.args.batch_size + + start_time = time.time() + predictions = infer_sess.run(output_tensor, feed_dict={input_tensor: image_np}) + time_consume = time.time() - start_time + + # Write out the file name, expected label, and top prediction + self.write_results_output(predictions, tf_filenames, np_labels) + + # only add data loading time for real data, not for dummy data + if self.args.data_location: + time_consume += data_load_time + + print('Iteration %d: %.6f sec' % (iteration, time_consume)) + if iteration > warm_up_iteration: + total_time += time_consume + + time_average = total_time / (iteration - warm_up_iteration) + print('Average time: %.6f sec' % (time_average)) + + print('Batch size = %d' % self.args.batch_size) + if (self.args.batch_size == 1): + print('Latency: %.3f ms' % (time_average * 1000)) + # print throughput for both batch size 1 and 128 + print('Throughput: %.3f images/sec' % (self.args.batch_size / time_average)) + + else: # accuracy check + total_accuracy1, total_accuracy5 = (0.0, 0.0) + + while num_remaining_images >= self.args.batch_size: + # Reads and preprocess data + tf_filenames = None + if self.args.results_file_path: + np_images, np_labels, tf_filenames = data_sess.run([images, labels, filenames]) + else: + np_images, np_labels = data_sess.run([images, labels]) + num_processed_images += self.args.batch_size + num_remaining_images -= self.args.batch_size + + # Compute inference on the preprocessed data + predictions = infer_sess.run(output_tensor, + {input_tensor: np_images}) + + # Write out the file name, expected label, and top prediction + self.write_results_output(predictions, tf_filenames, np_labels) + + with tf.Graph().as_default() as accu_graph: + accuracy1 = tf.reduce_sum( + tf.cast(tf.nn.in_top_k(tf.constant(predictions), + tf.constant(np_labels), 1), tf.float32)) + + accuracy5 = tf.reduce_sum( + tf.cast(tf.nn.in_top_k(tf.constant(predictions), + tf.constant(np_labels), 5), tf.float32)) + with tf.Session() as accu_sess: + np_accuracy1, np_accuracy5 = accu_sess.run([accuracy1, accuracy5]) + + total_accuracy1 += np_accuracy1 + total_accuracy5 += np_accuracy5 + + print("Processed %d images. (Top1 accuracy, Top5 accuracy) = (%0.4f, %0.4f)" \ + % (num_processed_images, total_accuracy1 / num_processed_images, + total_accuracy5 / num_processed_images)) + + def validate_args(self): + """validate the arguments""" + + if not self.args.data_location: + if self.args.accuracy_only: + raise ValueError("You must use real data for accuracy measurement.") + + +if __name__ == "__main__": + evaluate_opt_graph = eval_classifier_optimized_graph() + evaluate_opt_graph.run() diff --git a/models/image_recognition/tensorflow/resnet50v1_5/inference/preprocessing.py b/models/image_recognition/tensorflow/resnet50v1_5/inference/preprocessing.py new file mode 100644 index 000000000..3c6361584 --- /dev/null +++ b/models/image_recognition/tensorflow/resnet50v1_5/inference/preprocessing.py @@ -0,0 +1,177 @@ +# +# -*- coding: utf-8 -*- +# +# Copyright (c) 2019 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# SPDX-License-Identifier: EPL-2.0 +# + +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import tensorflow as tf +from tensorflow.data.experimental import parallel_interleave +from tensorflow.data.experimental import map_and_batch +from tensorflow.python.platform import gfile + + +def parse_example_proto(example_serialized): + """Parses an Example proto containing a training example of an image. + """ + # Dense features in Example proto. + feature_map = { + 'image/encoded': tf.FixedLenFeature([], dtype=tf.string, + default_value=''), + 'image/class/label': tf.FixedLenFeature([1], dtype=tf.int64, + default_value=-1), + 'image/filename': tf.FixedLenFeature([], dtype=tf.string, + default_value="") + } + sparse_float32 = tf.VarLenFeature(dtype=tf.float32) + # Sparse features in Example proto. + feature_map.update( + {k: sparse_float32 for k in ['image/object/bbox/xmin', + 'image/object/bbox/ymin', + 'image/object/bbox/xmax', + 'image/object/bbox/ymax']}) + + features = tf.parse_single_example(example_serialized, feature_map) + label = tf.cast(features['image/class/label'], dtype=tf.int32) + filename = tf.cast(features['image/filename'], dtype=tf.string) + + return features['image/encoded'], label, filename + + +def eval_image(image, height, width, resize_method, + central_fraction=0.875, scope=None): + + with tf.name_scope('eval_image'): + if resize_method == 'crop': + shape = tf.shape(image) + image = tf.cond(tf.less(shape[0], shape[1]), + lambda: tf.image.resize_images(image, + tf.convert_to_tensor([256, 256 * shape[1] / shape[0]], + dtype=tf.int32)), + lambda: tf.image.resize_images(image, + tf.convert_to_tensor([256 * shape[0] / shape[1], 256], + dtype=tf.int32))) + + shape = tf.shape(image) + y0 = (shape[0] - height) // 2 + x0 = (shape[1] - width) // 2 + distorted_image = tf.image.crop_to_bounding_box(image, y0, x0, height, width) + distorted_image.set_shape([height, width, 3]) + means = tf.broadcast_to([123.68, 116.78, 103.94], tf.shape(distorted_image)) + return distorted_image - means + else: # bilinear + if image.dtype != tf.float32: + image = tf.image.convert_image_dtype(image, dtype=tf.float32) + # Crop the central region of the image with an area containing 87.5% of + # the original image. + if central_fraction: + image = tf.image.central_crop(image, central_fraction=central_fraction) + + if height and width: + # Resize the image to the specified height and width. + image = tf.expand_dims(image, 0) + image = tf.image.resize_bilinear(image, [height, width], + align_corners=False) + image = tf.squeeze(image, [0]) + image = tf.subtract(image, 0.5) + image = tf.multiply(image, 2.0) + return image + +class RecordInputImagePreprocessor(object): + """Preprocessor for images with RecordInput format.""" + + def __init__(self, + height, + width, + batch_size, + num_cores, + resize_method="bilinear"): + + self.height = height + self.width = width + self.batch_size = batch_size + self.num_cores = num_cores + self.resize_method = resize_method + + def parse_and_preprocess(self, value): + # parse + image_buffer, label_index, filename = parse_example_proto(value) + # preprocess + image = tf.image.decode_jpeg( + image_buffer, channels=3, fancy_upscaling=False, dct_method='INTEGER_FAST') + image = eval_image(image, self.height, self.width, self.resize_method) + return (image, label_index, filename) + + def minibatch(self, dataset, subset, cache_data=False): + + with tf.name_scope('batch_processing'): + + glob_pattern = dataset.tf_record_pattern(subset) + file_names = gfile.Glob(glob_pattern) + if not file_names: + raise ValueError('Found no files in --data_dir matching: {}' + .format(glob_pattern)) + ds = tf.data.TFRecordDataset.list_files(file_names) + + ds = ds.apply( + parallel_interleave( + tf.data.TFRecordDataset, cycle_length=self.num_cores, block_length=5, + sloppy=True, + buffer_output_elements=10000, prefetch_input_elements=10000)) + + if cache_data: + ds = ds.take(1).cache().repeat() + + ds = ds.prefetch(buffer_size=10000) + #ds = ds.prefetch(buffer_size=self.batch_size) + + # num of parallel batches not greater than 56 + max_num_parallel_batches = min(56, 2 * self.num_cores) + ds = ds.apply( + map_and_batch( + map_func=self.parse_and_preprocess, + batch_size=self.batch_size, + num_parallel_batches=max_num_parallel_batches, + num_parallel_calls=None)) + + ds = ds.prefetch(buffer_size=tf.contrib.data.AUTOTUNE) + + ds_iterator = ds.make_one_shot_iterator() + images, labels, filename = ds_iterator.get_next() + # reshape + labels = tf.reshape(labels, [self.batch_size]) + filename = tf.reshape(filename, [self.batch_size]) + + return images, labels, filename diff --git a/models/image_recognition/tensorflow/resnet50v1_5/int8/__init__.py b/models/image_recognition/tensorflow/resnet50v1_5/int8/__init__.py new file mode 100644 index 000000000..159180624 --- /dev/null +++ b/models/image_recognition/tensorflow/resnet50v1_5/int8/__init__.py @@ -0,0 +1,20 @@ +# +# -*- coding: utf-8 -*- +# +# Copyright (c) 2019 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# SPDX-License-Identifier: EPL-2.0 +# + diff --git a/models/image_recognition/tensorflow/resnet50v1_5/int8/benchmark.py b/models/image_recognition/tensorflow/resnet50v1_5/int8/benchmark.py new file mode 100644 index 000000000..c6d9a9e1f --- /dev/null +++ b/models/image_recognition/tensorflow/resnet50v1_5/int8/benchmark.py @@ -0,0 +1,213 @@ +# +# -*- coding: utf-8 -*- +# +# Copyright (c) 2019 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# SPDX-License-Identifier: EPL-2.0 +# + + +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import argparse +import sys +import time + +import datasets +import tensorflow as tf + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument("--input_graph", default=None, + help="graph/model to be executed") + parser.add_argument("--input_height", default=224, + type=int, help="input height") + parser.add_argument("--input_width", default=224, + type=int, help="input width") + parser.add_argument("--batch_size", default=32, + type=int, help="batch size") + parser.add_argument("--data_location", default=None, + help="dataset location") + parser.add_argument("--input_layer", default="input", + help="name of input layer") + parser.add_argument("--output_layer", default="predict", + help="name of output layer") + parser.add_argument("--num_cores", default=28, + type=int, help="number of physical cores") + parser.add_argument( + '--num_inter_threads', + help='number threads across operators', + type=int, default=1) + parser.add_argument( + '--num_intra_threads', + help='number threads for an operator', + type=int, default=1) + parser.add_argument( + '--data_num_inter_threads', + help='number threads across data layer operators', + type=int, default=16) + parser.add_argument( + '--data_num_intra_threads', + help='number threads for an data layer operator', + type=int, default=14) + parser.add_argument("--warmup_steps", type=int, default=10, + help="number of warmup steps") + parser.add_argument("--steps", type=int, default=50, help="number of steps") + args = parser.parse_args() + + if args.input_graph: + model_file = args.input_graph + else: + sys.exit("Please provide a graph file.") + if args.input_height: + input_height = args.input_height + else: + input_height = 224 + if args.input_width: + input_width = args.input_width + else: + input_width = 224 + batch_size = args.batch_size + input_layer = args.input_layer + output_layer = args.output_layer + warmup_steps = args.warmup_steps + steps = args.steps + assert steps > 10, "Benchmark steps should be at least 10." + num_inter_threads = args.num_inter_threads + num_intra_threads = args.num_intra_threads + + data_config = tf.ConfigProto() + data_config.intra_op_parallelism_threads = args.data_num_intra_threads + data_config.inter_op_parallelism_threads = args.data_num_inter_threads + data_config.use_per_session_threads = 1 + + infer_config = tf.ConfigProto() + infer_config.intra_op_parallelism_threads = num_intra_threads + infer_config.inter_op_parallelism_threads = num_inter_threads + infer_config.use_per_session_threads = 1 + + data_graph = tf.Graph() + with data_graph.as_default(): + if args.data_location: + print("inference with real data") + # get the images from dataset + dataset = datasets.ImagenetData(args.data_location) + preprocessor = dataset.get_image_preprocessor(benchmark=True)( + input_height, input_width, batch_size, + num_cores=args.num_cores, + resize_method='crop') + images = preprocessor.minibatch(dataset, subset='validation') + else: + # synthetic images + print("inference with dummy data") + input_shape = [batch_size, input_height, input_width, 3] + images = tf.random.uniform( + input_shape, 0.0, 255.0, dtype=tf.float32, name='synthetic_images') + + infer_graph = tf.Graph() + with infer_graph.as_default(): + graph_def = tf.GraphDef() + with open(model_file, "rb") as f: + graph_def.ParseFromString(f.read()) + tf.import_graph_def(graph_def, name='') + + input_tensor = infer_graph.get_tensor_by_name(input_layer + ":0") + output_tensor = infer_graph.get_tensor_by_name(output_layer + ":0") + tf.global_variables_initializer() + + data_sess = tf.Session(graph=data_graph, config=data_config) + infer_sess = tf.Session(graph=infer_graph, config=infer_config) + + print("[Running warmup steps...]") + step_total_time = 0 + step_total_images = 0 + + for t in range(warmup_steps): + data_start_time = time.time() + image_data = data_sess.run(images) + data_load_time = time.time() - data_start_time + + start_time = time.time() + infer_sess.run(output_tensor, {input_tensor: image_data}) + elapsed_time = time.time() - start_time + + # only count the data loading and processing time for real data + if args.data_location: + elapsed_time += data_load_time + + step_total_time += elapsed_time + step_total_images += batch_size + + if ((t + 1) % 10 == 0): + print("steps = {0}, {1} images/sec" + "".format(t + 1, step_total_images / step_total_time)) + step_total_time = 0 + step_total_images = 0 + + print("[Running benchmark steps...]") + total_time = 0 + total_images = 0 + + step_total_time = 0 + step_total_images = 0 + + for t in range(steps): + try: + data_start_time = time.time() + image_data = data_sess.run(images) + data_load_time = time.time() - data_start_time + + start_time = time.time() + infer_sess.run(output_tensor, {input_tensor: image_data}) + elapsed_time = time.time() - start_time + + # only count the data loading and processing time for real data + if args.data_location: + elapsed_time += data_load_time + + total_time += elapsed_time + total_images += batch_size + + step_total_time += elapsed_time + step_total_images += batch_size + + if ((t + 1) % 10 == 0): + print("steps = {0}, {1} images/sec" + "".format(t + 1, step_total_images / step_total_time)) + step_total_time = 0 + step_total_images = 0 + + except tf.errors.OutOfRangeError: + print("Running out of images from dataset.") + break + + print("Average throughput for batch size {0}: {1} images/sec".format(batch_size, total_images / total_time)) diff --git a/models/image_recognition/tensorflow/resnet50v1_5/int8/cnn_util.py b/models/image_recognition/tensorflow/resnet50v1_5/int8/cnn_util.py new file mode 100644 index 000000000..fb76f2971 --- /dev/null +++ b/models/image_recognition/tensorflow/resnet50v1_5/int8/cnn_util.py @@ -0,0 +1,51 @@ +# +# -*- coding: utf-8 -*- +# +# Copyright (c) 2019 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# SPDX-License-Identifier: EPL-2.0 +# + + +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +"""Utilities for CNN benchmarks.""" + +import tensorflow as tf + + +def tensorflow_version_tuple(): + v = tf.__version__ + major, minor, patch = v.split('.') + return (int(major), int(minor), patch) + + +def tensorflow_version(): + vt = tensorflow_version_tuple() + return vt[0] * 1000 + vt[1] + diff --git a/models/image_recognition/tensorflow/resnet50v1_5/int8/datasets.py b/models/image_recognition/tensorflow/resnet50v1_5/int8/datasets.py new file mode 100644 index 000000000..1a885cb66 --- /dev/null +++ b/models/image_recognition/tensorflow/resnet50v1_5/int8/datasets.py @@ -0,0 +1,114 @@ +# +# -*- coding: utf-8 -*- +# +# Copyright (c) 2019 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# SPDX-License-Identifier: EPL-2.0 +# + + +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +import os +from abc import abstractmethod + +import tensorflow as tf + + +IMAGENET_NUM_TRAIN_IMAGES = 1281167 +IMAGENET_NUM_VAL_IMAGES = 50000 + +class Dataset(object): + """Abstract class for cnn benchmarks dataset.""" + + def __init__(self, name, height=None, width=None, depth=None, data_dir=None, + queue_runner_required=False, num_classes=1000): + self.name = name + self.height = height + self.width = width + self.depth = depth or 3 + + self.data_dir = data_dir + self._queue_runner_required = queue_runner_required + self._num_classes = num_classes + + def tf_record_pattern(self, subset): + return os.path.join(self.data_dir, '%s-*-of-*' % subset) + + def reader(self): + return tf.TFRecordReader() + + @property + def num_classes(self): + return self._num_classes + + @num_classes.setter + def num_classes(self, val): + self._num_classes = val + + @abstractmethod + def num_examples_per_epoch(self, subset): + pass + + def __str__(self): + return self.name + + def get_image_preprocessor(self): + return None + + def queue_runner_required(self): + return self._queue_runner_required + + def use_synthetic_gpu_images(self): + return not self.data_dir + + +class ImagenetData(Dataset): + """Configuration for Imagenet dataset.""" + + def __init__(self, data_dir=None): + super(ImagenetData, self).__init__('imagenet', 300, 300, data_dir=data_dir) + + def num_examples_per_epoch(self, subset='train'): + if subset == 'train': + return IMAGENET_NUM_TRAIN_IMAGES + elif subset == 'validation': + return IMAGENET_NUM_VAL_IMAGES + elif subset == 'calibrate' or subset == 'calibration': + return 100 + else: + raise ValueError('Invalid data subset "%s"' % subset) + + def get_image_preprocessor(self, benchmark=False): + if benchmark: + import preprocessing_benchmark + return preprocessing_benchmark.RecordInputImagePreprocessor + else: + import preprocessing + return preprocessing.RecordInputImagePreprocessor + diff --git a/models/image_recognition/tensorflow/resnet50v1_5/int8/generate_calibration_data.py b/models/image_recognition/tensorflow/resnet50v1_5/int8/generate_calibration_data.py new file mode 100644 index 000000000..abf62345b --- /dev/null +++ b/models/image_recognition/tensorflow/resnet50v1_5/int8/generate_calibration_data.py @@ -0,0 +1,183 @@ +# +# -*- coding: utf-8 -*- +# +# Copyright (c) 2019 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# SPDX-License-Identifier: EPL-2.0 +# + + +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import argparse +import sys +import os +import time +import numpy as np +from collections import namedtuple +from operator import attrgetter + +from google.protobuf import text_format +import tensorflow as tf +import preprocessing +import datasets + +NUM_TEST_IMAGES = 50000 + +def load_graph(model_file): + graph = tf.Graph() + graph_def = tf.GraphDef() + + import os + file_ext = os.path.splitext(model_file)[1] + + with open(model_file, "rb") as f: + if file_ext == '.pbtxt': + text_format.Merge(f.read(), graph_def) + else: + graph_def.ParseFromString(f.read()) + with graph.as_default(): + tf.import_graph_def(graph_def, name='') + + return graph + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument("--input_graph", default=None, + help="graph/model to be executed") + parser.add_argument("--data_location", default=None, + help="full path to the validation data") + parser.add_argument("--input_height", default=224, + type=int, help="input height") + parser.add_argument("--input_width", default=224, + type=int, help="input width") + parser.add_argument("--batch_size", default=32, + type=int, help="batch size") + parser.add_argument("--input_layer", default="input", + help="name of input layer") + parser.add_argument("--output_layer", default="predict", + help="name of output layer") + parser.add_argument( + '--num_inter_threads', + help='number threads across operators', + type=int, default=1) + parser.add_argument( + '--num_intra_threads', + help='number threads for an operator', + type=int, default=1) + args = parser.parse_args() + + if args.input_graph: + model_file = args.input_graph + else: + sys.exit("Please provide a graph file.") + if args.input_height: + input_height = args.input_height + else: + input_height = 224 + if args.input_width: + input_width = args.input_width + else: + input_width = 224 + batch_size = args.batch_size + input_layer = args.input_layer + output_layer = args.output_layer + num_inter_threads = args.num_inter_threads + num_intra_threads = args.num_intra_threads + data_location = args.data_location + dataset = datasets.ImagenetData(data_location) + preprocessor = preprocessing.ImagePreprocessor( + input_height, input_width, batch_size, + 1, # device count + tf.float32, # data_type for input fed to the graph + train=False, # doing inference + resize_method='crop') + images, labels, tf_records = preprocessor.minibatch(dataset, subset='train') + graph = load_graph(model_file) + input_tensor = graph.get_tensor_by_name(input_layer + ":0") + output_tensor = graph.get_tensor_by_name(output_layer + ":0") + + config = tf.ConfigProto() + config.inter_op_parallelism_threads = num_inter_threads + config.intra_op_parallelism_threads = num_intra_threads + + total_accuracy1, total_accuracy5 = (0.0, 0.0) + num_processed_images = 0 + num_remaining_images = dataset.num_examples_per_epoch(subset='train') \ + - num_processed_images + + CALIBRATION_POOL_SIZE = 1000 + CALIBRATION_SET_SIZE = 100 + calibration_pool = [] + ImageWithConfidence = namedtuple('ImageWithConfidence', + ['tf_record', 'confidence']) + current_pool_size = 0 + with tf.Session() as sess: + sess_graph = tf.Session(graph=graph, config=config) + while num_remaining_images >= batch_size: + # Reads and preprocess data + np_images, np_labels, serialized_images = sess.run( + [images[0], labels[0], tf_records]) + num_processed_images += batch_size + num_remaining_images -= batch_size + # Compute inference on the preprocessed data + predictions = sess_graph.run(output_tensor, + {input_tensor: np_images}) + selected_img_indices = np.where( + predictions.argmax(axis=1) == np_labels)[0].tolist() + current_pool_size += len(selected_img_indices) + for indx in selected_img_indices: + calibration_pool.append(ImageWithConfidence( + serialized_images[indx], predictions[indx].max())) + + accuracy1 = tf.reduce_sum( + tf.cast(tf.nn.in_top_k(tf.constant(predictions), + tf.constant(np_labels), 1), tf.float32)) + + accuracy5 = tf.reduce_sum( + tf.cast(tf.nn.in_top_k(tf.constant(predictions), + tf.constant(np_labels), 5), tf.float32)) + np_accuracy1, np_accuracy5 = sess.run([accuracy1, accuracy5]) + total_accuracy1 += np_accuracy1 + total_accuracy5 += np_accuracy5 + print("Processed %d images. (Top1 accuracy, Top5 accuracy) = (%0.4f, %0.4f)" \ + % (num_processed_images, total_accuracy1/num_processed_images, + total_accuracy5/num_processed_images)) + if current_pool_size >= CALIBRATION_POOL_SIZE: + break + + writer = tf.python_io.TFRecordWriter('calibration-1-of-1') + calibration_pool = sorted(calibration_pool, + key=attrgetter('confidence'), reverse=True) + for i in range(CALIBRATION_SET_SIZE): + writer.write(calibration_pool[i].tf_record) + writer.close() diff --git a/models/image_recognition/tensorflow/resnet50v1_5/int8/preprocessing.py b/models/image_recognition/tensorflow/resnet50v1_5/int8/preprocessing.py new file mode 100644 index 000000000..c4e0a95ce --- /dev/null +++ b/models/image_recognition/tensorflow/resnet50v1_5/int8/preprocessing.py @@ -0,0 +1,419 @@ +# +# -*- coding: utf-8 -*- +# +# Copyright (c) 2019 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# SPDX-License-Identifier: EPL-2.0 +# + + +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +"""Image pre-processing utilities. +""" +from six.moves import xrange # pylint: disable=redefined-builtin +import tensorflow as tf +from random import randint + +from tensorflow.python.ops import data_flow_ops +import cnn_util + +def parse_example_proto(example_serialized): + """Parses an Example proto containing a training example of an image. + + The output of the build_image_data.py image preprocessing script is a dataset + containing serialized Example protocol buffers. Each Example proto contains + the following fields: + + image/height: 462 + image/width: 581 + image/colorspace: 'RGB' + image/channels: 3 + image/class/label: 615 + image/class/synset: 'n03623198' + image/class/text: 'knee pad' + image/object/bbox/xmin: 0.1 + image/object/bbox/xmax: 0.9 + image/object/bbox/ymin: 0.2 + image/object/bbox/ymax: 0.6 + image/object/bbox/label: 615 + image/format: 'JPEG' + image/filename: 'ILSVRC2012_val_00041207.JPEG' + image/encoded: + + Args: + example_serialized: scalar Tensor tf.string containing a serialized + Example protocol buffer. + + Returns: + image_buffer: Tensor tf.string containing the contents of a JPEG file. + label: Tensor tf.int32 containing the label. + bbox: 3-D float Tensor of bounding boxes arranged [1, num_boxes, coords] + where each coordinate is [0, 1) and the coordinates are arranged as + [ymin, xmin, ymax, xmax]. + text: Tensor tf.string containing the human-readable label. + """ + # Dense features in Example proto. + feature_map = { + 'image/encoded': tf.FixedLenFeature([], dtype=tf.string, + default_value=''), + 'image/class/label': tf.FixedLenFeature([1], dtype=tf.int64, + default_value=-1), + 'image/class/text': tf.FixedLenFeature([], dtype=tf.string, + default_value=''), + } + sparse_float32 = tf.VarLenFeature(dtype=tf.float32) + # Sparse features in Example proto. + feature_map.update( + {k: sparse_float32 for k in ['image/object/bbox/xmin', + 'image/object/bbox/ymin', + 'image/object/bbox/xmax', + 'image/object/bbox/ymax']}) + + features = tf.parse_single_example(example_serialized, feature_map) + label = tf.cast(features['image/class/label'], dtype=tf.int32) + + xmin = tf.expand_dims(features['image/object/bbox/xmin'].values, 0) + ymin = tf.expand_dims(features['image/object/bbox/ymin'].values, 0) + xmax = tf.expand_dims(features['image/object/bbox/xmax'].values, 0) + ymax = tf.expand_dims(features['image/object/bbox/ymax'].values, 0) + + # Note that we impose an ordering of (y, x) just to make life difficult. + bbox = tf.concat([ymin, xmin, ymax, xmax], 0) + + # Force the variable number of bounding boxes into the shape + # [1, num_boxes, coords]. + bbox = tf.expand_dims(bbox, 0) + bbox = tf.transpose(bbox, [0, 2, 1]) + + return features['image/encoded'], label, bbox, features['image/class/text'] + + +def decode_jpeg(image_buffer, scope=None): # , dtype=tf.float32): + """Decode a JPEG string into one 3-D float image Tensor. + + Args: + image_buffer: scalar string Tensor. + scope: Optional scope for op_scope. + Returns: + 3-D float Tensor with values ranging from [0, 1). + """ + # with tf.op_scope([image_buffer], scope, 'decode_jpeg'): + # with tf.name_scope(scope, 'decode_jpeg', [image_buffer]): + with tf.name_scope(scope or 'decode_jpeg'): + # Decode the string as an RGB JPEG. + # Note that the resulting image contains an unknown height and width + # that is set dynamically by decode_jpeg. In other words, the height + # and width of image is unknown at compile-time. + image = tf.image.decode_jpeg(image_buffer, channels=3, + fancy_upscaling=False, + dct_method='INTEGER_FAST') + + # image = tf.Print(image, [tf.shape(image)], 'Image shape: ') + + return image + + +def eval_image(image, height, width, bbox, thread_id, resize): + """Get the image for model evaluation.""" + with tf.name_scope('eval_image'): + if not thread_id: + tf.summary.image( + 'original_image', tf.expand_dims(image, 0)) + + if resize == 'crop': + # Note: This is much slower than crop_to_bounding_box + # It seems that the redundant pad step has huge overhead + # distorted_image = tf.image.resize_image_with_crop_or_pad(image, + # height, width) + shape = tf.shape(image) + image = tf.cond(tf.less(shape[0], shape[1]), + lambda: tf.image.resize_images(image, tf.convert_to_tensor([256, 256*shape[1]/shape[0]], dtype=tf.int32)), + lambda: tf.image.resize_images(image, tf.convert_to_tensor([256*shape[0]/shape[1], 256], dtype=tf.int32))) + shape = tf.shape(image) + + y0 = (shape[0] - height) // 2 + x0 = (shape[1] - width) // 2 + #y0=tf.random_uniform([],minval=0,maxval=(shape[0] - height + 1), dtype=tf.int32) + #x0=tf.random_uniform([],minval=0,maxval=(shape[1] - width + 1), dtype=tf.int32) + ## distorted_image = tf.slice(image, [y0,x0,0], [height,width,3]) + distorted_image = tf.image.crop_to_bounding_box(image, y0, x0, height, + width) + else: + sample_distorted_bounding_box = tf.image.sample_distorted_bounding_box( + tf.shape(image), + bounding_boxes=bbox, + min_object_covered=0.5, + aspect_ratio_range=[0.90, 1.10], + area_range=[0.10, 1.0], + max_attempts=100, + use_image_if_no_bounding_boxes=True) + bbox_begin, bbox_size, _ = sample_distorted_bounding_box + # Crop the image to the specified bounding box. + distorted_image = tf.slice(image, bbox_begin, bbox_size) + resize_method = { + 'nearest': tf.image.ResizeMethod.NEAREST_NEIGHBOR, + 'bilinear': tf.image.ResizeMethod.BILINEAR, + 'bicubic': tf.image.ResizeMethod.BICUBIC, + 'area': tf.image.ResizeMethod.AREA + }[resize] + # This resizing operation may distort the images because the aspect + # ratio is not respected. + if cnn_util.tensorflow_version() >= 11: + distorted_image = tf.image.resize_images( + distorted_image, [height, width], + resize_method, + align_corners=False) + else: + distorted_image = tf.image.resize_images( + distorted_image, height, width, resize_method, align_corners=False) + distorted_image.set_shape([height, width, 3]) + if not thread_id: + tf.summary.image( + 'cropped_resized_image', tf.expand_dims(distorted_image, 0)) + image = distorted_image + return image + + +def distort_image(image, height, width, bbox, thread_id=0, scope=None): + """Distort one image for training a network. + + Distorting images provides a useful technique for augmenting the data + set during training in order to make the network invariant to aspects + of the image that do not effect the label. + + Args: + image: 3-D float Tensor of image + height: integer + width: integer + bbox: 3-D float Tensor of bounding boxes arranged [1, num_boxes, coords] + where each coordinate is [0, 1) and the coordinates are arranged + as [ymin, xmin, ymax, xmax]. + thread_id: integer indicating the preprocessing thread. + scope: Optional scope for op_scope. + Returns: + 3-D float Tensor of distorted image used for training. + """ + # with tf.op_scope([image, height, width, bbox], scope, 'distort_image'): + # with tf.name_scope(scope, 'distort_image', [image, height, width, bbox]): + with tf.name_scope(scope or 'distort_image'): + # Each bounding box has shape [1, num_boxes, box coords] and + # the coordinates are ordered [ymin, xmin, ymax, xmax]. + + # After this point, all image pixels reside in [0,1) + # until the very end, when they're rescaled to (-1, 1). The various + # adjust_* ops all require this range for dtype float. + image = tf.image.convert_image_dtype(image, dtype=tf.float32) + + # Display the bounding box in the first thread only. + if not thread_id: + image_with_box = tf.image.draw_bounding_boxes(tf.expand_dims(image, 0), + bbox) + tf.summary.image( + 'image_with_bounding_boxes', image_with_box) + + # A large fraction of image datasets contain a human-annotated bounding + # box delineating the region of the image containing the object of interest. + # We choose to create a new bounding box for the object which is a randomly + # distorted version of the human-annotated bounding box that obeys an allowed + # range of aspect ratios, sizes and overlap with the human-annotated + # bounding box. If no box is supplied, then we assume the bounding box is + # the entire image. + sample_distorted_bounding_box = tf.image.sample_distorted_bounding_box( + tf.shape(image), + bounding_boxes=bbox, + min_object_covered=0.1, + aspect_ratio_range=[0.99, 1.01], + area_range=[0.05, 1.0], + max_attempts=100, + use_image_if_no_bounding_boxes=True) + bbox_begin, bbox_size, distort_bbox = sample_distorted_bounding_box + if not thread_id: + image_with_distorted_box = tf.image.draw_bounding_boxes( + tf.expand_dims(image, 0), distort_bbox) + tf.summary.image( + 'images_with_distorted_bounding_box', + image_with_distorted_box) + + # Crop the image to the specified bounding box. + distorted_image = tf.slice(image, bbox_begin, bbox_size) + + # This resizing operation may distort the images because the aspect + # ratio is not respected. We select a resize method in a round robin + # fashion based on the thread number. + # Note that ResizeMethod contains 4 enumerated resizing methods. + resize_method = thread_id % 4 + if cnn_util.tensorflow_version() >= 11: + distorted_image = tf.image.resize_images( + distorted_image, [height, width], resize_method, align_corners=False) + else: + distorted_image = tf.image.resize_images( + distorted_image, height, width, resize_method, align_corners=False) + # Restore the shape since the dynamic slice based upon the bbox_size loses + # the third dimension. + distorted_image.set_shape([height, width, 3]) + if not thread_id: + tf.summary.image( + 'cropped_resized_image', + tf.expand_dims(distorted_image, 0)) + + # Randomly flip the image horizontally. + distorted_image = tf.image.random_flip_left_right(distorted_image) + + # Randomly distort the colors. + distorted_image = distort_color(distorted_image, thread_id) + + # Note: This ensures the scaling matches the output of eval_image + distorted_image *= 256 + + if not thread_id: + tf.summary.image( + 'final_distorted_image', + tf.expand_dims(distorted_image, 0)) + return distorted_image + + +def distort_color(image, thread_id=0, scope=None): + """Distort the color of the image. + + Each color distortion is non-commutative and thus ordering of the color ops + matters. Ideally we would randomly permute the ordering of the color ops. + Rather then adding that level of complication, we select a distinct ordering + of color ops for each preprocessing thread. + + Args: + image: Tensor containing single image. + thread_id: preprocessing thread ID. + scope: Optional scope for op_scope. + Returns: + color-distorted image + """ + # with tf.op_scope([image], scope, 'distort_color'): + # with tf.name_scope(scope, 'distort_color', [image]): + with tf.name_scope(scope or 'distort_color'): + color_ordering = thread_id % 2 + + if color_ordering == 0: + image = tf.image.random_brightness(image, max_delta=32. / 255.) + image = tf.image.random_saturation(image, lower=0.5, upper=1.5) + image = tf.image.random_hue(image, max_delta=0.2) + image = tf.image.random_contrast(image, lower=0.5, upper=1.5) + elif color_ordering == 1: + image = tf.image.random_brightness(image, max_delta=32. / 255.) + image = tf.image.random_contrast(image, lower=0.5, upper=1.5) + image = tf.image.random_saturation(image, lower=0.5, upper=1.5) + image = tf.image.random_hue(image, max_delta=0.2) + + # The random_* ops do not necessarily clamp. + image = tf.clip_by_value(image, 0.0, 1.0) + return image + + +class ImagePreprocessor(object): + """Preprocessor for input images.""" + + def __init__(self, + height, + width, + batch_size, + device_count, + dtype=tf.float32, + train=True, + distortions=None, + resize_method=None): + self.height = height + self.width = width + self.batch_size = batch_size + self.device_count = device_count + self.dtype = dtype + self.train = train + self.resize_method = resize_method + if distortions is None: + distortions = False + self.distortions = distortions + if self.batch_size % self.device_count != 0: + raise ValueError( + ('batch_size must be a multiple of device_count: ' + 'batch_size %d, device_count: %d') % + (self.batch_size, self.device_count)) + self.batch_size_per_device = self.batch_size // self.device_count + + def preprocess(self, image_buffer, bbox, thread_id): + """Preprocessing image_buffer using thread_id.""" + # Note: Width and height of image is known only at runtime. + image = tf.image.decode_jpeg(image_buffer, channels=3, + dct_method='INTEGER_FAST') + if self.train and self.distortions: + image = distort_image(image, self.height, self.width, bbox, thread_id) + else: + image = eval_image(image, self.height, self.width, bbox, thread_id, + self.resize_method) + # Note: image is now float32 [height,width,3] with range [0, 255] + + # image = tf.cast(image, tf.uint8) # HACK TESTING + + return image + + def minibatch(self, dataset, subset): + with tf.name_scope('batch_processing'): + images = [[] for i in range(self.device_count)] + labels = [[] for i in range(self.device_count)] + record_input = data_flow_ops.RecordInput( + file_pattern=dataset.tf_record_pattern(subset), + seed=randint(0, 9000), + parallelism=64, + buffer_size=10000, + batch_size=self.batch_size, + name='record_input') + records = record_input.get_yield_op() + records = tf.split(records, self.batch_size, 0) + records = [tf.reshape(record, []) for record in records] + for i in xrange(self.batch_size): + value = records[i] + image_buffer, label_index, bbox, _ = parse_example_proto(value) + image = self.preprocess(image_buffer, bbox, i % 4) + device_index = i % self.device_count + images[device_index].append(image) + labels[device_index].append(label_index) + label_index_batch = [None] * self.device_count + for device_index in xrange(self.device_count): + images[device_index] = tf.parallel_stack(images[device_index]) + label_index_batch[device_index] = tf.concat(labels[device_index], 0) + + # dynamic_pad=True) # HACK TESTING dynamic_pad=True + images[device_index] = tf.cast(images[device_index], self.dtype) + depth = 3 + images[device_index] = tf.reshape( + images[device_index], + shape=[self.batch_size_per_device, self.height, self.width, depth]) + label_index_batch[device_index] = tf.reshape( + label_index_batch[device_index], [self.batch_size_per_device]) + # Display the training images in the visualizer. + # tf.summary.image('images', images) + + return images, label_index_batch, records diff --git a/models/image_recognition/tensorflow/resnet50v1_5/int8/preprocessing_benchmark.py b/models/image_recognition/tensorflow/resnet50v1_5/int8/preprocessing_benchmark.py new file mode 100644 index 000000000..8e3556556 --- /dev/null +++ b/models/image_recognition/tensorflow/resnet50v1_5/int8/preprocessing_benchmark.py @@ -0,0 +1,173 @@ +# +# -*- coding: utf-8 -*- +# +# Copyright (c) 2019 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# SPDX-License-Identifier: EPL-2.0 +# + + +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import tensorflow as tf +from tensorflow.data.experimental import parallel_interleave +from tensorflow.data.experimental import map_and_batch +from tensorflow.python.platform import gfile + + +def parse_example_proto(example_serialized): + """Parses an Example proto containing a training example of an image. + """ + # Dense features in Example proto. + feature_map = { + 'image/encoded': tf.FixedLenFeature([], dtype=tf.string, + default_value=''), + 'image/class/label': tf.FixedLenFeature([1], dtype=tf.int64, + default_value=-1), + } + sparse_float32 = tf.VarLenFeature(dtype=tf.float32) + # Sparse features in Example proto. + feature_map.update( + {k: sparse_float32 for k in ['image/object/bbox/xmin', + 'image/object/bbox/ymin', + 'image/object/bbox/xmax', + 'image/object/bbox/ymax']}) + + features = tf.parse_single_example(example_serialized, feature_map) + label = tf.cast(features['image/class/label'], dtype=tf.int32) + + return features['image/encoded'], label + + +def eval_image(image, height, width, resize_method, + central_fraction=0.875, scope=None): + with tf.name_scope('eval_image'): + if resize_method == 'crop': + shape = tf.shape(image) + image = tf.cond(tf.less(shape[0], shape[1]), + lambda: tf.image.resize_images(image, + tf.convert_to_tensor([256, 256 * shape[1] / shape[0]], + dtype=tf.int32)), + lambda: tf.image.resize_images(image, + tf.convert_to_tensor([256 * shape[0] / shape[1], 256], + dtype=tf.int32))) + shape = tf.shape(image) + y0 = (shape[0] - height) // 2 + x0 = (shape[1] - width) // 2 + distorted_image = tf.image.crop_to_bounding_box(image, y0, x0, height, width) + distorted_image.set_shape([height, width, 3]) + means = tf.broadcast_to([123.68, 116.78, 103.94], tf.shape(distorted_image)) + return distorted_image - means + else: # bilinear + if image.dtype != tf.float32: + image = tf.image.convert_image_dtype(image, dtype=tf.float32) + # Crop the central region of the image with an area containing 87.5% of + # the original image. + if central_fraction: + image = tf.image.central_crop(image, central_fraction=central_fraction) + + if height and width: + # Resize the image to the specified height and width. + image = tf.expand_dims(image, 0) + image = tf.image.resize_bilinear(image, [height, width], + align_corners=False) + image = tf.squeeze(image, [0]) + image = tf.subtract(image, 0.5) + image = tf.multiply(image, 2.0) + return image + + +class RecordInputImagePreprocessor(object): + """Preprocessor for images with RecordInput format.""" + + def __init__(self, + height, + width, + batch_size, + num_cores, + resize_method): + + self.height = height + self.width = width + self.batch_size = batch_size + self.num_cores = num_cores + self.resize_method = resize_method + + def parse_and_preprocess(self, value): + # parse + image_buffer, label_index = parse_example_proto(value) + # preprocess + image = tf.image.decode_jpeg( + image_buffer, channels=3, fancy_upscaling=False, dct_method='INTEGER_FAST') + image = eval_image(image, self.height, self.width, self.resize_method) + + return (image, label_index) + + def minibatch(self, dataset, subset, cache_data=False): + + with tf.name_scope('batch_processing'): + + glob_pattern = dataset.tf_record_pattern(subset) + file_names = gfile.Glob(glob_pattern) + if not file_names: + raise ValueError('Found no files in --data_dir matching: {}' + .format(glob_pattern)) + ds = tf.data.TFRecordDataset.list_files(file_names) + + ds = ds.apply( + parallel_interleave( + tf.data.TFRecordDataset, cycle_length=self.num_cores, block_length=5, + sloppy=True, + buffer_output_elements=10000, prefetch_input_elements=10000)) + + if cache_data: + ds = ds.take(1).cache().repeat() + + ds = ds.prefetch(buffer_size=10000) + # ds = ds.prefetch(buffer_size=self.batch_size) + + # num of parallel batches not greater than 56 + max_num_parallel_batches = min(56, 2*self.num_cores) + ds = ds.apply( + map_and_batch( + map_func=self.parse_and_preprocess, + batch_size=self.batch_size, + num_parallel_batches=max_num_parallel_batches, + num_parallel_calls=None)) # this number should be tuned + + ds = ds.prefetch(buffer_size=tf.contrib.data.AUTOTUNE) # this number can be tuned + + ds_iterator = ds.make_one_shot_iterator() + images, _ = ds_iterator.get_next() + + return images diff --git a/tests/unit/common/tensorflow/tf_model_args/tf_resnet50v1_5_args.json b/tests/unit/common/tensorflow/tf_model_args/tf_resnet50v1_5_args.json new file mode 100644 index 000000000..271813ed7 --- /dev/null +++ b/tests/unit/common/tensorflow/tf_model_args/tf_resnet50v1_5_args.json @@ -0,0 +1,40 @@ +[ + { "_comment": "FP32 accuracy command", + "input": "run_tf_benchmark.py --framework=tensorflow --use-case=image_recognition --model-name=resnet50v1_5 --precision=fp32 --mode=inference --model-source-dir=/workspace/models --intelai-models=/workspace/intelai_models --batch-size 100 --socket-id 0 --accuracy-only --verbose --in-graph=/in_graph/freezed_resnet50v1_5.pb --accuracy-only --data-location=/dataset", + "output": "numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/inference/eval_image_classifier_inference.py --input-graph=/in_graph/freezed_resnet50v1_5.pb --num-inter-threads=1 --num-intra-threads=28 --batch-size=100 --data-location=/dataset --accuracy-only --num-cores=28 --warmup-steps=10 --steps=50"}, + + { "_comment": "FP32 command for latency benchmark with default --num-inter-threads, --num-intra-threads.", + "input": "run_tf_benchmark.py --framework tensorflow --use-case image_recognition --precision fp32 --mode inference --model-name resnet50v1_5 --batch-size 128 --in-graph /freezed_resnet50v1_5.pb --intelai-models . --socket-id 0 --verbose", + "output": "numactl --cpunodebind=0 --membind=0 python ./inference/eval_image_classifier_inference.py --input-graph=/freezed_resnet50v1_5.pb --num-inter-threads=1 --num-intra-threads=28 --batch-size=128 --warmup-steps=10 --steps=50 --num-cores=28"}, + + { "_comment": "FP32 command for latency benchmark with --num-inter-threads 4 --num-intra-threads 16", + "input": "run_tf_benchmark.py --framework tensorflow --use-case image_recognition --precision fp32 --mode inference --model-name resnet50v1_5 --batch-size 1 --in-graph /freezed_resnet50v1_5.pb --intelai-models . --socket-id 0 --verbose --num-inter-threads 4 --num-intra-threads 16", + "output": "numactl --cpunodebind=0 --membind=0 python ./inference/eval_image_classifier_inference.py --input-graph=/freezed_resnet50v1_5.pb --num-inter-threads=4 --num-intra-threads=16 --batch-size=1 --warmup-steps=10 --steps=50 --num-cores=28"}, + + { "_comment": "FP32 command for throughput benchmark with --num-inter-threads=1 --num-intra-threads=28", + "input": "run_tf_benchmark.py --framework tensorflow --use-case image_recognition --precision fp32 --mode inference --model-name resnet50v1_5 --batch-size 128 --in-graph /freezed_resnet50v1_5.pb --intelai-models . --socket-id 0 --verbose", + "output": "numactl --cpunodebind=0 --membind=0 python ./inference/eval_image_classifier_inference.py --input-graph=/freezed_resnet50v1_5.pb --num-inter-threads=1 --num-intra-threads=28 --batch-size=128 --warmup-steps=10 --steps=50 --num-cores=28"}, + + { "_comment": "Int8 command for throughput benchmark with --output-dir enabled.", + "input": "run_tf_benchmark.py --framework=tensorflow --use-case=image_recognition --model-name=resnet50v1_5 --precision=int8 --mode=inference --model-source-dir=/workspace/models --benchmark-dir=/workspace/benchmarks --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=128 --socket-id=0 --output-dir=/workspace/benchmarks/common/tensorflow/logs --benchmark-only --verbose --in-graph=/in_graph/resnet50v1_5_int8_pretrained_model.pb --steps=200 --warmup-steps=20", + "output": "LD_PRELOAD=/usr/lib/libtcmalloc.so.4.2.6 numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/inference/eval_image_classifier_inference.py --input-graph=/in_graph/resnet50v1_5_int8_pretrained_model.pb --num-inter-threads=1 --num-intra-threads=28 --batch-size=128 --warmup-steps=20 --steps=200"}, + + { "_comment": "Int8 command for data calibration with --calibration-only", + "input": "run_tf_benchmark.py --framework=tensorflow --use-case=image_recognition --model-name=resnet50v1_5 --precision=int8 --mode=inference --benchmark-dir=/workspace/benchmarks --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=100 --socket-id=-1 --output-dir=/workspace/benchmarks/common/tensorflow/logs --accuracy-only --verbose --model-source-dir=/workspace/models --in-graph=/in_graph/resnet50v1_5_int8_pretrained_model.pb --data-location=/dataset --calibration-only", + "output": "LD_PRELOAD=/usr/lib/libtcmalloc.so.4.2.6 python /workspace/intelai_models/int8/generate_calibration_data.py --num_intra_threads=56 --num_inter_threads=2 --batch_size=100 --input_graph=/in_graph/resnet50v1_5_int8_pretrained_model.pb --data_location=/dataset"}, + + { "_comment": "Fp32 command for throughput benchmark with --output-results enabled.", + "input": "run_tf_benchmark.py --framework=tensorflow --use-case=image_recognition --model-name=resnet50v1_5 --precision=fp32 --mode=inference --benchmark-dir=/workspace/benchmarks --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=100 --socket-id=0 --output-dir=/workspace/benchmarks/common/tensorflow/logs --benchmark-only --output-results --verbose --model-source-dir=/workspace/models --in-graph=/in_graph/resnet50v1_5_fp32_pretrained_model.pb --data-location=/dataset", + "output": "numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/inference/eval_image_classifier_inference.py --input-graph=/in_graph/resnet50v1_5_fp32_pretrained_model.pb --num-inter-threads=1 --num-intra-threads=28 --num-cores=28 --batch-size=100 --warmup-steps=10 --steps=50 --data-location=/dataset --results-file-path /workspace/benchmarks/common/tensorflow/logs/resnet50v1_5_fp32_inference_results*.txt"}, + + { "_comment": "Int8 accuracy command", + "input": "run_tf_benchmark.py --framework tensorflow --use-case image_recognition --precision int8 --mode inference --model-name resnet50v1_5 --batch-size 100 --data-location /dataset --in-graph /final_int8_resnet50v1_5.pb --intelai-models . --accuracy-only --verbose", + "output": "LD_PRELOAD=/usr/lib/libtcmalloc.so.4.2.6 python ./inference/eval_image_classifier_inference.py --input-graph=/final_int8_resnet50v1_5.pb --num-inter-threads=2 --num-intra-threads=56 --batch-size=100 --warmup-steps=10 --steps=50 --data-location=/dataset --accuracy-only"}, + + { "_comment": "Int8 command for throughput benchmark with --steps=200 --warmup-steps=20", + "input": "run_tf_benchmark.py --framework=tensorflow --use-case=image_recognition --model-name=resnet50v1_5 --precision=int8 --mode=inference --model-source-dir=/workspace/models --benchmark-dir=/workspace/benchmarks --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=128 --socket-id=0 --output-dir=/workspace/benchmarks/common/tensorflow/logs --benchmark-only --verbose --in-graph=/in_graph/resnet50v1_5_int8_pretrained_model.pb --steps=200 --warmup-steps=20", + "output": "LD_PRELOAD=/usr/lib/libtcmalloc.so.4.2.6 numactl --cpunodebind=0 --membind=0 python /workspace/intelai_models/inference/eval_image_classifier_inference.py --input-graph=/in_graph/resnet50v1_5_int8_pretrained_model.pb --num-inter-threads=1 --num-intra-threads=28 --batch-size=128 --warmup-steps=20 --steps=200" + } +] + + From 3db66e13f5ebc325ca020dc9afe00482b0e7a11f Mon Sep 17 00:00:00 2001 From: Dina Suehiro Jones Date: Thu, 23 May 2019 13:47:20 -0700 Subject: [PATCH 38/62] Add link download the MobileNet v1 Int8 pretrained model (#313) --- .../tensorflow/mobilenet_v1/README.md | 47 +++++++++---------- 1 file changed, 22 insertions(+), 25 deletions(-) diff --git a/benchmarks/image_recognition/tensorflow/mobilenet_v1/README.md b/benchmarks/image_recognition/tensorflow/mobilenet_v1/README.md index af5947e20..3c8c0d947 100644 --- a/benchmarks/image_recognition/tensorflow/mobilenet_v1/README.md +++ b/benchmarks/image_recognition/tensorflow/mobilenet_v1/README.md @@ -40,7 +40,10 @@ when calling `launch_benchmark.py` and the script will run without TCMalloc. -rw-r--r--. 1 user 52508270 Jun 20 15:09 validation-00126-of-00128 -rw-r--r--. 1 user 55292089 Jun 20 15:09 validation-00127-of-00128 ``` -2. A link to download the pre-trained model is coming soon. +2. Download the pre-trained model. +``` +$ wget https://storage.googleapis.com/intel-optimized-tensorflow/models/mobilenetv1_int8_pretrained_model.pb +``` 3. Clone the [intelai/models](https://github.com/intelai/models) repo and then run the benchmarking scripts for either benchmarking throughput, @@ -113,45 +116,39 @@ when calling `launch_benchmark.py` and the script will run without TCMalloc. Example log tail when benchmarking for throughput: ``` - OMP: Info #250: KMP_AFFINITY: pid 682 tid 885 thread 55 bound to OS proc set 83 - OMP: Info #250: KMP_AFFINITY: pid 682 tid 886 thread 56 bound to OS proc set 0 - OMP: Info #250: KMP_AFFINITY: pid 682 tid 884 thread 54 bound to OS proc set 82 [Running warmup steps...] - steps = 10, 1830.24507317 images/sec + steps = 10, 1865.30956528 images/sec [Running benchmark steps...] - steps = 10, 1841.47811007 images/sec - steps = 20, 1848.84108679 images/sec - steps = 30, 1847.84668478 images/sec - steps = 40, 1849.15354305 images/sec - steps = 50, 1840.95611001 images/sec + steps = 10, 1872.92398031 images/sec + steps = 20, 1862.64499512 images/sec + steps = 30, 1857.97283454 images/sec + steps = 40, 1864.70142784 images/sec + steps = 50, 1854.23896906 images/sec Ran inference with batch size 240 - Log location outside container: {--output-dir value}/benchmark_mobilenet_v1_inference_int8_20190409_222536.log + Log location outside container: {--output-dir value}/benchmark_mobilenet_v1_inference_int8_20190523_164626.log ``` Example log tail when benchmarking for latency: ``` - OMP: Info #250: KMP_AFFINITY: pid 681 tid 882 thread 53 bound to OS proc set 81 - OMP: Info #250: KMP_AFFINITY: pid 681 tid 884 thread 55 bound to OS proc set 83 - OMP: Info #250: KMP_AFFINITY: pid 681 tid 885 thread 56 bound to OS proc set 0 [Running warmup steps...] - steps = 10, 139.81945463 images/sec + steps = 10, 197.082229114 images/sec [Running benchmark steps...] - steps = 10, 140.212074614 images/sec - steps = 20, 135.230332731 images/sec - steps = 30, 133.508530685 images/sec - steps = 40, 135.724816361 images/sec - steps = 50, 132.714339957 images/sec + steps = 10, 195.201936054 images/sec + steps = 20, 195.693743293 images/sec + steps = 30, 198.999098543 images/sec + steps = 40, 189.256565292 images/sec + steps = 50, 201.252531069 images/sec Ran inference with batch size 1 - Log location outside container: {--output-dir value}/benchmark_mobilenet_v1_inference_int8_20190409_223122.log + Log location outside container: {--output-dir value}/benchmark_mobilenet_v1_inference_int8_20190523_164348.log ``` Example log tail when running for accuracy: ``` - Processed 49800 images. (Top1 accuracy, Top5 accuracy) = (0.7009, 0.8933) - Processed 49900 images. (Top1 accuracy, Top5 accuracy) = (0.7011, 0.8933) - Processed 50000 images. (Top1 accuracy, Top5 accuracy) = (0.7013, 0.8933) + Processed 49800 images. (Top1 accuracy, Top5 accuracy) = (0.7014, 0.8935) + Processed 49900 images. (Top1 accuracy, Top5 accuracy) = (0.7014, 0.8934) + Processed 50000 images. (Top1 accuracy, Top5 accuracy) = (0.7014, 0.8934) Ran inference with batch size 100 - Log location outside container: {--output-dir value}/benchmark_mobilenet_v1_inference_int8_20190409_223621.log + Log location outside container: {--output-dir value}/benchmark_mobilenet_v1_inference_int8_20190523_164955.log ``` ## FP32 Inference Instructions From 59dbbda8697b93849e7b5936b5e1267769e415b7 Mon Sep 17 00:00:00 2001 From: Dina Suehiro Jones Date: Fri, 24 May 2019 09:55:33 -0700 Subject: [PATCH 39/62] Trivial update to benchmark README (#315) --- benchmarks/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/benchmarks/README.md b/benchmarks/README.md index 414e344e5..03f59a89d 100644 --- a/benchmarks/README.md +++ b/benchmarks/README.md @@ -37,7 +37,7 @@ dependencies to be installed: | Object Detection | TensorFlow | [R-FCN](https://arxiv.org/pdf/1605.06409.pdf) | Inference | [Int8](object_detection/tensorflow/rfcn/README.md#int8-inference-instructions) [FP32](object_detection/tensorflow/rfcn/README.md#fp32-inference-instructions) | | Object Detection | TensorFlow | [Faster R-CNN](https://arxiv.org/pdf/1506.01497.pdf) | Inference | [Int8](object_detection/tensorflow/faster_rcnn/README.md#int8-inference-instructions) [FP32](object_detection/tensorflow/faster_rcnn/README.md#fp32-inference-instructions) | | Object Detection | TensorFlow | [SSD-MobileNet](https://arxiv.org/pdf/1704.04861.pdf) | Inference | [Int8](object_detection/tensorflow/ssd-mobilenet/README.md#int8-inference-instructions) [FP32](object_detection/tensorflow/ssd-mobilenet/README.md#fp32-inference-instructions) | -| Object Detection | TensorFlow | [SSD-ResNet34](https://arxiv.org/pdf/1512.02325.pdf) | Inference | [INT8](object_detection/tensorflow/ssd-resnet34/README.md#int8-inference-instructions) [FP32](object_detection/tensorflow/ssd-resnet34/README.md#fp32-inference-instructions) | +| Object Detection | TensorFlow | [SSD-ResNet34](https://arxiv.org/pdf/1512.02325.pdf) | Inference | [Int8](object_detection/tensorflow/ssd-resnet34/README.md#int8-inference-instructions) [FP32](object_detection/tensorflow/ssd-resnet34/README.md#fp32-inference-instructions) | | Object Detection | TensorFlow | [SSD-VGG16](https://arxiv.org/pdf/1512.02325.pdf) | Inference | [Int8](object_detection/tensorflow/ssd_vgg16/README.md#int8-inference-instructions) [FP32](object_detection/tensorflow/ssd_vgg16/README.md#fp32-inference-instructions) | | Recommendation | TensorFlow | [NCF](https://arxiv.org/pdf/1708.05031.pdf) | Inference | [FP32](recommendation/tensorflow/ncf/README.md#fp32-inference-instructions) | | Recommendation | TensorFlow | [Wide & Deep Large Dataset](https://arxiv.org/pdf/1606.07792.pdf) | Inference | [Int8](recommendation/tensorflow/wide_deep_large_ds/README.md#int8-inference-instructions) [FP32](recommendation/tensorflow/wide_deep_large_ds/README.md#fp32-inference-instructions) | From 59563bec9491647ad67269526255c82effd91c87 Mon Sep 17 00:00:00 2001 From: Dina Suehiro Jones Date: Tue, 28 May 2019 09:58:53 -0700 Subject: [PATCH 40/62] Add link to download the DenseNet 169 pretrained model (#318) --- .../image_recognition/tensorflow/densenet169/README.md | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/benchmarks/image_recognition/tensorflow/densenet169/README.md b/benchmarks/image_recognition/tensorflow/densenet169/README.md index fa02b7a80..f75146e6d 100644 --- a/benchmarks/image_recognition/tensorflow/densenet169/README.md +++ b/benchmarks/image_recognition/tensorflow/densenet169/README.md @@ -31,7 +31,10 @@ following modes/precisions: -rw-r--r--. 1 user 55292089 Jun 20 15:09 validation-00127-of-00128 ``` -2. A link to download the pre-trained model is coming soon. +2. Download the pretrained model: + ``` + $ wget https://storage.googleapis.com/intel-optimized-tensorflow/models/densenet169_fp32_pretrained_model.pb + ``` 3. Clone the [intelai/models](https://github.com/intelai/models) repo and then run the benchmarking scripts for either benchmarking throughput, From 4adab615b0c0ae5e81e2fd116f18482c8f30c2d2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=81ukasz=20Wencel?= Date: Thu, 30 May 2019 18:30:58 +0200 Subject: [PATCH 41/62] Add iteration time to accuracy scripts (#317) --- .../tensorflow/densenet169/README.md | 4 ++++ .../tensorflow/inceptionv3/README.md | 6 +++++- .../tensorflow/inceptionv4/README.md | 11 ++++++++++- .../tensorflow/mobilenet_v1/README.md | 8 +++++++- .../image_recognition/tensorflow/resnet50/README.md | 6 ++++++ .../tensorflow/resnet50v1_5/README.md | 5 +++++ .../tensorflow/densenet169/inference/fp32/accuracy.py | 3 +++ .../eval_image_classifier_accuracy.py | 3 +++ .../fp32/eval_image_classifier_inference.py | 3 +++ .../tensorflow/inceptionv3/int8/accuracy.py | 3 +++ .../tensorflow/inceptionv4/inference/accuracy.py | 3 +++ .../mobilenet_v1/inference/fp32/accuracy.py | 3 +++ .../mobilenet_v1/inference/int8/accuracy.py | 3 +++ .../inference/eval_image_classifier_inference.py | 3 +++ .../inference/eval_image_classifier_inference.py | 3 +++ .../inference/eval_image_classifier_inference.py | 3 +++ 16 files changed, 67 insertions(+), 3 deletions(-) diff --git a/benchmarks/image_recognition/tensorflow/densenet169/README.md b/benchmarks/image_recognition/tensorflow/densenet169/README.md index f75146e6d..f38be702a 100644 --- a/benchmarks/image_recognition/tensorflow/densenet169/README.md +++ b/benchmarks/image_recognition/tensorflow/densenet169/README.md @@ -131,9 +131,13 @@ following modes/precisions: Example log tail when running for accuracy: ``` + Iteration time: 581.6446 ms 0.757505030181 + Iteration time: 581.5755 ms 0.757489959839 + Iteration time: 581.5709 ms 0.75749498998 + Iteration time: 581.1705 ms 0.75748 Ran inference with batch size 100 Log location outside container: {--output-dir value}/benchmark_densenet169_inference_fp32_20190412_021545.log diff --git a/benchmarks/image_recognition/tensorflow/inceptionv3/README.md b/benchmarks/image_recognition/tensorflow/inceptionv3/README.md index e02c73331..f40cdfebd 100644 --- a/benchmarks/image_recognition/tensorflow/inceptionv3/README.md +++ b/benchmarks/image_recognition/tensorflow/inceptionv3/README.md @@ -187,6 +187,7 @@ different configs. Example log tail when running for accuracy: ``` +Iteration time: 357.3781 ms Processed 50000 images. (Top1 accuracy, Top5 accuracy) = (0.7666, 0.9333) Executing command: python /workspace/intelai_models/int8/accuracy.py --input_height=299 --input_width=299 --num_intra_threads=56 --num_inter_threads=2 --batch_size=100 --input_graph=/in_graph/inceptionv3_int8_pretrained_model.pb --data_location=/dataset Ran inference with batch size 100 @@ -329,12 +330,15 @@ python launch_benchmark.py \ ``` Example log tail when benchmarking for accuracy: ``` +Iteration time: 756.7571 ms Processed 49800 images. (Top1 accuracy, Top5 accuracy) = (0.7673, 0.9341) +Iteration time: 757.3781 ms Processed 49900 images. (Top1 accuracy, Top5 accuracy) = (0.7674, 0.9341) +Iteration time: 760.3024 ms Processed 50000 images. (Top1 accuracy, Top5 accuracy) = (0.7675, 0.9342) Ran inference with batch size 100 Log location outside container: {--output-dir value}/benchmark_inceptionv3_inference_fp32_20190104_023816.log ``` Note that the `--verbose` or `--output-dir` flag can be added to any of the above commands -to get additional debug output or change the default output location.. \ No newline at end of file +to get additional debug output or change the default output location.. diff --git a/benchmarks/image_recognition/tensorflow/inceptionv4/README.md b/benchmarks/image_recognition/tensorflow/inceptionv4/README.md index e89d13dee..75c6fa102 100644 --- a/benchmarks/image_recognition/tensorflow/inceptionv4/README.md +++ b/benchmarks/image_recognition/tensorflow/inceptionv4/README.md @@ -101,9 +101,13 @@ when calling `launch_benchmark.py` and the script will run without TCMalloc. Example log tail when running for accuracy: ``` ... + Iteration time: 685.1976 ms Processed 49700 images. (Top1 accuracy, Top5 accuracy) = (0.7985, 0.9504) + Iteration time: 686.3845 ms Processed 49800 images. (Top1 accuracy, Top5 accuracy) = (0.7983, 0.9504) + Iteration time: 686.7021 ms Processed 49900 images. (Top1 accuracy, Top5 accuracy) = (0.7984, 0.9504) + Iteration time: 685.8914 ms Processed 50000 images. (Top1 accuracy, Top5 accuracy) = (0.7984, 0.9504) Ran inference with batch size 100 Log location outside container: /benchmark_inceptionv4_inference_int8_20190306_221608.log @@ -226,10 +230,15 @@ when calling `launch_benchmark.py` and the script will run without TCMalloc. Example log tail when running for accuracy: ``` ... + Iteration time: 1337.8728 ms Processed 49600 images. (Top1 accuracy, Top5 accuracy) = (0.8015, 0.9517) + Iteration time: 1331.8253 ms Processed 49700 images. (Top1 accuracy, Top5 accuracy) = (0.8017, 0.9518) + Iteration time: 1339.1553 ms Processed 49800 images. (Top1 accuracy, Top5 accuracy) = (0.8017, 0.9518) + Iteration time: 1334.5991 ms Processed 49900 images. (Top1 accuracy, Top5 accuracy) = (0.8018, 0.9519) + Iteration time: 1336.1905 ms Processed 50000 images. (Top1 accuracy, Top5 accuracy) = (0.8018, 0.9519) Ran inference with batch size 100 Log location outside container: /benchmark_inceptionv4_inference_fp32_20190308_182729.log @@ -262,4 +271,4 @@ when calling `launch_benchmark.py` and the script will run without TCMalloc. Latency: 63.534 ms Ran inference with batch size 1 Log location outside container: /benchmark_inceptionv4_inference_fp32_20190307_221954.log - ``` \ No newline at end of file + ``` diff --git a/benchmarks/image_recognition/tensorflow/mobilenet_v1/README.md b/benchmarks/image_recognition/tensorflow/mobilenet_v1/README.md index 3c8c0d947..93a0d9025 100644 --- a/benchmarks/image_recognition/tensorflow/mobilenet_v1/README.md +++ b/benchmarks/image_recognition/tensorflow/mobilenet_v1/README.md @@ -144,8 +144,11 @@ $ wget https://storage.googleapis.com/intel-optimized-tensorflow/models/mobilene Example log tail when running for accuracy: ``` + Iteration time: 66.8541 ms Processed 49800 images. (Top1 accuracy, Top5 accuracy) = (0.7014, 0.8935) + Iteration time: 66.7909 ms Processed 49900 images. (Top1 accuracy, Top5 accuracy) = (0.7014, 0.8934) + Iteration time: 66.7001 ms Processed 50000 images. (Top1 accuracy, Top5 accuracy) = (0.7014, 0.8934) Ran inference with batch size 100 Log location outside container: {--output-dir value}/benchmark_mobilenet_v1_inference_int8_20190523_164955.log @@ -287,9 +290,12 @@ $ wget https://storage.googleapis.com/intel-optimized-tensorflow/models/mobilene ``` * Below is a sample lof file snippet when testing accuracy: ``` + Iteration time: 119.1134 ms Processed 49800 images. (Top1 accuracy, Top5 accuracy) = (0.7104, 0.8999) + Iteration time: 118.8375 ms Processed 49900 images. (Top1 accuracy, Top5 accuracy) = (0.7103, 0.8999) + Iteration time: 119.9311 ms Processed 50000 images. (Top1 accuracy, Top5 accuracy) = (0.7102, 0.8999) Ran inference with batch size 100 Log location outside container: {--output-dir value}/benchmark_mobilenet_v1_inference_fp32_20190110_211648.log - ``` \ No newline at end of file + ``` diff --git a/benchmarks/image_recognition/tensorflow/resnet50/README.md b/benchmarks/image_recognition/tensorflow/resnet50/README.md index 31c06609a..fa2fb6e65 100644 --- a/benchmarks/image_recognition/tensorflow/resnet50/README.md +++ b/benchmarks/image_recognition/tensorflow/resnet50/README.md @@ -65,10 +65,15 @@ The log file is saved to the value of `--output-dir`. The tail of the log output when the benchmarking completes should look something like this: ``` +Iteration time: 233.495 ms Processed 49600 images. (Top1 accuracy, Top5 accuracy) = (0.7361, 0.9155) +Iteration time: 233.231 ms Processed 49700 images. (Top1 accuracy, Top5 accuracy) = (0.7361, 0.9155) +Iteration time: 234.541 ms Processed 49800 images. (Top1 accuracy, Top5 accuracy) = (0.7360, 0.9154) +Iteration time: 233.033 ms Processed 49900 images. (Top1 accuracy, Top5 accuracy) = (0.7361, 0.9155) +Iteration time: 233.013 ms Processed 50000 images. (Top1 accuracy, Top5 accuracy) = (0.7360, 0.9154) Ran inference with batch size 100 Log location outside container: {--output-dir value}/benchmark_resnet50_inference_int8_20190104_212224.log @@ -236,6 +241,7 @@ The tail of the log output when the accuracy run completes should look something like this: ``` ... +Iteration time: 649.252 ms Processed 50000 images. (Top1 accuracy, Top5 accuracy) = (0.7430, 0.9188) Ran inference with batch size 100 Log location outside container: {--output-dir value}/benchmark_resnet50_inference_fp32_20190104_213452.log diff --git a/benchmarks/image_recognition/tensorflow/resnet50v1_5/README.md b/benchmarks/image_recognition/tensorflow/resnet50v1_5/README.md index cc1f255a6..610eb7cc0 100644 --- a/benchmarks/image_recognition/tensorflow/resnet50v1_5/README.md +++ b/benchmarks/image_recognition/tensorflow/resnet50v1_5/README.md @@ -68,9 +68,13 @@ The log file is saved to the value of `--output-dir`. The tail of the log output when the benchmarking completes should look something like this: ``` +Iteration time: 239.899 ms Processed 49700 images. (Top1 accuracy, Top5 accuracy) = (0.7622, 0.9296) +Iteration time: 239.110 ms Processed 49800 images. (Top1 accuracy, Top5 accuracy) = (0.7621, 0.9295) +Iteration time: 239.512 ms Processed 49900 images. (Top1 accuracy, Top5 accuracy) = (0.7622, 0.9296) +Iteration time: 239.989 ms Processed 50000 images. (Top1 accuracy, Top5 accuracy) = (0.7623, 0.9296) Ran inference with batch size 100 Log location outside container: {--output-dir value}/benchmark_resnet50_inference_int8_{timestamp}.log @@ -242,6 +246,7 @@ The tail of the log output when the accuracy run completes should look something like this: ``` ... +Iteration time: 514.427 ms Processed 50000 images. (Top1 accuracy, Top5 accuracy) = (0.7651, 0.9307) lscpu_path_cmd = command -v lscpu lscpu located here: /usr/bin/lscpu diff --git a/models/image_recognition/tensorflow/densenet169/inference/fp32/accuracy.py b/models/image_recognition/tensorflow/densenet169/inference/fp32/accuracy.py index 35d598a48..0335ce423 100644 --- a/models/image_recognition/tensorflow/densenet169/inference/fp32/accuracy.py +++ b/models/image_recognition/tensorflow/densenet169/inference/fp32/accuracy.py @@ -121,9 +121,11 @@ def load_graph(model_file): #print(np_labels.shape) num_processed_images += batch_size num_remaining_images -= batch_size + start_time = time.time() # Compute inference on the preprocessed data predictions1 = sess_graph.run(output_tensor, {input_tensor: np_images}) + elapsed_time = time.time() - start_time if(batch_size !=1): predictions1 = sess.run(tf.squeeze(predictions1)) else : @@ -131,4 +133,5 @@ def load_graph(model_file): predictions2 = tf.argmax(predictions1, axis=1) predictions = sess.run(predictions2) top1 += batch_size - (np.count_nonzero(predictions - np_labels)) + print("Iteration time: %0.4f ms" % elapsed_time) print(top1/num_processed_images) diff --git a/models/image_recognition/tensorflow/inception_resnet_v2/eval_image_classifier_accuracy.py b/models/image_recognition/tensorflow/inception_resnet_v2/eval_image_classifier_accuracy.py index 5671f2287..595b252a4 100644 --- a/models/image_recognition/tensorflow/inception_resnet_v2/eval_image_classifier_accuracy.py +++ b/models/image_recognition/tensorflow/inception_resnet_v2/eval_image_classifier_accuracy.py @@ -147,9 +147,11 @@ def load_graph(model_file): np_images, np_labels = sess.run([images[0], labels[0]]) num_processed_images += batch_size num_remaining_images -= batch_size + start_time = time.time() # Compute inference on the preprocessed data predictions = sess_graph.run(output_tensor, {input_tensor: np_images}) + elapsed_time = time.time() - start_time accuracy1 = tf.reduce_sum( tf.cast(tf.nn.in_top_k(tf.constant(predictions), tf.constant(np_labels), 1), tf.float32)) @@ -160,6 +162,7 @@ def load_graph(model_file): np_accuracy1, np_accuracy5 = sess.run([accuracy1, accuracy5]) total_accuracy1 += np_accuracy1 total_accuracy5 += np_accuracy5 + print("Iteration time: %0.4f ms" % elapsed_time) print("Processed %d images. (Top1 accuracy, Top5 accuracy) = (%0.4f, %0.4f)" \ % (num_processed_images, total_accuracy1 / num_processed_images, total_accuracy5 / num_processed_images)) diff --git a/models/image_recognition/tensorflow/inceptionv3/fp32/eval_image_classifier_inference.py b/models/image_recognition/tensorflow/inceptionv3/fp32/eval_image_classifier_inference.py index 98b826ac9..b84d28ae3 100644 --- a/models/image_recognition/tensorflow/inceptionv3/fp32/eval_image_classifier_inference.py +++ b/models/image_recognition/tensorflow/inceptionv3/fp32/eval_image_classifier_inference.py @@ -189,9 +189,11 @@ def run(self): num_processed_images += self.args.batch_size num_remaining_images -= self.args.batch_size + start_time = time.time() # Compute inference on the preprocessed data predictions = infer_sess.run(output_tensor, {input_tensor: np_images}) + elapsed_time = time.time() - start_time with tf.Graph().as_default() as accu_graph: accuracy1 = tf.reduce_sum( @@ -207,6 +209,7 @@ def run(self): total_accuracy1 += np_accuracy1 total_accuracy5 += np_accuracy5 + print("Iteration time: %0.4f ms" % elapsed_time) print("Processed %d images. (Top1 accuracy, Top5 accuracy) = (%0.4f, %0.4f)" \ % (num_processed_images, total_accuracy1 / num_processed_images, total_accuracy5 / num_processed_images)) diff --git a/models/image_recognition/tensorflow/inceptionv3/int8/accuracy.py b/models/image_recognition/tensorflow/inceptionv3/int8/accuracy.py index 7d79593aa..8062bd6be 100644 --- a/models/image_recognition/tensorflow/inceptionv3/int8/accuracy.py +++ b/models/image_recognition/tensorflow/inceptionv3/int8/accuracy.py @@ -120,9 +120,11 @@ def load_graph(model_file): np_images, np_labels = sess.run([images[0], labels[0]]) num_processed_images += batch_size num_remaining_images -= batch_size + start_time = time.time() # Compute inference on the preprocessed data predictions = sess_graph.run(output_tensor, {input_tensor: np_images}) + elapsed_time = time.time() - start_time accuracy1 = tf.reduce_sum( tf.cast(tf.nn.in_top_k(tf.constant(predictions), tf.constant(np_labels), 1), tf.float32)) @@ -133,6 +135,7 @@ def load_graph(model_file): np_accuracy1, np_accuracy5 = sess.run([accuracy1, accuracy5]) total_accuracy1 += np_accuracy1 total_accuracy5 += np_accuracy5 + print("Iteration time: %0.4f ms" % elapsed_time) print("Processed %d images. (Top1 accuracy, Top5 accuracy) = (%0.4f, %0.4f)" \ % (num_processed_images, total_accuracy1/num_processed_images, total_accuracy5/num_processed_images)) diff --git a/models/image_recognition/tensorflow/inceptionv4/inference/accuracy.py b/models/image_recognition/tensorflow/inceptionv4/inference/accuracy.py index 3dc0b90f9..a3bdf7c58 100644 --- a/models/image_recognition/tensorflow/inceptionv4/inference/accuracy.py +++ b/models/image_recognition/tensorflow/inceptionv4/inference/accuracy.py @@ -144,9 +144,11 @@ def load_graph(model_file): np_images, np_labels = sess.run([images[0], labels[0]]) num_processed_images += batch_size num_remaining_images -= batch_size + start_time = time.time() # Compute inference on the preprocessed data predictions = sess_graph.run(output_tensor, {input_tensor: np_images}) + elapsed_time = time.time() - start_time accuracy1 = tf.reduce_sum( tf.cast(tf.nn.in_top_k(tf.constant(predictions), tf.constant(np_labels), 1), tf.float32)) @@ -157,6 +159,7 @@ def load_graph(model_file): np_accuracy1, np_accuracy5 = sess.run([accuracy1, accuracy5]) total_accuracy1 += np_accuracy1 total_accuracy5 += np_accuracy5 + print("Iteration time: %0.4f ms" % elapsed_time) print( "Processed %d images. (Top1 accuracy, Top5 accuracy) = (%0.4f, %0.4f)" \ % ( diff --git a/models/image_recognition/tensorflow/mobilenet_v1/inference/fp32/accuracy.py b/models/image_recognition/tensorflow/mobilenet_v1/inference/fp32/accuracy.py index 7d6a37abc..f5d45fb9f 100644 --- a/models/image_recognition/tensorflow/mobilenet_v1/inference/fp32/accuracy.py +++ b/models/image_recognition/tensorflow/mobilenet_v1/inference/fp32/accuracy.py @@ -143,9 +143,11 @@ def load_graph(model_file): np_images, np_labels = sess.run([images[0], labels[0]]) num_processed_images += batch_size num_remaining_images -= batch_size + start_time = time.time() # Compute inference on the preprocessed data predictions = sess_graph.run(output_tensor, {input_tensor: np_images}) + elapsed_time = time.time() - start_time accuracy1 = tf.reduce_sum( tf.cast(tf.nn.in_top_k(tf.constant(predictions), tf.constant(np_labels), 1), tf.float32)) @@ -156,6 +158,7 @@ def load_graph(model_file): np_accuracy1, np_accuracy5 = sess.run([accuracy1, accuracy5]) total_accuracy1 += np_accuracy1 total_accuracy5 += np_accuracy5 + print("Iteration time: %0.4f ms" % elapsed_time) print( "Processed %d images. (Top1 accuracy, Top5 accuracy) = (%0.4f, %0.4f)" \ % ( diff --git a/models/image_recognition/tensorflow/mobilenet_v1/inference/int8/accuracy.py b/models/image_recognition/tensorflow/mobilenet_v1/inference/int8/accuracy.py index 347c39989..6d7acaf50 100644 --- a/models/image_recognition/tensorflow/mobilenet_v1/inference/int8/accuracy.py +++ b/models/image_recognition/tensorflow/mobilenet_v1/inference/int8/accuracy.py @@ -114,9 +114,11 @@ def load_graph(model_file): np_images, np_labels = sess.run([images[0], labels[0]]) num_processed_images += batch_size num_remaining_images -= batch_size + start_time = time.time() # Compute inference on the preprocessed data predictions = sess_graph.run(output_tensor, {input_tensor: np_images}) + elapsed_time = time.time() - start_time accuracy1 = tf.reduce_sum( tf.cast(tf.nn.in_top_k(tf.constant(predictions), tf.constant(np_labels), 1), tf.float32)) @@ -127,6 +129,7 @@ def load_graph(model_file): np_accuracy1, np_accuracy5 = sess.run([accuracy1, accuracy5]) total_accuracy1 += np_accuracy1 total_accuracy5 += np_accuracy5 + print("Iteration time: %0.4f ms" % elapsed_time) print("Processed %d images. (Top1 accuracy, Top5 accuracy) = (%0.4f, %0.4f)" \ % (num_processed_images, total_accuracy1/num_processed_images, total_accuracy5/num_processed_images)) diff --git a/models/image_recognition/tensorflow/resnet101/inference/eval_image_classifier_inference.py b/models/image_recognition/tensorflow/resnet101/inference/eval_image_classifier_inference.py index a65a54b08..e62b40b3d 100644 --- a/models/image_recognition/tensorflow/resnet101/inference/eval_image_classifier_inference.py +++ b/models/image_recognition/tensorflow/resnet101/inference/eval_image_classifier_inference.py @@ -200,9 +200,11 @@ def run(self): num_processed_images += self.args.batch_size num_remaining_images -= self.args.batch_size + start_time = time.time() # Compute inference on the preprocessed data predictions = infer_sess.run(output_tensor, {input_tensor: np_images}) + elapsed_time = time.time() - start_time with tf.Graph().as_default() as accu_graph: # Putting all code within this make things faster. accuracy1 = tf.reduce_sum( @@ -216,6 +218,7 @@ def run(self): np_accuracy1, np_accuracy5 = accu_sess.run([accuracy1, accuracy5]) total_accuracy1 += np_accuracy1 total_accuracy5 += np_accuracy5 + print("Iteration time: %0.4f ms" % elapsed_time) print("Processed %d images. (Top1 accuracy, Top5 accuracy) = (%0.4f, %0.4f)" \ % (num_processed_images, total_accuracy1 / num_processed_images, total_accuracy5 / num_processed_images)) diff --git a/models/image_recognition/tensorflow/resnet50/inference/eval_image_classifier_inference.py b/models/image_recognition/tensorflow/resnet50/inference/eval_image_classifier_inference.py index 791c1b761..21a1b465e 100644 --- a/models/image_recognition/tensorflow/resnet50/inference/eval_image_classifier_inference.py +++ b/models/image_recognition/tensorflow/resnet50/inference/eval_image_classifier_inference.py @@ -230,9 +230,11 @@ def run(self): num_processed_images += self.args.batch_size num_remaining_images -= self.args.batch_size + start_time = time.time() # Compute inference on the preprocessed data predictions = infer_sess.run(output_tensor, {input_tensor: np_images}) + elapsed_time = time.time() - start_time # Write out the file name, expected label, and top prediction self.write_results_output(predictions, tf_filenames, np_labels) @@ -251,6 +253,7 @@ def run(self): total_accuracy1 += np_accuracy1 total_accuracy5 += np_accuracy5 + print("Iteration time: %0.4f ms" % elapsed_time) print("Processed %d images. (Top1 accuracy, Top5 accuracy) = (%0.4f, %0.4f)" \ % (num_processed_images, total_accuracy1 / num_processed_images, total_accuracy5 / num_processed_images)) diff --git a/models/image_recognition/tensorflow/resnet50v1_5/inference/eval_image_classifier_inference.py b/models/image_recognition/tensorflow/resnet50v1_5/inference/eval_image_classifier_inference.py index c8fe46a11..e1e6133e1 100644 --- a/models/image_recognition/tensorflow/resnet50v1_5/inference/eval_image_classifier_inference.py +++ b/models/image_recognition/tensorflow/resnet50v1_5/inference/eval_image_classifier_inference.py @@ -230,9 +230,11 @@ def run(self): num_processed_images += self.args.batch_size num_remaining_images -= self.args.batch_size + start_time = time.time() # Compute inference on the preprocessed data predictions = infer_sess.run(output_tensor, {input_tensor: np_images}) + elapsed_time = time.time() - start_time # Write out the file name, expected label, and top prediction self.write_results_output(predictions, tf_filenames, np_labels) @@ -251,6 +253,7 @@ def run(self): total_accuracy1 += np_accuracy1 total_accuracy5 += np_accuracy5 + print("Iteration time: %0.4f ms" % elapsed_time) print("Processed %d images. (Top1 accuracy, Top5 accuracy) = (%0.4f, %0.4f)" \ % (num_processed_images, total_accuracy1 / num_processed_images, total_accuracy5 / num_processed_images)) From a2b26ee4dbb8df799ec1e90e91543a91219271d3 Mon Sep 17 00:00:00 2001 From: Melanie Buehler Date: Thu, 30 May 2019 10:41:43 -0700 Subject: [PATCH 42/62] Adds TF Serving Transformer-LT Tutorial (#302) * Initial draft of tutorial and model export script * Added benchmarking script * Improved settings, benchmarking, and other sections * Added PYTHONPATH update for needed utils * Updated for reviewer suggestions * Added * to bold heading * Replaced '&' with '-d' for detached mode --- docs/README.md | 1 + .../tensorflow_serving/Tutorial.md | 211 ++++++++++++++++++ .../transformer_benchmark.py | 181 +++++++++++++++ .../transformer_graph_to_saved_model.py | 87 ++++++++ 4 files changed, 480 insertions(+) create mode 100644 docs/language_translation/tensorflow_serving/Tutorial.md create mode 100644 docs/language_translation/tensorflow_serving/transformer_benchmark.py create mode 100644 docs/language_translation/tensorflow_serving/transformer_graph_to_saved_model.py diff --git a/docs/README.md b/docs/README.md index 11e99bf97..3fd8677db 100644 --- a/docs/README.md +++ b/docs/README.md @@ -18,6 +18,7 @@ * Inference with IntelĀ® Optimization of Tensorflow Serving: * [Image Recognition](/docs/image_recognition/tensorflow_serving/Tutorial.md) (ResNet50 and InceptionV3) * [Object Detection](/docs/object_detection/tensorflow_serving/Tutorial.md) (R-FCN) + * [Language Translation](/docs/language_translation/tensorflow_serving/Tutorial.md) (Transformer-LT) * Model Quantization and Optimization * [Image Recognition](/docs/image_recognition/quantization/Tutorial.md) (ResNet50) diff --git a/docs/language_translation/tensorflow_serving/Tutorial.md b/docs/language_translation/tensorflow_serving/Tutorial.md new file mode 100644 index 000000000..c0a690e3e --- /dev/null +++ b/docs/language_translation/tensorflow_serving/Tutorial.md @@ -0,0 +1,211 @@ + +# Language Translation with TensorFlow Serving on CPU using Transformer-LT + +## Goal + +This tutorial will introduce you to the CPU performance considerations for language translation and how to use [IntelĀ® Optimizations for TensorFlow Serving](https://www.tensorflow.org/serving/) to improve inference time on CPUs. +This tutorial uses a pre-trained [Transformer-LT](https://arxiv.org/pdf/1706.03762.pdf) model for translating English to German and a sample of English news excerpts. +We provide sample code that you can use to get your optimized TensorFlow model server and GRPC client up and running quickly. +In this tutorial using Transformer-LT, you will measure inference performance in two situations: +* **Online inference**, where batch_size=1. In this case, a lower number means better runtime performance. +* **Batch inference**, where batch_size>1. In this case, a higher number means better runtime performance. + +**NOTE about GRPC vs. REST**: It [has been suggested](https://medium.com/@avidaneran/tensorflow-serving-rest-vs-grpc-e8cef9d4ff62) that GRPC has faster client-side serialization and de-serialization than REST, especially if you are optimizing for batch inference. +Please note however that this tutorial is focused on optimizing the model server, not the client that sends requests. +We use GRPC in this tutorial for illustration, not as a best practice, and offer another [tutorial](/docs/object_detection/tensorflow_serving/Tutorial.md) that illustrates the use of the REST API with TensorFlow Serving, if you are interested in that protocol. + +## Prerequisites + +This tutorial assumes you have already: +* [Installed TensorFlow Serving](/docs/general/tensorflow_serving/InstallationGuide.md) +* Read and understood the [General Best Practices](/docs/general/tensorflow_serving/GeneralBestPractices.md), + especially these sections: + * [Performance Metrics](/docs/general/tensorflow_serving/GeneralBestPractices.md#performance-metrics) + * [TensorFlow Serving Configuration Settings](/docs/general/tensorflow_serving/GeneralBestPractices.md#tensorflow-serving-configuration-settings) +* Ran an example end-to-end using a GRPC client, such as the [one in the Installation Guide](/docs/general/tensorflow_serving/InstallationGuide.md#option-2-query-using-grpc) + +## Background + +The Transformer-LT model is a popular solution for language translation. +It is based on an encoder-decoder architecture with an added attention mechanism. +The encoder is used to encode the original sentence to a meaningful fixed-length vector, and the decoder is responsible for extracting the context data from the vector. +The encoder and decoder process the inputs and outputs, which are in the form of a time sequence. + +In a traditional encoder/decoder model, each element in the context vector is treated equally, but this is typically not the ideal solution. +For instance, when you translate the phrase ā€œI travel by trainā€ from English into Chinese, the word ā€œIā€ has a greater influence than other words when producing its counterpart in Chinese. +Thus, the attention mechanism was introduced to differentiate contributions of each element in the source sequence to their counterpart in the destination sequence, through the use of a hidden matrix. +This matrix contains weights of each element in the source sequence when producing elements in the destination sequence. + +[IntelĀ® Math Kernel Library for Deep Neural Networks (IntelĀ® MKL-DNN)](https://github.com/intel/mkl-dnn) offers significant performance improvements for many neural network operations. +Tuning TensorFlow Serving to take full advantage of your hardware for recommendation systems inference involves: +1. Running a TensorFlow Serving docker container configured for performance given your hardware resources +2. Running a GRPC client to verify prediction accuracy and measure online and batch inference performance +3. Experimenting with the TensorFlow Serving settings on your own to further optimize for your model and use case + +## Hands-on Tutorial with pre-trained Transformer-LT (Official) model + +1. **Clone this repository**: Clone the [intelai/models](https://github.com/intelai/models) repository into your home directory. + + ``` + cd ~ + git clone https://github.com/IntelAI/models.git + ``` + +2. **Clone the tensorflow/models repository**: Tokenization of the input data requires utility functions in a specific commit of the tensorflow/models repository. + + ``` + cd ~ + mkdir tensorflow-models + cd tensorflow-models + git clone https://github.com/tensorflow/models.git + cd models + git checkout 8367cf6dabe11adf7628541706b660821f397dce + ``` + + Now add the required directory to the `PYTHONPATH` variable: + + ``` + export PYTHONPATH=$PYTHONPATH:$(pwd)/official/transformer + ``` + +3. **Set up the client environment**: We need to create a virtual environment for this tutorial. + + - We will use a virtual environment to install the required packages. If you do not have pip or virtualenv, you will need to get them first: + + ``` + sudo apt-get install -y python python-pip virtualenv + ``` + + - Create and activate the python virtual environment in your home directory and install the `grpc`, `tensorflow`, `pandas`, and `tensorflow-serving-api` packages. + + ``` + cd ~ + virtualenv lt_venv + source lt_venv/bin/activate + pip install grpc intel-tensorflow pandas tensorflow-serving-api + ``` + +4. **Download the pre-trained model and test data**: Download and extract the packaged pre-trained model and dataset ```transformer_lt_official_fp32_pretrained_model.tar.gz``` + (refer to the [model README](/benchmarks/language_translation/tensorflow/transformer_lt_official) to get the latest location of this archive). + + ``` + wget https://storage.googleapis.com/intel-optimized-tensorflow/models/transformer_lt_official_fp32_pretrained_model.tar.gz + tar -xzvf transformer_lt_official_fp32_pretrained_model.tar.gz + ``` + + After extraction, you should see the following folders and files in the `transformer_lt_official_fp32_pretrained_model` directory: + + ``` + $ ls -l transformer_lt_official_fp32_pretrained_model/* + + transformer_lt_official_fp32_pretrained_model/data: + total 1064 + -rw-r--r--. 1 359898 Feb 20 16:05 newstest2014.en + -rw-r--r--. 1 399406 Feb 20 16:05 newstest2014.de + -rw-r--r--. 1 324025 Mar 15 17:31 vocab.txt + + transformer_lt_official_fp32_pretrained_model/graph: + total 241540 + -rwx------. 1 247333269 Mar 15 17:29 fp32_graphdef.pb + ``` + + - `newstest2014.en`: Input file with English text + - `newstest2014.de`: German translation of the input file for measuring accuracy + - `vocab.txt`: Dictionary of vocabulary + - `fp32_graphdef.pb`: Pre-trained model + +5. **Create a SavedModel**: Using the conversion script `transformer_graph_to_saved_model.py`, convert the pre-trained model graph to a SavedModel. + + ``` + cd ~/models/docs/language_translation/tensorflow_serving + python transformer_graph_to_saved_model.py --import_path ~/transformer_lt_official_fp32_pretrained_model/graph/fp32_graphdef.pb + ``` + + This will create a `/tmp/1/` directory with a `saved_model.pb` file in it. This is the file we will serve from TensorFlow Serving. + The [`transformer_graph_to_saved_model.py`](transformer_graph_to_saved_model.py) script attaches a signature definition to the model in order to make it compatible with TensorFlow Serving. + You can take a look at the script, its flags/options, and these resources for more information: + * [SavedModel](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/python/saved_model) + * [SignatureDefs](https://www.tensorflow.org/serving/signature_defs) + +6. **Discover the number of physical cores**: Compute *num_physical_cores* by executing the `lscpu` command and multiplying `Core(s) per socket` by `Socket(s)`. + For example, for a machine with `Core(s) per socket: 28` and `Socket(s): 2`, `num_physical_cores = 28 * 2 = 56`. + To compute *num_physical_cores* with bash commands: + ``` + cores_per_socket=`lscpu | grep "Core(s) per socket" | cut -d':' -f2 | xargs` + num_sockets=`lscpu | grep "Socket(s)" | cut -d':' -f2 | xargs` + num_physical_cores=$((cores_per_socket * num_sockets)) + echo $num_physical_cores + ``` + +7. **Recommended Settings**: To optimize overall performance, start with the following settings from the [General Best Practices](/docs/general/tensorflow_serving/GeneralBestPractices.md). + Playing around with these settings can improve performance even further, so you should experiment with your own hardware and model if you have strict performance requirements. + + | Options | Recommendations| + | ------------- | ------------- | + |TENSORFLOW_INTER_OP_PARALLELISM | 2 | + |TENSORFLOW_INTRA_OP_PARALLELISM| Number of physical cores | + |OMP_NUM_THREADS |Number of physical cores| + | Batch Size | 64 | + +8. **Start the server**: We can now start up the TensorFlow model server. Using `-d` (for "detached") runs the container as a background process. + + ``` + cd ~ + docker run \ + --name=tfserving \ + -d \ + -p 8500:8500 \ + -v "/tmp:/models/transformer" \ + -e MODEL_NAME=transformer \ + -e OMP_NUM_THREADS=$num_physical_cores \ + -e TENSORFLOW_INTER_OP_PARALLELISM=2 \ + -e TENSORFLOW_INTRA_OP_PARALLELISM=$num_physical_cores \ + tensorflow/serving:mkl + ``` + + You can make sure the container is running using the `docker ps` command. + +9. **Online and batch performance**: Run `transformer_benchmark.py` [python script](/docs/language_translation/tensorflow_serving/transformer_benchmark.py), which can measure both online and batch performance. + + If you are not already there, go to the tutorial directory: + ``` + cd ~/models/docs/language_translation/tensorflow_serving + ``` + + **Online Inference** (batch_size=1): + ``` + python transformer_benchmark.py \ + -d ~/transformer_lt_official_fp32_pretrained_model/data/newstest2014.en \ + -v ~/transformer_lt_official_fp32_pretrained_model/data/vocab.txt \ + -b 1 + ``` + + **Batch Inference** (batch_size=64): + ``` + python transformer_benchmark.py \ + -d ~/transformer_lt_official_fp32_pretrained_model/data/newstest2014.en \ + -v ~/transformer_lt_official_fp32_pretrained_model/data/vocab.txt \ + -b 64 + ``` + + Note: If you want an output file of translated sentences, set the `-o` flag to an output file name of your choice. + If this option is set, the script will take a significantly longer time to finish. + +10. **Clean up**: + * After you are finished sending requests to the server, you can stop the container running in the background. To restart the container with the same name, you need to stop and remove the container from the registry. To view your running containers run `docker ps`. + + ``` + docker rm -f tfserving + ``` + + * Deactivate your virtual environment with `deactivate`. + + +## Conclusion +You have now seen an end-to-end example of serving a language translation model for inference using TensorFlow Serving, and learned: +1. How to create a SavedModel from a Transformer-LT TensorFlow model graph +2. How to choose good values for the performance-related runtime parameters exposed by the `docker run` command +3. How to test online and batch inference metrics using a GRPC client + +With this knowledge and the example code provided, you should be able to get started serving your own custom language translation model with good performance. +If desired, you should also be able to investigate a variety of different settings combinations to see if further performance improvements are possible. diff --git a/docs/language_translation/tensorflow_serving/transformer_benchmark.py b/docs/language_translation/tensorflow_serving/transformer_benchmark.py new file mode 100644 index 000000000..a5cf43654 --- /dev/null +++ b/docs/language_translation/tensorflow_serving/transformer_benchmark.py @@ -0,0 +1,181 @@ +# Copyright (c) 2019 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + + +from __future__ import print_function + +import os +import sys +import time +import argparse +import grpc +import numpy as np +import pandas as pd +import tensorflow as tf + +from tensorflow_serving.apis import predict_pb2 +from tensorflow_serving.apis import prediction_service_pb2_grpc + +from utils import tokenizer +from utils.tokenizer import Subtokenizer + +def check_for_link(value): + """ + Throws an error if the specified path is a link. os.islink returns + True for sym links. For files, we also look at the number of links in + os.stat() to determine if it's a hard link. + """ + if os.path.islink(value) or \ + (os.path.isfile(value) and os.stat(value).st_nlink > 1): + raise argparse.ArgumentTypeError("{} cannot be a link.".format(value)) + +def check_valid_file_or_folder(value): + """verifies filename exists and isn't a link""" + if value is not None: + if not os.path.isfile(value) and not os.path.isdir(value): + raise argparse.ArgumentTypeError("{} does not exist or is not a file/folder.". + format(value)) + check_for_link(value) + return value + +def input_generator_ts(file_path, vocab_file): + """Read and sort lines based on token count from the file + sorted by decreasing length based on token sorting. + + Args: + file_path: String path of file to read + vocab_file: String path of vocab file + Returns: + Sorted list of inputs, and dictionary mapping original index->sorted index + of each element. + """ + with tf.gfile.Open(file_path) as f: + records = f.read().split("\n") + inputs = [record.strip() for record in records] + if not inputs[-1]: + inputs.pop() + + subtokenizer = Subtokenizer(vocab_file) + + batch = [] + token_lens = [] + for i, line in enumerate(inputs): + enc = subtokenizer.encode(line, add_eos=True) + token_lens.append((i, len(enc))) + + sorted_by_token_input_lens = sorted(token_lens, key=lambda x: x[1], reverse=True) + sorted_inputs = [None] * len(sorted_by_token_input_lens) + sorted_keys = [0] * len(sorted_by_token_input_lens) + + for i, (index, _) in enumerate(sorted_by_token_input_lens): + sorted_inputs[i] = inputs[index] + sorted_keys[index] = i + enc = subtokenizer.encode(sorted_inputs[i], add_eos=True) + batch.append(enc) + + return batch, sorted_keys + +def _trim_and_decode(ids, vocab_file): + """Trim EOS and PAD tokens from ids, and decode to return a string.""" + subtokenizer = Subtokenizer(vocab_file) + try: + index = list(ids).index(tokenizer.EOS_ID) + return subtokenizer.decode(ids[:index]) + except ValueError: # No EOS found in sequence + return subtokenizer.decode(ids) + +def benchmark(batch_size=1, num_iteration=20, warm_up_iteration=10): + channel = grpc.insecure_channel(SERVER_URL) + stub = prediction_service_pb2_grpc.PredictionServiceStub(channel) + request = predict_pb2.PredictRequest() + request.model_spec.name = 'transformer' + request.model_spec.signature_name = 'serving_default' + + batches, sorted_keys = input_generator_ts(DATA_FILE, VOCAB_FILE) + + translations = [] + batch = [] + inference_time = 0.0 + sentences_to_translate = min(batch_size * num_iteration, len(batches)) + sentences_after_warmup = 0 + + for i, line in enumerate(batches[0:sentences_to_translate]): + batch.append(line) + if (i + 1) % batch_size == 0 or i == sentences_to_translate - 1: + batch_num = (i // batch_size) + 1 + request.inputs['input'].CopyFrom( + tf.contrib.util.make_tensor_proto(pd.DataFrame(batch).fillna(0).values.astype(np.int64))) + start_time = time.time() + result = stub.Predict(request) + duration = time.time() - start_time + shape = [int(dim.size) for dim in result.outputs['output'].tensor_shape.dim] + translations += np.reshape(result.outputs['output'].int_val, shape).tolist() + print('Iteration %d: %.3f sec' % (batch_num, duration)) + if batch_num > warm_up_iteration: + inference_time += duration + sentences_after_warmup += len(batch) + batch = [] + + average_time = inference_time / sentences_after_warmup + print('Inferencing time: %s' % (inference_time)) + print('Batch size = %d' % batch_size) + if batch_size == 1: + print('Latency: %.3f ms' % (average_time * 1000)) + print('Throughput: %.3f sentences/sec' % (sentences_after_warmup / inference_time)) + + if OUT_FILE: + print('Decoding and saving translations to {}...'.format(OUT_FILE)) + decoded_translations = [] + for i, tr in enumerate(translations): + decoded_translations.append(_trim_and_decode(tr, VOCAB_FILE)) + + with tf.gfile.Open(OUT_FILE, "w") as f: + for i in sorted_keys: + if i < len(decoded_translations): + f.write("%s\n" % decoded_translations[i]) + print('Done!') + +if __name__ == '__main__': + ap = argparse.ArgumentParser() + ap.add_argument("-d", "--data_file", type=check_valid_file_or_folder, required=True, + help="Path to English language input file") + ap.add_argument("-v", "--vocab_file", type=check_valid_file_or_folder, required=True, + help="Path to vocabulary file") + ap.add_argument("-o", "--out_file", type=str, required=False, default='', + help="Path to output file (optional") + ap.add_argument("-b", "--batch_size", required=False, type=int, default=1, + help="Batch size to use") + ap.add_argument("-n", "--num_iteration", required=False, type=int, default=20, + help="Number of times to repeat") + ap.add_argument("-w", "--warm_up_iteration", required=False, type=int, default=10, + help="Number of initial iterations to ignore in benchmarking") + + args = vars(ap.parse_args()) + + SERVER_URL = 'localhost:8500' + DATA_FILE = args['data_file'] + VOCAB_FILE = args['vocab_file'] + OUT_FILE = args['out_file'] + BATCH_SIZE = args['batch_size'] + NUM_ITERATION = args['num_iteration'] + WARM_UP_ITERATION = args['warm_up_iteration'] + + tf.logging.set_verbosity(tf.logging.WARN) + + print('\n SERVER_URL: {} \n DATA_FILE: {}'.format(SERVER_URL, DATA_FILE)) + + print('\nStarting Transformer-LT (Official) model benchmarking for Latency with batch_size={}, num_iteration={}, warm_up_iteration={}'.format(BATCH_SIZE, NUM_ITERATION, WARM_UP_ITERATION)) + benchmark(batch_size=BATCH_SIZE, num_iteration=NUM_ITERATION, warm_up_iteration=WARM_UP_ITERATION) + diff --git a/docs/language_translation/tensorflow_serving/transformer_graph_to_saved_model.py b/docs/language_translation/tensorflow_serving/transformer_graph_to_saved_model.py new file mode 100644 index 000000000..c5cc250ce --- /dev/null +++ b/docs/language_translation/tensorflow_serving/transformer_graph_to_saved_model.py @@ -0,0 +1,87 @@ +# +# -*- coding: utf-8 -*- +# +# Copyright (c) 2019 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# SPDX-License-Identifier: EPL-2.0 +# + +"""Import a Transformer-LT model graph and export a SavedModel. + +Usage: transformer_graph_to_saved_model.py [--model_version=y] import_path export_dir +""" + +from __future__ import print_function + +import sys +import tensorflow as tf + +tf.app.flags.DEFINE_integer('model_version', 1, 'Version number of the model.') +tf.app.flags.DEFINE_string('import_path', '', 'Model import path.') +tf.app.flags.DEFINE_string('export_dir', '/tmp', 'Export directory.') +FLAGS = tf.app.flags.FLAGS + + +def main(_): + if len(sys.argv) < 2 or sys.argv[-1].startswith('-'): + print('Usage: transformer_graph_to_saved_model.py [--model_version=y] import_path export_dir') + sys.exit(-1) + if FLAGS.import_path == '': + print('Please specify the path to the model graph you want to convert to SavedModel format.') + sys.exit(-1) + if FLAGS.model_version <= 0: + print('Please specify a positive value for version number.') + sys.exit(-1) + + # Import model graph + with tf.Session() as sess: + graph_def = tf.GraphDef() + with tf.gfile.GFile(FLAGS.import_path, 'rb') as input_file: + input_graph_content = input_file.read() + graph_def.ParseFromString(input_graph_content) + + sess.graph.as_default() + tf.import_graph_def(graph_def, name='') + sess.run(tf.global_variables_initializer()) + + # Build the signature_def_map. + in_data = sess.graph.get_tensor_by_name('input_tensor:0') + inputs = {'input': tf.saved_model.utils.build_tensor_info(in_data)} + + out_data = sess.graph.get_tensor_by_name('model/Transformer/strided_slice_19:0') + outputs = {'output': tf.saved_model.utils.build_tensor_info(out_data)} + + signature = tf.saved_model.signature_def_utils.build_signature_def( + inputs=inputs, + outputs=outputs, + method_name=tf.saved_model.signature_constants.PREDICT_METHOD_NAME + ) + + # Save out the SavedModel + print('Exporting trained model to', FLAGS.export_dir + '/' + str(FLAGS.model_version)) + builder = tf.saved_model.builder.SavedModelBuilder(FLAGS.export_dir + '/' + str(FLAGS.model_version)) + builder.add_meta_graph_and_variables( + sess, [tf.saved_model.tag_constants.SERVING], + signature_def_map={ + tf.saved_model.signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY: signature + } + ) + builder.save() + + print('Done!') + + +if __name__ == '__main__': + tf.app.run() From e4a7f4ff31e1d8aebe1e9f92c74246db0bad26d8 Mon Sep 17 00:00:00 2001 From: Melanie Buehler Date: Mon, 3 Jun 2019 09:21:51 -0700 Subject: [PATCH 43/62] Update verbiage in new READMEs, precisions, tutorials, etc. (#324) --- Contribute.md | 12 +++---- README.md | 3 +- benchmarks/README.md | 4 +-- .../tensorflow/densenet169/README.md | 13 ++++--- .../tensorflow/mobilenet_v1/README.md | 19 +++++------ .../tensorflow/resnet50v1_5/README.md | 16 ++++----- .../tensorflow_serving/inceptionv3/README.md | 13 ++++--- .../tensorflow/lm-1b/README.md | 17 +++++----- .../tensorflow/faster_rcnn/README.md | 11 +++--- .../tensorflow/ssd-resnet34/README.md | 9 +++-- .../tensorflow/ssd_vgg16/README.md | 34 +++++++++---------- .../tensorflow/wavenet/README.md | 2 +- .../tensorflow/Tutorial.md | 6 ++-- .../tensorflow_serving/Tutorial.md | 2 +- 14 files changed, 77 insertions(+), 84 deletions(-) diff --git a/Contribute.md b/Contribute.md index 73c58e8af..4a869c931 100644 --- a/Contribute.md +++ b/Contribute.md @@ -1,6 +1,6 @@ # Contributing to the Model Zoo for IntelĀ® Architecture -## Adding benchmarking scripts for a new TensorFlow model +## Adding scripts for a new TensorFlow model ### Code updates @@ -14,7 +14,7 @@ required: Note that you will need to add `__init__.py` files in each new directory that you add, in order for python to find the code. - ![Benchmarks Directory Structure](benchmarks_directory_structure.png) + ![Directory Structure](benchmarks_directory_structure.png) 2. Next, in the leaf folder that was created in the previous step, you will need to create `config.json` and `model_init.py` files: @@ -149,16 +149,16 @@ developing new scripts: This README file should describe all of the steps necessary to run the model, including downloading and preprocessing the dataset, downloading the pretrained model, cloning repositories, and running - the benchmarking script with the appropriate arguments. Most models - have best known settings for throughput and latency performance + the model script with the appropriate arguments. Most models + have best known settings for batch and online inference performance testing as well as testing accuracy. The README file should specify how to set these configs using the `launch_benchmark.py` script. -2. Update the table in the [benchmarks README](/benchmarks/README.md) +2. Update the table in the [main `benchmarks` README](/benchmarks/README.md) with a link to the model that you are adding. Note that the models in this table are ordered alphabetically by use case, framework, and model name. The model name should link to the original paper for the - model. The benchmarking instructions column should link to the README + model. The instructions column should link to the README file that you created in the previous step. ### Testing diff --git a/README.md b/README.md index 5e3c45394..eb326584b 100644 --- a/README.md +++ b/README.md @@ -8,7 +8,8 @@ This repository contains **links to pre-trained models, sample scripts, best pra - Show how to efficiently execute, train, and deploy Intel-optimized models - Make it easy to get started running Intel-optimized models on Intel hardware in the cloud or on bare metal -***DISCLAIMER: These scripts are not intended for benchmarking Intel platforms. For any performance and/or benchmarking information on specific Intel platforms, visit [https://www.intel.ai/blog](https://www.intel.ai/blog).*** +***DISCLAIMER: These scripts are not intended for benchmarking Intel platforms. +For any performance and/or benchmarking information on specific Intel platforms, visit [https://www.intel.ai/blog](https://www.intel.ai/blog).*** ## How to Use the Model Zoo diff --git a/benchmarks/README.md b/benchmarks/README.md index 3c5675fbd..a1bac907b 100644 --- a/benchmarks/README.md +++ b/benchmarks/README.md @@ -34,7 +34,7 @@ dependencies to be installed: | Language Translation | TensorFlow | [GNMT](https://arxiv.org/pdf/1609.08144.pdf) | Inference | [FP32](language_translation/tensorflow/gnmt/README.md#fp32-inference-instructions) | | Language Translation | TensorFlow | [Transformer Language](https://arxiv.org/pdf/1706.03762.pdf)| Inference | [FP32](language_translation/tensorflow/transformer_language/README.md#fp32-inference-instructions) | | Language Translation | TensorFlow | [Transformer_LT_Official ](https://arxiv.org/pdf/1706.03762.pdf)| Inference | [FP32](language_translation/tensorflow/transformer_lt_official/README.md#fp32-inference-instructions) | -| Object Detection | TensorFlow | [R-FCN](https://arxiv.org/pdf/1605.06409.pdf) | Inference | [FP32](object_detection/tensorflow/rfcn/README.md#fp32-inference-instructions) | +| Object Detection | TensorFlow | [R-FCN](https://arxiv.org/pdf/1605.06409.pdf) | Inference | [Int8](object_detection/tensorflow/rfcn/README.md#int8-inference-instructions) [FP32](object_detection/tensorflow/rfcn/README.md#fp32-inference-instructions) | | Object Detection | TensorFlow | [Faster R-CNN](https://arxiv.org/pdf/1506.01497.pdf) | Inference | [Int8](object_detection/tensorflow/faster_rcnn/README.md#int8-inference-instructions) [FP32](object_detection/tensorflow/faster_rcnn/README.md#fp32-inference-instructions) | | Object Detection | TensorFlow | [SSD-MobileNet](https://arxiv.org/pdf/1704.04861.pdf) | Inference | [Int8](object_detection/tensorflow/ssd-mobilenet/README.md#int8-inference-instructions) [FP32](object_detection/tensorflow/ssd-mobilenet/README.md#fp32-inference-instructions) | | Object Detection | TensorFlow | [SSD-ResNet34](https://arxiv.org/pdf/1512.02325.pdf) | Inference | [Int8](object_detection/tensorflow/ssd-resnet34/README.md#int8-inference-instructions) [FP32](object_detection/tensorflow/ssd-resnet34/README.md#fp32-inference-instructions) | @@ -48,7 +48,7 @@ dependencies to be installed: ## TensorFlow Serving Use Cases -| Use Case | Framework | Model | Mode | Benchmarking Instructions | +| Use Case | Framework | Model | Mode | Instructions | | -----------------------| --------------| ------------------- | --------- |------------------------------| | Image Recognition | TensorFlow Serving | [Inception V3](https://arxiv.org/pdf/1512.00567.pdf) | Inference | [FP32](image_recognition/tensorflow_serving/inceptionv3/README.md#fp32-inference-instructions) | diff --git a/benchmarks/image_recognition/tensorflow/densenet169/README.md b/benchmarks/image_recognition/tensorflow/densenet169/README.md index f38be702a..b1ecd8832 100644 --- a/benchmarks/image_recognition/tensorflow/densenet169/README.md +++ b/benchmarks/image_recognition/tensorflow/densenet169/README.md @@ -8,7 +8,7 @@ following modes/precisions: 1. Download ImageNet dataset. - This step is required only for running accuracy, for running benchmark we do not need to provide dataset. + This step is required only for running accuracy, for running the model for performance we do not need to provide dataset. Register and download the ImageNet dataset. Once you have the raw ImageNet dataset downloaded, we need to convert it to the TFRecord format. The TensorFlow models repo provides @@ -37,10 +37,9 @@ following modes/precisions: ``` 3. Clone the [intelai/models](https://github.com/intelai/models) repo - and then run the benchmarking scripts for either benchmarking throughput, - latency or accuracy. For --dataset-location in accuracy run, please use the ImageNet validation data path from step 1. - Each benchmark run has user configurable arguments separated from regular arguments by '--' at the end of the command. - Unless configured, these arguments will run with default values. Below are the example codes for each benchmark case: + and then run the model scripts for either online or batch inference or accuracy. For --dataset-location in accuracy run, please use the ImageNet validation data path from step 1. + Each model run has user configurable arguments separated from regular arguments by '--' at the end of the command. + Unless configured, these arguments will run with default values. Below are the example codes for each use case: ``` $ git clone https://github.com/IntelAI/models.git @@ -105,7 +104,7 @@ following modes/precisions: or the directory specified by the `--output-dir` arg. Below are examples of what the tail of your log file should look like for the different configs. - Example log tail when benchmarking for throughput: + Example log tail when running for batch inference: ``` steps = 80, 159.83471377 images/sec Latency: 625.646317005 ms @@ -117,7 +116,7 @@ following modes/precisions: Log location outside container: {--output-dir value}/benchmark_densenet169_inference_fp32_20190412_023940.log ``` - Example log tail when benchmarking for latency: + Example log tail when running for online inference: ``` steps = 80, 34.9948442873 images/sec Latency: 28.5756379366 ms diff --git a/benchmarks/image_recognition/tensorflow/mobilenet_v1/README.md b/benchmarks/image_recognition/tensorflow/mobilenet_v1/README.md index e0af6b190..694a3f575 100644 --- a/benchmarks/image_recognition/tensorflow/mobilenet_v1/README.md +++ b/benchmarks/image_recognition/tensorflow/mobilenet_v1/README.md @@ -46,10 +46,9 @@ $ wget https://storage.googleapis.com/intel-optimized-tensorflow/models/mobilene ``` 3. Clone the [intelai/models](https://github.com/intelai/models) repo - and then run the benchmarking scripts for either benchmarking throughput, - latency or accuracy. For --dataset-location in accuracy run, please use the ImageNet validation data path from step 1. - Each benchmark run has user configurable arguments separated from regular arguments by '--' at the end of the command. - Unless configured, these arguments will run with default values. Below are the example codes for each benchmark case: + and then run the model scripts for either online or batch inference or accuracy. For --dataset-location in accuracy run, please use the ImageNet validation data path from step 1. + Each model run has user configurable arguments separated from regular arguments by '--' at the end of the command. + Unless configured, these arguments will run with default values. Below are the example codes for each use case: ``` $ git clone https://github.com/IntelAI/models.git @@ -57,7 +56,7 @@ $ wget https://storage.googleapis.com/intel-optimized-tensorflow/models/mobilene $ cd benchmarks ``` - For throughput (using `--benchmark-only`, `--socket-id 0` and `--batch-size 240`): + For batch inference (using `--benchmark-only`, `--socket-id 0` and `--batch-size 240`): ``` python launch_benchmark.py \ --model-name mobilenet_v1 \ @@ -73,7 +72,7 @@ $ wget https://storage.googleapis.com/intel-optimized-tensorflow/models/mobilene input_layer="input" output_layer="MobilenetV1/Predictions/Reshape_1" ``` - For latency (using `--benchmark-only`, `--socket-id 0` and `--batch-size 1`) + For online inference (using `--benchmark-only`, `--socket-id 0` and `--batch-size 1`) ``` python launch_benchmark.py \ --model-name mobilenet_v1 \ @@ -114,7 +113,7 @@ $ wget https://storage.googleapis.com/intel-optimized-tensorflow/models/mobilene or the directory specified by the `--output-dir` arg. Below are examples of what the tail of your log file should look like for the different configs. - Example log tail when benchmarking for throughput: + Example log tail when running for batch inference: ``` [Running warmup steps...] steps = 10, 1865.30956528 images/sec @@ -128,7 +127,7 @@ $ wget https://storage.googleapis.com/intel-optimized-tensorflow/models/mobilene Log location outside container: {--output-dir value}/benchmark_mobilenet_v1_inference_int8_20190523_164626.log ``` - Example log tail when benchmarking for latency: + Example log tail when running for online inference: ``` [Running warmup steps...] steps = 10, 197.082229114 images/sec @@ -157,8 +156,8 @@ $ wget https://storage.googleapis.com/intel-optimized-tensorflow/models/mobilene ## FP32 Inference Instructions 1. The ImageNet dataset is required for testing accuracy and can also be - used when running benchmarking. If no datset is provided when running - benchmarking, synthetic data will be used. + used when running online or batch inference. If no dataset is provided when running + online or batch inference, synthetic data will be used. Download the ImageNet dataset and convert it to the TF records format using the instructions diff --git a/benchmarks/image_recognition/tensorflow/resnet50v1_5/README.md b/benchmarks/image_recognition/tensorflow/resnet50v1_5/README.md index 610eb7cc0..2a13913d9 100644 --- a/benchmarks/image_recognition/tensorflow/resnet50v1_5/README.md +++ b/benchmarks/image_recognition/tensorflow/resnet50v1_5/README.md @@ -6,7 +6,7 @@ following precisions: * [FP32 inference](#fp32-inference-instructions) Original ResNet model has multiple versions which have shown better accuracy -and/or throughput performance. As mentioned in TensorFlow's [official ResNet +and/or batch inference performance. As mentioned in TensorFlow's [official ResNet model page](https://github.com/tensorflow/models/tree/master/official/resnet), 3 different versions of the original ResNet model exists - ResNet50v1, ResNet50v1.5, and ResNet50v2. As a side note, ResNet50v1.5 is also in MLPerf's [cloud inference benchmark for @@ -82,7 +82,7 @@ Log location outside container: {--output-dir value}/benchmark_resnet50_inferenc * Evaluate the model performance: If just evaluate performance for dummy data, the `--data-location` is not needed. Otherwise `--data-location` argument needs to be specified: -Calculate the model throughput `images/sec`, the required parameters to run the inference script would include: +Calculate the batch inference performance `images/sec`, the required parameters to run the inference script would include: the pre-trained `resnet50v1_5_int8_pretrained_model.pb` input graph file (from step 2), and the `--benchmark-only` flag. It is optional to specify the number of `warmup_steps` and `steps` as extra @@ -134,7 +134,7 @@ $ git clone https://github.com/IntelAI/models.git ``` 3. If running resnet50 for accuracy, the ImageNet dataset will be -required (if running benchmarking for throughput/latency, then dummy +required (if running the model for batch or online inference, then dummy data will be used). The TensorFlow models repo provides @@ -147,7 +147,7 @@ located at `models/models/image_recognition/tensorflow/resnet50v1_5/`. If benchmarking uses dummy data for inference, `--data-location` flag is not required. Otherwise, `--data-location` needs to point to point to ImageNet dataset location. -* To measure the model latency, set `--batch-size=1` and run the benchmark script as shown: +* To measure online inference, set `--batch-size=1` and run the model script as shown: ``` $ cd /home//models/benchmarks @@ -164,7 +164,7 @@ $ python launch_benchmark.py \ The log file is saved to the value of `--output-dir`. -The tail of the log output when the benchmarking completes should look +The tail of the log output when the script completes should look something like this: ``` Inference with dummy data. @@ -185,7 +185,7 @@ Ran inference with batch size 1 Log location outside container: {--output-dir value}/benchmark_resnet50_inference_fp32_{timestamp}.log ``` -* To measure the model Throughput, set `--batch-size=128` and run the benchmark script as shown: +* To measure batch inference, set `--batch-size=128` and run the model script as shown: ``` $ cd /home//models/benchmarks @@ -202,7 +202,7 @@ $ python launch_benchmark.py \ The log file is saved to the value of `--output-dir`. -The tail of the log output when the benchmarking completes should look +The tail of the log output when the script completes should look something like this: ``` Inference with dummy data. @@ -254,7 +254,7 @@ Ran inference with batch size 100 Log location outside container: {--output-dir value}/benchmark_resnet50_inference_fp32_{timestamp}.log ``` -* The `--output-results` flag can be used along with above benchmarking +* The `--output-results` flag can be used along with above performance or accuracy test, in order to also output a file with the inference results (file name, actual label, and the predicted label). The results output can only be used with real data. diff --git a/benchmarks/image_recognition/tensorflow_serving/inceptionv3/README.md b/benchmarks/image_recognition/tensorflow_serving/inceptionv3/README.md index 1ddb7bb14..bef280f1d 100644 --- a/benchmarks/image_recognition/tensorflow_serving/inceptionv3/README.md +++ b/benchmarks/image_recognition/tensorflow_serving/inceptionv3/README.md @@ -21,17 +21,16 @@ $ wget https://storage.googleapis.com/intel-optimized-tensorflow/models/inceptio 3. Navigate to the `benchmarks` directory in your local clone of the [intelai/models](https://github.com/IntelAI/models) repo from step 1. The `launch_benchmark.py` script in the `benchmarks` directory is -used for starting a tensorflow serving benchmarking using optimized TensorFlow Serving docker +used for starting a tensorflow serving run using optimized TensorFlow Serving docker container. It has arguments to specify which model, framework, mode, precision, and input graph. Substitute in your own `--in-graph` pretrained model file path (from step 2). -4. Inception V3 can be run for `latency` benchmarking and `throughput` -benchmarking. Use one of the following examples below, +4. Inception V3 can be run for measuring batch or online inference performance. Use one of the following examples below, depending on your use case. -* For latency with dummy data (using `--batch-size 1`): +* For online inference with dummy data (using `--batch-size 1`): ``` python launch_benchmark.py \ @@ -43,7 +42,7 @@ python launch_benchmark.py \ --batch-size=1 \ --benchmark-only ``` -Example log tail when benchmarking for latency: +Example log tail when running for online inference: ``` Iteration 35: 0.019 sec Iteration 36: 0.020 sec @@ -59,7 +58,7 @@ tfserving_3784 Log output location: {--output-dir value}/benchmark_inceptionv3_inference_fp32_20190516_103531.log ``` -* For throughput with dummy data (using `--batch-size 128`): +* For batch inference with dummy data (using `--batch-size 128`): ``` python launch_benchmark.py \ @@ -71,7 +70,7 @@ python launch_benchmark.py \ --batch-size=128 \ --benchmark-only ``` -Example log tail when benchmarking for throughput: +Example log tail when running for batch inference: ``` Iteration 34: 0.779 sec Iteration 35: 0.916 sec diff --git a/benchmarks/language_modeling/tensorflow/lm-1b/README.md b/benchmarks/language_modeling/tensorflow/lm-1b/README.md index 82b42cac9..ec4bdcf47 100644 --- a/benchmarks/language_modeling/tensorflow/lm-1b/README.md +++ b/benchmarks/language_modeling/tensorflow/lm-1b/README.md @@ -1,10 +1,10 @@ # LM-1B -This document has instructions for how to run LM-1B benchmark for the +This document has instructions for how to run LM-1B for the following modes/platforms: * [FP32 inference](#fp32-inference-instructions) -Benchmarking instructions and scripts for model training and inference for +Instructions and scripts for model training and inference for other platforms are coming later. ## FP32 Inference Instructions @@ -32,19 +32,18 @@ git clone https://github.com/IntelAI/models.git 3. Next, navigate to the `benchmarks` directory in your local clone of the [intelai/models](https://github.com/IntelAI/models) repo (from step 2). The `launch_benchmark.py` script in the `benchmarks` directory is -used for starting a benchmarking run in a optimized TensorFlow docker +used for starting a model run in a optimized TensorFlow docker container. It has arguments to specify which model, framework, mode, precision, and docker image to use, and the checkpoint directory. Substitute the `--model-source-dir` to `/inference/cloud/language_modeling`. -Before benchmarking, ensure that you have run the script to prepare checkpoint files and the dataset +Before running, ensure that you have run the script to prepare checkpoint files and the dataset from Step 1. -LM-1B can run for latency or throughput -benchmarking. Use one of the following examples below, depending on +LM-1B can run for online or batch inference. Use one of the following examples below, depending on your use case. -For latency (using `--socket-id 0` and `--batch-size 1`): +For online inference (using `--socket-id 0` and `--batch-size 1`): ``` python launch_benchmark.py \ @@ -59,7 +58,7 @@ python launch_benchmark.py \ ``` -For throughput (using `--socket-id 0` and `--batch-size 1024`): +For batch inference (using `--socket-id 0` and `--batch-size 1024`): ``` python launch_benchmark.py \ @@ -81,7 +80,7 @@ to get additional debug output. `models/benchmarks/common/tensorflow/logs` directory. The user can specify a different directory using `--output-dir`. -Example log tail when benchmarking for latency or throughput: +Example log tail when running for online or batch inference: ``` Running warmup... Running benchmark... diff --git a/benchmarks/object_detection/tensorflow/faster_rcnn/README.md b/benchmarks/object_detection/tensorflow/faster_rcnn/README.md index fe553f411..9528f4808 100644 --- a/benchmarks/object_detection/tensorflow/faster_rcnn/README.md +++ b/benchmarks/object_detection/tensorflow/faster_rcnn/README.md @@ -254,13 +254,10 @@ with the appropriate parameters. To run on single socket use `--socket_id` switc by default it will be using all available sockets. Optional parameter `number_of_steps` (default value = 5000) can be added at the end of command after `--` as shown below: -<<<<<<< HEAD -Run benchmarking for throughput and latency using the following command. -The `--data-location` is the path to the directory that contains the -raw coco dataset validation images which you downloaded and unzipped: -======= -Run for batch and online inference: ->>>>>>> 869ed7aa20949bb5346e10887d92933dff7bc894 +Run batch and online inference using the following command. +The `--data-location` is the path to the directory that contains the raw coco dataset +validation images which you downloaded and unzipped: + ``` $ cd /home//models/benchmarks diff --git a/benchmarks/object_detection/tensorflow/ssd-resnet34/README.md b/benchmarks/object_detection/tensorflow/ssd-resnet34/README.md index 97791788f..3f2623389 100644 --- a/benchmarks/object_detection/tensorflow/ssd-resnet34/README.md +++ b/benchmarks/object_detection/tensorflow/ssd-resnet34/README.md @@ -281,7 +281,7 @@ $ wget https://storage.googleapis.com/intel-optimized-tensorflow/models/ssd_resn ``` 6. Clone the [intelai/models](https://github.com/intelai/models) repo. -This repo has the launch script for running benchmarking, which we will +This repo has the launch script for running the model, which we will use in the next step. ``` @@ -290,11 +290,10 @@ $ git clone https://github.com/IntelAI/models.git 7. Next, navigate to the `benchmarks` directory of the [intelai/models](https://github.com/intelai/models) repo that was just -cloned in the previous step. SSD-ResNet34 can be run for benchmarking -throughput and latency, or testing accuracy. Note that we are running +cloned in the previous step. SSD-ResNet34 can be run for testing batch or online inference, or testing accuracy. Note that we are running SSD-ResNet34 with a TensorFlow 1.14 docker image. -To benchmarking for throughput and latency, use the following command, +To run for batch and online inference, use the following command, the path to the frozen graph that you downloaded in step 5 as the `--in-graph`, and use the `--benchmark-only` flag: @@ -337,7 +336,7 @@ $ python launch_benchmark.py \ 8. The log file is saved to the value of `--output-dir`. -Below is a sample log file tail when running benchmarking: +Below is a sample log file tail when testing performance: ``` Batchsize: 1 diff --git a/benchmarks/object_detection/tensorflow/ssd_vgg16/README.md b/benchmarks/object_detection/tensorflow/ssd_vgg16/README.md index 9d2cb7b3c..5f101b835 100644 --- a/benchmarks/object_detection/tensorflow/ssd_vgg16/README.md +++ b/benchmarks/object_detection/tensorflow/ssd_vgg16/README.md @@ -5,7 +5,7 @@ following modes/precisions: * [Int8 inference](#int8-inference-instructions) * [FP32 inference](#fp32-inference-instructions) -Benchmarking instructions and scripts for model training and inference +Instructions and scripts for model training and inference other precisions are coming later. ## Int8 Inference Instructions @@ -23,12 +23,12 @@ $ git checkout 2d8b0cb9b2e70281bf9dce438ff17ffa5e59075c ``` 2. Clone the [intelai/models](https://github.com/intelai/models) repository. -It will be used to run the SSD-VGG16 model accuracy and benchmark tests. +It will be used to run the SSD-VGG16 model accuracy and inference performance tests. 3. Download the 2017 validation images file: [COCO dataset](http://cocodataset.org/#home) and annotations: This is required if you would like to run the accuracy test, -or the throughput and latency benchmark with real data. +or batch/online inference with real data. ``` $ wget http://images.cocodataset.org/zips/val2017.zip @@ -87,16 +87,16 @@ total 792084 $ wget https://storage.googleapis.com/intel-optimized-tensorflow/models/ssdvgg16_int8_pretrained_model.pb ``` -6. Navigate to the `benchmarks` directory (step 2), and run the benchmarking scripts for either benchmarking throughput -and latency or accuracy. +6. Navigate to the `benchmarks` directory (step 2), and run the model scripts for either batch or online +inference or accuracy. ``` $ cd models/benchmarks ``` -* Run benchmarking for throughput and latency where the `--model-source-dir` is the model source directory from step 1, -and the `--in-graph` is the pretrained model graph from step 5, -if you specify the `--data-location` which is the path to the tf record file that you generated in step 4, -the benchmark will run with real data, otherwise dummy data will be used: +* Run the model for batch or online inference where the `--model-source-dir` is the model source directory from step 1, +and the `--in-graph` is the pretrained model graph from step 5. +If you specify the `--data-location` which is the path to the tf record file that you generated in step 4, +the model will run with real data, otherwise dummy data will be used: ``` python launch_benchmark.py \ --model-name ssd_vgg16 \ @@ -142,7 +142,7 @@ python launch_benchmark.py \ ``` >Notes: ->* For the throughput and latency benchmark, we recommend the provided values for the arguments: `--num-inter-threads=11`, `--num-intra-threads=21`, `--data-num-inter-threads=21`, +>* For batch and online inference, we recommend the provided values for the arguments: `--num-inter-threads=11`, `--num-intra-threads=21`, `--data-num-inter-threads=21`, `--data-num-intra-threads=28` for optimized performance on `28-cores Cascade Lake (CLX)` machine. >* SSD-VGG16 model accuracy test works only with the `Python3` based docker images. @@ -152,8 +152,8 @@ to get additional debug output or change the default output location. 6. The log file is saved to the value of `--output-dir`. -Below is a sample log file tail when running benchmarking for throughput -and latency, the following results are based on CLX 28-cores with hyper-threading enabled: +Below is a sample log file tail when running the model for batch +and online inference, the following results are based on CLX 28-cores with hyper-threading enabled: ``` Batch size = 1 @@ -189,13 +189,13 @@ Use the steps 1, 2,3 and 4 as above. $ wget https://storage.googleapis.com/intel-optimized-tensorflow/models/ssdvgg16_fp32_pretrained_model.pb ``` -6. Navigate to the `benchmarks` directory (step 2), and run the benchmarking scripts for either benchmarking throughput -and latency or accuracy. +6. Navigate to the `benchmarks` directory (step 2), and run the model scripts for either batch +and online inference or accuracy. ``` $ cd models/benchmarks ``` -* Run benchmarking for throughput and latency where the `--model-source-dir` is the model source directory from step 1, +* Run the model for batch and online inference where the `--model-source-dir` is the model source directory from step 1, and the `--in-graph` is the pretrained model graph from step 5, if you specify the `--data-location` which is the path to the tf record file that you generated in step 4, the benchmark will run with real data, otherwise dummy data will be used: @@ -246,7 +246,7 @@ python launch_benchmark.py \ ``` >Notes: ->* For the throughput and latency benchmark, we recommend the provided values for the arguments: `--num-inter-threads=11`, `--num-intra-threads=21`, `--data-num-inter-threads=21`, +>* For batch and online inference, we recommend the provided values for the arguments: `--num-inter-threads=11`, `--num-intra-threads=21`, `--data-num-inter-threads=21`, `--data-num-intra-threads=28` for optimized performance on `28-cores Cascade Lake (CLX)` machine. >* SSD-VGG16 model accuracy test works only with the `Python3` based docker images. @@ -256,7 +256,7 @@ to get additional debug output or change the default output location. 6. The log file is saved to the value of `--output-dir`. -Below is a sample log file tail when running throughput and latency benchmarking, +Below is a sample log file tail when running batch and online inference, the following results are based on CLX 28-cores with hyper-threading enabled: ``` diff --git a/benchmarks/text_to_speech/tensorflow/wavenet/README.md b/benchmarks/text_to_speech/tensorflow/wavenet/README.md index 1c88cbae2..512cabd95 100644 --- a/benchmarks/text_to_speech/tensorflow/wavenet/README.md +++ b/benchmarks/text_to_speech/tensorflow/wavenet/README.md @@ -41,7 +41,7 @@ $ pwd 2. Clone this [intelai/models](https://github.com/intelai/models) repo. This repo has the launch script for running the model, as well as checkpoint files for a pre-trained model. After cloning the repo, -navigate to the benchmarks directory, which is where the launch script +navigate to the `benchmarks` directory, which is where the launch script is located. ``` diff --git a/docs/language_translation/tensorflow/Tutorial.md b/docs/language_translation/tensorflow/Tutorial.md index 13f827a50..aee385c63 100644 --- a/docs/language_translation/tensorflow/Tutorial.md +++ b/docs/language_translation/tensorflow/Tutorial.md @@ -158,7 +158,7 @@ Substitute the `--model-source-dir` for the location where you cloned the ``` ~/tensorflow-models/models ``` -3.1. *Real Time inference* (using `--socket-id 0` and `--batch-size 1` for latency) +3.1. *Online inference* (using `--socket-id 0` and `--batch-size 1`) If you wish to calculate the [BLEU](https://en.wikipedia.org/wiki/BLEU) metric to find out the machine-translation quality, pass the file as `reference` flag. `newstest2014.en` file must have only one sentence per line @@ -185,7 +185,7 @@ python launch_benchmark.py \ The translated German text will be in the file `translation.txt` located at `~/models/benchmarks/common/tensorflow/logs` -3.2. *Max Throughput inference* (using `--socket-id 0` and `--batch-size 64` for throughput) +3.2. *Batch inference* (using `--socket-id 0` and `--batch-size 64`) ```bash python launch_benchmark.py \ @@ -222,7 +222,7 @@ Log location outside container: /~/models/benchmarks/common/tensorflow/logs/benc The logs are captured in a directory outside of the container.
4. If you want to run the ```launch_benchmark.py``` interactively from within the docker container, add flag ```--debug```. This will launch a docker container based on the ```--docker_image```, -performs necessary installs, runs the ```launch_benchmark.py``` script and does not terminate the container process. As an example, this step will demonstrate real-time inference (--batch-size 1), but you can implement the same strategy for max throughput (--batch-size 64)." +performs necessary installs, runs the ```launch_benchmark.py``` script and does not terminate the container process. As an example, this step will demonstrate online inference (--batch-size 1), but you can implement the same strategy for batch inference (--batch-size 64)." console in: ```bash diff --git a/docs/language_translation/tensorflow_serving/Tutorial.md b/docs/language_translation/tensorflow_serving/Tutorial.md index c0a690e3e..1d8ebff71 100644 --- a/docs/language_translation/tensorflow_serving/Tutorial.md +++ b/docs/language_translation/tensorflow_serving/Tutorial.md @@ -37,7 +37,7 @@ Thus, the attention mechanism was introduced to differentiate contributions of e This matrix contains weights of each element in the source sequence when producing elements in the destination sequence. [IntelĀ® Math Kernel Library for Deep Neural Networks (IntelĀ® MKL-DNN)](https://github.com/intel/mkl-dnn) offers significant performance improvements for many neural network operations. -Tuning TensorFlow Serving to take full advantage of your hardware for recommendation systems inference involves: +Tuning TensorFlow Serving to take full advantage of your hardware for language translation inference involves: 1. Running a TensorFlow Serving docker container configured for performance given your hardware resources 2. Running a GRPC client to verify prediction accuracy and measure online and batch inference performance 3. Experimenting with the TensorFlow Serving settings on your own to further optimize for your model and use case From 634d8dffa5e037bd3c58889083399ff7bd5a5a5c Mon Sep 17 00:00:00 2001 From: Wafaa Taie Date: Thu, 6 Jun 2019 17:09:27 -0700 Subject: [PATCH 44/62] fix one of the data location references in readme (#325) --- .../object_detection/tensorflow/ssd_vgg16/README.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/benchmarks/object_detection/tensorflow/ssd_vgg16/README.md b/benchmarks/object_detection/tensorflow/ssd_vgg16/README.md index 5f101b835..8036419ba 100644 --- a/benchmarks/object_detection/tensorflow/ssd_vgg16/README.md +++ b/benchmarks/object_detection/tensorflow/ssd_vgg16/README.md @@ -125,7 +125,7 @@ the model directory `SSD.TensorFlow` from step 1. ``` * The `--data-location` is required, which is the path to the tf record file that you generated in step 4. - * Copy the annotation file `instances_val2017.json` (from step 3) to the dataset directory `/home//coco/output`. + * Copy the annotation file `instances_val2017.json` (from step 3) to the dataset directory `/home//tf_records/`. * Use the `--accuracy-only` flag: ``` python launch_benchmark.py \ @@ -150,7 +150,7 @@ python launch_benchmark.py \ >* The `--verbose` or `--output-dir` flag can be added to any of the above commands to get additional debug output or change the default output location. -6. The log file is saved to the value of `--output-dir`. +7. The log file is saved to the value of `--output-dir`. Below is a sample log file tail when running the model for batch and online inference, the following results are based on CLX 28-cores with hyper-threading enabled: @@ -229,7 +229,7 @@ the model directory `SSD.TensorFlow` from step 1. ``` * The `--data-location` is required, which is the path to the tf record file that you generated in step 3. - * Copy the annotation file `instances_val2017.json` (from step 3) to the dataset directory `/home//coco/output`. + * Copy the annotation file `instances_val2017.json` (from step 3) to the dataset directory `/home//tf_records/`. * Use the `--accuracy-only` flag: ``` python launch_benchmark.py \ @@ -254,7 +254,7 @@ python launch_benchmark.py \ >* The `--verbose` or `--output-dir` flag can be added to any of the above commands to get additional debug output or change the default output location. -6. The log file is saved to the value of `--output-dir`. +7. The log file is saved to the value of `--output-dir`. Below is a sample log file tail when running batch and online inference, the following results are based on CLX 28-cores with hyper-threading enabled: From 058f0bf711a2b052ad024d283da27f34c80c83c8 Mon Sep 17 00:00:00 2001 From: Wafaa Taie Date: Tue, 11 Jun 2019 09:22:52 -0700 Subject: [PATCH 45/62] Add ResNet50 int8 TF Serving Tutorial (#314) * add tf serving resnet50 int8 tutorial * fix typo. * combine both int8 and fp32 tutorials. * formatting changes based on code review comments. * remove inceptionv3 int8. * update the tutorial to remove inceptionv3 int8. --- .../tensorflow_serving/Tutorial.md | 30 ++++++++++++------- 1 file changed, 20 insertions(+), 10 deletions(-) diff --git a/docs/image_recognition/tensorflow_serving/Tutorial.md b/docs/image_recognition/tensorflow_serving/Tutorial.md index a5f832193..71f94f76c 100644 --- a/docs/image_recognition/tensorflow_serving/Tutorial.md +++ b/docs/image_recognition/tensorflow_serving/Tutorial.md @@ -1,10 +1,12 @@ # Image Recognition with TensorFlow Serving on CPU + ### Online and Batch Inference -Model: InceptionV3 and ResNet50 +Model and Precision: InceptionV3 FP32, ResNet50 FP32, and ResNet50 Int8 ## Goal -This tutorial will introduce you to the CPU performance considerations for image recognition deep learning models and how to use IntelĀ® Optimizations for [TensorFlow Serving](https://www.tensorflow.org/serving/) to improve inference time on CPUs. +This tutorial will introduce you to the CPU performance considerations for image recognition deep learning models with different precisions and +how to use IntelĀ® Optimizations for [TensorFlow Serving](https://www.tensorflow.org/serving/) to improve inference time on CPUs. It also provides sample code that you can use to get your optimized TensorFlow model server and GRPC client up and running quickly. ## Prerequisites @@ -22,20 +24,28 @@ This tutorial assumes you have already: Convolutional neural networks (CNNs) for image recognition are computationally expensive. The IntelĀ® Math Kernel Library for Deep Neural Networks (IntelĀ® MKL-DNN) offers significant performance improvements for convolution, pooling, normalization, activation, and other operations via efficient vectorization and multi-threading. Tuning TensorFlow Serving to take full advantage of your hardware for image recognition deep learning inference involves: -1. Working through this tutorial to set up servable versions of the well-known [ResNet50](https://arxiv.org/pdf/1512.03385.pdf) and [InceptionV3](https://arxiv.org/pdf/1512.00567v1.pdf) CNN models +1. Working through this tutorial to set up servable versions of the well-known [ResNet50](https://arxiv.org/pdf/1512.03385.pdf) and [InceptionV3](https://arxiv.org/pdf/1512.00567v1.pdf) CNN models with different precisions. 2. Running a TensorFlow Serving docker container configured for performance given your hardware resources 3. Running a client script to measure online and batch inference performance 4. Experimenting with the TensorFlow Serving settings on your own to further optimize for your model and use case ## Hands-on Tutorial - InceptionV3 and Resnet50 -For steps 1 and 2, refer to the Intel Model Zoo FP32 READMEs: -* [InceptionV3 README](/benchmarks/image_recognition/tensorflow/inceptionv3#fp32-inference-instructions) -* [ResNet50 README](/benchmarks/image_recognition/tensorflow/resnet50#fp32-inference-instructions) +This section shows a step-by-step example for how to serve one of the following Image Recognition models +`(ResNet50 FP32, ResNet50 Int8, and InceptionV3 FP32)` using TensorFlow Serving. +It also explains the possible ways to manage the available CPU resources and tune it for the optimal performance. + +For steps 1 and 2, refer to the Intel Model Zoo READMEs: +* **FP32 precision:** use the Intel Model Zoo `FP32` README sections, + * [InceptionV3 FP32 README](/benchmarks/image_recognition/tensorflow/inceptionv3#fp32-inference-instructions), and + * [ResNet50 FP32 README](/benchmarks/image_recognition/tensorflow/resnet50#fp32-inference-instructions) + +* **Int8 precision:** use the Intel Model Zoo `Int8` README sections, + * [ResNet50 Int8 README](/benchmarks/image_recognition/tensorflow/resnet50#int8-inference-instructions) -NOTE: The below example shows InceptionV3. The same code snippets will work for ResNet50 by replacing the model name to `resnet50`. +>NOTE: The below example shows InceptionV3 (FP32). The same code snippets will work for ResNet50 (FP32 and Int8) by replacing the model name to `resnet50`. -1. **Download the Model**: Download and extract the InceptionV3 pre-trained model (FP32), using the instructions in above README. +1. **Download the Model**: Download and extract the InceptionV3 pre-trained model, using the instructions in above README. 2. **(Optional) Download Data**: If you are interested only in testing performance, not accuracy, you can skip this step and use synthetic data. If you want to verify prediction accuracy by testing on real data, follow the instructions in one of the READMEs above to download the ImageNet dataset. @@ -62,7 +72,7 @@ NOTE: The below example shows InceptionV3. The same code snippets will work for (venv)$ pip install tensorflow-serving-api ``` 5. **Create a SavedModel**: Using the conversion script `model_graph_to_saved_model.py`, convert the pre-trained model graph to a SavedModel. - (For ResNet50, substitute the name of the ResNet50 FP32 pre-trained model.) + (For ResNet50, substitute the name of the ResNet50 FP32 or the ResNet50 Int8 pre-trained model.) Example: ``` @@ -174,7 +184,7 @@ NOTE: The below example shows InceptionV3. The same code snippets will work for ## Conclusion -You have now seen two end-to-end examples of serving an image recognition model for inference using TensorFlow Serving, and learned: +You have now seen three end-to-end examples of serving an image recognition model for inference using TensorFlow Serving, and learned: 1. How to create a SavedModel from a TensorFlow model graph 2. How to choose good values for the performance-related runtime parameters exposed by the `docker run` command 3. How to verify that the served model can correctly classify an image using a GRPC client From d01c39b19fdb63bf859b0a5bf400dab9e2715dcb Mon Sep 17 00:00:00 2001 From: Dina Suehiro Jones Date: Tue, 11 Jun 2019 09:56:41 -0700 Subject: [PATCH 46/62] Make the launch script executable (#326) --- benchmarks/launch_benchmark.py | 0 .../tensorflow/faster_rcnn/inference/int8/coco_int8.sh | 0 2 files changed, 0 insertions(+), 0 deletions(-) mode change 100644 => 100755 benchmarks/launch_benchmark.py mode change 100644 => 100755 models/object_detection/tensorflow/faster_rcnn/inference/int8/coco_int8.sh diff --git a/benchmarks/launch_benchmark.py b/benchmarks/launch_benchmark.py old mode 100644 new mode 100755 diff --git a/models/object_detection/tensorflow/faster_rcnn/inference/int8/coco_int8.sh b/models/object_detection/tensorflow/faster_rcnn/inference/int8/coco_int8.sh old mode 100644 new mode 100755 From a37f48f0a74dc8ee9f8230a4ac6333d8b567e9ff Mon Sep 17 00:00:00 2001 From: Clayne Robison Date: Wed, 12 Jun 2019 11:17:51 -0700 Subject: [PATCH 47/62] Ubuntu 18 tzdata fix (#310) * Fix tzdata installation hang with Ubuntu 18.04 * Update to use 'DEBIAN_FRONTEND=noninteractive' based on feedback from Ebi --- benchmarks/common/tensorflow/start.sh | 2 ++ 1 file changed, 2 insertions(+) diff --git a/benchmarks/common/tensorflow/start.sh b/benchmarks/common/tensorflow/start.sh index 5884cfac9..7c37309d3 100755 --- a/benchmarks/common/tensorflow/start.sh +++ b/benchmarks/common/tensorflow/start.sh @@ -60,6 +60,8 @@ if [[ ${NOINSTALL} != "True" ]]; then ## install common dependencies apt update apt full-upgrade -y + # Set env var before installs so that user interaction is not required + export DEBIAN_FRONTEND=noninteractive apt-get install python-tk numactl -y apt install -y libsm6 libxext6 pip install --upgrade pip From 17a5cccccc8980a572a038c84c41c985a7a21fb8 Mon Sep 17 00:00:00 2001 From: "Xiaoming (Jason) Cui" Date: Thu, 13 Jun 2019 09:32:29 -0700 Subject: [PATCH 48/62] Update Transformer LT Official to support num_inter and num_intra threads (#308) * Added the support to change num_inter_threads and num_intra_threads to the model * Update unit test args to include num_inter and num_intra --- .../transformer_lt_official/inference/fp32/model_init.py | 4 +++- .../tf_model_args/tf_transformer_lt_official_args.json | 4 ++-- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/benchmarks/language_translation/tensorflow/transformer_lt_official/inference/fp32/model_init.py b/benchmarks/language_translation/tensorflow/transformer_lt_official/inference/fp32/model_init.py index 85dae1e68..a8b0b9432 100644 --- a/benchmarks/language_translation/tensorflow/transformer_lt_official/inference/fp32/model_init.py +++ b/benchmarks/language_translation/tensorflow/transformer_lt_official/inference/fp32/model_init.py @@ -92,7 +92,9 @@ def __init__(self, args, custom_args, platform_util=None): if self.args.batch_size != -1 else "1") + \ " --file=" + self.args.decode_from_file + \ " --file_out=" + translate_file + \ - " --vocab_file=" + self.args.vocab_file + " --vocab_file=" + self.args.vocab_file +\ + " --num_inter=" + str(self.args.num_inter_threads) +\ + " --num_intra=" + str(self.args.num_intra_threads) self.bleu_params += " --translation=" + translate_file + \ " --reference=" + self.args.reference diff --git a/tests/unit/common/tensorflow/tf_model_args/tf_transformer_lt_official_args.json b/tests/unit/common/tensorflow/tf_model_args/tf_transformer_lt_official_args.json index 1ccbf4bc4..079f99abd 100644 --- a/tests/unit/common/tensorflow/tf_model_args/tf_transformer_lt_official_args.json +++ b/tests/unit/common/tensorflow/tf_model_args/tf_transformer_lt_official_args.json @@ -1,9 +1,9 @@ [ { "_comment": "FP32 latency benchmark", "input": "run_tf_benchmark.py --framework=tensorflow --use-case=language_translation --model-name=transformer_lt_official --precision=fp32 --mode=inference --model-source-dir=/workspace/models --benchmark-dir=/workspace/benchmarks --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=1 --socket-id=0 --benchmark-only --verbose --file=/dataset/newstest2014.en --reference=/dataset/newstest2014.de --vocab_file=/dataset/vocab.txt --in_graph=/in_graph/fp32_graphdef.pb", - "output": "numactl --cpunodebind=0 --membind=0 python /workspace/models/official/transformer/infer_ab.py --param_set=big --in_graph=/in_graph/fp32_graphdef.pb --batch_size=1 --file=/dataset/newstest2014.en --file_out=/models/benchmarks/common/tensorflow/logs/translate.txt --vocab_file=/dataset/vocab.txt"}, + "output": "numactl --cpunodebind=0 --membind=0 python /workspace/models/official/transformer/infer_ab.py --param_set=big --in_graph=/in_graph/fp32_graphdef.pb --batch_size=1 --file=/dataset/newstest2014.en --file_out=/models/benchmarks/common/tensorflow/logs/translate.txt --vocab_file=/dataset/vocab.txt --num_inter=1 --num_intra=28"}, { "_comment": "FP32 throughput benchmark", "input": "run_tf_benchmark.py --framework=tensorflow --use-case=language_translation --model-name=transformer_lt_official --precision=fp32 --mode=inference --model-source-dir=/workspace/models --benchmark-dir=/workspace/benchmarks --intelai-models=/workspace/intelai_models --num-cores=-1 --batch-size=64 --socket-id=0 --benchmark-only --verbose --file=/dataset/newstest2014.en --reference=/dataset/newstest2014.de --vocab_file=/dataset/vocab.txt --in_graph=/in_graph/fp32_graphdef.pb", - "output": "numactl --cpunodebind=0 --membind=0 python /workspace/models/official/transformer/infer_ab.py --param_set=big --in_graph=/in_graph/fp32_graphdef.pb --batch_size=64 --file=/dataset/newstest2014.en --file_out=/models/benchmarks/common/tensorflow/logs/translate.txt --vocab_file=/dataset/vocab.txt"} + "output": "numactl --cpunodebind=0 --membind=0 python /workspace/models/official/transformer/infer_ab.py --param_set=big --in_graph=/in_graph/fp32_graphdef.pb --batch_size=64 --file=/dataset/newstest2014.en --file_out=/models/benchmarks/common/tensorflow/logs/translate.txt --vocab_file=/dataset/vocab.txt --num_inter=1 --num_intra=28"} ] From 194e0119a20a4539dabbffdfe6eb9dd62a159f30 Mon Sep 17 00:00:00 2001 From: Melanie Buehler Date: Thu, 13 Jun 2019 09:40:49 -0700 Subject: [PATCH 49/62] TFServing SSD-MobileNet Tutorial (#311) * Added SSD-MobileNet to existing tutorial * Added tutorial script and notebook * Improvements and fixes from testing * More small fixes * Adds SSD-MobileNet to main docs README * Updated benchmarking verbiage * Simplify package installs, docker ports, and detached mode * Created requirements.txt --- docs/README.md | 2 +- .../tensorflow_serving/ObjectDetection.ipynb | 322 ++++++++++++++++++ .../tensorflow_serving/RFCN.ipynb | 207 ----------- .../tensorflow_serving/Tutorial.md | 304 ++++++++++------- ...hmark.py => object_detection_benchmark.py} | 68 +++- .../tensorflow_serving/requirements.txt | 16 + 6 files changed, 571 insertions(+), 348 deletions(-) create mode 100644 docs/object_detection/tensorflow_serving/ObjectDetection.ipynb delete mode 100644 docs/object_detection/tensorflow_serving/RFCN.ipynb rename docs/object_detection/tensorflow_serving/{rfcn-benchmark.py => object_detection_benchmark.py} (54%) create mode 100644 docs/object_detection/tensorflow_serving/requirements.txt diff --git a/docs/README.md b/docs/README.md index 3fd8677db..c5933030c 100644 --- a/docs/README.md +++ b/docs/README.md @@ -17,7 +17,7 @@ * [Recommendation Systems](/docs/recommendation/tensorflow/Tutorial.md) (Wide and Deep) * Inference with IntelĀ® Optimization of Tensorflow Serving: * [Image Recognition](/docs/image_recognition/tensorflow_serving/Tutorial.md) (ResNet50 and InceptionV3) - * [Object Detection](/docs/object_detection/tensorflow_serving/Tutorial.md) (R-FCN) + * [Object Detection](/docs/object_detection/tensorflow_serving/Tutorial.md) (R-FCN and SSD-MobileNet) * [Language Translation](/docs/language_translation/tensorflow_serving/Tutorial.md) (Transformer-LT) * Model Quantization and Optimization * [Image Recognition](/docs/image_recognition/quantization/Tutorial.md) (ResNet50) diff --git a/docs/object_detection/tensorflow_serving/ObjectDetection.ipynb b/docs/object_detection/tensorflow_serving/ObjectDetection.ipynb new file mode 100644 index 000000000..5e975ae0c --- /dev/null +++ b/docs/object_detection/tensorflow_serving/ObjectDetection.ipynb @@ -0,0 +1,322 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Object Detection: R-FCN and SSD-MobileNet" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "from __future__ import print_function\n", + "\n", + "import os\n", + "import time\n", + "import random\n", + "import numpy as np\n", + "import tensorflow as tf\n", + "from PIL import Image\n", + "\n", + "from object_detection.utils.visualization_utils import visualize_boxes_and_labels_on_image_array\n", + "\n", + "%matplotlib inline\n", + "import matplotlib\n", + "from matplotlib import pyplot as plt" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "MODEL = 'rfcn' # Use 'rfcn' for R-FCN or 'ssdmobilenet' for SSD-MobileNet\n", + "PROTOCOL = 'grpc' # Use 'grpc' for GRPC or 'rest' for REST\n", + "IMAGES_PATH = '/home//coco/val/val2017' # Edit this to your COCO validation directory" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "if PROTOCOL == 'grpc':\n", + " import grpc\n", + " import tensorflow as tf\n", + " from tensorflow_serving.apis import predict_pb2\n", + " from tensorflow_serving.apis import prediction_service_pb2_grpc\n", + " SERVER_URL = 'localhost:8500'\n", + "elif PROTOCOL == 'rest':\n", + " import requests\n", + " SERVER_URL = 'http://localhost:8501/v1/models/{}:predict'.format(MODEL)" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "def get_random_image(image_dir):\n", + " image_path = os.path.join(image_dir, random.choice(os.listdir(image_dir)))\n", + " image = Image.open(image_path)\n", + " (im_width, im_height) = image.size\n", + " return np.array(image.getdata()).reshape((im_height, im_width, 3)).astype(np.uint8)\n", + "\n", + "def visualize(output_dict, image_np):\n", + " new_dict = {}\n", + " if PROTOCOL == 'grpc':\n", + " new_dict['num_detections'] = int(output_dict['num_detections'].float_val[0])\n", + " new_dict['detection_classes'] = np.array(output_dict['detection_classes'].float_val).astype(np.uint8)\n", + " new_dict['detection_boxes'] = np.array(output_dict['detection_boxes'].float_val).reshape((-1,4))\n", + " new_dict['detection_scores'] = np.array(output_dict['detection_scores'].float_val)\n", + " new_dict['instance_masks'] = np.array(output_dict['instance_masks'].float_val)\n", + " elif PROTOCOL == 'rest':\n", + " new_dict['num_detections'] = int(output_dict['num_detections'])\n", + " new_dict['detection_classes'] = np.array(output_dict['detection_classes']).astype(np.uint8)\n", + " new_dict['detection_boxes'] = np.array(output_dict['detection_boxes'])\n", + " new_dict['detection_scores'] = np.array(output_dict['detection_scores'])\n", + "\n", + " # Visualize the results of a detection\n", + " visualize_boxes_and_labels_on_image_array(\n", + " image_np,\n", + " new_dict['detection_boxes'],\n", + " new_dict['detection_classes'],\n", + " new_dict['detection_scores'],\n", + " {1: {'id': 1, 'name': 'object'}}, # Empty category index\n", + " instance_masks=None,\n", + " use_normalized_coordinates=True,\n", + " line_thickness=8)\n", + " plt.figure()\n", + " plt.imshow(image_np)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Test Object Detection" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "batch_size = 1\n", + "np_image = get_random_image(IMAGES_PATH)" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "WARNING: The TensorFlow contrib module will not be included in TensorFlow 2.0.\n", + "For more information, please see:\n", + " * https://github.com/tensorflow/community/blob/master/rfcs/20180907-contrib-sunset.md\n", + " * https://github.com/tensorflow/addons\n", + "If you depend on functionality not listed there, please file an issue.\n", + "\n" + ] + }, + { + "data": { + "image/png": "\n", + "text/plain": [ + "

" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "if PROTOCOL == 'grpc':\n", + " np_image = np.repeat(np.expand_dims(np_image, 0), batch_size, axis=0)\n", + " channel = grpc.insecure_channel(SERVER_URL)\n", + " stub = prediction_service_pb2_grpc.PredictionServiceStub(channel)\n", + " request = predict_pb2.PredictRequest()\n", + " request.model_spec.name = 'ssdmobilenet'\n", + " request.model_spec.signature_name = 'serving_default'\n", + " request.inputs['inputs'].CopyFrom(tf.contrib.util.make_tensor_proto(np_image))\n", + " result = stub.Predict(request)\n", + " visualize(result.outputs, np_image[0])\n", + "elif PROTOCOL == 'rest':\n", + " predict_request = '{\"instances\" : %s}' % np.expand_dims(np_image, 0).tolist()\n", + " result = requests.post(SERVER_URL, data=predict_request)\n", + " visualize(result.json()['predictions'][0], np_image)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Measure Performance" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "def make_request(batch_size):\n", + " if PROTOCOL == 'rest':\n", + " np_images = np.repeat(np.expand_dims(get_random_image(IMAGES_PATH), 0).tolist(), batch_size, axis=0).tolist()\n", + " return '{\"instances\" : %s}' % np_images\n", + " elif PROTOCOL == 'grpc':\n", + " np_images = np.repeat(np.expand_dims(get_random_image(IMAGES_PATH), 0), batch_size, axis=0)\n", + " channel = grpc.insecure_channel(SERVER_URL)\n", + " stub = prediction_service_pb2_grpc.PredictionServiceStub(channel)\n", + " request = predict_pb2.PredictRequest()\n", + " request.model_spec.name = MODEL\n", + " request.model_spec.signature_name = 'serving_default'\n", + " request.inputs['inputs'].CopyFrom(tf.contrib.util.make_tensor_proto(np_images))\n", + " return (stub, request)\n", + "\n", + "def send_request(predict_request):\n", + " if PROTOCOL == 'rest':\n", + " requests.post(SERVER_URL, data=predict_request)\n", + " elif PROTOCOL == 'grpc':\n", + " predict_request[0].Predict(predict_request[1])\n", + "\n", + "def benchmark(batch_size=1, num_iteration=10, warm_up_iteration=2):\n", + " i = 0\n", + " total_time = 0\n", + " for _ in range(num_iteration):\n", + " i += 1\n", + " np_images = np.repeat(np.expand_dims(get_random_image(IMAGES_PATH), 0), batch_size, axis=0)\n", + " predict_request = make_request(batch_size)\n", + " start_time = time.time()\n", + " send_request(predict_request)\n", + " time_consume = time.time() - start_time\n", + " print('Iteration %d: %.3f sec' % (i, time_consume))\n", + " if i > warm_up_iteration:\n", + " total_time += time_consume\n", + "\n", + " time_average = total_time / (num_iteration - warm_up_iteration)\n", + " print('Average time: %.3f sec' % (time_average))\n", + " print('Batch size = %d' % batch_size)\n", + " if batch_size == 1:\n", + " print('Latency: %.3f ms' % (time_average * 1000))\n", + " print('Throughput: %.3f images/sec' % (batch_size / time_average))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Real-time Inference (latency, batch_size=1)" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Iteration 1: 0.059 sec\n", + "Iteration 2: 0.098 sec\n", + "Iteration 3: 0.055 sec\n", + "Iteration 4: 0.052 sec\n", + "Iteration 5: 0.056 sec\n", + "Iteration 6: 0.051 sec\n", + "Iteration 7: 0.056 sec\n", + "Iteration 8: 0.052 sec\n", + "Iteration 9: 0.050 sec\n", + "Iteration 10: 0.048 sec\n", + "Average time: 0.052 sec\n", + "Batch size = 1\n", + "Latency: 52.392 ms\n", + "Throughput: 19.087 images/sec\n" + ] + } + ], + "source": [ + "benchmark()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Throughput (batch_size=128)" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Iteration 1: 4.414 sec\n", + "Iteration 2: 2.699 sec\n", + "Iteration 3: 2.654 sec\n", + "Iteration 4: 2.409 sec\n", + "Iteration 5: 2.485 sec\n", + "Iteration 6: 2.476 sec\n", + "Iteration 7: 2.457 sec\n", + "Iteration 8: 2.497 sec\n", + "Iteration 9: 2.575 sec\n", + "Iteration 10: 2.539 sec\n", + "Average time: 2.511 sec\n", + "Batch size = 128\n", + "Throughput: 50.967 images/sec\n" + ] + } + ], + "source": [ + "benchmark(batch_size=128)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 2", + "language": "python", + "name": "python2" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 2 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython2", + "version": "2.7.12" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/docs/object_detection/tensorflow_serving/RFCN.ipynb b/docs/object_detection/tensorflow_serving/RFCN.ipynb deleted file mode 100644 index 2f96cf5e7..000000000 --- a/docs/object_detection/tensorflow_serving/RFCN.ipynb +++ /dev/null @@ -1,207 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Object Detection: R-FCN" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "from __future__ import print_function\n", - "\n", - "import os\n", - "import time\n", - "import random\n", - "import requests\n", - "import numpy as np\n", - "import tensorflow as tf\n", - "from PIL import Image\n", - "\n", - "from object_detection.utils.visualization_utils import visualize_boxes_and_labels_on_image_array\n", - "\n", - "%matplotlib inline\n", - "import matplotlib\n", - "from matplotlib import pyplot as plt" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "SERVER_URL = 'http://localhost:8501/v1/models/rfcn:predict'\n", - "IMAGES_PATH = '/home//coco/val/val2017' # Edit this to your COCO validation directory" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "def get_random_image(image_dir):\n", - " image_path = os.path.join(image_dir, random.choice(os.listdir(image_dir)))\n", - " image = Image.open(image_path)\n", - " (im_width, im_height) = image.size\n", - " \n", - " return np.array(image.getdata()).reshape((im_height, im_width, 3)).astype(np.uint8)\n", - "\n", - "def visualize(output_dict, image_np):\n", - " output_dict['num_detections'] = int(output_dict['num_detections'])\n", - " output_dict['detection_classes'] = np.array(output_dict['detection_classes']).astype(np.uint8)\n", - " output_dict['detection_boxes'] = np.array(output_dict['detection_boxes'])\n", - " output_dict['detection_scores'] = np.array(output_dict['detection_scores'])\n", - "\n", - " # Visualize the results of a detection\n", - " visualize_boxes_and_labels_on_image_array(\n", - " image_np,\n", - " output_dict['detection_boxes'],\n", - " output_dict['detection_classes'],\n", - " output_dict['detection_scores'],\n", - " {1: {'id': 1, 'name': 'object'}}, # Empty category index\n", - " instance_masks=output_dict.get('detection_masks'),\n", - " use_normalized_coordinates=True,\n", - " line_thickness=8)\n", - " plt.figure()\n", - " plt.imshow(image_np)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Test Object Detection" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "np_image = get_random_image(IMAGES_PATH)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "predict_request = '{\"instances\" : %s}' % np.expand_dims(np_image, 0).tolist()\n", - "result = requests.post(SERVER_URL, data=predict_request)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "visualize(result.json()['predictions'][0], np_image)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Measure Performance" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "def benchmark(batch_size=1, num_iteration=40, warm_up_iteration=10):\n", - " i = 0\n", - " total_time = 0\n", - " for _ in range(num_iteration):\n", - " i += 1\n", - " np_images = np.repeat(np.expand_dims(get_random_image(IMAGES_PATH), 0).tolist(), batch_size, axis=0).tolist()\n", - " predict_request = '{\"instances\" : %s}' % np_images\n", - " start_time = time.time()\n", - " requests.post(SERVER_URL, data=predict_request)\n", - " time_consume = time.time() - start_time\n", - " print('Iteration %d: %.3f sec' % (i, time_consume))\n", - " if i > warm_up_iteration:\n", - " total_time += time_consume\n", - "\n", - " time_average = total_time / (num_iteration - warm_up_iteration)\n", - " print('Average time: %.3f sec' % (time_average))\n", - " print('Batch size = %d' % batch_size)\n", - " if batch_size == 1:\n", - " print('Latency: %.3f ms' % (time_average * 1000))\n", - " print('Throughput: %.3f images/sec' % (batch_size / time_average))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Real-time Inference (latency, batch_size=1)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "benchmark()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Throughput (batch_size=128)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "benchmark(batch_size=128)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 2", - "language": "python", - "name": "python2" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 2 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython2", - "version": "2.7.10" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/docs/object_detection/tensorflow_serving/Tutorial.md b/docs/object_detection/tensorflow_serving/Tutorial.md index d3256e7c1..224943f5c 100644 --- a/docs/object_detection/tensorflow_serving/Tutorial.md +++ b/docs/object_detection/tensorflow_serving/Tutorial.md @@ -1,15 +1,17 @@ -# Object Detection with TensorFlow Serving on CPU using R-FCN model +# Object Detection with TensorFlow Serving on CPU +Models: R-FCN and SSD-MobileNet ## Goal This tutorial will introduce you to the CPU performance considerations for object detection in deep learning models and how to use [IntelĀ® Optimizations for TensorFlow Serving](https://www.tensorflow.org/serving/) to improve inference time on CPUs. -This tutorial uses a pre-trained Region-based Fully Convolutional Network (R-FCN) model for object detection and provides sample code that you can use to get your optimized TensorFlow model server and REST client up and running quickly. In this tutorial using R-FCN, you will measure inference performance in two situations: -* **Online inference**, where batch_size=1. In this case, lower time to result means better runtime performance. +This tutorial uses two pre-trained models - a [Region-based Fully Convolutional Network (R-FCN)](https://arxiv.org/pdf/1605.06409.pdf) and a [Single-Shot MultiBox Detector MobileNet (SSD-MobileNet)](https://arxiv.org/pdf/1704.04861.pdf) - for object detection and provides sample code that you can use to get your optimized TensorFlow model server and client up and running quickly. +In this tutorial you will choose between R-FCN and SSD-MobileNet, and between the REST client and GRPC client, and then measure inference performance in two situations: +* **Online inference**, where batch_size=1. In this case, a lower number means better runtime performance. * **Batch inference**, where batch_size>1. In this case, a higher number means better runtime performance. **NOTE about REST vs. GRPC**: This tutorial is focused on optimizing the model server, not the client that sends requests. For optimal client-side serialization and de-serialization, you may want to use TensorFlow Serving's GRPC option instead of the REST API, especially if you are optimizing for batch inference (here is one [article](https://medium.com/@avidaneran/tensorflow-serving-rest-vs-grpc-e8cef9d4ff62) with a relevant analysis). -We use REST in this tutorial for illustration, not as a best practice, and offer another [tutorial](/docs/image_recognition/tensorflow_serving/Tutorial.md) that illustrates the use of GRPC with TensorFlow Serving. +We show both GRPC and REST in this tutorial for illustration, not as a best practice. Feel free to compare and choose the protocol that works best for you. ## Prerequisites @@ -19,140 +21,178 @@ This tutorial assumes you have already: especially these sections: * [Performance Metrics](/docs/general/tensorflow_serving/GeneralBestPractices.md#performance-metrics) * [TensorFlow Serving Configuration Settings](/docs/general/tensorflow_serving/GeneralBestPractices.md#tensorflow-serving-configuration-settings) -* Ran an example end-to-end using a REST client, such as the one in the [Installation Guide](/docs/general/tensorflow_serving/InstallationGuide.md) +* Ran an example end-to-end using a REST or GRPC client, such as the one in the [Installation Guide](/docs/general/tensorflow_serving/InstallationGuide.md) ## Background -[IntelĀ® Math Kernel Library for Deep Neural Networks (IntelĀ® MKL-DNN)](https://github.com/intel/mkl-dnn) offers significant performance improvements for convolution, pooling, normalization, activation, and other operations for object detection, using efficient vectorization and multi-threading. Tuning TensorFlow Serving to take full advantage of your hardware for object detection deep learning inference involves: +[IntelĀ® Math Kernel Library for Deep Neural Networks (IntelĀ® MKL-DNN)](https://github.com/intel/mkl-dnn) offers significant performance improvements for convolution, pooling, normalization, activation, and other operations for object detection, using efficient vectorization and multi-threading. +Tuning TensorFlow Serving to take full advantage of your hardware for object detection deep learning inference involves: 1. Running a TensorFlow Serving docker container configured for performance given your hardware resources -2. Running a REST client notebook to verify object detection and measure online and batch inference performance +2. Running a REST or GRPC client to verify object detection and measure online and batch inference 3. Experimenting with the TensorFlow Serving settings on your own to further optimize for your model and use case -## Hands-on Tutorial with pre-trained R-FCN model +## Hands-on Tutorial -1. **Set up your environment**: We need to setup two things for this tutorial - #### 1.1 Install the [requests](http://docs.python-requests.org) package for making REST HTTP requests. - We will use a virtual environment to install the required packages. If you do not have pip or virtualenv, you will need to get them first: - ``` - $ sudo apt-get install -y python python-pip - $ pip install virtualenv - ``` - - Create and activate the python virtual envirnoment in your home directory and install the [`requests`](http://docs.python-requests.org) package. +1. **Download the data and clone the Model Zoo**: + + 1.1 Download the 2017 validation COCO dataset (~780MB) (**note**: do not convert the COCO dataset to TF records format): + ``` - $ cd ~ - $ virtualenv rfcn_venv - $ source rfcn_venv/bin/activate - (rfcn_venv)$ pip install requests + cd ~ + mkdir -p coco/val + wget http://images.cocodataset.org/zips/val2017.zip + unzip val2017.zip -d coco/val + export COCO_VAL_DATA=$(pwd)/coco/val/val2017 + echo "export COCO_VAL_DATA=$(pwd)/coco/val/val2017" >> ~/.bashrc ``` - #### 1.2 Install [Tensorflow Object Detection API](https://github.com/tensorflow/models/tree/master/research/object_detection) - For detailed instructions, [click here](https://github.com/tensorflow/models/blob/master/research/object_detection/g3doc/installation.md). Following are the instructions for Ubuntu 16.04. - - - 1.2.1 Install Tensorflow Object Detection API dependencies - ``` - (rfcn_venv)$ sudo apt-get install -y protobuf-compiler python-pil python-lxml python-tk - (rfcn_venv)$ pip install tensorflow Cython contextlib2 jupyter matplotlib pillow lxml - ``` - - 1.2.2 Clone the tensorflow models repo into your home directory. - ``` - (rfcn_venv)$ cd ~ - (rfcn_venv)$ git clone https://github.com/tensorflow/models - (rfcn_venv)$ export TF_MODELS_ROOT=$(pwd)/models - (rfcn_venv)$ echo "export TF_MODELS_ROOT=$(pwd)/models" >> ~/.bashrc - ``` + 1.2 Clone the Intel Model Zoo into your home directory: + + ``` + cd ~ + git clone https://github.com/IntelAI/models.git + ``` + +2. **Choose your model and download the pre-trained SavedModel**: Select either R-FCN or SSD-MobileNet. + Then download and extract the pre-trained model and copy the `saved_model.pb` to `~/obj_detection/1` (the `1` subdirectory is important - don't skip it!). + This is the file we will serve from TensorFlow Serving. Finally, define a variable for your chosen model to use in later steps. + Refer to the [TensorFlow documentation](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/python/saved_model) for more information about SavedModels, and refer to the FP32 model READMEs for [R-FCN](/benchmarks/object_detection/tensorflow/rfcn/README.md#download_fp32_pretrained_model) and [SSD-MobileNet](/benchmarks/object_detection/tensorflow/ssd-mobilenet/README.md#fp32-inference-instructions) to get the latest location of the pre-trained models. + + Highlight and copy one of the following download links: + * R-FCN: `https://storage.googleapis.com/intel-optimized-tensorflow/models/rfcn_resnet101_fp32_coco_pretrained_model.tar.gz` + * SSD-MobileNet: `http://download.tensorflow.org/models/object_detection/ssd_mobilenet_v1_coco_2018_01_28.tar.gz` + + Then execute the following bash commands after customizing them for the model you have chosen: + + ``` + cd ~ + wget + tar -xzvf + mkdir -p obj_detection/1 + cp /saved_model/saved_model.pb obj_detection/1 + model_name= + ``` - 1.2.3 Install COCO API - ``` - (rfcn_venv)$ cd ~ - (rfcn_venv)$ git clone https://github.com/cocodataset/cocoapi.git - (rfcn_venv)$ cd cocoapi/PythonAPI - (rfcn_venv)$ make - (rfcn_venv)$ cp -r pycocotools $TF_MODELS_ROOT/research/ - ``` +3. **Set up your virtual environment**: We will use a virtual environment to install the required packages. - 1.2.4 Manually install the protobuf-compiler v3.0.0, run the compilation process, add Libraries to PYTHONPATH and to your `.bashrc` and test the installation of Tensorflow Object Detection API - ``` - (rfcn_venv)$ cd $TF_MODELS_ROOT/research/ - (rfcn_venv)$ wget -O protobuf.zip https://github.com/google/protobuf/releases/download/v3.0.0/protoc-3.0.0-linux-x86_64.zip - (rfcn_venv)$ unzip protobuf.zip - (rfcn_venv)$ ./bin/protoc object_detection/protos/*.proto --python_out=. - (rfcn_venv)$ export PYTHONPATH=$PYTHONPATH:$(pwd):$(pwd)/slim - (rfcn_venv)$ echo "export PYTHONPATH=$PYTHONPATH:$(pwd):$(pwd)/slim" >> ~/.bashrc - (rfcn_venv)$ python object_detection/builders/model_builder_test.py - ``` - -2. **Download the Data**: Download the 2017 validation COCO dataset (~780MB) (**note**: do not convert the COCO dataset to TF records format): - + 3.1 If you do not have pip or virtualenv, you will need to get them first: + ``` + sudo apt-get install -y python python-pip virtualenv ``` - (rfcn_venv)$ cd ~ - (rfcn_venv)$ mkdir -p coco/val - (rfcn_venv)$ wget http://images.cocodataset.org/zips/val2017.zip - (rfcn_venv)$ unzip val2017.zip -d coco/val - (rfcn_venv)$ export COCO_VAL_DATA=$(pwd)/coco/val/val2017 - (rfcn_venv)$ echo "export COCO_VAL_DATA=$(pwd)/coco/val/val2017" >> ~/.bashrc + + 3.2 Create and activate the python virtual environment in your home directory: + ``` + cd ~ + virtualenv od_venv + source od_venv/bin/activate ``` -3. **Download and Prepare the pre-trained SavedModel**: Download and extract the pre-trained model and copy the `rfcn_resnet101_fp32_coco/saved_model/saved_model.pb` to `rfcn/1` (the `1` subdirectory is important - don't skip it!). This is the file we will serve from TensorFlow Serving. - Refer to the [TensorFlow documentation](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/python/saved_model) for more information about SavedModels, and refer to this [README file](/benchmarks/object_detection/tensorflow/rfcn/README.md#download_fp32_pretrained_model) to get the latest location of the pre-trained model. + 3.3 Install the required packages using `requirements.txt`: ``` - (rfcn_venv)$ cd ~/ - (rfcn_venv)$ wget https://storage.googleapis.com/intel-optimized-tensorflow/models/rfcn_resnet101_fp32_coco_pretrained_model.tar.gz - (rfcn_venv)$ tar -xzvf rfcn_resnet101_fp32_coco_pretrained_model.tar.gz - (rfcn_venv)$ mkdir -p rfcn/1 - (rfcn_venv)$ cp rfcn_resnet101_fp32_coco/saved_model/saved_model.pb rfcn/1 + pip install -r models/docs/object_detection/tensorflow_serving/requirements.txt ``` -4. **Discover the number of physical cores**: Compute *num_physical_cores* by executing the `lscpu` command and multiplying `Core(s) per socket` by `Socket(s)`. For example, for a machine with `Core(s) per socket: 28` and `Socket(s): 2`, `num_physical_cores = 28 * 2 = 56`. To compute *num_physical_cores* with bash commands: + 3.3 Choose between the REST example or the GRPC example (the environment dependencies are different depending on the protocol you use, + and GRPC is usually faster, especially when using larger batch sizes). Define a variable for your desired protocol. + + **REST**: + ``` + protocol_name=rest + ``` + + **GRPC**: + ``` + protocol_name=grpc + ``` + +4. **Install [TensorFlow Object Detection API](https://github.com/tensorflow/models/tree/master/research/object_detection)**: + For detailed instructions, [click here](https://github.com/tensorflow/models/blob/master/research/object_detection/g3doc/installation.md). + We have already installed the required python packages for the API. Following are the rest of the instructions for Ubuntu 16.04. + + 4.1 Clone the tensorflow models repo into a new folder in your home directory. ``` - (rfcn_venv)$ cores_per_socket=`lscpu | grep "Core(s) per socket" | cut -d':' -f2 | xargs` - (rfcn_venv)$ num_sockets=`lscpu | grep "Socket(s)" | cut -d':' -f2 | xargs` - (rfcn_venv)$ num_physical_cores=$((cores_per_socket * num_sockets)) - (rfcn_venv)$ echo $num_physical_cores + cd ~ + git clone https://github.com/tensorflow/models tensorflow-models + export TF_MODELS_ROOT=$(pwd)/tensorflow-models + echo "export TF_MODELS_ROOT=$(pwd)/tensorflow-models" >> ~/.bashrc ``` -5. **Start the server**: Now let's start up the TensorFlow model server. With `&` at the end of the cmd, runs the container as a background process. Press enter after executing the following cmd. -To optimize overall performance, use the following recommended settings from the [General Best Practices](/docs/general/tensorflow_serving/GeneralBestPractices.md): - * OMP_NUM_THREADS=*num_physical_cores* - * TENSORFLOW_INTER_OP_PARALLELISM=2 - * TENSORFLOW_INTRA_OP_PARALLELISM=*num_physical_cores* + 4.2 Manually install the protobuf-compiler v3.0.0, run the compilation process, add libraries to PYTHONPATH and to your `.bashrc` and test the installation of Tensorflow Object Detection API. + ``` + cd $TF_MODELS_ROOT/research/ + wget -O protobuf.zip https://github.com/protocolbuffers/protobuf/releases/download/v3.0.0/protoc-3.0.0-linux-x86_64.zip + unzip protobuf.zip + ./bin/protoc object_detection/protos/*.proto --python_out=. + export PYTHONPATH=$PYTHONPATH:$(pwd):$(pwd)/slim + echo "export PYTHONPATH=$PYTHONPATH:$(pwd):$(pwd)/slim" >> ~/.bashrc + python object_detection/builders/model_builder_test.py + ``` +5. **Discover the number of physical cores**: Compute *num_physical_cores* by executing the `lscpu` command and multiplying `Core(s) per socket` by `Socket(s)`. + For example, for a machine with `Core(s) per socket: 28` and `Socket(s): 2`, `num_physical_cores = 28 * 2 = 56`. + To compute *num_physical_cores* with bash commands: ``` - (rfcn_venv)$ cd ~ - (rfcn_venv)$ docker run \ - --name=tfserving_rfcn \ - -p 8501:8501 \ - -v "$(pwd)/rfcn:/models/rfcn" \ - -e MODEL_NAME=rfcn \ - -e OMP_NUM_THREADS=$num_physical_cores \ - -e TENSORFLOW_INTER_OP_PARALLELISM=2 \ - -e TENSORFLOW_INTRA_OP_PARALLELISM=$num_physical_cores \ - tensorflow/serving:mkl & - ``` - **Note**: For some models, playing around with these settings values can improve performance even further. - We recommend that you experiment with your own hardware and model if you have strict performance requirements. - -6. *Measure Online and Batch inference performance**: Clone the Intel Model Zoo into a directory called `intel-models` and run `rfcn-benchmark.py` [python script](/docs/object_detection/tensorflow_serving/rfcn-benchmark.py), which will test both Online and Batch performance. - ``` - (rfcn_venv)$ git clone https://github.com/IntelAI/models.git intel-models - (rfcn_venv)$ python intel-models/docs/object_detection/tensorflow_serving/rfcn-benchmark.py \ - -i $COCO_VAL_DATA + cores_per_socket=`lscpu | grep "Core(s) per socket" | cut -d':' -f2 | xargs` + num_sockets=`lscpu | grep "Socket(s)" | cut -d':' -f2 | xargs` + num_physical_cores=$((cores_per_socket * num_sockets)) + echo $num_physical_cores ``` +6. **Start the server**: Now start up the TensorFlow model server. Using `-d` (for "detached") runs the container as a background process. + We will publish the ports for both REST (`-p 8501:8501`) and GRPC (`-p 8500:8500`). + To optimize overall performance, use the following recommended settings from the [General Best Practices](/docs/general/tensorflow_serving/GeneralBestPractices.md): + * OMP_NUM_THREADS=*num_physical_cores* + * TENSORFLOW_INTER_OP_PARALLELISM=2 + * TENSORFLOW_INTRA_OP_PARALLELISM=*num_physical_cores* + + ``` + cd ~ + docker run \ + --name=tfserving \ + -d \ + -p 8500:8500 \ + -p 8501:8501 \ + -v "$(pwd)/obj_detection:/models/$model_name" \ + -e MODEL_NAME=$model_name \ + -e OMP_NUM_THREADS=$num_physical_cores \ + -e TENSORFLOW_INTER_OP_PARALLELISM=2 \ + -e TENSORFLOW_INTRA_OP_PARALLELISM=$num_physical_cores \ + tensorflow/serving:mkl + ``` + + **Note**: For some models, playing around with the parallelism settings can improve performance even further. + We recommend that you experiment with your own hardware and model if you have strict performance requirements. -7. **Visualize Object Detection Output**: To visually see the output of object detection results, we will use Jupyter notebook via web browser. If you are using a system that does not have a browser, such as a VM on GCP or AWS, a workaround is to use local port forwarding of port 8888 to relay the jupyter service to your localhost. You will need to quit your SSH session and log back in with port forwarding configured. -For example, with a GCP VM, add `--ssh-flag="-L 8888:localhost:8888"` to your ssh command. Once you are connected again with port forwarding, reactivate the virtual environment, navigate to the tutorial directory, and start jupyter notebook. Continue with the next instruction. - ``` - $ cd ~ - $ source rfcn_venv/bin/activate - (rfcn_venv)$ cd intel-models/docs/object_detection/tensorflow_serving - (rfcn_venv)$ jupyter notebook +7. **Measure online and batch inference performance**: Run the `object_detection_benchmark.py` [python script](/docs/object_detection/tensorflow_serving/object_detection_benchmark.py), which will test both online and batch inference performance. + + ``` + cd ~ + python models/docs/object_detection/tensorflow_serving/object_detection_benchmark.py \ + -i $COCO_VAL_DATA \ + -m $model_name \ + -p $protocol_name ``` - After running `jupyter notebook` , paste the generated link into your browser and open the `RFCN.ipynb` file. You will need to edit the code in one place - in the second cell, insert the path to your downloaded COCO validation data set. Then, execute the cells in order. The output of the "Test Object Detection" section should be an image with objects correctly detected by the R-FCN model. -8. (Optional) **Using a single core**: In some cases, it is desirable to constrain the inference server to a single core or socket. Docker has many runtime flags that allow you to control the container's access to the host system's CPUs, memory, and other resources. See the [Docker document on this topic](https://docs.docker.com/config/containers/resource_constraints/#cpu) for all the options and their definitions. For example, to run the container so that a single CPU is used, you can use these settings: +8. **Visualize object detection output**: To visually see the results of object detection, we will use a Jupyter notebook via web browser. + If you are using a system that does not have a browser, such as a VM on GCP or AWS, a workaround is to use local port forwarding of port 8888 to relay the jupyter service to your localhost. + You will need to quit your SSH session and log back in with port forwarding configured. For example, with a GCP VM, add `--ssh-flag="-L 8888:localhost:8888"` to your ssh command. + Once you are connected again with port forwarding, reactivate the virtual environment, navigate to the tutorial directory, and start the jupyter notebook service. + + ``` + cd ~ + source od_venv/bin/activate + cd models/docs/object_detection/tensorflow_serving + jupyter notebook + ``` + + After running `jupyter notebook`, paste the generated link into your browser and open the `ObjectDetection.ipynb` file. + You will need to edit the code in one cell - in the second cell, insert the path to your downloaded COCO validation data set and name of your chosen model and protocol. + Then, execute the cells in order. The output of the "Test Object Detection" section should be an image with objects detected by the served model. + +9. (Optional) **Using a single core**: In some cases, it is desirable to constrain the inference server to a single core or socket. + Docker has many runtime flags that allow you to control the container's access to the host system's CPUs, memory, and other resources. + See the [Docker document on this topic](https://docs.docker.com/config/containers/resource_constraints/#cpu) for all the options and their definitions. + For example, to run the container so that a single CPU is used, you can use these settings: * `--cpuset-cpus="0"` * `--cpus="1"` * `OMP_NUM_THREADS=1` @@ -160,33 +200,39 @@ For example, with a GCP VM, add `--ssh-flag="-L 8888:localhost:8888"` to your ss * `TENSORFLOW_INTRA_OP_PARALLELISM=1` ``` - (rfcn_venv)$ docker run \ - --name=tfserving_rfcn_1 \ - -p 8500:8500 \ - --cpuset-cpus="0" \ - --cpus="1" \ - -v "$(pwd)/rfcn:/models/rfcn" \ - -e MODEL_NAME=rfcn \ - -e OMP_NUM_THREADS=1 \ - -e TENSORFLOW_INTER_OP_PARALLELISM=1 \ - -e TENSORFLOW_INTRA_OP_PARALLELISM=1 \ - tensorflow/serving:mkl & + cd ~ + docker run \ + --name=tfserving_1core \ + -d \ + -p 8500:8500 \ + -p 8501:8501 \ + --cpuset-cpus="0" \ + --cpus="1" \ + -v "$(pwd)/obj_detection:/models/$model_name" \ + -e MODEL_NAME=$model_name \ + -e OMP_NUM_THREADS=1 \ + -e TENSORFLOW_INTER_OP_PARALLELISM=1 \ + -e TENSORFLOW_INTRA_OP_PARALLELISM=1 \ + tensorflow/serving:mkl ``` - + 10. **Clean up**: * After saving any changes you made to the Jupyter notebook, close the file and stop the Jupyter server by clicking `Quit` from the main file browser. - * After you are fininshed with querying, you can stop the container which is running in the background. To restart the container with the same name, you need to stop and remove the container from the registry. To view your running containers run `docker ps`. - ``` - (rfcn_venv)$ docker rm -f tfserving_rfcn - ``` + * After you are finished with querying, you can stop the container which is running in the background. + To restart the container with the same name, you need to stop and remove the container from the registry. + To view your running containers run `docker ps`. + + ``` + docker rm -f tfserving + ``` + * Deactivate your virtual environment with `deactivate`. - ## Conclusion You have now seen an end-to-end example of serving an object detection model for inference using TensorFlow Serving, and learned: 1. How to choose good values for the performance-related runtime parameters exposed by the `docker run` command -2. How to verify that the served model can correctly detect objects in an image using a sample Jupyter notebook -3. How to measure online and batch inference metrics using a REST client +2. How to test online and batch inference metrics using a REST or GRPC client +3. How to verify that the served model can correctly detect objects in an image using a sample Jupyter notebook With this knowledge and the example code provided, you should be able to get started serving your own custom object detection model with good performance. If desired, you should also be able to investigate a variety of different settings combinations to see if further performance improvement are possible. diff --git a/docs/object_detection/tensorflow_serving/rfcn-benchmark.py b/docs/object_detection/tensorflow_serving/object_detection_benchmark.py similarity index 54% rename from docs/object_detection/tensorflow_serving/rfcn-benchmark.py rename to docs/object_detection/tensorflow_serving/object_detection_benchmark.py index 6948df969..c30c1aeae 100644 --- a/docs/object_detection/tensorflow_serving/rfcn-benchmark.py +++ b/docs/object_detection/tensorflow_serving/object_detection_benchmark.py @@ -14,7 +14,7 @@ # ####### USAGE ######### -# python rfcn-benchmark.py -i +# python object_detection_benchmark.py -i -m -p from __future__ import print_function @@ -25,8 +25,6 @@ import requests import numpy as np from PIL import Image -import tensorflow as tf -from object_detection.utils.visualization_utils import visualize_boxes_and_labels_on_image_array def check_for_link(value): @@ -40,7 +38,7 @@ def check_for_link(value): raise argparse.ArgumentTypeError("{} cannot be a link.".format(value)) def check_valid_folder(value): - """verifies filename exists and isn't a link""" + """Verifies filename exists and isn't a link""" if value is not None: if not os.path.isdir(value): raise argparse.ArgumentTypeError("{} does not exist or is not a directory.". @@ -48,6 +46,20 @@ def check_valid_folder(value): check_for_link(value) return value +def check_valid_model(value): + """Verifies model name is supported""" + if value not in ('rfcn', 'ssdmobilenet'): + raise argparse.ArgumentError("Model name {} does not match 'rfcn' or 'ssdmobilenet'.". + format(value)) + return value + +def check_valid_protocol(value): + """Verifies protocol is supported""" + if value not in ('rest', 'grpc'): + raise argparse.ArgumentError("Protocol name {} does not match 'rest' or 'grpc'.". + format(value)) + return value + def get_random_image(image_dir): image_path = os.path.join(image_dir, random.choice(os.listdir(image_dir))) image = Image.open(image_path) @@ -55,15 +67,38 @@ def get_random_image(image_dir): return np.array(image.getdata()).reshape((im_height, im_width, 3)).astype(np.uint8) +def make_request(batch_size): + if PROTOCOL == 'rest': + np_images = np.repeat(np.expand_dims(get_random_image(IMAGES_PATH), 0).tolist(), batch_size, axis=0).tolist() + return '{"instances" : %s}' % np_images + elif PROTOCOL == 'grpc': + import grpc + import tensorflow as tf + from tensorflow_serving.apis import predict_pb2 + from tensorflow_serving.apis import prediction_service_pb2_grpc + np_images = np.repeat(np.expand_dims(get_random_image(IMAGES_PATH), 0), batch_size, axis=0) + channel = grpc.insecure_channel(SERVER_URL) + stub = prediction_service_pb2_grpc.PredictionServiceStub(channel) + request = predict_pb2.PredictRequest() + request.model_spec.name = MODEL + request.model_spec.signature_name = 'serving_default' + request.inputs['inputs'].CopyFrom(tf.contrib.util.make_tensor_proto(np_images)) + return (stub, request) + +def send_request(predict_request): + if PROTOCOL == 'rest': + requests.post(SERVER_URL, data=predict_request) + elif PROTOCOL == 'grpc': + predict_request[0].Predict(predict_request[1]) + def benchmark(batch_size=1, num_iteration=20, warm_up_iteration=10): i = 0 total_time = 0 for _ in range(num_iteration): i += 1 - np_images = np.repeat(np.expand_dims(get_random_image(IMAGES_PATH), 0).tolist(), batch_size, axis=0).tolist() - predict_request = '{"instances" : %s}' % np_images + predict_request = make_request(batch_size) start_time = time.time() - requests.post(SERVER_URL, data=predict_request) + send_request(predict_request) time_consume = time.time() - start_time print('Iteration %d: %.3f sec' % (i, time_consume)) if i > warm_up_iteration: @@ -81,15 +116,26 @@ def benchmark(batch_size=1, num_iteration=20, warm_up_iteration=10): ap = argparse.ArgumentParser() ap.add_argument("-i", "--images_path", type=check_valid_folder, required=True, help="Path to COCO validation directory") + ap.add_argument("-m", "--model", type=check_valid_model, required=True, + help="Name of model (rfcn or ssdmobilenet)") + ap.add_argument("-p", "--protocol", type=check_valid_protocol, required=True, + help="Name of protocol (rest or grpc)") args = vars(ap.parse_args()) - - SERVER_URL = 'http://localhost:8501/v1/models/rfcn:predict' + IMAGES_PATH = args['images_path'] + MODEL = args['model'] + PROTOCOL = args['protocol'] + if PROTOCOL == 'rest': + SERVER_URL = 'http://localhost:8501/v1/models/{}:predict'.format(MODEL) + elif PROTOCOL == 'grpc': + SERVER_URL = 'localhost:8500' print('\n SERVER_URL: {} \n IMAGES_PATH: {}'.format(SERVER_URL, IMAGES_PATH)) - print('\nStarting R-FCN model benchmarking for Latency with batch_size=1, num_iteration=20, warm_up_iteration=10') + print('\nStarting {} model benchmarking for latency on {}:'.format(MODEL.upper(), PROTOCOL.upper())) + print('batch_size=1, num_iteration=20, warm_up_iteration=10\n') benchmark(batch_size=1, num_iteration=20, warm_up_iteration=10) - print('\nStarting R-FCN model benchmarking for Throughput with batch_size=128, num_iteration=10, warm_up_iteration=2') + print('\nStarting {} model benchmarking for throughput on {}:'.format(MODEL.upper(), PROTOCOL.upper())) + print('batch_size=128, num_iteration=10, warm_up_iteration=2\n') benchmark(batch_size=128, num_iteration=10, warm_up_iteration=2) diff --git a/docs/object_detection/tensorflow_serving/requirements.txt b/docs/object_detection/tensorflow_serving/requirements.txt new file mode 100644 index 000000000..c4c13fc19 --- /dev/null +++ b/docs/object_detection/tensorflow_serving/requirements.txt @@ -0,0 +1,16 @@ +# rest +requests + +# grpc +grpc +intel-tensorflow +tensorflow-serving-api + +# object detection api +Cython +contextlib2 +jupyter +matplotlib +pillow +lxml +absl-py \ No newline at end of file From ab5c13d9c9ae15e2f7492a80aa5b94053020b07e Mon Sep 17 00:00:00 2001 From: Dina Suehiro Jones Date: Thu, 13 Jun 2019 16:24:02 -0700 Subject: [PATCH 50/62] Add arg validation for paths in generate_coco_records.py (#328) --- .../inference/generate_coco_records.py | 28 ++++++++++++++++--- 1 file changed, 24 insertions(+), 4 deletions(-) diff --git a/models/object_detection/tensorflow/ssd_vgg16/inference/generate_coco_records.py b/models/object_detection/tensorflow/ssd_vgg16/inference/generate_coco_records.py index 6badc74a9..5cc72cf7a 100755 --- a/models/object_detection/tensorflow/ssd_vgg16/inference/generate_coco_records.py +++ b/models/object_detection/tensorflow/ssd_vgg16/inference/generate_coco_records.py @@ -135,16 +135,36 @@ def get_record(filename, buffer, width, height, bboxes, labels, label_names, dif return tf.train.Example(features = tf.train.Features(feature = features)) +def check_for_link(value): + """ + Throws an error if the specified path is a link. os.islink returns + True for sym links. For files, we also look at the number of links in + os.stat() to determine if it's a hard link. + """ + if os.path.islink(value) or \ + (os.path.isfile(value) and os.stat(value).st_nlink > 1): + raise argparse.ArgumentTypeError("{} cannot be a link.".format(value)) + +def check_valid_file_or_folder(value): + """verifies filename exists and isn't a link""" + if value is not None: + if not os.path.isfile(value) and not os.path.isdir(value): + raise argparse.ArgumentTypeError("{} does not exist or is not a file/folder.". + format(value)) + check_for_link(value) + return value + + def main(): RECORDS_PER_FILE = 1024 RECORD_FILENAME_FORMAT = '%s-%.5d-of-%.5d' parser = argparse.ArgumentParser() - parser.add_argument('--image_path', type = str, required = True, help = 'path to the input validation image files') - parser.add_argument('--annotations_file', type = str, required = True, help = 'name of the input validation annotations file') - parser.add_argument('--output_prefix', type = str, required = True, help = 'prefix of the output TensorFlow record files') - parser.add_argument('--output_path', type = str, required = True, help = 'path to the output TensorFlow record files') + parser.add_argument('--image_path', type=check_valid_file_or_folder, required=True, help='path to the input validation image files') + parser.add_argument('--annotations_file', type=check_valid_file_or_folder, required=True, help='name of the input validation annotations file') + parser.add_argument('--output_prefix', type=str, required=True, help='prefix of the output TensorFlow record files') + parser.add_argument('--output_path', type=check_valid_file_or_folder, required=True, help='path to the output TensorFlow record files') args = parser.parse_args() From f0aa7abab24ba07c21499f2f13d4fb4e1f2eb80b Mon Sep 17 00:00:00 2001 From: Dina Suehiro Jones Date: Fri, 14 Jun 2019 14:10:44 -0700 Subject: [PATCH 51/62] Specify scipy==1.2.1 for MaskRCNN (#329) --- benchmarks/common/tensorflow/start.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/benchmarks/common/tensorflow/start.sh b/benchmarks/common/tensorflow/start.sh index 7c37309d3..923cdecc8 100755 --- a/benchmarks/common/tensorflow/start.sh +++ b/benchmarks/common/tensorflow/start.sh @@ -443,6 +443,7 @@ function maskrcnn() { if [ ${NOINSTALL} != "True" ]; then # install dependencies pip3 install -r ${MOUNT_EXTERNAL_MODELS_SOURCE}/requirements.txt + pip3 install --force-reinstall scipy==1.2.1 # install cocoapi get_cocoapi ${MOUNT_EXTERNAL_MODELS_SOURCE}/coco ${MOUNT_EXTERNAL_MODELS_SOURCE}/samples/coco From c895e4752ac7cf37247cd54b834cdd143f3ea370 Mon Sep 17 00:00:00 2001 From: Melanie Buehler Date: Fri, 14 Jun 2019 14:17:14 -0700 Subject: [PATCH 52/62] Remove grpc package from tfserving dependencies (#330) --- benchmarks/common/tensorflow_serving/start.sh | 3 +-- docs/general/tensorflow_serving/InstallationGuide.md | 2 +- docs/image_recognition/tensorflow_serving/Tutorial.md | 7 ++----- docs/language_translation/tensorflow_serving/Tutorial.md | 4 ++-- docs/object_detection/tensorflow_serving/requirements.txt | 1 - 5 files changed, 6 insertions(+), 11 deletions(-) diff --git a/benchmarks/common/tensorflow_serving/start.sh b/benchmarks/common/tensorflow_serving/start.sh index e611cc931..5da3f31c5 100644 --- a/benchmarks/common/tensorflow_serving/start.sh +++ b/benchmarks/common/tensorflow_serving/start.sh @@ -103,8 +103,7 @@ function resnet50_or_inceptionv3(){ virtualenv venv source venv/bin/activate - pip install grpc \ - requests \ + pip install requests \ intel-tensorflow \ tensorflow-serving-api diff --git a/docs/general/tensorflow_serving/InstallationGuide.md b/docs/general/tensorflow_serving/InstallationGuide.md index 60ffcede1..0aa6a03b7 100644 --- a/docs/general/tensorflow_serving/InstallationGuide.md +++ b/docs/general/tensorflow_serving/InstallationGuide.md @@ -260,7 +260,7 @@ $ curl -s http://download.tensorflow.org/models/official/20181001_resnet/savedmo $ cd ~ $ virtualenv tfserving_venv $ source tfserving_venv/bin/activate - (tfserving_venv)$ pip install grpc requests tensorflow tensorflow-serving-api + (tfserving_venv)$ pip install requests tensorflow tensorflow-serving-api ``` * Run the example `resnet_client_grpc.py` script from the TensorFlow Serving repository, which you cloned earlier. ``` diff --git a/docs/image_recognition/tensorflow_serving/Tutorial.md b/docs/image_recognition/tensorflow_serving/Tutorial.md index 71f94f76c..e5e9b0153 100644 --- a/docs/image_recognition/tensorflow_serving/Tutorial.md +++ b/docs/image_recognition/tensorflow_serving/Tutorial.md @@ -63,13 +63,10 @@ For steps 1 and 2, refer to the Intel Model Zoo READMEs: $ pip install virtualenv $ virtualenv venv ``` - Then activate the virtual environment and install `grpc`, `requests`, `tensorflow`, and `tensorflow-serving-api` (at the time of this writing, the order of installation matters): + Then activate the virtual environment and install `requests`, `tensorflow`, and `tensorflow-serving-api`: ``` $ source venv/bin/activate - (venv)$ pip install grpc - (venv)$ pip install requests - (venv)$ pip install intel-tensorflow - (venv)$ pip install tensorflow-serving-api + (venv)$ pip install requests intel-tensorflow tensorflow-serving-api ``` 5. **Create a SavedModel**: Using the conversion script `model_graph_to_saved_model.py`, convert the pre-trained model graph to a SavedModel. (For ResNet50, substitute the name of the ResNet50 FP32 or the ResNet50 Int8 pre-trained model.) diff --git a/docs/language_translation/tensorflow_serving/Tutorial.md b/docs/language_translation/tensorflow_serving/Tutorial.md index 1d8ebff71..c584495c1 100644 --- a/docs/language_translation/tensorflow_serving/Tutorial.md +++ b/docs/language_translation/tensorflow_serving/Tutorial.md @@ -76,13 +76,13 @@ Tuning TensorFlow Serving to take full advantage of your hardware for language t sudo apt-get install -y python python-pip virtualenv ``` - - Create and activate the python virtual environment in your home directory and install the `grpc`, `tensorflow`, `pandas`, and `tensorflow-serving-api` packages. + - Create and activate the python virtual environment in your home directory and install the `tensorflow`, `pandas`, and `tensorflow-serving-api` packages. ``` cd ~ virtualenv lt_venv source lt_venv/bin/activate - pip install grpc intel-tensorflow pandas tensorflow-serving-api + pip install intel-tensorflow pandas tensorflow-serving-api ``` 4. **Download the pre-trained model and test data**: Download and extract the packaged pre-trained model and dataset ```transformer_lt_official_fp32_pretrained_model.tar.gz``` diff --git a/docs/object_detection/tensorflow_serving/requirements.txt b/docs/object_detection/tensorflow_serving/requirements.txt index c4c13fc19..1e77692c2 100644 --- a/docs/object_detection/tensorflow_serving/requirements.txt +++ b/docs/object_detection/tensorflow_serving/requirements.txt @@ -2,7 +2,6 @@ requests # grpc -grpc intel-tensorflow tensorflow-serving-api From fc16c086dd303cdda99fafdd0cc0a65de29601e1 Mon Sep 17 00:00:00 2001 From: Wafaa Taie Date: Tue, 18 Jun 2019 10:05:16 -0700 Subject: [PATCH 53/62] fix the path to the calibration script for resnet101 int8. (#332) --- .../tensorflow/resnet101/inference/int8/model_init.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/benchmarks/image_recognition/tensorflow/resnet101/inference/int8/model_init.py b/benchmarks/image_recognition/tensorflow/resnet101/inference/int8/model_init.py index a53cf6884..36a9f479a 100644 --- a/benchmarks/image_recognition/tensorflow/resnet101/inference/int8/model_init.py +++ b/benchmarks/image_recognition/tensorflow/resnet101/inference/int8/model_init.py @@ -101,7 +101,7 @@ def run_benchmark_or_accuracy(self): self.run_command(cmd) def run_calibration(self): - calibration_script = os.path.join(self.args.intelai_models, self.args.mode, + calibration_script = os.path.join(self.args.intelai_models, self.args.precision, "calibration.py") script_args_list = [ "input_graph", "data_location", From d6c0cb89e0302edd38f686f545fec4843192f4bc Mon Sep 17 00:00:00 2001 From: Melanie Buehler Date: Tue, 18 Jun 2019 15:58:44 -0700 Subject: [PATCH 54/62] NCF doc hotfix (#334) * Update NCF doc for TF models code modification * Creates a backup and adds *.bak to gitignore --- .gitignore | 1 + benchmarks/recommendation/tensorflow/ncf/README.md | 4 ++-- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/.gitignore b/.gitignore index 82da18448..b3e91143c 100644 --- a/.gitignore +++ b/.gitignore @@ -8,3 +8,4 @@ .coverage .tox test_data/ +*.bak diff --git a/benchmarks/recommendation/tensorflow/ncf/README.md b/benchmarks/recommendation/tensorflow/ncf/README.md index ccde269ff..53a21a3b3 100644 --- a/benchmarks/recommendation/tensorflow/ncf/README.md +++ b/benchmarks/recommendation/tensorflow/ncf/README.md @@ -14,13 +14,13 @@ This model uses official tensorflow models repo, where [ncf](https://github.com/ model automatically downloads movielens ml-1m dataset as default if the `--data-location` flag is not set. If you want to download movielens 1M dataset and provide that path to `--data-location`, check this [reference](https://grouplens.org/datasets/movielens/1m/) -2. Clone the official `tensorflow/models` repository with tag `v1.11` +2. Clone the official `tensorflow/models` repository with tag `v1.11` and make a small change to `data_async_generation.py`, commenting out a line that causes a crash in the model script. ``` $ git clone https://github.com/tensorflow/models.git $ cd models $ git checkout v1.11 -$ pwd +$ sed -i.bak 's/atexit.register/# atexit.register/g' official/recommendation/data_async_generation.py ``` 3. Now clone `IntelAI/models` repository and then navigate to the `benchmarks` folder: From 2f46653463edab0935f713ea451d98c8444bb525 Mon Sep 17 00:00:00 2001 From: wenxizhu Date: Sat, 22 Jun 2019 02:35:00 +0800 Subject: [PATCH 55/62] BKC for mobilenet-v1 int8 inference (#333) * Add affinity and config setting to get better performance on mobilenet-v1. * Remove redundant config settings for mobilenet_v1 inference. --- .../mobilenet_v1/inference/int8/benchmark.py | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/models/image_recognition/tensorflow/mobilenet_v1/inference/int8/benchmark.py b/models/image_recognition/tensorflow/mobilenet_v1/inference/int8/benchmark.py index 7cccb9f23..0e7a41f31 100644 --- a/models/image_recognition/tensorflow/mobilenet_v1/inference/int8/benchmark.py +++ b/models/image_recognition/tensorflow/mobilenet_v1/inference/int8/benchmark.py @@ -113,18 +113,19 @@ def load_graph(model_file): name='synthetic_images') image_data = None + + config = tf.ConfigProto() + config.inter_op_parallelism_threads = num_inter_threads + config.intra_op_parallelism_threads = num_intra_threads + config.use_per_session_threads = True + with tf.Session() as sess: image_data = sess.run(images) - graph = load_graph(model_file) input_tensor = graph.get_tensor_by_name(input_layer + ":0"); output_tensor = graph.get_tensor_by_name(output_layer + ":0"); - config = tf.ConfigProto() - config.inter_op_parallelism_threads = num_inter_threads - config.intra_op_parallelism_threads = num_intra_threads - with tf.Session(graph=graph, config=config) as sess: sys.stdout.flush() print("[Running warmup steps...]") @@ -134,7 +135,7 @@ def load_graph(model_file): elapsed_time = time.time() - start_time if((t+1) % 10 == 0): print("steps = {0}, {1} images/sec" - "".format(t+1, batch_size/elapsed_time)) + "".format(t+1, batch_size/elapsed_time), flush=True) print("[Running benchmark steps...]") total_time = 0; @@ -145,4 +146,4 @@ def load_graph(model_file): elapsed_time = time.time() - start_time if((t+1) % 10 == 0): print("steps = {0}, {1} images/sec" - "".format(t+1, batch_size/elapsed_time)); + "".format(t+1, batch_size/elapsed_time), flush=True); From 41977d78058318a69fe71395e0574861f44371ff Mon Sep 17 00:00:00 2001 From: Jitendra Patil Date: Fri, 21 Jun 2019 12:42:11 -0700 Subject: [PATCH 56/62] TF Serving: tf version fix (#337) * updated order of intel-tensorflow & tensorflow-serving-api installation. --- benchmarks/common/tensorflow_serving/start.sh | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/benchmarks/common/tensorflow_serving/start.sh b/benchmarks/common/tensorflow_serving/start.sh index 5da3f31c5..b27e40ed5 100644 --- a/benchmarks/common/tensorflow_serving/start.sh +++ b/benchmarks/common/tensorflow_serving/start.sh @@ -103,10 +103,12 @@ function resnet50_or_inceptionv3(){ virtualenv venv source venv/bin/activate - pip install requests \ - intel-tensorflow \ - tensorflow-serving-api - + # Make sure intel-tensorflow is after tensorflow-serving-api, so that + # tensorflow from intel-tensorflow get installed effectively. + pip install grpc \ + requests \ + tensorflow-serving-api \ + intel-tensorflow # cd to image recognition tfserving scripts cd ${WORKSPACE}/../../${USE_CASE}/${FRAMEWORK}/${MODEL_NAME}/${MODE}/${PRECISION} From 6a13ce8cf368f7a63aed661f63ed68b8dab2789c Mon Sep 17 00:00:00 2001 From: Abolfazl Shahbazi <12436063+ashahba@users.noreply.github.com> Date: Mon, 24 Jun 2019 14:57:19 -0700 Subject: [PATCH 57/62] Install the development package for google-perftools (#338) * Install the development package for google-perftools * Creating symlinks for /usr/lib/libtcmalloc.so if doesn't exist --- benchmarks/common/tensorflow/start.sh | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/benchmarks/common/tensorflow/start.sh b/benchmarks/common/tensorflow/start.sh index 923cdecc8..d81504d83 100755 --- a/benchmarks/common/tensorflow/start.sh +++ b/benchmarks/common/tensorflow/start.sh @@ -67,9 +67,15 @@ if [[ ${NOINSTALL} != "True" ]]; then pip install --upgrade pip pip install requests - # install google-perftools for tcmalloc + # install libgoogle-perftools-dev for tcmalloc if [[ ${DISABLE_TCMALLOC} != "True" ]]; then - apt-get install google-perftools -y + apt-get install --no-install-recommends --fix-missing google-perftools -y + if [ ! -f /usr/lib/libtcmalloc.so ]; then + apt-get install --no-install-recommends --fix-missing libgoogle-perftools-dev -y + if [ ! -f /usr/lib/libtcmalloc.so ]; then + ln -sf /usr/lib/x86_64-linux-gnu/libtcmalloc.so /usr/lib/libtcmalloc.so + fi + fi fi fi @@ -827,7 +833,13 @@ function wide_deep_large_ds() { if [[ -z "${LIBTCMALLOC}" ]]; then echo "libtcmalloc.so.4 not found, trying to install" apt-get update - apt-get install google-perftools --fix-missing -y + apt-get install --no-install-recommends --fix-missing google-perftools -y + if [ ! -f /usr/lib/libtcmalloc.so ]; then + apt-get install --no-install-recommends --fix-missing libgoogle-perftools-dev -y + if [ ! -f /usr/lib/libtcmalloc.so ]; then + ln -sf /usr/lib/x86_64-linux-gnu/libtcmalloc.so /usr/lib/libtcmalloc.so + fi + fi fi LIBTCMALLOC="$(ldconfig -p | grep $TCMALLOC_LIB | tr ' ' '\n' | grep /)" From fba107ab1ac1e81b654cd82943dccdd556a09138 Mon Sep 17 00:00:00 2001 From: Dina Suehiro Jones Date: Tue, 25 Jun 2019 15:51:01 -0700 Subject: [PATCH 58/62] Update TF image tag and updates due to using a non-dev container (#339) --- .../tensorflow/dcgan/README.md | 2 +- benchmarks/common/tensorflow/start.sh | 7 ++++++- .../content_creation/tensorflow/draw/README.md | 4 ++-- .../tensorflow/facenet/README.md | 6 +++--- .../tensorflow/mtcc/README.md | 2 +- .../tensorflow/densenet169/README.md | 6 +++--- .../tensorflow/inception_resnet_v2/README.md | 12 ++++++------ .../tensorflow/inceptionv3/README.md | 16 ++++++++-------- .../tensorflow/inceptionv4/README.md | 12 ++++++------ .../tensorflow/mobilenet_v1/README.md | 12 ++++++------ .../tensorflow/resnet101/README.md | 14 +++++++------- .../tensorflow/resnet50/README.md | 12 ++++++------ .../tensorflow/resnet50v1_5/README.md | 14 +++++++------- .../tensorflow/squeezenet/README.md | 4 ++-- .../tensorflow/maskrcnn/README.md | 2 +- .../image_segmentation/tensorflow/unet/README.md | 2 +- .../language_modeling/tensorflow/lm-1b/README.md | 4 ++-- .../tensorflow/gnmt/README.md | 4 ++-- .../tensorflow/transformer_language/README.md | 4 ++-- .../tensorflow/transformer_lt_official/README.md | 4 ++-- .../tensorflow/faster_rcnn/README.md | 8 ++++---- .../object_detection/tensorflow/rfcn/README.md | 8 ++++---- .../tensorflow/ssd-mobilenet/README.md | 8 ++++---- .../tensorflow/ssd-resnet34/README.md | 8 ++++---- .../tensorflow/ssd_vgg16/README.md | 8 ++++---- .../recommendation/tensorflow/ncf/README.md | 6 +++--- .../tensorflow/wide_deep/README.md | 4 ++-- .../tensorflow/wide_deep_large_ds/README.md | 12 ++++++------ .../text_to_speech/tensorflow/wavenet/README.md | 2 +- docs/general/tensorflow/LaunchBenchmark.md | 4 ++-- 30 files changed, 108 insertions(+), 103 deletions(-) diff --git a/benchmarks/adversarial_networks/tensorflow/dcgan/README.md b/benchmarks/adversarial_networks/tensorflow/dcgan/README.md index 688844ae0..e6f572916 100644 --- a/benchmarks/adversarial_networks/tensorflow/dcgan/README.md +++ b/benchmarks/adversarial_networks/tensorflow/dcgan/README.md @@ -60,7 +60,7 @@ $ python launch_benchmark.py \ --socket-id 0 \ --checkpoint /home//dcgan_fp32_unconditional_cifar10_pretrained_model \ --data-location /home//cifar10 \ - --docker-image intelaipg/intel-optimized-tensorflow:1.14 + --docker-image intelaipg/intel-optimized-tensorflow:1.14.0 ``` 5. Log files are located at the value of `--output-dir`. diff --git a/benchmarks/common/tensorflow/start.sh b/benchmarks/common/tensorflow/start.sh index d81504d83..4eeda5648 100755 --- a/benchmarks/common/tensorflow/start.sh +++ b/benchmarks/common/tensorflow/start.sh @@ -196,7 +196,7 @@ function install_protoc() { if [ ! -f "bin/protoc" ]; then install_location=$1 echo "protoc not found, installing protoc from ${install_location}" - apt-get -y install wget + apt-get -y install wget unzip wget -O protobuf.zip ${install_location} unzip -o protobuf.zip rm protobuf.zip @@ -642,6 +642,7 @@ function ssd-resnet34() { do pip install $line done + apt install -y git-all old_dir=${PWD} cd /tmp git clone --single-branch https://github.com/tensorflow/benchmarks.git @@ -762,6 +763,10 @@ function transformer_lt_official() { exit 1 fi + if [ ${NOINSTALL} != "True" ]; then + pip install pandas + fi + cp ${MOUNT_INTELAI_MODELS_SOURCE}/${MODE}/${PRECISION}/infer_ab.py \ ${MOUNT_EXTERNAL_MODELS_SOURCE}/official/transformer/infer_ab.py diff --git a/benchmarks/content_creation/tensorflow/draw/README.md b/benchmarks/content_creation/tensorflow/draw/README.md index 310b789ac..069dfd7a8 100644 --- a/benchmarks/content_creation/tensorflow/draw/README.md +++ b/benchmarks/content_creation/tensorflow/draw/README.md @@ -48,7 +48,7 @@ modes/precisions: --model-name draw \ --mode inference \ --framework tensorflow \ - --docker-image intelaipg/intel-optimized-tensorflow:1.14-py3 \ + --docker-image intelaipg/intel-optimized-tensorflow:1.14.0-py3 \ --checkpoint /home//draw_fp32_pretrained_model \ --data-location /home//mnist \ --batch-size 1 \ @@ -61,7 +61,7 @@ modes/precisions: --model-name draw \ --mode inference \ --framework tensorflow \ - --docker-image intelaipg/intel-optimized-tensorflow:1.14-py3 \ + --docker-image intelaipg/intel-optimized-tensorflow:1.14.0-py3 \ --checkpoint /home//draw_fp32_pretrained_model \ --data-location /home//mnist \ --batch-size 100 \ diff --git a/benchmarks/face_detection_and_alignment/tensorflow/facenet/README.md b/benchmarks/face_detection_and_alignment/tensorflow/facenet/README.md index 5d8bef1dc..4a5322bad 100644 --- a/benchmarks/face_detection_and_alignment/tensorflow/facenet/README.md +++ b/benchmarks/face_detection_and_alignment/tensorflow/facenet/README.md @@ -59,7 +59,7 @@ python launch_benchmark.py \ --checkpoint /home//checkpoints \ --data-location /home//dataset \ --model-source-dir /home//facenet/ \ - --docker-image intelaipg/intel-optimized-tensorflow:1.14 + --docker-image intelaipg/intel-optimized-tensorflow:1.14.0 ``` Example log tail for online inference: ``` @@ -94,7 +94,7 @@ python launch_benchmark.py \ --checkpoint /home//checkpoints \ --data-location /home//dataset \ --model-source-dir /home//facenet/ \ - --docker-image intelaipg/intel-optimized-tensorflow:1.14 + --docker-image intelaipg/intel-optimized-tensorflow:1.14.0 ``` Example log tail for batch inference: ``` @@ -126,7 +126,7 @@ python launch_benchmark.py \ --checkpoint /home//checkpoints \ --data-location /home//dataset \ --model-source-dir /home//facenet/ \ - --docker-image intelaipg/intel-optimized-tensorflow:1.14 + --docker-image intelaipg/intel-optimized-tensorflow:1.14.0 ``` Example log tail for accuracy: ``` diff --git a/benchmarks/face_detection_and_alignment/tensorflow/mtcc/README.md b/benchmarks/face_detection_and_alignment/tensorflow/mtcc/README.md index 84017e33b..a79584c36 100644 --- a/benchmarks/face_detection_and_alignment/tensorflow/mtcc/README.md +++ b/benchmarks/face_detection_and_alignment/tensorflow/mtcc/README.md @@ -55,7 +55,7 @@ Run: --mode inference \ --socket-id 0 \ --checkpoint /home//MTCNN_model \ - --docker-image intelaipg/intel-optimized-tensorflow:1.14 + --docker-image intelaipg/intel-optimized-tensorflow:1.14.0 ``` 6. The log file is saved to the value of `--output-dir`. diff --git a/benchmarks/image_recognition/tensorflow/densenet169/README.md b/benchmarks/image_recognition/tensorflow/densenet169/README.md index b1ecd8832..8f4a0b3f2 100644 --- a/benchmarks/image_recognition/tensorflow/densenet169/README.md +++ b/benchmarks/image_recognition/tensorflow/densenet169/README.md @@ -58,7 +58,7 @@ following modes/precisions: --batch-size 100 \ --socket-id 0 \ --in-graph /home//densenet169_fp32_pretrained_model.pb \ - --docker-image intelaipg/intel-optimized-tensorflow:1.14 \ + --docker-image intelaipg/intel-optimized-tensorflow:1.14.0 \ -- input_height=224 input_width=224 warmup_steps=20 steps=100 \ input_layer="input" output_layer="densenet169/predictions/Reshape_1" ``` @@ -74,7 +74,7 @@ following modes/precisions: --batch-size 1 \ --socket-id 0 \ --in-graph /home//densenet169_fp32_pretrained_model.pb \ - --docker-image intelaipg/intel-optimized-tensorflow:1.14 \ + --docker-image intelaipg/intel-optimized-tensorflow:1.14.0 \ -- input_height=224 input_width=224 warmup_steps=20 steps=100 \ input_layer="input" output_layer="densenet169/predictions/Reshape_1" ``` @@ -91,7 +91,7 @@ following modes/precisions: --batch-size 100 \ --socket-id 0 \ --in-graph /home//densenet169_fp32_pretrained_model.pb \ - --docker-image intelaipg/intel-optimized-tensorflow:1.14 \ + --docker-image intelaipg/intel-optimized-tensorflow:1.14.0 \ --data-location /home//imagenet_validation_dataset \ -- input_height=224 input_width=224 \ input_layer="input" output_layer="densenet169/predictions/Reshape_1" diff --git a/benchmarks/image_recognition/tensorflow/inception_resnet_v2/README.md b/benchmarks/image_recognition/tensorflow/inception_resnet_v2/README.md index 7d62cdf10..034651a7f 100644 --- a/benchmarks/image_recognition/tensorflow/inception_resnet_v2/README.md +++ b/benchmarks/image_recognition/tensorflow/inception_resnet_v2/README.md @@ -84,7 +84,7 @@ python launch_benchmark.py \ --framework tensorflow \ --accuracy-only \ --batch-size 100 \ - --docker-image intelaipg/intel-optimized-tensorflow:1.14 \ + --docker-image intelaipg/intel-optimized-tensorflow:1.14.0 \ --in-graph /home//inception_resnet_v2_int8_pretrained_model.pb \ --data-location /home//datasets/ImageNet_TFRecords ``` @@ -100,7 +100,7 @@ python launch_benchmark.py \ --benchmark-only \ --batch-size 1 \ --socket-id 0 \ - --docker-image intelaipg/intel-optimized-tensorflow:1.14 \ + --docker-image intelaipg/intel-optimized-tensorflow:1.14.0 \ --in-graph /home//inception_resnet_v2_int8_pretrained_model.pb ``` @@ -115,7 +115,7 @@ python launch_benchmark.py \ --benchmark-only \ --batch-size 128 \ --socket-id 0 \ - --docker-image intelaipg/intel-optimized-tensorflow:1.14 \ + --docker-image intelaipg/intel-optimized-tensorflow:1.14.0 \ --in-graph /home//inception_resnet_v2_int8_pretrained_model.pb ``` @@ -242,7 +242,7 @@ python launch_benchmark.py \ --framework tensorflow \ --accuracy-only \ --batch-size 100 \ - --docker-image intelaipg/intel-optimized-tensorflow:1.14 \ + --docker-image intelaipg/intel-optimized-tensorflow:1.14.0 \ --in-graph /home//inception_resnet_v2_fp32_pretrained_model.pb \ --data-location /home//datasets/ImageNet_TFRecords ``` @@ -259,7 +259,7 @@ python launch_benchmark.py \ --batch-size 1 \ --socket-id 0 \ --in-graph /home//inception_resnet_v2_fp32_pretrained_model.pb \ - --docker-image intelaipg/intel-optimized-tensorflow:1.14 + --docker-image intelaipg/intel-optimized-tensorflow:1.14.0 ``` For batch inference (using `--benchmark-only`, `--socket-id 0` and `--batch-size 128`): @@ -274,7 +274,7 @@ python launch_benchmark.py \ --batch-size 128 \ --socket-id 0 \ --in-graph /home//inception_resnet_v2_fp32_pretrained_model.pb \ - --docker-image intelaipg/intel-optimized-tensorflow:1.14 + --docker-image intelaipg/intel-optimized-tensorflow:1.14.0 ``` Note that the `--verbose` or `--output-dir` flag can be added to any of the above commands diff --git a/benchmarks/image_recognition/tensorflow/inceptionv3/README.md b/benchmarks/image_recognition/tensorflow/inceptionv3/README.md index 64f0209ef..adaedef36 100644 --- a/benchmarks/image_recognition/tensorflow/inceptionv3/README.md +++ b/benchmarks/image_recognition/tensorflow/inceptionv3/README.md @@ -97,7 +97,7 @@ python launch_benchmark.py \ --framework tensorflow \ --accuracy-only \ --batch-size 100 \ - --docker-image intelaipg/intel-optimized-tensorflow:1.14 \ + --docker-image intelaipg/intel-optimized-tensorflow:1.14.0 \ --in-graph /home//inceptionv3_int8_pretrained_model.pb \ --data-location /home//datasets/ImageNet_TFRecords ``` @@ -118,7 +118,7 @@ python launch_benchmark.py \ --benchmark-only \ --batch-size 1 \ --socket-id 0 \ - --docker-image intelaipg/intel-optimized-tensorflow:1.14 \ + --docker-image intelaipg/intel-optimized-tensorflow:1.14.0 \ --in-graph /home//inceptionv3_int8_pretrained_model.pb \ --data-location /home//datasets/ImageNet_TFRecords \ -- warmup_steps=50 steps=500 @@ -135,7 +135,7 @@ python launch_benchmark.py \ --benchmark-only \ --batch-size 1 \ --socket-id 0 \ - --docker-image intelaipg/intel-optimized-tensorflow:1.14 \ + --docker-image intelaipg/intel-optimized-tensorflow:1.14.0 \ --in-graph /home//inceptionv3_int8_pretrained_model.pb \ -- warmup_steps=50 steps=500 ``` @@ -151,7 +151,7 @@ python launch_benchmark.py \ --benchmark-only \ --batch-size 128 \ --socket-id 0 \ - --docker-image intelaipg/intel-optimized-tensorflow:1.14 \ + --docker-image intelaipg/intel-optimized-tensorflow:1.14.0 \ --in-graph /home//inceptionv3_int8_pretrained_model.pb \ --data-location /home//datasets/ImageNet_TFRecords \ -- warmup_steps=50 steps=500 @@ -168,7 +168,7 @@ python launch_benchmark.py \ --benchmark-only \ --batch-size 128 \ --socket-id 0 \ - --docker-image intelaipg/intel-optimized-tensorflow:1.14 \ + --docker-image intelaipg/intel-optimized-tensorflow:1.14.0 \ --in-graph /home//inceptionv3_int8_pretrained_model.pb \ -- warmup_steps=50 steps=500 ``` @@ -258,7 +258,7 @@ python launch_benchmark.py \ --framework tensorflow \ --batch-size 1 \ --socket-id 0 \ - --docker-image intelaipg/intel-optimized-tensorflow:1.14 \ + --docker-image intelaipg/intel-optimized-tensorflow:1.14.0 \ --in-graph /home//inceptionv3_fp32_pretrained_model.pb ``` Example log tail when running for online inference: @@ -289,7 +289,7 @@ python launch_benchmark.py \ --framework tensorflow \ --batch-size 128 \ --socket-id 0 \ - --docker-image intelaipg/intel-optimized-tensorflow:1.14 \ + --docker-image intelaipg/intel-optimized-tensorflow:1.14.0 \ --in-graph /home//inceptionv3_fp32_pretrained_model.pb ``` Example log tail when running for batch inference: @@ -321,7 +321,7 @@ python launch_benchmark.py \ --accuracy-only \ --batch-size 100 \ --data-location /dataset/Imagenet_Validation \ - --docker-image intelaipg/intel-optimized-tensorflow:1.14 \ + --docker-image intelaipg/intel-optimized-tensorflow:1.14.0 \ --in-graph /home//inceptionv3_fp32_pretrained_model.pb ``` Example log tail when running for accuracy: diff --git a/benchmarks/image_recognition/tensorflow/inceptionv4/README.md b/benchmarks/image_recognition/tensorflow/inceptionv4/README.md index 7084202c0..31b3ba91b 100644 --- a/benchmarks/image_recognition/tensorflow/inceptionv4/README.md +++ b/benchmarks/image_recognition/tensorflow/inceptionv4/README.md @@ -56,7 +56,7 @@ when calling `launch_benchmark.py` and the script will run without TCMalloc. --accuracy-only \ --batch-size 100 \ --socket-id 0 \ - --docker-image intelaipg/intel-optimized-tensorflow:1.14 \ + --docker-image intelaipg/intel-optimized-tensorflow:1.14.0 \ --in-graph /home//inceptionv4_int8_pretrained_model.pb \ --data-location /home//ImageNet_TFRecords ``` @@ -71,7 +71,7 @@ when calling `launch_benchmark.py` and the script will run without TCMalloc. --benchmark-only \ --batch-size 240 \ --socket-id 0 \ - --docker-image intelaipg/intel-optimized-tensorflow:1.14 \ + --docker-image intelaipg/intel-optimized-tensorflow:1.14.0 \ --in-graph /home//inceptionv4_int8_pretrained_model.pb ``` @@ -85,7 +85,7 @@ when calling `launch_benchmark.py` and the script will run without TCMalloc. --benchmark-only \ --batch-size 1 \ --socket-id 0 \ - --docker-image intelaipg/intel-optimized-tensorflow:1.14 \ + --docker-image intelaipg/intel-optimized-tensorflow:1.14.0 \ --in-graph /home//inceptionv4_int8_pretrained_model.pb ``` @@ -185,7 +185,7 @@ when calling `launch_benchmark.py` and the script will run without TCMalloc. --accuracy-only \ --batch-size 100 \ --socket-id 0 \ - --docker-image intelaipg/intel-optimized-tensorflow:1.14 \ + --docker-image intelaipg/intel-optimized-tensorflow:1.14.0 \ --in-graph /home//inceptionv4_fp32_pretrained_model.pb \ --data-location /home//ImageNet_TFRecords ``` @@ -200,7 +200,7 @@ when calling `launch_benchmark.py` and the script will run without TCMalloc. --benchmark-only \ --batch-size 240 \ --socket-id 0 \ - --docker-image intelaipg/intel-optimized-tensorflow:1.14 \ + --docker-image intelaipg/intel-optimized-tensorflow:1.14.0 \ --in-graph /home//inceptionv4_fp32_pretrained_model.pb ``` @@ -214,7 +214,7 @@ when calling `launch_benchmark.py` and the script will run without TCMalloc. --benchmark-only \ --batch-size 1 \ --socket-id 0 \ - --docker-image intelaipg/intel-optimized-tensorflow:1.14 \ + --docker-image intelaipg/intel-optimized-tensorflow:1.14.0 \ --in-graph /home//inceptionv4_fp32_pretrained_model.pb ``` diff --git a/benchmarks/image_recognition/tensorflow/mobilenet_v1/README.md b/benchmarks/image_recognition/tensorflow/mobilenet_v1/README.md index 694a3f575..0c7295244 100644 --- a/benchmarks/image_recognition/tensorflow/mobilenet_v1/README.md +++ b/benchmarks/image_recognition/tensorflow/mobilenet_v1/README.md @@ -67,7 +67,7 @@ $ wget https://storage.googleapis.com/intel-optimized-tensorflow/models/mobilene --batch-size 240 \ --socket-id 0 \ --in-graph /home//mobilenetv1_int8_pretrained_model.pb \ - --docker-image intelaipg/intel-optimized-tensorflow:1.14 \ + --docker-image intelaipg/intel-optimized-tensorflow:1.14.0 \ -- input_height=224 input_width=224 warmup_steps=10 steps=50 \ input_layer="input" output_layer="MobilenetV1/Predictions/Reshape_1" ``` @@ -83,7 +83,7 @@ $ wget https://storage.googleapis.com/intel-optimized-tensorflow/models/mobilene --batch-size 1 \ --socket-id 0 \ --in-graph /home//mobilenetv1_int8_pretrained_model.pb \ - --docker-image intelaipg/intel-optimized-tensorflow:1.14 \ + --docker-image intelaipg/intel-optimized-tensorflow:1.14.0 \ -- input_height=224 input_width=224 warmup_steps=10 steps=50 \ input_layer="input" output_layer="MobilenetV1/Predictions/Reshape_1" ``` @@ -100,7 +100,7 @@ $ wget https://storage.googleapis.com/intel-optimized-tensorflow/models/mobilene --batch-size 100 \ --socket-id 0 \ --in-graph /home//mobilenetv1_int8_pretrained_model.pb \ - --docker-image intelaipg/intel-optimized-tensorflow:1.14 \ + --docker-image intelaipg/intel-optimized-tensorflow:1.14.0 \ --data-location /home//imagenet_validation_dataset \ -- input_height=224 input_width=224 \ input_layer="input" output_layer="MobilenetV1/Predictions/Reshape_1" @@ -216,7 +216,7 @@ $ wget https://storage.googleapis.com/intel-optimized-tensorflow/models/mobilene --model-name mobilenet_v1 \ --mode inference \ --framework tensorflow \ - --docker-image intelaipg/intel-optimized-tensorflow:1.14 \ + --docker-image intelaipg/intel-optimized-tensorflow:1.14.0 \ --model-source-dir /home//tensorflow/models \ --batch-size 1 \ --socket-id 0 \ @@ -234,7 +234,7 @@ $ wget https://storage.googleapis.com/intel-optimized-tensorflow/models/mobilene --model-name mobilenet_v1 \ --mode inference \ --framework tensorflow \ - --docker-image intelaipg/intel-optimized-tensorflow:1.14 \ + --docker-image intelaipg/intel-optimized-tensorflow:1.14.0 \ --model-source-dir /home//tensorflow/models \ --batch-size 100 \ --socket-id 0 \ @@ -248,7 +248,7 @@ $ wget https://storage.googleapis.com/intel-optimized-tensorflow/models/mobilene --model-name mobilenet_v1 \ --mode inference \ --framework tensorflow \ - --docker-image intelaipg/intel-optimized-tensorflow:1.14 \ + --docker-image intelaipg/intel-optimized-tensorflow:1.14.0 \ --model-source-dir /home//tensorflow/models \ --batch-size 100 \ --accuracy-only \ diff --git a/benchmarks/image_recognition/tensorflow/resnet101/README.md b/benchmarks/image_recognition/tensorflow/resnet101/README.md index 9ab36ebc7..a39daaf70 100644 --- a/benchmarks/image_recognition/tensorflow/resnet101/README.md +++ b/benchmarks/image_recognition/tensorflow/resnet101/README.md @@ -85,7 +85,7 @@ $ python launch_benchmark.py \ --framework tensorflow \ --accuracy-only \ --batch-size 100 \ - --docker-image intelaipg/intel-optimized-tensorflow:1.14 \ + --docker-image intelaipg/intel-optimized-tensorflow:1.14.0 \ --data-location /home//dataset/FullImageNetData_directory \ --in-graph=/home//resnet101_int8_pretrained_model.pb ``` @@ -106,7 +106,7 @@ python launch_benchmark.py \ --benchmark-only \ --batch-size 1 \ --socket-id 0 \ - --docker-image intelaipg/intel-optimized-tensorflow:1.14 \ + --docker-image intelaipg/intel-optimized-tensorflow:1.14.0 \ --in-graph=/home//resnet101_int8_pretrained_model.pb \ -- warmup_steps=50 steps=500 ``` @@ -123,7 +123,7 @@ python launch_benchmark.py \ --batch-size 1 \ --socket-id 0 \ --data-location /home//dataset/FullImageNetData_directory \ - --docker-image intelaipg/intel-optimized-tensorflow:1.14 \ + --docker-image intelaipg/intel-optimized-tensorflow:1.14.0 \ --in-graph=/home//resnet101_int8_pretrained_model.pb \ -- warmup_steps=50 steps=500 ``` @@ -139,7 +139,7 @@ python launch_benchmark.py \ --benchmark-only \ --batch-size 128 \ --socket-id 0 \ - --docker-image intelaipg/intel-optimized-tensorflow:1.14 \ + --docker-image intelaipg/intel-optimized-tensorflow:1.14.0 \ --in-graph=/home//resnet101_int8_pretrained_model.pb \ -- warmup_steps=50 steps=500 ``` @@ -156,7 +156,7 @@ python launch_benchmark.py \ --batch-size 128 \ --data-location /home//dataset/FullImageNetData_directory \ --socket-id 0 \ - --docker-image intelaipg/intel-optimized-tensorflow:1.14 \ + --docker-image intelaipg/intel-optimized-tensorflow:1.14.0 \ --in-graph=/home//resnet101_int8_pretrained_model.pb \ -- warmup_steps=50 steps=500 ``` @@ -250,7 +250,7 @@ $ wget https://storage.googleapis.com/intel-optimized-tensorflow/models/resnet10 --mode inference \ --model-name resnet101 \ --batch-size 128 \ - --docker-image intelaipg/intel-optimized-tensorflow:1.14 \ + --docker-image intelaipg/intel-optimized-tensorflow:1.14.0 \ --in-graph /home//trained_models/resnet101_fp32_pretrained_model.pb \ --socket-id 0 ``` @@ -277,7 +277,7 @@ $ wget https://storage.googleapis.com/intel-optimized-tensorflow/models/resnet10 --mode inference \ --model-name resnet101 \ --batch-size 100 \ - --docker-image intelaipg/intel-optimized-tensorflow:1.14 \ + --docker-image intelaipg/intel-optimized-tensorflow:1.14.0 \ --in-graph /home//trained_models/resnet101_fp32_pretrained_model.pb \ --data-location /home//imagenet_validation_dataset \ --accuracy-only \ diff --git a/benchmarks/image_recognition/tensorflow/resnet50/README.md b/benchmarks/image_recognition/tensorflow/resnet50/README.md index db5ca2f58..751f15e98 100644 --- a/benchmarks/image_recognition/tensorflow/resnet50/README.md +++ b/benchmarks/image_recognition/tensorflow/resnet50/README.md @@ -58,7 +58,7 @@ $ python launch_benchmark.py \ --mode inference \ --batch-size=100 \ --accuracy-only \ - --docker-image intelaipg/intel-optimized-tensorflow:1.14 + --docker-image intelaipg/intel-optimized-tensorflow:1.14.0 ``` The log file is saved to the value of `--output-dir`. @@ -99,7 +99,7 @@ $ python launch_benchmark.py \ --mode inference \ --batch-size=128 \ --benchmark-only \ - --docker-image intelaipg/intel-optimized-tensorflow:1.14 + --docker-image intelaipg/intel-optimized-tensorflow:1.14.0 -- warmup_steps=50 steps=500 ``` The tail of the log output when the script completes should look @@ -160,7 +160,7 @@ $ python launch_benchmark.py \ --mode inference \ --batch-size=1 \ --socket-id 0 \ - --docker-image intelaipg/intel-optimized-tensorflow:1.14 + --docker-image intelaipg/intel-optimized-tensorflow:1.14.0 ``` The log file is saved to the value of `--output-dir`. @@ -195,7 +195,7 @@ $ python launch_benchmark.py \ --mode inference \ --batch-size=128 \ --socket-id 0 \ - --docker-image intelaipg/intel-optimized-tensorflow:1.14 + --docker-image intelaipg/intel-optimized-tensorflow:1.14.0 ``` The log file is saved to the value of `--output-dir`. @@ -233,7 +233,7 @@ $ python launch_benchmark.py \ --batch-size 100 \ --socket-id 0 \ --data-location /home//dataset/ImageNetData_directory \ - --docker-image intelaipg/intel-optimized-tensorflow:1.14 + --docker-image intelaipg/intel-optimized-tensorflow:1.14.0 ``` The log file is saved to the value of `--output-dir`. @@ -267,7 +267,7 @@ $ python launch_benchmark.py \ --batch-size 100 \ --socket-id 0 \ --data-location /home//dataset/ImageNetData_directory \ - --docker-image intelaipg/intel-optimized-tensorflow:1.14 + --docker-image intelaipg/intel-optimized-tensorflow:1.14.0 ``` The results file will be written to the `models/benchmarks/common/tensorflow/logs` directory, unless another diff --git a/benchmarks/image_recognition/tensorflow/resnet50v1_5/README.md b/benchmarks/image_recognition/tensorflow/resnet50v1_5/README.md index 2a13913d9..314a65dcc 100644 --- a/benchmarks/image_recognition/tensorflow/resnet50v1_5/README.md +++ b/benchmarks/image_recognition/tensorflow/resnet50v1_5/README.md @@ -42,7 +42,7 @@ $ git clone https://github.com/IntelAI/models.git The optimized ResNet50v1.5 model files are attached to the [intelai/models](https://github.com/intelai/models) repo and located at `models/models/image_recognition/tensorflow/resnet50v1_5/`. - The docker image (`intelaipg/intel-optimized-tensorflow:1.14`) + The docker image (`intelaipg/intel-optimized-tensorflow:1.14.0`) used in the commands above were built using [TensorFlow](git@github.com:tensorflow/tensorflow.git) master for TensorFlow version 1.14. @@ -61,7 +61,7 @@ $ python launch_benchmark.py \ --mode inference \ --batch-size=100 \ --accuracy-only \ - --docker-image intelaipg/intel-optimized-tensorflow:1.14 + --docker-image intelaipg/intel-optimized-tensorflow:1.14.0 ``` The log file is saved to the value of `--output-dir`. @@ -100,7 +100,7 @@ $ python launch_benchmark.py \ --mode inference \ --batch-size=128 \ --benchmark-only \ - --docker-image intelaipg/intel-optimized-tensorflow:1.14 + --docker-image intelaipg/intel-optimized-tensorflow:1.14.0 -- warmup_steps=50 steps=500 ``` The tail of the log output when the benchmarking completes should look @@ -159,7 +159,7 @@ $ python launch_benchmark.py \ --mode inference \ --batch-size=1 \ --socket-id 0 \ - --docker-image intelaipg/intel-optimized-tensorflow:1.14 + --docker-image intelaipg/intel-optimized-tensorflow:1.14.0 ``` The log file is saved to the value of `--output-dir`. @@ -197,7 +197,7 @@ $ python launch_benchmark.py \ --mode inference \ --batch-size=128 \ --socket-id 0 \ - --docker-image intelaipg/intel-optimized-tensorflow:1.14 + --docker-image intelaipg/intel-optimized-tensorflow:1.14.0 ``` The log file is saved to the value of `--output-dir`. @@ -238,7 +238,7 @@ $ python launch_benchmark.py \ --batch-size 100 \ --socket-id 0 \ --data-location /home//dataset/ImageNetData_directory \ - --docker-image intelaipg/intel-optimized-tensorflow:1.14 + --docker-image intelaipg/intel-optimized-tensorflow:1.14.0 ``` The log file is saved to the value of `--output-dir`. @@ -275,7 +275,7 @@ $ python launch_benchmark.py \ --batch-size 100 \ --socket-id 0 \ --data-location /home//dataset/ImageNetData_directory \ - --docker-image intelaipg/intel-optimized-tensorflow:1.14 + --docker-image intelaipg/intel-optimized-tensorflow:1.14.0 ``` The results file will be written to the `models/benchmarks/common/tensorflow/logs` directory, unless another diff --git a/benchmarks/image_recognition/tensorflow/squeezenet/README.md b/benchmarks/image_recognition/tensorflow/squeezenet/README.md index c6118e89d..2c3a245f9 100644 --- a/benchmarks/image_recognition/tensorflow/squeezenet/README.md +++ b/benchmarks/image_recognition/tensorflow/squeezenet/README.md @@ -79,7 +79,7 @@ $ python launch_benchmark.py \ --framework tensorflow \ --socket-id 0 \ --batch-size 64 \ - --docker-image intelaipg/intel-optimized-tensorflow:1.14 \ + --docker-image intelaipg/intel-optimized-tensorflow:1.14.0 \ --checkpoint /home//squeezenet_checkpoints \ --data-location /home//datasets/ImageNet_TFRecords ``` @@ -94,7 +94,7 @@ $ python launch_benchmark.py \ --framework tensorflow \ --socket-id 0 \ --batch-size 1 \ - --docker-image intelaipg/intel-optimized-tensorflow:1.14 \ + --docker-image intelaipg/intel-optimized-tensorflow:1.14.0 \ --checkpoint /home//squeezenet_checkpoints \ --data-location /home//datasets/ImageNet_TFRecords ``` diff --git a/benchmarks/image_segmentation/tensorflow/maskrcnn/README.md b/benchmarks/image_segmentation/tensorflow/maskrcnn/README.md index f7058bc90..938dcb634 100644 --- a/benchmarks/image_segmentation/tensorflow/maskrcnn/README.md +++ b/benchmarks/image_segmentation/tensorflow/maskrcnn/README.md @@ -61,7 +61,7 @@ $ python launch_benchmark.py \ --batch-size 1 \ --socket-id 0 \ --data-location /home//COCO2014 \ - --docker-image intelaipg/intel-optimized-tensorflow:1.14-py3 + --docker-image intelaipg/intel-optimized-tensorflow:1.14.0-py3 ``` 5. Log files are located at the value of `--output-dir`. diff --git a/benchmarks/image_segmentation/tensorflow/unet/README.md b/benchmarks/image_segmentation/tensorflow/unet/README.md index 9d7f9dbdb..e91d2af2a 100644 --- a/benchmarks/image_segmentation/tensorflow/unet/README.md +++ b/benchmarks/image_segmentation/tensorflow/unet/README.md @@ -57,7 +57,7 @@ modes/precisions: --benchmark-only \ --batch-size 1 \ --socket-id 0 \ - --docker-image intelaipg/intel-optimized-tensorflow:1.14 \ + --docker-image intelaipg/intel-optimized-tensorflow:1.14.0 \ --checkpoint /home//unet_trained \ --model-source-dir /home//tf_unet \ -- checkpoint_name=model.cpkt diff --git a/benchmarks/language_modeling/tensorflow/lm-1b/README.md b/benchmarks/language_modeling/tensorflow/lm-1b/README.md index ec4bdcf47..871660d67 100644 --- a/benchmarks/language_modeling/tensorflow/lm-1b/README.md +++ b/benchmarks/language_modeling/tensorflow/lm-1b/README.md @@ -53,7 +53,7 @@ python launch_benchmark.py \ --framework tensorflow \ --batch-size 1 \ --socket-id 0 \ - --docker-image intelaipg/intel-optimized-tensorflow:1.14 \ + --docker-image intelaipg/intel-optimized-tensorflow:1.14.0 \ --model-source-dir /inference/cloud/language_modeling ``` @@ -68,7 +68,7 @@ python launch_benchmark.py \ --framework tensorflow \ --batch-size 1024 \ --socket-id 0 \ - --docker-image intelaipg/intel-optimized-tensorflow:1.14 \ + --docker-image intelaipg/intel-optimized-tensorflow:1.14.0 \ --model-source-dir /inference/cloud/language_modeling \ -- steps=4 \ ``` diff --git a/benchmarks/language_translation/tensorflow/gnmt/README.md b/benchmarks/language_translation/tensorflow/gnmt/README.md index 143daf45d..523965917 100644 --- a/benchmarks/language_translation/tensorflow/gnmt/README.md +++ b/benchmarks/language_translation/tensorflow/gnmt/README.md @@ -82,7 +82,7 @@ python launch_benchmark.py \ --socket-id 0 \ --checkpoint /home//gnmt_checkpoints \ --data-location /home//wmt16 \ ---docker-image intelaipg/intel-optimized-tensorflow:1.14 \ +--docker-image intelaipg/intel-optimized-tensorflow:1.14.0 \ -- infer_mode=beam_search ``` @@ -99,7 +99,7 @@ python launch_benchmark.py \ --socket-id 0 \ --checkpoint /home//gnmt_checkpoints \ --data-location /home//wmt16 \ ---docker-image intelaipg/intel-optimized-tensorflow:1.14 \ +--docker-image intelaipg/intel-optimized-tensorflow:1.14.0 \ -- infer_mode=beam_search ``` diff --git a/benchmarks/language_translation/tensorflow/transformer_language/README.md b/benchmarks/language_translation/tensorflow/transformer_language/README.md index 93bf84700..f4997711f 100644 --- a/benchmarks/language_translation/tensorflow/transformer_language/README.md +++ b/benchmarks/language_translation/tensorflow/transformer_language/README.md @@ -82,7 +82,7 @@ python launch_benchmark.py \ --framework tensorflow \ --batch-size 1 \ --socket-id 0 \ - --docker-image intelaipg/intel-optimized-tensorflow:1.14 \ + --docker-image intelaipg/intel-optimized-tensorflow:1.14.0 \ --checkpoint /home//transformer_lt_fp32_pretrained_model \ --data-location /home//t2t_data \ --model-source-dir /home//tensor2tensor/ \ @@ -99,7 +99,7 @@ python launch_benchmark.py \ --framework tensorflow \ --batch-size 32 \ --socket-id 0 \ - --docker-image intelaipg/intel-optimized-tensorflow:1.14 \ + --docker-image intelaipg/intel-optimized-tensorflow:1.14.0 \ --checkpoint /home//transformer_lt_fp32_pretrained_model \ --data-location /home//t2t_data \ --model-source-dir /home//tensor2tensor/ \ diff --git a/benchmarks/language_translation/tensorflow/transformer_lt_official/README.md b/benchmarks/language_translation/tensorflow/transformer_lt_official/README.md index f0d79e4e3..f592cf832 100644 --- a/benchmarks/language_translation/tensorflow/transformer_lt_official/README.md +++ b/benchmarks/language_translation/tensorflow/transformer_lt_official/README.md @@ -65,7 +65,7 @@ python launch_benchmark.py \ --framework tensorflow \ --batch-size 1 \ --socket-id 0 \ - --docker-image intelaipg/intel-optimized-tensorflow:latest \ + --docker-image intelaipg/intel-optimized-tensorflow:1.14.0 \ --model-source-dir /home//tensorflow-models/models \ --in-graph /home//transformer_lt_official_fp32_pretrained_model/graph/fp32_graphdef.pb \ --data-location /home//transformer_lt_official_fp32_pretrained_model/data \ @@ -85,7 +85,7 @@ python launch_benchmark.py \ --framework tensorflow \ --batch-size 64 \ --socket-id 0 \ - --docker-image intelaipg/intel-optimized-tensorflow:latest \ + --docker-image intelaipg/intel-optimized-tensorflow:1.14.0 \ --model-source-dir /home//tensorflow-models/models \ --in-graph /home//transformer_lt_official_fp32_pretrained_model/graph/fp32_graphdef.pb \ --data-location /home//transformer_lt_official_fp32_pretrained_model/data \ diff --git a/benchmarks/object_detection/tensorflow/faster_rcnn/README.md b/benchmarks/object_detection/tensorflow/faster_rcnn/README.md index 9528f4808..cd57419f1 100644 --- a/benchmarks/object_detection/tensorflow/faster_rcnn/README.md +++ b/benchmarks/object_detection/tensorflow/faster_rcnn/README.md @@ -156,7 +156,7 @@ $ python launch_benchmark.py \ --mode inference \ --socket-id 0 \ --checkpoint /home//faster_rcnn_resnet50_fp32_coco \ - --docker-image intelaipg/intel-optimized-tensorflow:1.14 \ + --docker-image intelaipg/intel-optimized-tensorflow:1.14.0 \ -- config_file=pipeline.config ``` @@ -169,7 +169,7 @@ python launch_benchmark.py \ --mode inference \ --precision fp32 \ --framework tensorflow \ - --docker-image intelaipg/intel-optimized-tensorflow:1.14 \ + --docker-image intelaipg/intel-optimized-tensorflow:1.14.0 \ --model-source-dir /home//tensorflow/models \ --data-location /home//coco/output \ --in-graph /home//faster_rcnn_resnet50_fp32_coco/frozen_inference_graph.pb \ @@ -270,7 +270,7 @@ $ python launch_benchmark.py \ --mode inference \ --socket-id 0 \ --in-graph /home//faster_rcnn_int8_pretrained_model.pb \ - --docker-image intelaipg/intel-optimized-tensorflow:1.14 \ + --docker-image intelaipg/intel-optimized-tensorflow:1.14.0 \ --benchmark-only \ -- number_of_steps=5000 ``` @@ -285,7 +285,7 @@ python launch_benchmark.py \ --precision int8 \ --framework tensorflow \ --socket-id 0 \ - --docker-image intelaipg/intel-optimized-tensorflow:1.14 \ + --docker-image intelaipg/intel-optimized-tensorflow:1.14.0 \ --model-source-dir /home//tensorflow/models \ --data-location /home//output/coco_val.record \ --in-graph /home//faster_rcnn_int8_pretrained_model.pb \ diff --git a/benchmarks/object_detection/tensorflow/rfcn/README.md b/benchmarks/object_detection/tensorflow/rfcn/README.md index 85f2e2fbc..42c488ca4 100644 --- a/benchmarks/object_detection/tensorflow/rfcn/README.md +++ b/benchmarks/object_detection/tensorflow/rfcn/README.md @@ -139,7 +139,7 @@ python launch_benchmark.py \ --mode inference \ --precision int8 \ --framework tensorflow \ - --docker-image intelaipg/intel-optimized-tensorflow:1.14 \ + --docker-image intelaipg/intel-optimized-tensorflow:1.14.0 \ --model-source-dir /home//tensorflow/models \ --data-location /home//val/val2017 \ --in-graph /home//rfcn_resnet101_int8_coco_pretrained_model.pb \ @@ -156,7 +156,7 @@ python launch_benchmark.py \ --mode inference \ --precision int8 \ --framework tensorflow \ - --docker-image intelaipg/intel-optimized-tensorflow:1.14 \ + --docker-image intelaipg/intel-optimized-tensorflow:1.14.0 \ --model-source-dir /home//tensorflow/models \ --data-location /home//coco/output/coco_val.record-00000-of-00001 \ --in-graph /home//rfcn_resnet101_int8_coco_pretrained_model.pb \ @@ -331,7 +331,7 @@ $ python launch_benchmark.py \ --mode inference \ --socket-id 0 \ --checkpoint /home//rfcn_resnet101_fp32_coco \ - --docker-image intelaipg/intel-optimized-tensorflow:1.14 \ + --docker-image intelaipg/intel-optimized-tensorflow:1.14.0 \ -- config_file=rfcn_pipeline.config ``` @@ -344,7 +344,7 @@ python launch_benchmark.py \ --mode inference \ --precision fp32 \ --framework tensorflow \ - --docker-image intelaipg/intel-optimized-tensorflow:1.14 \ + --docker-image intelaipg/intel-optimized-tensorflow:1.14.0 \ --model-source-dir /home//tensorflow/models \ --data-location /home//coco/output/coco_val.record \ --in-graph /home//rfcn_resnet101_fp32_coco/frozen_inference_graph.pb \ diff --git a/benchmarks/object_detection/tensorflow/ssd-mobilenet/README.md b/benchmarks/object_detection/tensorflow/ssd-mobilenet/README.md index fa395d5f9..35db5d14d 100644 --- a/benchmarks/object_detection/tensorflow/ssd-mobilenet/README.md +++ b/benchmarks/object_detection/tensorflow/ssd-mobilenet/README.md @@ -121,7 +121,7 @@ python launch_benchmark.py \ --precision int8 \ --framework tensorflow \ --socket-id 0 \ - --docker-image intelaipg/intel-optimized-tensorflow:1.14 \ + --docker-image intelaipg/intel-optimized-tensorflow:1.14.0 \ --model-source-dir /home//tensorflow/models \ --data-location /home//val/val2017 \ --in-graph /home//ssdmobilenet_int8_pretrained_model.pb \ @@ -138,7 +138,7 @@ python launch_benchmark.py \ --precision int8 \ --framework tensorflow \ --socket-id 0 \ - --docker-image intelaipg/intel-optimized-tensorflow:1.14 \ + --docker-image intelaipg/intel-optimized-tensorflow:1.14.0 \ --model-source-dir /home//tensorflow/models \ --data-location /home//coco/output/coco_val.record \ --in-graph /home//ssdmobilenet_int8_pretrained_model.pb \ @@ -349,7 +349,7 @@ $ python launch_benchmark.py \ --precision fp32 \ --mode inference \ --socket-id 0 \ - --docker-image intelaipg/intel-optimized-tensorflow:1.14 \ + --docker-image intelaipg/intel-optimized-tensorflow:1.14.0 \ --benchmark-only ``` @@ -368,7 +368,7 @@ $ python launch_benchmark.py \ --precision fp32 \ --mode inference \ --socket-id 0 \ - --docker-image intelaipg/intel-optimized-tensorflow:1.14 \ + --docker-image intelaipg/intel-optimized-tensorflow:1.14.0 \ --accuracy-only ``` diff --git a/benchmarks/object_detection/tensorflow/ssd-resnet34/README.md b/benchmarks/object_detection/tensorflow/ssd-resnet34/README.md index 3f2623389..2b5364ff9 100644 --- a/benchmarks/object_detection/tensorflow/ssd-resnet34/README.md +++ b/benchmarks/object_detection/tensorflow/ssd-resnet34/README.md @@ -134,7 +134,7 @@ $ python launch_benchmark.py \ --mode inference \ --socket-id 0 \ --batch-size=1 \ - --docker-image intelaipg/intel-optimized-tensorflow:1.14-py3 \ + --docker-image intelaipg/intel-optimized-tensorflow:1.14.0-py3 \ --benchmark-only ``` @@ -154,7 +154,7 @@ $ python launch_benchmark.py \ --mode inference \ --socket-id 0 \ --batch-size=1 \ - --docker-image intelaipg/intel-optimized-tensorflow:1.14-py3 \ + --docker-image intelaipg/intel-optimized-tensorflow:1.14.0-py3 \ --accuracy-only ``` @@ -310,7 +310,7 @@ $ python launch_benchmark.py \ --mode inference \ --socket-id 0 \ --batch-size=1 \ - --docker-image intelaipg/intel-optimized-tensorflow:latest-prs-b5d67b7-devel-mkl-py3 \ + --docker-image intelaipg/intel-optimized-tensorflow:1.14.0-py3 \ --benchmark-only ``` @@ -330,7 +330,7 @@ $ python launch_benchmark.py \ --mode inference \ --socket-id 0 \ --batch-size=1 \ - --docker-image intelaipg/intel-optimized-tensorflow:latest-prs-b5d67b7-devel-mkl-py3 \ + --docker-image intelaipg/intel-optimized-tensorflow:1.14.0-py3 \ --accuracy-only ``` diff --git a/benchmarks/object_detection/tensorflow/ssd_vgg16/README.md b/benchmarks/object_detection/tensorflow/ssd_vgg16/README.md index 8036419ba..6ecd4b646 100644 --- a/benchmarks/object_detection/tensorflow/ssd_vgg16/README.md +++ b/benchmarks/object_detection/tensorflow/ssd_vgg16/README.md @@ -103,7 +103,7 @@ python launch_benchmark.py \ --mode inference \ --precision int8 \ --framework tensorflow \ - --docker-image intelaipg/intel-optimized-tensorflow:1.14-py3 \ + --docker-image intelaipg/intel-optimized-tensorflow:1.14.0-py3 \ --model-source-dir /home//SSD.TensorFlow \ --data-location /home//tf_records \ --in-graph /home//ssdvgg16_int8_pretrained_model.pb \ @@ -133,7 +133,7 @@ python launch_benchmark.py \ --mode inference \ --precision int8 \ --framework tensorflow \ - --docker-image intelaipg/intel-optimized-tensorflow:1.14-py3 \ + --docker-image intelaipg/intel-optimized-tensorflow:1.14.0-py3 \ --model-source-dir /home//SSD.TensorFlow \ --data-location /home//tf_records \ --in-graph /home//ssdvgg16_int8_pretrained_model.pb \ @@ -210,7 +210,7 @@ $ python launch_benchmark.py \ --framework tensorflow \ --precision fp32 \ --mode inference \ - --docker-image intelaipg/intel-optimized-tensorflow:1.14-py3 \ + --docker-image intelaipg/intel-optimized-tensorflow:1.14.0-py3 \ --batch-size 1 \ --socket-id 0 \ --num-inter-threads 11 \ @@ -237,7 +237,7 @@ python launch_benchmark.py \ --mode inference \ --precision fp32 \ --framework tensorflow \ - --docker-image intelaipg/intel-optimized-tensorflow:1.14-py3 \ + --docker-image intelaipg/intel-optimized-tensorflow:1.14.0-py3 \ --model-source-dir /home//SSD.TensorFlow \ --data-location /home//tf_records \ --in-graph /home//ssdvgg16_fp32_pretrained_model.pb \ diff --git a/benchmarks/recommendation/tensorflow/ncf/README.md b/benchmarks/recommendation/tensorflow/ncf/README.md index 53a21a3b3..2eccb84e7 100644 --- a/benchmarks/recommendation/tensorflow/ncf/README.md +++ b/benchmarks/recommendation/tensorflow/ncf/README.md @@ -53,7 +53,7 @@ $ python launch_benchmark.py \ --framework tensorflow \ --precision fp32 \ --mode inference \ - --docker-image intelaipg/intel-optimized-tensorflow:1.14 + --docker-image intelaipg/intel-optimized-tensorflow:1.14.0 ``` The tail of batch inference log, looks as below. @@ -83,7 +83,7 @@ $ python launch_benchmark.py \ --framework tensorflow \ --precision fp32 \ --mode inference \ - --docker-image intelaipg/intel-optimized-tensorflow:1.14 + --docker-image intelaipg/intel-optimized-tensorflow:1.14.0 ``` The tail of online inference log, looks as below. @@ -115,7 +115,7 @@ $ python launch_benchmark.py \ --framework tensorflow \ --precision fp32 \ --mode inference \ - --docker-image intelaipg/intel-optimized-tensorflow:1.14 + --docker-image intelaipg/intel-optimized-tensorflow:1.14.0 ``` The tail of accuracy log, looks as below. diff --git a/benchmarks/recommendation/tensorflow/wide_deep/README.md b/benchmarks/recommendation/tensorflow/wide_deep/README.md index d092b8f22..8f28c0607 100644 --- a/benchmarks/recommendation/tensorflow/wide_deep/README.md +++ b/benchmarks/recommendation/tensorflow/wide_deep/README.md @@ -56,7 +56,7 @@ use in the next step. --batch-size 1 \ --data-location /home//widedeep_dataset \ --checkpoint /home//path/to/wide_deep_fp32_pretrained_model \ - --docker-image intelaipg/intel-optimized-tensorflow:latest-devel-mkl \ + --docker-image intelaipg/intel-optimized-tensorflow:1.14.0 \ --verbose ``` * Running the model in batch inference mode, set `--batch-size` = `1024` @@ -72,7 +72,7 @@ use in the next step. --batch-size 1024 \ --data-location /home//path/to/dataset \ --checkpoint /home//path/to/wide_deep_fp32_pretrained_model \ - --docker-image intelaipg/intel-optimized-tensorflow:latest-devel-mkl \ + --docker-image intelaipg/intel-optimized-tensorflow:1.14.0 \ --verbose ``` 6. The log file is saved to the value of `--output-dir`. diff --git a/benchmarks/recommendation/tensorflow/wide_deep_large_ds/README.md b/benchmarks/recommendation/tensorflow/wide_deep_large_ds/README.md index 61ff4cd48..19880cd2d 100755 --- a/benchmarks/recommendation/tensorflow/wide_deep_large_ds/README.md +++ b/benchmarks/recommendation/tensorflow/wide_deep_large_ds/README.md @@ -163,7 +163,7 @@ when calling `launch_benchmark.py` and the script will run without TCMalloc. ``` cd /home//models/benchmarks - python launch_benchmark.py + python launch_benchmark.py \ --model-name wide_deep_large_ds \ --precision fp32 \ --mode inference \ @@ -171,7 +171,7 @@ when calling `launch_benchmark.py` and the script will run without TCMalloc. --batch-size 1000 \ --socket-id 0 \ --accuracy-only \ - --docker-image docker.io/intelaipg/intel-optimized-tensorflow:nightly-latestprs-bdw \ + --docker-image intelaipg/intel-optimized-tensorflow:1.14.0 \ --in-graph /root/user/wide_deep_files/wide_deep_fp32_pretrained_model.pb \ --data-location /root/user/wide_deep_files/dataset_preprocessed_eval.tfrecords ``` @@ -183,7 +183,7 @@ when calling `launch_benchmark.py` and the script will run without TCMalloc. ``` cd /home//models/benchmarks - python launch_benchmark.py + python launch_benchmark.py \ --model-name wide_deep_large_ds \ --precision fp32 \ --mode inference \ @@ -191,7 +191,7 @@ when calling `launch_benchmark.py` and the script will run without TCMalloc. --benchmark-only \ --batch-size 1 \ --socket-id 0 \ - --docker-image docker.io/intelaipg/intel-optimized-tensorflow:nightly-latestprs-bdw \ + --docker-image intelaipg/intel-optimized-tensorflow:1.14.0 \ --in-graph /root/user/wide_deep_files/wide_deep_fp32_pretrained_model.pb \ --data-location /root/user/wide_deep_files/dataset_preprocessed_test.tfrecords \ -- num_parallel_batches=1 @@ -200,7 +200,7 @@ when calling `launch_benchmark.py` and the script will run without TCMalloc. ``` cd /home//models/benchmarks - python launch_benchmark.py + python launch_benchmark.py \ --model-name wide_deep_large_ds \ --precision fp32 \ --mode inference \ @@ -208,7 +208,7 @@ when calling `launch_benchmark.py` and the script will run without TCMalloc. --benchmark-only \ --batch-size 512 \ --socket-id 0 \ - --docker-image docker.io/intelaipg/intel-optimized-tensorflow:nightly-latestprs-bdw \ + --docker-image intelaipg/intel-optimized-tensorflow:1.14.0 \ --in-graph /root/user/wide_deep_files/wide_deep_fp32_pretrained_model.pb \ --data-location /root/user/wide_deep_files/dataset_preprocessed_test.tfrecords ``` diff --git a/benchmarks/text_to_speech/tensorflow/wavenet/README.md b/benchmarks/text_to_speech/tensorflow/wavenet/README.md index 512cabd95..d51cdfa72 100644 --- a/benchmarks/text_to_speech/tensorflow/wavenet/README.md +++ b/benchmarks/text_to_speech/tensorflow/wavenet/README.md @@ -71,7 +71,7 @@ python launch_benchmark.py \ --framework tensorflow \ --socket-id 0 \ --num-cores 1 \ - --docker-image intelaipg/intel-optimized-tensorflow:1.14 \ + --docker-image intelaipg/intel-optimized-tensorflow:1.14.0 \ --model-source-dir /home//wavenet/tensorflow-wavenet \ --checkpoint /home//wavenet_checkpoints \ -- checkpoint_name=model.ckpt-99 sample=8510 diff --git a/docs/general/tensorflow/LaunchBenchmark.md b/docs/general/tensorflow/LaunchBenchmark.md index 4ee5d6a0a..08c6c999a 100644 --- a/docs/general/tensorflow/LaunchBenchmark.md +++ b/docs/general/tensorflow/LaunchBenchmark.md @@ -164,7 +164,7 @@ $ python launch_benchmark.py \ --batch-size 1 \ --socket-id 0 \ --data-location /home//Imagenet_Validation \ - --docker-image intelaipg/intel-optimized-tensorflow:1.14 \ + --docker-image intelaipg/intel-optimized-tensorflow:1.14.0 \ --volume /home//custom_folder_1:/custom_folder_1 \ --volume /home//custom_folder_2:/custom_folder_2 ``` @@ -201,7 +201,7 @@ Below is an example showing how to use the `--debug` flag: --batch-size=1 \ --socket-id 0 \ --data-location /home//Imagenet_Validation \ - --docker-image intelaipg/intel-optimized-tensorflow:1.14 \ + --docker-image intelaipg/intel-optimized-tensorflow:1.14.0 \ --debug # ls From 51baf076d2c98fafeb8fbe9088ad89275782c636 Mon Sep 17 00:00:00 2001 From: Dina Suehiro Jones Date: Thu, 27 Jun 2019 09:41:53 -0700 Subject: [PATCH 59/62] Update lm-1b README due to branch and path changes (#343) * Update lm-1b README due to branch and path changes * Added specific SHA (just the current one from master as of 6/26) --- .../tensorflow/lm-1b/README.md | 25 +++++++++++++------ 1 file changed, 17 insertions(+), 8 deletions(-) diff --git a/benchmarks/language_modeling/tensorflow/lm-1b/README.md b/benchmarks/language_modeling/tensorflow/lm-1b/README.md index 871660d67..85b4f1fc7 100644 --- a/benchmarks/language_modeling/tensorflow/lm-1b/README.md +++ b/benchmarks/language_modeling/tensorflow/lm-1b/README.md @@ -9,17 +9,26 @@ other platforms are coming later. ## FP32 Inference Instructions -1. Clone [mlperf/inference](https://github.com/mlperf/inference.git) and -checkout `setInter` branch. +1. Clone [mlperf/inference](https://github.com/mlperf/inference.git) +with the current SHA from master of the repo on 6/26/2019: ``` git clone https://github.com/mlperf/inference.git -cd mlperf -git checkout setInter +cd inference +git checkout 41eb3e489233e83e544cd25148aca177b95d7bea ``` -To prepare the checkpoint and dataset, run: +To prepare the checkpoint and dataset, run the `benchmark.py` script +from the mlperf inference repo. Since this requires python3 and +TensorFlow to be installed, the following instructions show how to run +a docker container with your cloned mlperf inference repo mounted as a +volume: ``` -python inference/cloud/language_modeling/benchmark.py +docker run --volume /home//inference:/inference -it intelaipg/intel-optimized-tensorflow:1.14.0-py3 /bin/bash +``` +In the docker container, run: +``` +cd /inference/others/cloud/language_modeling/ +python3 benchmark.py ``` 2. Clone this [intelai/models](https://github.com/IntelAI/models) @@ -54,7 +63,7 @@ python launch_benchmark.py \ --batch-size 1 \ --socket-id 0 \ --docker-image intelaipg/intel-optimized-tensorflow:1.14.0 \ - --model-source-dir /inference/cloud/language_modeling + --model-source-dir /inference/others/cloud/language_modeling ``` @@ -69,7 +78,7 @@ python launch_benchmark.py \ --batch-size 1024 \ --socket-id 0 \ --docker-image intelaipg/intel-optimized-tensorflow:1.14.0 \ - --model-source-dir /inference/cloud/language_modeling \ + --model-source-dir /inference/others/cloud/language_modeling \ -- steps=4 \ ``` From 2aa62041cd2ea2c2eda0d464dad5eb61b4e13f0e Mon Sep 17 00:00:00 2001 From: Dina Suehiro Jones Date: Tue, 2 Jul 2019 13:22:01 -0700 Subject: [PATCH 60/62] Update README files to use tf-cpu.1-14 docker image (#346) * Update README files to use tf-cpu docker image for 1.14 * Peg older version of pytest --- .../content_creation/tensorflow/draw/README.md | 4 ++-- .../tensorflow/densenet169/README.md | 6 +++--- .../tensorflow/inception_resnet_v2/README.md | 8 ++++---- .../tensorflow/inceptionv3/README.md | 16 ++++++++-------- .../tensorflow/inceptionv4/README.md | 12 ++++++------ .../tensorflow/mobilenet_v1/README.md | 12 ++++++------ .../tensorflow/resnet101/README.md | 14 +++++++------- .../tensorflow/squeezenet/README.md | 4 ++-- .../tensorflow/maskrcnn/README.md | 2 +- .../image_segmentation/tensorflow/unet/README.md | 2 +- .../language_modeling/tensorflow/lm-1b/README.md | 6 +++--- .../tensorflow/gnmt/README.md | 4 ++-- .../tensorflow/transformer_language/README.md | 4 ++-- .../tensorflow/transformer_lt_official/README.md | 4 ++-- .../tensorflow/faster_rcnn/README.md | 8 ++++---- .../object_detection/tensorflow/rfcn/README.md | 8 ++++---- .../tensorflow/ssd-mobilenet/README.md | 8 ++++---- .../tensorflow/ssd-resnet34/README.md | 8 ++++---- .../tensorflow/ssd_vgg16/README.md | 8 ++++---- .../tensorflow/wide_deep/README.md | 4 ++-- .../tensorflow/wide_deep_large_ds/README.md | 6 +++--- .../text_to_speech/tensorflow/wavenet/README.md | 2 +- requirements-test.txt | 2 +- 23 files changed, 76 insertions(+), 76 deletions(-) diff --git a/benchmarks/content_creation/tensorflow/draw/README.md b/benchmarks/content_creation/tensorflow/draw/README.md index 069dfd7a8..a918d1a5a 100644 --- a/benchmarks/content_creation/tensorflow/draw/README.md +++ b/benchmarks/content_creation/tensorflow/draw/README.md @@ -48,7 +48,7 @@ modes/precisions: --model-name draw \ --mode inference \ --framework tensorflow \ - --docker-image intelaipg/intel-optimized-tensorflow:1.14.0-py3 \ + --docker-image gcr.io/deeplearning-platform-release/tf-cpu.1-14 \ --checkpoint /home//draw_fp32_pretrained_model \ --data-location /home//mnist \ --batch-size 1 \ @@ -61,7 +61,7 @@ modes/precisions: --model-name draw \ --mode inference \ --framework tensorflow \ - --docker-image intelaipg/intel-optimized-tensorflow:1.14.0-py3 \ + --docker-image gcr.io/deeplearning-platform-release/tf-cpu.1-14 \ --checkpoint /home//draw_fp32_pretrained_model \ --data-location /home//mnist \ --batch-size 100 \ diff --git a/benchmarks/image_recognition/tensorflow/densenet169/README.md b/benchmarks/image_recognition/tensorflow/densenet169/README.md index 8f4a0b3f2..aaf2fd9e2 100644 --- a/benchmarks/image_recognition/tensorflow/densenet169/README.md +++ b/benchmarks/image_recognition/tensorflow/densenet169/README.md @@ -58,7 +58,7 @@ following modes/precisions: --batch-size 100 \ --socket-id 0 \ --in-graph /home//densenet169_fp32_pretrained_model.pb \ - --docker-image intelaipg/intel-optimized-tensorflow:1.14.0 \ + --docker-image gcr.io/deeplearning-platform-release/tf-cpu.1-14 \ -- input_height=224 input_width=224 warmup_steps=20 steps=100 \ input_layer="input" output_layer="densenet169/predictions/Reshape_1" ``` @@ -74,7 +74,7 @@ following modes/precisions: --batch-size 1 \ --socket-id 0 \ --in-graph /home//densenet169_fp32_pretrained_model.pb \ - --docker-image intelaipg/intel-optimized-tensorflow:1.14.0 \ + --docker-image gcr.io/deeplearning-platform-release/tf-cpu.1-14 \ -- input_height=224 input_width=224 warmup_steps=20 steps=100 \ input_layer="input" output_layer="densenet169/predictions/Reshape_1" ``` @@ -91,7 +91,7 @@ following modes/precisions: --batch-size 100 \ --socket-id 0 \ --in-graph /home//densenet169_fp32_pretrained_model.pb \ - --docker-image intelaipg/intel-optimized-tensorflow:1.14.0 \ + --docker-image gcr.io/deeplearning-platform-release/tf-cpu.1-14 \ --data-location /home//imagenet_validation_dataset \ -- input_height=224 input_width=224 \ input_layer="input" output_layer="densenet169/predictions/Reshape_1" diff --git a/benchmarks/image_recognition/tensorflow/inception_resnet_v2/README.md b/benchmarks/image_recognition/tensorflow/inception_resnet_v2/README.md index 034651a7f..cd38e364f 100644 --- a/benchmarks/image_recognition/tensorflow/inception_resnet_v2/README.md +++ b/benchmarks/image_recognition/tensorflow/inception_resnet_v2/README.md @@ -84,7 +84,7 @@ python launch_benchmark.py \ --framework tensorflow \ --accuracy-only \ --batch-size 100 \ - --docker-image intelaipg/intel-optimized-tensorflow:1.14.0 \ + --docker-image gcr.io/deeplearning-platform-release/tf-cpu.1-14 \ --in-graph /home//inception_resnet_v2_int8_pretrained_model.pb \ --data-location /home//datasets/ImageNet_TFRecords ``` @@ -100,7 +100,7 @@ python launch_benchmark.py \ --benchmark-only \ --batch-size 1 \ --socket-id 0 \ - --docker-image intelaipg/intel-optimized-tensorflow:1.14.0 \ + --docker-image gcr.io/deeplearning-platform-release/tf-cpu.1-14 \ --in-graph /home//inception_resnet_v2_int8_pretrained_model.pb ``` @@ -115,7 +115,7 @@ python launch_benchmark.py \ --benchmark-only \ --batch-size 128 \ --socket-id 0 \ - --docker-image intelaipg/intel-optimized-tensorflow:1.14.0 \ + --docker-image gcr.io/deeplearning-platform-release/tf-cpu.1-14 \ --in-graph /home//inception_resnet_v2_int8_pretrained_model.pb ``` @@ -242,7 +242,7 @@ python launch_benchmark.py \ --framework tensorflow \ --accuracy-only \ --batch-size 100 \ - --docker-image intelaipg/intel-optimized-tensorflow:1.14.0 \ + --docker-image gcr.io/deeplearning-platform-release/tf-cpu.1-14 \ --in-graph /home//inception_resnet_v2_fp32_pretrained_model.pb \ --data-location /home//datasets/ImageNet_TFRecords ``` diff --git a/benchmarks/image_recognition/tensorflow/inceptionv3/README.md b/benchmarks/image_recognition/tensorflow/inceptionv3/README.md index adaedef36..0a9223914 100644 --- a/benchmarks/image_recognition/tensorflow/inceptionv3/README.md +++ b/benchmarks/image_recognition/tensorflow/inceptionv3/README.md @@ -97,7 +97,7 @@ python launch_benchmark.py \ --framework tensorflow \ --accuracy-only \ --batch-size 100 \ - --docker-image intelaipg/intel-optimized-tensorflow:1.14.0 \ + --docker-image gcr.io/deeplearning-platform-release/tf-cpu.1-14 \ --in-graph /home//inceptionv3_int8_pretrained_model.pb \ --data-location /home//datasets/ImageNet_TFRecords ``` @@ -118,7 +118,7 @@ python launch_benchmark.py \ --benchmark-only \ --batch-size 1 \ --socket-id 0 \ - --docker-image intelaipg/intel-optimized-tensorflow:1.14.0 \ + --docker-image gcr.io/deeplearning-platform-release/tf-cpu.1-14 \ --in-graph /home//inceptionv3_int8_pretrained_model.pb \ --data-location /home//datasets/ImageNet_TFRecords \ -- warmup_steps=50 steps=500 @@ -135,7 +135,7 @@ python launch_benchmark.py \ --benchmark-only \ --batch-size 1 \ --socket-id 0 \ - --docker-image intelaipg/intel-optimized-tensorflow:1.14.0 \ + --docker-image gcr.io/deeplearning-platform-release/tf-cpu.1-14 \ --in-graph /home//inceptionv3_int8_pretrained_model.pb \ -- warmup_steps=50 steps=500 ``` @@ -151,7 +151,7 @@ python launch_benchmark.py \ --benchmark-only \ --batch-size 128 \ --socket-id 0 \ - --docker-image intelaipg/intel-optimized-tensorflow:1.14.0 \ + --docker-image gcr.io/deeplearning-platform-release/tf-cpu.1-14 \ --in-graph /home//inceptionv3_int8_pretrained_model.pb \ --data-location /home//datasets/ImageNet_TFRecords \ -- warmup_steps=50 steps=500 @@ -168,7 +168,7 @@ python launch_benchmark.py \ --benchmark-only \ --batch-size 128 \ --socket-id 0 \ - --docker-image intelaipg/intel-optimized-tensorflow:1.14.0 \ + --docker-image gcr.io/deeplearning-platform-release/tf-cpu.1-14 \ --in-graph /home//inceptionv3_int8_pretrained_model.pb \ -- warmup_steps=50 steps=500 ``` @@ -258,7 +258,7 @@ python launch_benchmark.py \ --framework tensorflow \ --batch-size 1 \ --socket-id 0 \ - --docker-image intelaipg/intel-optimized-tensorflow:1.14.0 \ + --docker-image gcr.io/deeplearning-platform-release/tf-cpu.1-14 \ --in-graph /home//inceptionv3_fp32_pretrained_model.pb ``` Example log tail when running for online inference: @@ -289,7 +289,7 @@ python launch_benchmark.py \ --framework tensorflow \ --batch-size 128 \ --socket-id 0 \ - --docker-image intelaipg/intel-optimized-tensorflow:1.14.0 \ + --docker-image gcr.io/deeplearning-platform-release/tf-cpu.1-14 \ --in-graph /home//inceptionv3_fp32_pretrained_model.pb ``` Example log tail when running for batch inference: @@ -321,7 +321,7 @@ python launch_benchmark.py \ --accuracy-only \ --batch-size 100 \ --data-location /dataset/Imagenet_Validation \ - --docker-image intelaipg/intel-optimized-tensorflow:1.14.0 \ + --docker-image gcr.io/deeplearning-platform-release/tf-cpu.1-14 \ --in-graph /home//inceptionv3_fp32_pretrained_model.pb ``` Example log tail when running for accuracy: diff --git a/benchmarks/image_recognition/tensorflow/inceptionv4/README.md b/benchmarks/image_recognition/tensorflow/inceptionv4/README.md index 31b3ba91b..560de9ef5 100644 --- a/benchmarks/image_recognition/tensorflow/inceptionv4/README.md +++ b/benchmarks/image_recognition/tensorflow/inceptionv4/README.md @@ -56,7 +56,7 @@ when calling `launch_benchmark.py` and the script will run without TCMalloc. --accuracy-only \ --batch-size 100 \ --socket-id 0 \ - --docker-image intelaipg/intel-optimized-tensorflow:1.14.0 \ + --docker-image gcr.io/deeplearning-platform-release/tf-cpu.1-14 \ --in-graph /home//inceptionv4_int8_pretrained_model.pb \ --data-location /home//ImageNet_TFRecords ``` @@ -71,7 +71,7 @@ when calling `launch_benchmark.py` and the script will run without TCMalloc. --benchmark-only \ --batch-size 240 \ --socket-id 0 \ - --docker-image intelaipg/intel-optimized-tensorflow:1.14.0 \ + --docker-image gcr.io/deeplearning-platform-release/tf-cpu.1-14 \ --in-graph /home//inceptionv4_int8_pretrained_model.pb ``` @@ -85,7 +85,7 @@ when calling `launch_benchmark.py` and the script will run without TCMalloc. --benchmark-only \ --batch-size 1 \ --socket-id 0 \ - --docker-image intelaipg/intel-optimized-tensorflow:1.14.0 \ + --docker-image gcr.io/deeplearning-platform-release/tf-cpu.1-14 \ --in-graph /home//inceptionv4_int8_pretrained_model.pb ``` @@ -185,7 +185,7 @@ when calling `launch_benchmark.py` and the script will run without TCMalloc. --accuracy-only \ --batch-size 100 \ --socket-id 0 \ - --docker-image intelaipg/intel-optimized-tensorflow:1.14.0 \ + --docker-image gcr.io/deeplearning-platform-release/tf-cpu.1-14 \ --in-graph /home//inceptionv4_fp32_pretrained_model.pb \ --data-location /home//ImageNet_TFRecords ``` @@ -200,7 +200,7 @@ when calling `launch_benchmark.py` and the script will run without TCMalloc. --benchmark-only \ --batch-size 240 \ --socket-id 0 \ - --docker-image intelaipg/intel-optimized-tensorflow:1.14.0 \ + --docker-image gcr.io/deeplearning-platform-release/tf-cpu.1-14 \ --in-graph /home//inceptionv4_fp32_pretrained_model.pb ``` @@ -214,7 +214,7 @@ when calling `launch_benchmark.py` and the script will run without TCMalloc. --benchmark-only \ --batch-size 1 \ --socket-id 0 \ - --docker-image intelaipg/intel-optimized-tensorflow:1.14.0 \ + --docker-image gcr.io/deeplearning-platform-release/tf-cpu.1-14 \ --in-graph /home//inceptionv4_fp32_pretrained_model.pb ``` diff --git a/benchmarks/image_recognition/tensorflow/mobilenet_v1/README.md b/benchmarks/image_recognition/tensorflow/mobilenet_v1/README.md index 0c7295244..e7d0d6f5d 100644 --- a/benchmarks/image_recognition/tensorflow/mobilenet_v1/README.md +++ b/benchmarks/image_recognition/tensorflow/mobilenet_v1/README.md @@ -67,7 +67,7 @@ $ wget https://storage.googleapis.com/intel-optimized-tensorflow/models/mobilene --batch-size 240 \ --socket-id 0 \ --in-graph /home//mobilenetv1_int8_pretrained_model.pb \ - --docker-image intelaipg/intel-optimized-tensorflow:1.14.0 \ + --docker-image gcr.io/deeplearning-platform-release/tf-cpu.1-14 \ -- input_height=224 input_width=224 warmup_steps=10 steps=50 \ input_layer="input" output_layer="MobilenetV1/Predictions/Reshape_1" ``` @@ -83,7 +83,7 @@ $ wget https://storage.googleapis.com/intel-optimized-tensorflow/models/mobilene --batch-size 1 \ --socket-id 0 \ --in-graph /home//mobilenetv1_int8_pretrained_model.pb \ - --docker-image intelaipg/intel-optimized-tensorflow:1.14.0 \ + --docker-image gcr.io/deeplearning-platform-release/tf-cpu.1-14 \ -- input_height=224 input_width=224 warmup_steps=10 steps=50 \ input_layer="input" output_layer="MobilenetV1/Predictions/Reshape_1" ``` @@ -100,7 +100,7 @@ $ wget https://storage.googleapis.com/intel-optimized-tensorflow/models/mobilene --batch-size 100 \ --socket-id 0 \ --in-graph /home//mobilenetv1_int8_pretrained_model.pb \ - --docker-image intelaipg/intel-optimized-tensorflow:1.14.0 \ + --docker-image gcr.io/deeplearning-platform-release/tf-cpu.1-14 \ --data-location /home//imagenet_validation_dataset \ -- input_height=224 input_width=224 \ input_layer="input" output_layer="MobilenetV1/Predictions/Reshape_1" @@ -216,7 +216,7 @@ $ wget https://storage.googleapis.com/intel-optimized-tensorflow/models/mobilene --model-name mobilenet_v1 \ --mode inference \ --framework tensorflow \ - --docker-image intelaipg/intel-optimized-tensorflow:1.14.0 \ + --docker-image gcr.io/deeplearning-platform-release/tf-cpu.1-14 \ --model-source-dir /home//tensorflow/models \ --batch-size 1 \ --socket-id 0 \ @@ -234,7 +234,7 @@ $ wget https://storage.googleapis.com/intel-optimized-tensorflow/models/mobilene --model-name mobilenet_v1 \ --mode inference \ --framework tensorflow \ - --docker-image intelaipg/intel-optimized-tensorflow:1.14.0 \ + --docker-image gcr.io/deeplearning-platform-release/tf-cpu.1-14 \ --model-source-dir /home//tensorflow/models \ --batch-size 100 \ --socket-id 0 \ @@ -248,7 +248,7 @@ $ wget https://storage.googleapis.com/intel-optimized-tensorflow/models/mobilene --model-name mobilenet_v1 \ --mode inference \ --framework tensorflow \ - --docker-image intelaipg/intel-optimized-tensorflow:1.14.0 \ + --docker-image gcr.io/deeplearning-platform-release/tf-cpu.1-14 \ --model-source-dir /home//tensorflow/models \ --batch-size 100 \ --accuracy-only \ diff --git a/benchmarks/image_recognition/tensorflow/resnet101/README.md b/benchmarks/image_recognition/tensorflow/resnet101/README.md index a39daaf70..7fb3566eb 100644 --- a/benchmarks/image_recognition/tensorflow/resnet101/README.md +++ b/benchmarks/image_recognition/tensorflow/resnet101/README.md @@ -85,7 +85,7 @@ $ python launch_benchmark.py \ --framework tensorflow \ --accuracy-only \ --batch-size 100 \ - --docker-image intelaipg/intel-optimized-tensorflow:1.14.0 \ + --docker-image gcr.io/deeplearning-platform-release/tf-cpu.1-14 \ --data-location /home//dataset/FullImageNetData_directory \ --in-graph=/home//resnet101_int8_pretrained_model.pb ``` @@ -106,7 +106,7 @@ python launch_benchmark.py \ --benchmark-only \ --batch-size 1 \ --socket-id 0 \ - --docker-image intelaipg/intel-optimized-tensorflow:1.14.0 \ + --docker-image gcr.io/deeplearning-platform-release/tf-cpu.1-14 \ --in-graph=/home//resnet101_int8_pretrained_model.pb \ -- warmup_steps=50 steps=500 ``` @@ -123,7 +123,7 @@ python launch_benchmark.py \ --batch-size 1 \ --socket-id 0 \ --data-location /home//dataset/FullImageNetData_directory \ - --docker-image intelaipg/intel-optimized-tensorflow:1.14.0 \ + --docker-image gcr.io/deeplearning-platform-release/tf-cpu.1-14 \ --in-graph=/home//resnet101_int8_pretrained_model.pb \ -- warmup_steps=50 steps=500 ``` @@ -139,7 +139,7 @@ python launch_benchmark.py \ --benchmark-only \ --batch-size 128 \ --socket-id 0 \ - --docker-image intelaipg/intel-optimized-tensorflow:1.14.0 \ + --docker-image gcr.io/deeplearning-platform-release/tf-cpu.1-14 \ --in-graph=/home//resnet101_int8_pretrained_model.pb \ -- warmup_steps=50 steps=500 ``` @@ -156,7 +156,7 @@ python launch_benchmark.py \ --batch-size 128 \ --data-location /home//dataset/FullImageNetData_directory \ --socket-id 0 \ - --docker-image intelaipg/intel-optimized-tensorflow:1.14.0 \ + --docker-image gcr.io/deeplearning-platform-release/tf-cpu.1-14 \ --in-graph=/home//resnet101_int8_pretrained_model.pb \ -- warmup_steps=50 steps=500 ``` @@ -250,7 +250,7 @@ $ wget https://storage.googleapis.com/intel-optimized-tensorflow/models/resnet10 --mode inference \ --model-name resnet101 \ --batch-size 128 \ - --docker-image intelaipg/intel-optimized-tensorflow:1.14.0 \ + --docker-image gcr.io/deeplearning-platform-release/tf-cpu.1-14 \ --in-graph /home//trained_models/resnet101_fp32_pretrained_model.pb \ --socket-id 0 ``` @@ -277,7 +277,7 @@ $ wget https://storage.googleapis.com/intel-optimized-tensorflow/models/resnet10 --mode inference \ --model-name resnet101 \ --batch-size 100 \ - --docker-image intelaipg/intel-optimized-tensorflow:1.14.0 \ + --docker-image gcr.io/deeplearning-platform-release/tf-cpu.1-14 \ --in-graph /home//trained_models/resnet101_fp32_pretrained_model.pb \ --data-location /home//imagenet_validation_dataset \ --accuracy-only \ diff --git a/benchmarks/image_recognition/tensorflow/squeezenet/README.md b/benchmarks/image_recognition/tensorflow/squeezenet/README.md index 2c3a245f9..feaba492a 100644 --- a/benchmarks/image_recognition/tensorflow/squeezenet/README.md +++ b/benchmarks/image_recognition/tensorflow/squeezenet/README.md @@ -79,7 +79,7 @@ $ python launch_benchmark.py \ --framework tensorflow \ --socket-id 0 \ --batch-size 64 \ - --docker-image intelaipg/intel-optimized-tensorflow:1.14.0 \ + --docker-image gcr.io/deeplearning-platform-release/tf-cpu.1-14 \ --checkpoint /home//squeezenet_checkpoints \ --data-location /home//datasets/ImageNet_TFRecords ``` @@ -94,7 +94,7 @@ $ python launch_benchmark.py \ --framework tensorflow \ --socket-id 0 \ --batch-size 1 \ - --docker-image intelaipg/intel-optimized-tensorflow:1.14.0 \ + --docker-image gcr.io/deeplearning-platform-release/tf-cpu.1-14 \ --checkpoint /home//squeezenet_checkpoints \ --data-location /home//datasets/ImageNet_TFRecords ``` diff --git a/benchmarks/image_segmentation/tensorflow/maskrcnn/README.md b/benchmarks/image_segmentation/tensorflow/maskrcnn/README.md index 938dcb634..218fd7e2f 100644 --- a/benchmarks/image_segmentation/tensorflow/maskrcnn/README.md +++ b/benchmarks/image_segmentation/tensorflow/maskrcnn/README.md @@ -61,7 +61,7 @@ $ python launch_benchmark.py \ --batch-size 1 \ --socket-id 0 \ --data-location /home//COCO2014 \ - --docker-image intelaipg/intel-optimized-tensorflow:1.14.0-py3 + --docker-image gcr.io/deeplearning-platform-release/tf-cpu.1-14 ``` 5. Log files are located at the value of `--output-dir`. diff --git a/benchmarks/image_segmentation/tensorflow/unet/README.md b/benchmarks/image_segmentation/tensorflow/unet/README.md index e91d2af2a..d86505a69 100644 --- a/benchmarks/image_segmentation/tensorflow/unet/README.md +++ b/benchmarks/image_segmentation/tensorflow/unet/README.md @@ -57,7 +57,7 @@ modes/precisions: --benchmark-only \ --batch-size 1 \ --socket-id 0 \ - --docker-image intelaipg/intel-optimized-tensorflow:1.14.0 \ + --docker-image gcr.io/deeplearning-platform-release/tf-cpu.1-14 \ --checkpoint /home//unet_trained \ --model-source-dir /home//tf_unet \ -- checkpoint_name=model.cpkt diff --git a/benchmarks/language_modeling/tensorflow/lm-1b/README.md b/benchmarks/language_modeling/tensorflow/lm-1b/README.md index 85b4f1fc7..fa05e8b3b 100644 --- a/benchmarks/language_modeling/tensorflow/lm-1b/README.md +++ b/benchmarks/language_modeling/tensorflow/lm-1b/README.md @@ -23,7 +23,7 @@ TensorFlow to be installed, the following instructions show how to run a docker container with your cloned mlperf inference repo mounted as a volume: ``` -docker run --volume /home//inference:/inference -it intelaipg/intel-optimized-tensorflow:1.14.0-py3 /bin/bash +docker run --volume /home//inference:/inference -it gcr.io/deeplearning-platform-release/tf-cpu.1-14 /bin/bash ``` In the docker container, run: ``` @@ -62,7 +62,7 @@ python launch_benchmark.py \ --framework tensorflow \ --batch-size 1 \ --socket-id 0 \ - --docker-image intelaipg/intel-optimized-tensorflow:1.14.0 \ + --docker-image gcr.io/deeplearning-platform-release/tf-cpu.1-14 \ --model-source-dir /inference/others/cloud/language_modeling ``` @@ -77,7 +77,7 @@ python launch_benchmark.py \ --framework tensorflow \ --batch-size 1024 \ --socket-id 0 \ - --docker-image intelaipg/intel-optimized-tensorflow:1.14.0 \ + --docker-image gcr.io/deeplearning-platform-release/tf-cpu.1-14 \ --model-source-dir /inference/others/cloud/language_modeling \ -- steps=4 \ ``` diff --git a/benchmarks/language_translation/tensorflow/gnmt/README.md b/benchmarks/language_translation/tensorflow/gnmt/README.md index 523965917..987be7075 100644 --- a/benchmarks/language_translation/tensorflow/gnmt/README.md +++ b/benchmarks/language_translation/tensorflow/gnmt/README.md @@ -82,7 +82,7 @@ python launch_benchmark.py \ --socket-id 0 \ --checkpoint /home//gnmt_checkpoints \ --data-location /home//wmt16 \ ---docker-image intelaipg/intel-optimized-tensorflow:1.14.0 \ +--docker-image gcr.io/deeplearning-platform-release/tf-cpu.1-14 \ -- infer_mode=beam_search ``` @@ -99,7 +99,7 @@ python launch_benchmark.py \ --socket-id 0 \ --checkpoint /home//gnmt_checkpoints \ --data-location /home//wmt16 \ ---docker-image intelaipg/intel-optimized-tensorflow:1.14.0 \ +--docker-image gcr.io/deeplearning-platform-release/tf-cpu.1-14 \ -- infer_mode=beam_search ``` diff --git a/benchmarks/language_translation/tensorflow/transformer_language/README.md b/benchmarks/language_translation/tensorflow/transformer_language/README.md index f4997711f..2c0b700f2 100644 --- a/benchmarks/language_translation/tensorflow/transformer_language/README.md +++ b/benchmarks/language_translation/tensorflow/transformer_language/README.md @@ -82,7 +82,7 @@ python launch_benchmark.py \ --framework tensorflow \ --batch-size 1 \ --socket-id 0 \ - --docker-image intelaipg/intel-optimized-tensorflow:1.14.0 \ + --docker-image gcr.io/deeplearning-platform-release/tf-cpu.1-14 \ --checkpoint /home//transformer_lt_fp32_pretrained_model \ --data-location /home//t2t_data \ --model-source-dir /home//tensor2tensor/ \ @@ -99,7 +99,7 @@ python launch_benchmark.py \ --framework tensorflow \ --batch-size 32 \ --socket-id 0 \ - --docker-image intelaipg/intel-optimized-tensorflow:1.14.0 \ + --docker-image gcr.io/deeplearning-platform-release/tf-cpu.1-14 \ --checkpoint /home//transformer_lt_fp32_pretrained_model \ --data-location /home//t2t_data \ --model-source-dir /home//tensor2tensor/ \ diff --git a/benchmarks/language_translation/tensorflow/transformer_lt_official/README.md b/benchmarks/language_translation/tensorflow/transformer_lt_official/README.md index f592cf832..87cc6b472 100644 --- a/benchmarks/language_translation/tensorflow/transformer_lt_official/README.md +++ b/benchmarks/language_translation/tensorflow/transformer_lt_official/README.md @@ -65,7 +65,7 @@ python launch_benchmark.py \ --framework tensorflow \ --batch-size 1 \ --socket-id 0 \ - --docker-image intelaipg/intel-optimized-tensorflow:1.14.0 \ + --docker-image gcr.io/deeplearning-platform-release/tf-cpu.1-14 \ --model-source-dir /home//tensorflow-models/models \ --in-graph /home//transformer_lt_official_fp32_pretrained_model/graph/fp32_graphdef.pb \ --data-location /home//transformer_lt_official_fp32_pretrained_model/data \ @@ -85,7 +85,7 @@ python launch_benchmark.py \ --framework tensorflow \ --batch-size 64 \ --socket-id 0 \ - --docker-image intelaipg/intel-optimized-tensorflow:1.14.0 \ + --docker-image gcr.io/deeplearning-platform-release/tf-cpu.1-14 \ --model-source-dir /home//tensorflow-models/models \ --in-graph /home//transformer_lt_official_fp32_pretrained_model/graph/fp32_graphdef.pb \ --data-location /home//transformer_lt_official_fp32_pretrained_model/data \ diff --git a/benchmarks/object_detection/tensorflow/faster_rcnn/README.md b/benchmarks/object_detection/tensorflow/faster_rcnn/README.md index cd57419f1..ff3dfce3f 100644 --- a/benchmarks/object_detection/tensorflow/faster_rcnn/README.md +++ b/benchmarks/object_detection/tensorflow/faster_rcnn/README.md @@ -156,7 +156,7 @@ $ python launch_benchmark.py \ --mode inference \ --socket-id 0 \ --checkpoint /home//faster_rcnn_resnet50_fp32_coco \ - --docker-image intelaipg/intel-optimized-tensorflow:1.14.0 \ + --docker-image gcr.io/deeplearning-platform-release/tf-cpu.1-14 \ -- config_file=pipeline.config ``` @@ -169,7 +169,7 @@ python launch_benchmark.py \ --mode inference \ --precision fp32 \ --framework tensorflow \ - --docker-image intelaipg/intel-optimized-tensorflow:1.14.0 \ + --docker-image gcr.io/deeplearning-platform-release/tf-cpu.1-14 \ --model-source-dir /home//tensorflow/models \ --data-location /home//coco/output \ --in-graph /home//faster_rcnn_resnet50_fp32_coco/frozen_inference_graph.pb \ @@ -270,7 +270,7 @@ $ python launch_benchmark.py \ --mode inference \ --socket-id 0 \ --in-graph /home//faster_rcnn_int8_pretrained_model.pb \ - --docker-image intelaipg/intel-optimized-tensorflow:1.14.0 \ + --docker-image gcr.io/deeplearning-platform-release/tf-cpu.1-14 \ --benchmark-only \ -- number_of_steps=5000 ``` @@ -285,7 +285,7 @@ python launch_benchmark.py \ --precision int8 \ --framework tensorflow \ --socket-id 0 \ - --docker-image intelaipg/intel-optimized-tensorflow:1.14.0 \ + --docker-image gcr.io/deeplearning-platform-release/tf-cpu.1-14 \ --model-source-dir /home//tensorflow/models \ --data-location /home//output/coco_val.record \ --in-graph /home//faster_rcnn_int8_pretrained_model.pb \ diff --git a/benchmarks/object_detection/tensorflow/rfcn/README.md b/benchmarks/object_detection/tensorflow/rfcn/README.md index 42c488ca4..6e4a519df 100644 --- a/benchmarks/object_detection/tensorflow/rfcn/README.md +++ b/benchmarks/object_detection/tensorflow/rfcn/README.md @@ -139,7 +139,7 @@ python launch_benchmark.py \ --mode inference \ --precision int8 \ --framework tensorflow \ - --docker-image intelaipg/intel-optimized-tensorflow:1.14.0 \ + --docker-image gcr.io/deeplearning-platform-release/tf-cpu.1-14 \ --model-source-dir /home//tensorflow/models \ --data-location /home//val/val2017 \ --in-graph /home//rfcn_resnet101_int8_coco_pretrained_model.pb \ @@ -156,7 +156,7 @@ python launch_benchmark.py \ --mode inference \ --precision int8 \ --framework tensorflow \ - --docker-image intelaipg/intel-optimized-tensorflow:1.14.0 \ + --docker-image gcr.io/deeplearning-platform-release/tf-cpu.1-14 \ --model-source-dir /home//tensorflow/models \ --data-location /home//coco/output/coco_val.record-00000-of-00001 \ --in-graph /home//rfcn_resnet101_int8_coco_pretrained_model.pb \ @@ -331,7 +331,7 @@ $ python launch_benchmark.py \ --mode inference \ --socket-id 0 \ --checkpoint /home//rfcn_resnet101_fp32_coco \ - --docker-image intelaipg/intel-optimized-tensorflow:1.14.0 \ + --docker-image gcr.io/deeplearning-platform-release/tf-cpu.1-14 \ -- config_file=rfcn_pipeline.config ``` @@ -344,7 +344,7 @@ python launch_benchmark.py \ --mode inference \ --precision fp32 \ --framework tensorflow \ - --docker-image intelaipg/intel-optimized-tensorflow:1.14.0 \ + --docker-image gcr.io/deeplearning-platform-release/tf-cpu.1-14 \ --model-source-dir /home//tensorflow/models \ --data-location /home//coco/output/coco_val.record \ --in-graph /home//rfcn_resnet101_fp32_coco/frozen_inference_graph.pb \ diff --git a/benchmarks/object_detection/tensorflow/ssd-mobilenet/README.md b/benchmarks/object_detection/tensorflow/ssd-mobilenet/README.md index 35db5d14d..c6400197c 100644 --- a/benchmarks/object_detection/tensorflow/ssd-mobilenet/README.md +++ b/benchmarks/object_detection/tensorflow/ssd-mobilenet/README.md @@ -121,7 +121,7 @@ python launch_benchmark.py \ --precision int8 \ --framework tensorflow \ --socket-id 0 \ - --docker-image intelaipg/intel-optimized-tensorflow:1.14.0 \ + --docker-image gcr.io/deeplearning-platform-release/tf-cpu.1-14 \ --model-source-dir /home//tensorflow/models \ --data-location /home//val/val2017 \ --in-graph /home//ssdmobilenet_int8_pretrained_model.pb \ @@ -138,7 +138,7 @@ python launch_benchmark.py \ --precision int8 \ --framework tensorflow \ --socket-id 0 \ - --docker-image intelaipg/intel-optimized-tensorflow:1.14.0 \ + --docker-image gcr.io/deeplearning-platform-release/tf-cpu.1-14 \ --model-source-dir /home//tensorflow/models \ --data-location /home//coco/output/coco_val.record \ --in-graph /home//ssdmobilenet_int8_pretrained_model.pb \ @@ -349,7 +349,7 @@ $ python launch_benchmark.py \ --precision fp32 \ --mode inference \ --socket-id 0 \ - --docker-image intelaipg/intel-optimized-tensorflow:1.14.0 \ + --docker-image gcr.io/deeplearning-platform-release/tf-cpu.1-14 \ --benchmark-only ``` @@ -368,7 +368,7 @@ $ python launch_benchmark.py \ --precision fp32 \ --mode inference \ --socket-id 0 \ - --docker-image intelaipg/intel-optimized-tensorflow:1.14.0 \ + --docker-image gcr.io/deeplearning-platform-release/tf-cpu.1-14 \ --accuracy-only ``` diff --git a/benchmarks/object_detection/tensorflow/ssd-resnet34/README.md b/benchmarks/object_detection/tensorflow/ssd-resnet34/README.md index 2b5364ff9..e7b3528fb 100644 --- a/benchmarks/object_detection/tensorflow/ssd-resnet34/README.md +++ b/benchmarks/object_detection/tensorflow/ssd-resnet34/README.md @@ -134,7 +134,7 @@ $ python launch_benchmark.py \ --mode inference \ --socket-id 0 \ --batch-size=1 \ - --docker-image intelaipg/intel-optimized-tensorflow:1.14.0-py3 \ + --docker-image gcr.io/deeplearning-platform-release/tf-cpu.1-14 \ --benchmark-only ``` @@ -154,7 +154,7 @@ $ python launch_benchmark.py \ --mode inference \ --socket-id 0 \ --batch-size=1 \ - --docker-image intelaipg/intel-optimized-tensorflow:1.14.0-py3 \ + --docker-image gcr.io/deeplearning-platform-release/tf-cpu.1-14 \ --accuracy-only ``` @@ -310,7 +310,7 @@ $ python launch_benchmark.py \ --mode inference \ --socket-id 0 \ --batch-size=1 \ - --docker-image intelaipg/intel-optimized-tensorflow:1.14.0-py3 \ + --docker-image gcr.io/deeplearning-platform-release/tf-cpu.1-14 \ --benchmark-only ``` @@ -330,7 +330,7 @@ $ python launch_benchmark.py \ --mode inference \ --socket-id 0 \ --batch-size=1 \ - --docker-image intelaipg/intel-optimized-tensorflow:1.14.0-py3 \ + --docker-image gcr.io/deeplearning-platform-release/tf-cpu.1-14 \ --accuracy-only ``` diff --git a/benchmarks/object_detection/tensorflow/ssd_vgg16/README.md b/benchmarks/object_detection/tensorflow/ssd_vgg16/README.md index 6ecd4b646..971311f75 100644 --- a/benchmarks/object_detection/tensorflow/ssd_vgg16/README.md +++ b/benchmarks/object_detection/tensorflow/ssd_vgg16/README.md @@ -103,7 +103,7 @@ python launch_benchmark.py \ --mode inference \ --precision int8 \ --framework tensorflow \ - --docker-image intelaipg/intel-optimized-tensorflow:1.14.0-py3 \ + --docker-image gcr.io/deeplearning-platform-release/tf-cpu.1-14 \ --model-source-dir /home//SSD.TensorFlow \ --data-location /home//tf_records \ --in-graph /home//ssdvgg16_int8_pretrained_model.pb \ @@ -133,7 +133,7 @@ python launch_benchmark.py \ --mode inference \ --precision int8 \ --framework tensorflow \ - --docker-image intelaipg/intel-optimized-tensorflow:1.14.0-py3 \ + --docker-image gcr.io/deeplearning-platform-release/tf-cpu.1-14 \ --model-source-dir /home//SSD.TensorFlow \ --data-location /home//tf_records \ --in-graph /home//ssdvgg16_int8_pretrained_model.pb \ @@ -210,7 +210,7 @@ $ python launch_benchmark.py \ --framework tensorflow \ --precision fp32 \ --mode inference \ - --docker-image intelaipg/intel-optimized-tensorflow:1.14.0-py3 \ + --docker-image gcr.io/deeplearning-platform-release/tf-cpu.1-14 \ --batch-size 1 \ --socket-id 0 \ --num-inter-threads 11 \ @@ -237,7 +237,7 @@ python launch_benchmark.py \ --mode inference \ --precision fp32 \ --framework tensorflow \ - --docker-image intelaipg/intel-optimized-tensorflow:1.14.0-py3 \ + --docker-image gcr.io/deeplearning-platform-release/tf-cpu.1-14 \ --model-source-dir /home//SSD.TensorFlow \ --data-location /home//tf_records \ --in-graph /home//ssdvgg16_fp32_pretrained_model.pb \ diff --git a/benchmarks/recommendation/tensorflow/wide_deep/README.md b/benchmarks/recommendation/tensorflow/wide_deep/README.md index 8f28c0607..8ace58237 100644 --- a/benchmarks/recommendation/tensorflow/wide_deep/README.md +++ b/benchmarks/recommendation/tensorflow/wide_deep/README.md @@ -56,7 +56,7 @@ use in the next step. --batch-size 1 \ --data-location /home//widedeep_dataset \ --checkpoint /home//path/to/wide_deep_fp32_pretrained_model \ - --docker-image intelaipg/intel-optimized-tensorflow:1.14.0 \ + --docker-image gcr.io/deeplearning-platform-release/tf-cpu.1-14 \ --verbose ``` * Running the model in batch inference mode, set `--batch-size` = `1024` @@ -72,7 +72,7 @@ use in the next step. --batch-size 1024 \ --data-location /home//path/to/dataset \ --checkpoint /home//path/to/wide_deep_fp32_pretrained_model \ - --docker-image intelaipg/intel-optimized-tensorflow:1.14.0 \ + --docker-image gcr.io/deeplearning-platform-release/tf-cpu.1-14 \ --verbose ``` 6. The log file is saved to the value of `--output-dir`. diff --git a/benchmarks/recommendation/tensorflow/wide_deep_large_ds/README.md b/benchmarks/recommendation/tensorflow/wide_deep_large_ds/README.md index 19880cd2d..e2467d45f 100755 --- a/benchmarks/recommendation/tensorflow/wide_deep_large_ds/README.md +++ b/benchmarks/recommendation/tensorflow/wide_deep_large_ds/README.md @@ -171,7 +171,7 @@ when calling `launch_benchmark.py` and the script will run without TCMalloc. --batch-size 1000 \ --socket-id 0 \ --accuracy-only \ - --docker-image intelaipg/intel-optimized-tensorflow:1.14.0 \ + --docker-image gcr.io/deeplearning-platform-release/tf-cpu.1-14 \ --in-graph /root/user/wide_deep_files/wide_deep_fp32_pretrained_model.pb \ --data-location /root/user/wide_deep_files/dataset_preprocessed_eval.tfrecords ``` @@ -191,7 +191,7 @@ when calling `launch_benchmark.py` and the script will run without TCMalloc. --benchmark-only \ --batch-size 1 \ --socket-id 0 \ - --docker-image intelaipg/intel-optimized-tensorflow:1.14.0 \ + --docker-image gcr.io/deeplearning-platform-release/tf-cpu.1-14 \ --in-graph /root/user/wide_deep_files/wide_deep_fp32_pretrained_model.pb \ --data-location /root/user/wide_deep_files/dataset_preprocessed_test.tfrecords \ -- num_parallel_batches=1 @@ -208,7 +208,7 @@ when calling `launch_benchmark.py` and the script will run without TCMalloc. --benchmark-only \ --batch-size 512 \ --socket-id 0 \ - --docker-image intelaipg/intel-optimized-tensorflow:1.14.0 \ + --docker-image gcr.io/deeplearning-platform-release/tf-cpu.1-14 \ --in-graph /root/user/wide_deep_files/wide_deep_fp32_pretrained_model.pb \ --data-location /root/user/wide_deep_files/dataset_preprocessed_test.tfrecords ``` diff --git a/benchmarks/text_to_speech/tensorflow/wavenet/README.md b/benchmarks/text_to_speech/tensorflow/wavenet/README.md index d51cdfa72..963d892d3 100644 --- a/benchmarks/text_to_speech/tensorflow/wavenet/README.md +++ b/benchmarks/text_to_speech/tensorflow/wavenet/README.md @@ -71,7 +71,7 @@ python launch_benchmark.py \ --framework tensorflow \ --socket-id 0 \ --num-cores 1 \ - --docker-image intelaipg/intel-optimized-tensorflow:1.14.0 \ + --docker-image gcr.io/deeplearning-platform-release/tf-cpu.1-14 \ --model-source-dir /home//wavenet/tensorflow-wavenet \ --checkpoint /home//wavenet_checkpoints \ -- checkpoint_name=model.ckpt-99 sample=8510 diff --git a/requirements-test.txt b/requirements-test.txt index 5102c19b3..fe0bf31ab 100644 --- a/requirements-test.txt +++ b/requirements-test.txt @@ -1,6 +1,6 @@ conditional flake8==3.7.5 -pytest +pytest==4.6.3 pytest-cov pytest-xdist mock From f2cc76dd503f2dbdf674c5e8956fbb92f9bceb9d Mon Sep 17 00:00:00 2001 From: Dina Suehiro Jones Date: Tue, 2 Jul 2019 13:54:56 -0700 Subject: [PATCH 61/62] Update Pillow version and py3 fix (#351) --- benchmarks/common/tensorflow/start.sh | 7 ++++++- .../object_detection/tensorflow/rfcn/requirements.txt | 2 +- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/benchmarks/common/tensorflow/start.sh b/benchmarks/common/tensorflow/start.sh index 4eeda5648..9ea5f9f02 100755 --- a/benchmarks/common/tensorflow/start.sh +++ b/benchmarks/common/tensorflow/start.sh @@ -449,7 +449,7 @@ function maskrcnn() { if [ ${NOINSTALL} != "True" ]; then # install dependencies pip3 install -r ${MOUNT_EXTERNAL_MODELS_SOURCE}/requirements.txt - pip3 install --force-reinstall scipy==1.2.1 + pip3 install --force-reinstall scipy==1.2.1 Pillow==5.3.0 # install cocoapi get_cocoapi ${MOUNT_EXTERNAL_MODELS_SOURCE}/coco ${MOUNT_EXTERNAL_MODELS_SOURCE}/samples/coco @@ -551,6 +551,7 @@ function rfcn() { if [ ${NOINSTALL} != "True" ]; then # install dependencies pip install -r "${MOUNT_BENCHMARK}/object_detection/tensorflow/rfcn/requirements.txt" + original_dir=$(pwd) cd "${MOUNT_EXTERNAL_MODELS_SOURCE}/research" @@ -561,6 +562,10 @@ function rfcn() { get_cocoapi ${MOUNT_EXTERNAL_MODELS_SOURCE}/cocoapi ${MOUNT_EXTERNAL_MODELS_SOURCE}/research/ fi + # Fix the object_detection_evaluation.py file to change unicode() to str() so that it works in py3 + chmod -R 777 ${MOUNT_EXTERNAL_MODELS_SOURCE}/research/object_detection/utils/object_detection_evaluation.py + sed -i.bak "s/unicode(/str(/g" ${MOUNT_EXTERNAL_MODELS_SOURCE}/research/object_detection/utils/object_detection_evaluation.py + split_arg="" if [ -n "${split}" ] && [ ${ACCURACY_ONLY} == "True" ]; then split_arg="--split=${split}" diff --git a/benchmarks/object_detection/tensorflow/rfcn/requirements.txt b/benchmarks/object_detection/tensorflow/rfcn/requirements.txt index 92d9e0ba5..3ebb25335 100644 --- a/benchmarks/object_detection/tensorflow/rfcn/requirements.txt +++ b/benchmarks/object_detection/tensorflow/rfcn/requirements.txt @@ -1,6 +1,6 @@ Cython contextlib2 -pillow +pillow==5.3.0 lxml jupyter matplotlib From 53e25d01ca88ad99bfc92e6655f9ab14ffdde463 Mon Sep 17 00:00:00 2001 From: Dina Suehiro Jones Date: Wed, 3 Jul 2019 12:54:19 -0700 Subject: [PATCH 62/62] Updating docker images that were missed earlier (#352) --- .../tensorflow/dcgan/README.md | 2 +- .../tensorflow/facenet/README.md | 6 +++--- .../tensorflow/mtcc/README.md | 2 +- .../tensorflow/inception_resnet_v2/README.md | 4 ++-- .../tensorflow/resnet50/README.md | 12 ++++++------ .../tensorflow/resnet50v1_5/README.md | 14 +++++++------- benchmarks/recommendation/tensorflow/ncf/README.md | 6 +++--- docs/general/tensorflow/LaunchBenchmark.md | 4 ++-- 8 files changed, 25 insertions(+), 25 deletions(-) diff --git a/benchmarks/adversarial_networks/tensorflow/dcgan/README.md b/benchmarks/adversarial_networks/tensorflow/dcgan/README.md index e6f572916..4950d0f63 100644 --- a/benchmarks/adversarial_networks/tensorflow/dcgan/README.md +++ b/benchmarks/adversarial_networks/tensorflow/dcgan/README.md @@ -60,7 +60,7 @@ $ python launch_benchmark.py \ --socket-id 0 \ --checkpoint /home//dcgan_fp32_unconditional_cifar10_pretrained_model \ --data-location /home//cifar10 \ - --docker-image intelaipg/intel-optimized-tensorflow:1.14.0 + --docker-image gcr.io/deeplearning-platform-release/tf-cpu.1-14 ``` 5. Log files are located at the value of `--output-dir`. diff --git a/benchmarks/face_detection_and_alignment/tensorflow/facenet/README.md b/benchmarks/face_detection_and_alignment/tensorflow/facenet/README.md index 4a5322bad..fd27ffa2b 100644 --- a/benchmarks/face_detection_and_alignment/tensorflow/facenet/README.md +++ b/benchmarks/face_detection_and_alignment/tensorflow/facenet/README.md @@ -59,7 +59,7 @@ python launch_benchmark.py \ --checkpoint /home//checkpoints \ --data-location /home//dataset \ --model-source-dir /home//facenet/ \ - --docker-image intelaipg/intel-optimized-tensorflow:1.14.0 + --docker-image gcr.io/deeplearning-platform-release/tf-cpu.1-14 ``` Example log tail for online inference: ``` @@ -94,7 +94,7 @@ python launch_benchmark.py \ --checkpoint /home//checkpoints \ --data-location /home//dataset \ --model-source-dir /home//facenet/ \ - --docker-image intelaipg/intel-optimized-tensorflow:1.14.0 + --docker-image gcr.io/deeplearning-platform-release/tf-cpu.1-14 ``` Example log tail for batch inference: ``` @@ -126,7 +126,7 @@ python launch_benchmark.py \ --checkpoint /home//checkpoints \ --data-location /home//dataset \ --model-source-dir /home//facenet/ \ - --docker-image intelaipg/intel-optimized-tensorflow:1.14.0 + --docker-image gcr.io/deeplearning-platform-release/tf-cpu.1-14 ``` Example log tail for accuracy: ``` diff --git a/benchmarks/face_detection_and_alignment/tensorflow/mtcc/README.md b/benchmarks/face_detection_and_alignment/tensorflow/mtcc/README.md index a79584c36..36cad0fe3 100644 --- a/benchmarks/face_detection_and_alignment/tensorflow/mtcc/README.md +++ b/benchmarks/face_detection_and_alignment/tensorflow/mtcc/README.md @@ -55,7 +55,7 @@ Run: --mode inference \ --socket-id 0 \ --checkpoint /home//MTCNN_model \ - --docker-image intelaipg/intel-optimized-tensorflow:1.14.0 + --docker-image gcr.io/deeplearning-platform-release/tf-cpu.1-14 ``` 6. The log file is saved to the value of `--output-dir`. diff --git a/benchmarks/image_recognition/tensorflow/inception_resnet_v2/README.md b/benchmarks/image_recognition/tensorflow/inception_resnet_v2/README.md index cd38e364f..c3a44d2d2 100644 --- a/benchmarks/image_recognition/tensorflow/inception_resnet_v2/README.md +++ b/benchmarks/image_recognition/tensorflow/inception_resnet_v2/README.md @@ -259,7 +259,7 @@ python launch_benchmark.py \ --batch-size 1 \ --socket-id 0 \ --in-graph /home//inception_resnet_v2_fp32_pretrained_model.pb \ - --docker-image intelaipg/intel-optimized-tensorflow:1.14.0 + --docker-image gcr.io/deeplearning-platform-release/tf-cpu.1-14 ``` For batch inference (using `--benchmark-only`, `--socket-id 0` and `--batch-size 128`): @@ -274,7 +274,7 @@ python launch_benchmark.py \ --batch-size 128 \ --socket-id 0 \ --in-graph /home//inception_resnet_v2_fp32_pretrained_model.pb \ - --docker-image intelaipg/intel-optimized-tensorflow:1.14.0 + --docker-image gcr.io/deeplearning-platform-release/tf-cpu.1-14 ``` Note that the `--verbose` or `--output-dir` flag can be added to any of the above commands diff --git a/benchmarks/image_recognition/tensorflow/resnet50/README.md b/benchmarks/image_recognition/tensorflow/resnet50/README.md index 751f15e98..71bbdf7cc 100644 --- a/benchmarks/image_recognition/tensorflow/resnet50/README.md +++ b/benchmarks/image_recognition/tensorflow/resnet50/README.md @@ -58,7 +58,7 @@ $ python launch_benchmark.py \ --mode inference \ --batch-size=100 \ --accuracy-only \ - --docker-image intelaipg/intel-optimized-tensorflow:1.14.0 + --docker-image gcr.io/deeplearning-platform-release/tf-cpu.1-14 ``` The log file is saved to the value of `--output-dir`. @@ -99,7 +99,7 @@ $ python launch_benchmark.py \ --mode inference \ --batch-size=128 \ --benchmark-only \ - --docker-image intelaipg/intel-optimized-tensorflow:1.14.0 + --docker-image gcr.io/deeplearning-platform-release/tf-cpu.1-14 -- warmup_steps=50 steps=500 ``` The tail of the log output when the script completes should look @@ -160,7 +160,7 @@ $ python launch_benchmark.py \ --mode inference \ --batch-size=1 \ --socket-id 0 \ - --docker-image intelaipg/intel-optimized-tensorflow:1.14.0 + --docker-image gcr.io/deeplearning-platform-release/tf-cpu.1-14 ``` The log file is saved to the value of `--output-dir`. @@ -195,7 +195,7 @@ $ python launch_benchmark.py \ --mode inference \ --batch-size=128 \ --socket-id 0 \ - --docker-image intelaipg/intel-optimized-tensorflow:1.14.0 + --docker-image gcr.io/deeplearning-platform-release/tf-cpu.1-14 ``` The log file is saved to the value of `--output-dir`. @@ -233,7 +233,7 @@ $ python launch_benchmark.py \ --batch-size 100 \ --socket-id 0 \ --data-location /home//dataset/ImageNetData_directory \ - --docker-image intelaipg/intel-optimized-tensorflow:1.14.0 + --docker-image gcr.io/deeplearning-platform-release/tf-cpu.1-14 ``` The log file is saved to the value of `--output-dir`. @@ -267,7 +267,7 @@ $ python launch_benchmark.py \ --batch-size 100 \ --socket-id 0 \ --data-location /home//dataset/ImageNetData_directory \ - --docker-image intelaipg/intel-optimized-tensorflow:1.14.0 + --docker-image gcr.io/deeplearning-platform-release/tf-cpu.1-14 ``` The results file will be written to the `models/benchmarks/common/tensorflow/logs` directory, unless another diff --git a/benchmarks/image_recognition/tensorflow/resnet50v1_5/README.md b/benchmarks/image_recognition/tensorflow/resnet50v1_5/README.md index 314a65dcc..18889005a 100644 --- a/benchmarks/image_recognition/tensorflow/resnet50v1_5/README.md +++ b/benchmarks/image_recognition/tensorflow/resnet50v1_5/README.md @@ -42,7 +42,7 @@ $ git clone https://github.com/IntelAI/models.git The optimized ResNet50v1.5 model files are attached to the [intelai/models](https://github.com/intelai/models) repo and located at `models/models/image_recognition/tensorflow/resnet50v1_5/`. - The docker image (`intelaipg/intel-optimized-tensorflow:1.14.0`) + The docker image (`gcr.io/deeplearning-platform-release/tf-cpu.1-14`) used in the commands above were built using [TensorFlow](git@github.com:tensorflow/tensorflow.git) master for TensorFlow version 1.14. @@ -61,7 +61,7 @@ $ python launch_benchmark.py \ --mode inference \ --batch-size=100 \ --accuracy-only \ - --docker-image intelaipg/intel-optimized-tensorflow:1.14.0 + --docker-image gcr.io/deeplearning-platform-release/tf-cpu.1-14 ``` The log file is saved to the value of `--output-dir`. @@ -100,7 +100,7 @@ $ python launch_benchmark.py \ --mode inference \ --batch-size=128 \ --benchmark-only \ - --docker-image intelaipg/intel-optimized-tensorflow:1.14.0 + --docker-image gcr.io/deeplearning-platform-release/tf-cpu.1-14 -- warmup_steps=50 steps=500 ``` The tail of the log output when the benchmarking completes should look @@ -159,7 +159,7 @@ $ python launch_benchmark.py \ --mode inference \ --batch-size=1 \ --socket-id 0 \ - --docker-image intelaipg/intel-optimized-tensorflow:1.14.0 + --docker-image gcr.io/deeplearning-platform-release/tf-cpu.1-14 ``` The log file is saved to the value of `--output-dir`. @@ -197,7 +197,7 @@ $ python launch_benchmark.py \ --mode inference \ --batch-size=128 \ --socket-id 0 \ - --docker-image intelaipg/intel-optimized-tensorflow:1.14.0 + --docker-image gcr.io/deeplearning-platform-release/tf-cpu.1-14 ``` The log file is saved to the value of `--output-dir`. @@ -238,7 +238,7 @@ $ python launch_benchmark.py \ --batch-size 100 \ --socket-id 0 \ --data-location /home//dataset/ImageNetData_directory \ - --docker-image intelaipg/intel-optimized-tensorflow:1.14.0 + --docker-image gcr.io/deeplearning-platform-release/tf-cpu.1-14 ``` The log file is saved to the value of `--output-dir`. @@ -275,7 +275,7 @@ $ python launch_benchmark.py \ --batch-size 100 \ --socket-id 0 \ --data-location /home//dataset/ImageNetData_directory \ - --docker-image intelaipg/intel-optimized-tensorflow:1.14.0 + --docker-image gcr.io/deeplearning-platform-release/tf-cpu.1-14 ``` The results file will be written to the `models/benchmarks/common/tensorflow/logs` directory, unless another diff --git a/benchmarks/recommendation/tensorflow/ncf/README.md b/benchmarks/recommendation/tensorflow/ncf/README.md index 2eccb84e7..a86a56b1f 100644 --- a/benchmarks/recommendation/tensorflow/ncf/README.md +++ b/benchmarks/recommendation/tensorflow/ncf/README.md @@ -53,7 +53,7 @@ $ python launch_benchmark.py \ --framework tensorflow \ --precision fp32 \ --mode inference \ - --docker-image intelaipg/intel-optimized-tensorflow:1.14.0 + --docker-image gcr.io/deeplearning-platform-release/tf-cpu.1-14 ``` The tail of batch inference log, looks as below. @@ -83,7 +83,7 @@ $ python launch_benchmark.py \ --framework tensorflow \ --precision fp32 \ --mode inference \ - --docker-image intelaipg/intel-optimized-tensorflow:1.14.0 + --docker-image gcr.io/deeplearning-platform-release/tf-cpu.1-14 ``` The tail of online inference log, looks as below. @@ -115,7 +115,7 @@ $ python launch_benchmark.py \ --framework tensorflow \ --precision fp32 \ --mode inference \ - --docker-image intelaipg/intel-optimized-tensorflow:1.14.0 + --docker-image gcr.io/deeplearning-platform-release/tf-cpu.1-14 ``` The tail of accuracy log, looks as below. diff --git a/docs/general/tensorflow/LaunchBenchmark.md b/docs/general/tensorflow/LaunchBenchmark.md index 08c6c999a..14e38385e 100644 --- a/docs/general/tensorflow/LaunchBenchmark.md +++ b/docs/general/tensorflow/LaunchBenchmark.md @@ -164,7 +164,7 @@ $ python launch_benchmark.py \ --batch-size 1 \ --socket-id 0 \ --data-location /home//Imagenet_Validation \ - --docker-image intelaipg/intel-optimized-tensorflow:1.14.0 \ + --docker-image gcr.io/deeplearning-platform-release/tf-cpu.1-14 \ --volume /home//custom_folder_1:/custom_folder_1 \ --volume /home//custom_folder_2:/custom_folder_2 ``` @@ -201,7 +201,7 @@ Below is an example showing how to use the `--debug` flag: --batch-size=1 \ --socket-id 0 \ --data-location /home//Imagenet_Validation \ - --docker-image intelaipg/intel-optimized-tensorflow:1.14.0 \ + --docker-image gcr.io/deeplearning-platform-release/tf-cpu.1-14 \ --debug # ls