diff --git a/orttraining/tools/ci_test/download_azure_blob.py b/orttraining/tools/ci_test/download_azure_blob_archive.py similarity index 69% rename from orttraining/tools/ci_test/download_azure_blob.py rename to orttraining/tools/ci_test/download_azure_blob_archive.py index 324f97b4a865a..564dcc8007ce4 100755 --- a/orttraining/tools/ci_test/download_azure_blob.py +++ b/orttraining/tools/ci_test/download_azure_blob_archive.py @@ -17,7 +17,7 @@ sys.path.append(os.path.join(REPO_DIR, "tools", "python")) -import get_azcopy # noqa: E402 +from util import get_azcopy # noqa: E402 def _download(azcopy_path, url, local_path): subprocess.run([azcopy_path, "cp", "--log-level", "NONE", url, local_path], check=True) @@ -39,19 +39,23 @@ def _check_file_sha256_digest(path, expected_digest): match = actual_digest.lower() == expected_digest.lower() if not match: raise RuntimeError( - "SHA256 digest mismatch, expected: {}, actual: {}".format(expected_digest.lower(), actual_digest.lower())) + "SHA256 digest mismatch, expected: {}, actual: {}".format( + expected_digest.lower(), actual_digest.lower())) def main(): - parser = argparse.ArgumentParser(description="Downloads training end-to-end test data.") - parser.add_argument("--azure_blob_url", required=True, help="The test data destination directory.") - parser.add_argument("--target_dir", required=True, help="The test data destination directory.") - parser.add_argument("--archive_sha256_digest", help="The test data destination directory.") + parser = argparse.ArgumentParser( + description="Downloads an Azure blob archive.") + parser.add_argument("--azure_blob_url", required=True, + help="The Azure blob URL.") + parser.add_argument("--target_dir", required=True, + help="The destination directory.") + parser.add_argument("--archive_sha256_digest", + help="The SHA256 digest of the archive. Verified if provided.") args = parser.parse_args() - with tempfile.TemporaryDirectory() as temp_dir, \ - get_azcopy.get_azcopy() as azcopy_path: + with tempfile.TemporaryDirectory() as temp_dir, get_azcopy() as azcopy_path: archive_path = os.path.join(temp_dir, "archive.zip") - print("Downloading E2E test data from '{}'...".format(args.azure_blob_url)) + print("Downloading archive from '{}'...".format(args.azure_blob_url)) _download(azcopy_path, args.azure_blob_url, archive_path) if args.archive_sha256_digest: _check_file_sha256_digest(archive_path, args.archive_sha256_digest) diff --git a/tools/ci_build/build.py b/tools/ci_build/build.py index 683663db00e38..b89feca88cd14 100755 --- a/tools/ci_build/build.py +++ b/tools/ci_build/build.py @@ -10,10 +10,22 @@ import subprocess import sys import hashlib -from logger import log +from logger import get_logger from amd_hipify import amd_hipify +SCRIPT_DIR = os.path.dirname(os.path.realpath(__file__)) +REPO_DIR = os.path.normpath(os.path.join(SCRIPT_DIR, "..", "..")) + +sys.path.append(os.path.join(REPO_DIR, "tools", "python")) + + +from util import run # noqa: E402 + + +log = get_logger("build") + + class BaseError(Exception): """Base class for errors originating from build.py.""" pass @@ -490,8 +502,9 @@ def get_config_build_dir(build_dir, config): def run_subprocess(args, cwd=None, capture=False, dll_path=None, shell=False, env={}): - log.info("Running subprocess in '{0}'\n{1}".format( - cwd or os.getcwd(), args)) + if isinstance(args, str): + raise ValueError("args should be a sequence of strings, not a string") + my_env = os.environ.copy() if dll_path: if is_windows(): @@ -502,15 +515,9 @@ def run_subprocess(args, cwd=None, capture=False, dll_path=None, else: my_env["LD_LIBRARY_PATH"] = dll_path - stdout, stderr = (subprocess.PIPE, subprocess.STDOUT) if capture else ( - None, None) my_env.update(env) - completed_process = subprocess.run( - args, cwd=cwd, check=True, stdout=stdout, stderr=stderr, - env=my_env, shell=shell) - log.debug("Subprocess completed. Return code=" + - str(completed_process.returncode)) - return completed_process + + return run(*args, cwd=cwd, capture=capture, shell=shell, env=my_env) def update_submodules(source_dir): @@ -925,7 +932,7 @@ def generate_build_tree(cmake_path, source_dir, build_dir, cuda_home, cudnn_home args.cmake_generator == 'Visual Studio 16 2019' and args.use_full_protobuf): raise BuildError( - "Fuzz test has only be tested with build shared libs option using MSVC on windows") + "Fuzz test has only be tested with build shared libs option using MSVC on windows") cmake_args += [ "-Donnxruntime_BUILD_UNIT_TESTS=ON", "-Donnxruntime_FUZZ_TEST=ON", @@ -1163,9 +1170,9 @@ def adb_shell(*args, **kwargs): def run_android_tests(args, source_dir, config, cwd): if args.android_abi == 'x86_64': - run_subprocess(os.path.join( + run_subprocess([os.path.join( source_dir, 'tools', 'ci_build', 'github', 'android', - 'start_android_emulator.sh')) + 'start_android_emulator.sh')]) adb_push('testdata', '/data/local/tmp/', cwd=cwd) adb_push( os.path.join(source_dir, 'cmake', 'external', 'onnx', 'onnx', 'backend', 'test'), diff --git a/tools/ci_build/exclude_unused_ops.py b/tools/ci_build/exclude_unused_ops.py index 521622dced4ff..b61a87035b848 100644 --- a/tools/ci_build/exclude_unused_ops.py +++ b/tools/ci_build/exclude_unused_ops.py @@ -14,7 +14,10 @@ import typing from onnx import AttributeProto -from logger import log +from logger import get_logger + + +log = get_logger("exclude_unused_ops") def _extract_ops_from_config(file_path, required_ops): diff --git a/tools/ci_build/get_docker_image.py b/tools/ci_build/get_docker_image.py new file mode 100755 index 0000000000000..923881dfd77ad --- /dev/null +++ b/tools/ci_build/get_docker_image.py @@ -0,0 +1,166 @@ +#!/usr/bin/env python3 +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. + +import argparse +import collections +import hashlib +import os +import shlex +import sys +from logger import get_logger + + +SCRIPT_DIR = os.path.dirname(os.path.realpath(__file__)) +REPO_DIR = os.path.normpath(os.path.join(SCRIPT_DIR, "..", "..")) + +sys.path.append(os.path.join(REPO_DIR, "tools", "python")) + + +from util import run # noqa: E402 + + +log = get_logger("get_docker_image") + + +def parse_args(): + parser = argparse.ArgumentParser( + description="Gets a docker image, either by pulling it from a " + "container registry or building it locally and then pushing it. " + "The uniqueness of the docker image is determined by a hash digest of " + "the Dockerfile, the build context directory, and arguments to " + "'docker build' affecting the image content. " + "This digest value is used in the image tag. " + "This script checks whether an image with that tag is initially " + "present in the container registry to determine whether to pull or " + "build the image. " + "The user must be logged in to the container registry.") + + parser.add_argument( + "--dockerfile", default="Dockerfile", help="Path to the Dockerfile.") + parser.add_argument( + "--context", default=".", help="Path to the build context.") + parser.add_argument( + "--docker-build-args", default="", + help="String of Docker build args which may affect the image content. " + "These will be used in differentiating images from one another. " + "For example, '--build-arg'.") + parser.add_argument( + "--docker-build-args-not-affecting-image-content", default="", + help="String of Docker build args which do not affect the image " + "content.") + + parser.add_argument( + "--container-registry", required=True, + help="The Azure container registry name.") + parser.add_argument( + "--repository", required=True, help="The image repository name.") + + parser.add_argument( + "--docker-path", default="docker", help="Path to docker.") + + return parser.parse_args() + + +FileInfo = collections.namedtuple('FileInfo', ['path', 'mode']) + + +def file_info_str(file_info: FileInfo): + return "{} {}".format(file_info.path, file_info.mode) + + +def make_file_info_from_path(file_path: str): + return FileInfo(file_path, os.stat(file_path).st_mode) + + +def update_hash_with_directory(dir_file_info: FileInfo, hash_obj): + hash_obj.update(file_info_str(dir_file_info).encode()) + + files, dirs = [], [] + with os.scandir(dir_file_info.path) as dir_it: + for dir_entry in dir_it: + file_info = FileInfo(dir_entry.path, dir_entry.stat().st_mode) + if dir_entry.is_dir(): + dirs.append(file_info) + elif dir_entry.is_file(): + files.append(file_info) + + def file_info_key(file_info: FileInfo): + return file_info.path + + files.sort(key=file_info_key) + dirs.sort(key=file_info_key) + + for file_info in files: + update_hash_with_file(file_info, hash_obj) + + for file_info in dirs: + update_hash_with_directory(file_info, hash_obj) + + +def update_hash_with_file(file_info: FileInfo, hash_obj): + hash_obj.update(file_info_str(file_info).encode()) + + read_bytes_length = 8192 + with open(file_info.path, mode="rb") as file_data: + while True: + read_bytes = file_data.read(read_bytes_length) + if len(read_bytes) == 0: + break + hash_obj.update(read_bytes) + + +def generate_tag(dockerfile_path, context_path, docker_build_args_str): + hash_obj = hashlib.sha256() + hash_obj.update(docker_build_args_str.encode()) + update_hash_with_file( + make_file_info_from_path(dockerfile_path), hash_obj) + update_hash_with_directory( + make_file_info_from_path(context_path), hash_obj) + return "image_content_digest_{}".format(hash_obj.hexdigest()) + + +def container_registry_has_image(full_image_name, docker_path): + env = os.environ.copy() + env["DOCKER_CLI_EXPERIMENTAL"] = "enabled" # needed for "docker manifest" + proc = run( + docker_path, "manifest", "inspect", "--insecure", full_image_name, + env=env, check=False, quiet=True) + return proc.returncode == 0 + + +def main(): + args = parse_args() + + tag = generate_tag(args.dockerfile, args.context, args.docker_build_args) + + full_image_name = "{}.azurecr.io/{}:{}".format( + args.container_registry, args.repository, tag) + + log.info("Image: {}".format(full_image_name)) + + if container_registry_has_image(full_image_name, args.docker_path): + log.info("Image found, pulling...") + + run(args.docker_path, "pull", full_image_name) + else: + log.info("Image not found, building and pushing...") + + run(args.docker_path, "build", + "--pull", + *shlex.split(args.docker_build_args), + *shlex.split(args.docker_build_args_not_affecting_image_content), + "--tag", full_image_name, + "--file", args.dockerfile, + args.context) + + run(args.docker_path, "push", full_image_name) + + # tag so we can refer to the image by repository name + run(args.docker_path, "tag", full_image_name, args.repository) + + return 0 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/tools/ci_build/github/azure-pipelines/linux-gpu-tensorrt-ci-pipeline.yml b/tools/ci_build/github/azure-pipelines/linux-gpu-tensorrt-ci-pipeline.yml index 4310bc38e6c08..8f2c363674719 100644 --- a/tools/ci_build/github/azure-pipelines/linux-gpu-tensorrt-ci-pipeline.yml +++ b/tools/ci_build/github/azure-pipelines/linux-gpu-tensorrt-ci-pipeline.yml @@ -7,10 +7,10 @@ jobs: steps: - template: templates/set-test-data-variables-step.yml - # Latest TensorRT container only supports ubuntu18.04 - - script: 'tools/ci_build/github/linux/run_dockerbuild.sh -o ubuntu18.04 -d tensorrt -r $(Build.BinariesDirectory) -p 3.6 -x "--build_wheel"' - - displayName: 'Command Line Script' + - template: templates/run-docker-build-steps.yml + parameters: + # Latest TensorRT container only supports ubuntu18.04 + RunDockerBuildArgs: '-o ubuntu18.04 -d tensorrt -r $(Build.BinariesDirectory) -p 3.6 -x "--build_wheel"' - template: templates/component-governance-component-detection-steps.yml parameters : diff --git a/tools/ci_build/github/azure-pipelines/linux-multi-gpu-tensorrt-ci-pipeline.yml b/tools/ci_build/github/azure-pipelines/linux-multi-gpu-tensorrt-ci-pipeline.yml index 5ed98d8c46a67..e51234a262956 100644 --- a/tools/ci_build/github/azure-pipelines/linux-multi-gpu-tensorrt-ci-pipeline.yml +++ b/tools/ci_build/github/azure-pipelines/linux-multi-gpu-tensorrt-ci-pipeline.yml @@ -4,6 +4,6 @@ jobs: AgentPool : 'Linux-Multi-GPU' JobName: 'Linux_CI_Multi_GPU_TensorRT_Dev' # The latest TensorRT container only supports ubuntu18.04 - BuildCommand: 'tools/ci_build/github/linux/run_dockerbuild.sh -o ubuntu18.04 -d tensorrt -r $(Build.BinariesDirectory) -p 3.6 -x "--enable_multi_device_test"' + RunDockerBuildArgs: '-o ubuntu18.04 -d tensorrt -r $(Build.BinariesDirectory) -p 3.6 -x "--enable_multi_device_test"' DoNugetPack: 'false' ArtifactName: 'drop-linux' diff --git a/tools/ci_build/github/azure-pipelines/linux-ngraph-ci-pipeline.yml b/tools/ci_build/github/azure-pipelines/linux-ngraph-ci-pipeline.yml index a60bc4fe6a907..0a31faa80429f 100644 --- a/tools/ci_build/github/azure-pipelines/linux-ngraph-ci-pipeline.yml +++ b/tools/ci_build/github/azure-pipelines/linux-ngraph-ci-pipeline.yml @@ -15,8 +15,9 @@ jobs: continueOnError: true condition: always() - - script: 'tools/ci_build/github/linux/run_dockerbuild.sh -o ubuntu16.04 -d ngraph -r $(Build.BinariesDirectory) -x "--use_ngraph --build_wheel"' - displayName: 'Command Line Script' + - template: templates/run-docker-build-steps.yml + parameters: + RunDockerBuildArgs: '-o ubuntu16.04 -d ngraph -r $(Build.BinariesDirectory) -x "--use_ngraph --build_wheel"' - template: templates/component-governance-component-detection-steps.yml parameters : diff --git a/tools/ci_build/github/azure-pipelines/linux-nuphar-ci-pipeline.yml b/tools/ci_build/github/azure-pipelines/linux-nuphar-ci-pipeline.yml index cafae402d5345..3a82967f3534c 100644 --- a/tools/ci_build/github/azure-pipelines/linux-nuphar-ci-pipeline.yml +++ b/tools/ci_build/github/azure-pipelines/linux-nuphar-ci-pipeline.yml @@ -3,7 +3,7 @@ jobs: parameters: AgentPool : 'Linux-CPU' JobName: 'Linux_CI_Dev' - BuildCommand: 'tools/ci_build/github/linux/run_dockerbuild.sh -o ubuntu18.04 -d cpu -r $(Build.BinariesDirectory) -x "--enable_pybind --use_nuphar"' + RunDockerBuildArgs: '-o ubuntu18.04 -d cpu -r $(Build.BinariesDirectory) -x "--enable_pybind --use_nuphar"' DoNugetPack: 'false' ArtifactName: 'drop-linux' TimeoutInMinutes: 180 diff --git a/tools/ci_build/github/azure-pipelines/linux-openvino-ci-pipeline.yml b/tools/ci_build/github/azure-pipelines/linux-openvino-ci-pipeline.yml index 51d9877ac1c94..844fc8b1ed6ca 100644 --- a/tools/ci_build/github/azure-pipelines/linux-openvino-ci-pipeline.yml +++ b/tools/ci_build/github/azure-pipelines/linux-openvino-ci-pipeline.yml @@ -3,7 +3,7 @@ jobs: parameters: AgentPool : 'Linux-CPU' JobName: 'Linux_CI_Dev' - BuildCommand: 'tools/ci_build/github/linux/run_dockerbuild.sh -o ubuntu18.04 -d openvino -v 2021.1 -r $(Build.BinariesDirectory) -x "--use_openvino CPU_FP32 --build_wheel"' + RunDockerBuildArgs: '-o ubuntu18.04 -d openvino -v 2021.1 -r $(Build.BinariesDirectory) -x "--use_openvino CPU_FP32 --build_wheel"' DoNugetPack: 'false' ArtifactName: 'drop-linux' TimeoutInMinutes: 120 diff --git a/tools/ci_build/github/azure-pipelines/linux-openvino-nightly-pipeline.yml b/tools/ci_build/github/azure-pipelines/linux-openvino-nightly-pipeline.yml index a1d128ab56d26..62a3dec8a6d1c 100644 --- a/tools/ci_build/github/azure-pipelines/linux-openvino-nightly-pipeline.yml +++ b/tools/ci_build/github/azure-pipelines/linux-openvino-nightly-pipeline.yml @@ -20,9 +20,10 @@ jobs: arguments: --build_dir $(Build.BinariesDirectory) --edge_device pythonInterpreter: '/usr/bin/python3' workingDirectory: $(Build.BinariesDirectory) - - - script: 'tools/ci_build/github/linux/run_dockerbuild.sh -o ubuntu16.04 -d openvino -v 2020.2 -r $(Build.BinariesDirectory) -x "--use_openvino GPU_FP32 --build_wheel"' - displayName: 'Command Line Script' + + - template: templates/run-docker-build-steps.yml + parameters: + RunDockerBuildArgs: '-o ubuntu16.04 -d openvino -v 2020.2 -r $(Build.BinariesDirectory) -x "--use_openvino GPU_FP32 --build_wheel"' - template: templates/component-governance-component-detection-steps.yml parameters : diff --git a/tools/ci_build/github/azure-pipelines/orttraining-linux-gpu-ci-pipeline.yml b/tools/ci_build/github/azure-pipelines/orttraining-linux-gpu-ci-pipeline.yml index 85b58d01b1ad3..03fdff524c163 100644 --- a/tools/ci_build/github/azure-pipelines/orttraining-linux-gpu-ci-pipeline.yml +++ b/tools/ci_build/github/azure-pipelines/orttraining-linux-gpu-ci-pipeline.yml @@ -6,8 +6,7 @@ jobs: AgentPool : 'Linux-Single-GPU-V100' JobName: 'Onnxruntime_Linux_GPU_Training' SubmoduleCheckoutMode: 'recursive' - BuildCommand: > - tools/ci_build/github/linux/run_dockerbuild.sh + RunDockerBuildArgs: > -o ubuntu16.04 -d gpu -r $(Build.BinariesDirectory) -x " --enable_training diff --git a/tools/ci_build/github/azure-pipelines/orttraining-linux-gpu-e2e-test-ci-pipeline.yml b/tools/ci_build/github/azure-pipelines/orttraining-linux-gpu-e2e-test-ci-pipeline.yml index c331def4cb015..cd7ddc221a002 100644 --- a/tools/ci_build/github/azure-pipelines/orttraining-linux-gpu-e2e-test-ci-pipeline.yml +++ b/tools/ci_build/github/azure-pipelines/orttraining-linux-gpu-e2e-test-ci-pipeline.yml @@ -12,21 +12,23 @@ jobs: # update these if the E2E test data changes - script: | - orttraining/tools/ci_test/download_azure_blob.py \ + orttraining/tools/ci_test/download_azure_blob_archive.py \ --azure_blob_url https://onnxruntimetestdata.blob.core.windows.net/training/onnxruntime_training_data.zip?snapshot=2020-06-15T23:17:35.8314853Z \ --target_dir $(Build.BinariesDirectory)/training_e2e_test_data \ --archive_sha256_digest B01C169B6550D1A0A6F1B4E2F34AE2A8714B52DBB70AC04DA85D371F691BDFF9 displayName: 'Download onnxruntime_training_data.zip data' - - script: | - tools/ci_build/github/linux/run_dockerbuild.sh \ + - template: templates/run-docker-build-steps.yml + parameters: + RunDockerBuildArgs: | -o ubuntu16.04 -d gpu -r $(Build.BinariesDirectory) \ + -t onnxruntime_e2e_test_image \ -x " \ --config RelWithDebInfo \ --enable_training \ --update --build \ " - displayName: 'Build' + DisplayName: 'Build' - script: | docker run \ @@ -35,7 +37,7 @@ jobs: --volume $(Build.SourcesDirectory):/onnxruntime_src \ --volume $(Build.BinariesDirectory):/build \ --volume $(Build.BinariesDirectory)/training_e2e_test_data:/training_e2e_test_data:ro \ - onnxruntime-ubuntu16.04-cuda10.1-cudnn7.6 \ + onnxruntime_e2e_test_image \ /onnxruntime_src/orttraining/tools/ci_test/run_batch_size_test.py \ --binary_dir /build/RelWithDebInfo \ --model_root /training_e2e_test_data/models @@ -49,7 +51,7 @@ jobs: --volume $(Build.SourcesDirectory):/onnxruntime_src \ --volume $(Build.BinariesDirectory):/build \ --volume $(Build.BinariesDirectory)/training_e2e_test_data:/training_e2e_test_data:ro \ - onnxruntime-ubuntu16.04-cuda10.1-cudnn7.6 \ + onnxruntime_e2e_test_image \ /onnxruntime_src/orttraining/tools/ci_test/run_convergence_test.py \ --binary_dir /build/RelWithDebInfo \ --model_root /training_e2e_test_data/models \ diff --git a/tools/ci_build/github/azure-pipelines/orttraining-linux-gpu-e2e-test-nightly-pipeline.yml b/tools/ci_build/github/azure-pipelines/orttraining-linux-gpu-e2e-test-nightly-pipeline.yml index f054fe2cb58c2..3a2497f902cc8 100644 --- a/tools/ci_build/github/azure-pipelines/orttraining-linux-gpu-e2e-test-nightly-pipeline.yml +++ b/tools/ci_build/github/azure-pipelines/orttraining-linux-gpu-e2e-test-nightly-pipeline.yml @@ -12,21 +12,23 @@ jobs: # update these if the E2E test data changes - script: | - orttraining/tools/ci_test/download_azure_blob.py \ + orttraining/tools/ci_test/download_azure_blob_archive.py \ --azure_blob_url https://onnxruntimetestdata.blob.core.windows.net/training/onnxruntime_training_data.zip?snapshot=2020-06-15T23:17:35.8314853Z \ --target_dir $(Build.BinariesDirectory)/training_e2e_test_data \ --archive_sha256_digest B01C169B6550D1A0A6F1B4E2F34AE2A8714B52DBB70AC04DA85D371F691BDFF9 displayName: 'Download onnxruntime_training_data.zip data' - script: | - orttraining/tools/ci_test/download_azure_blob.py \ + orttraining/tools/ci_test/download_azure_blob_archive.py \ --azure_blob_url https://onnxruntimetestdata.blob.core.windows.net/training/glue_MRPC_data.zip \ --target_dir /bert_data/hf_data/ displayName: 'Download glue_MRPC_data.zip data' - - script: | - tools/ci_build/github/linux/run_dockerbuild.sh \ + - template: templates/run-docker-build-steps.yml + parameters: + RunDockerBuildArgs: | -o ubuntu16.04 -d gpu -r $(Build.BinariesDirectory) \ + -t onnxruntime_e2e_test_image \ -x " \ --config RelWithDebInfo \ --enable_training \ @@ -35,7 +37,7 @@ jobs: --enable_training_python_frontend_e2e_tests \ --enable_training_pipeline_e2e_tests \ " - displayName: 'Build' + DisplayName: 'Build' # Hit OOM with run_training_pipeline_e2e_tests.py - slightly above 16GB limit. # leave this code here for further investigation. @@ -48,7 +50,7 @@ jobs: # --volume $(Build.BinariesDirectory):/build \ # --volume /bert_data:/bert_data \ # --volume /bert_ort:/bert_ort \ - # onnxruntime-ubuntu16.04-cuda10.1-cudnn7.6 \ + # onnxruntime_e2e_test_image \ # /build/RelWithDebInfo/run_training_pipeline_e2e_tests.py \ # --cwd /build/RelWithDebInfo # displayName: 'Run run_training_pipeline_e2e_tests.py' @@ -62,7 +64,7 @@ jobs: --volume $(Build.SourcesDirectory):/onnxruntime_src \ --volume $(Build.BinariesDirectory):/build \ --volume $(Build.BinariesDirectory)/training_e2e_test_data:/training_e2e_test_data:ro \ - onnxruntime-ubuntu16.04-cuda10.1-cudnn7.6 \ + onnxruntime_e2e_test_image \ /onnxruntime_src/orttraining/tools/ci_test/run_batch_size_test.py \ --binary_dir /build/RelWithDebInfo \ --model_root /training_e2e_test_data/models @@ -76,7 +78,7 @@ jobs: --volume $(Build.SourcesDirectory):/onnxruntime_src \ --volume $(Build.BinariesDirectory):/build \ --volume $(Build.BinariesDirectory)/training_e2e_test_data:/training_e2e_test_data:ro \ - onnxruntime-ubuntu16.04-cuda10.1-cudnn7.6 \ + onnxruntime_e2e_test_image \ /onnxruntime_src/orttraining/tools/ci_test/run_convergence_test.py \ --binary_dir /build/RelWithDebInfo \ --model_root /training_e2e_test_data/models \ @@ -93,7 +95,7 @@ jobs: --volume $(Build.SourcesDirectory):/onnxruntime_src \ --volume $(Build.BinariesDirectory):/build \ --volume /bert_data:/bert_data \ - onnxruntime-ubuntu16.04-cuda10.1-cudnn7.6 \ + onnxruntime_e2e_test_image \ /build/RelWithDebInfo/launch_test.py \ --cmd_line_with_args "python orttraining_run_frontend_batch_size_test.py -v" \ --cwd /build/RelWithDebInfo \ @@ -110,7 +112,7 @@ jobs: --volume $(Build.SourcesDirectory):/onnxruntime_src \ --volume $(Build.BinariesDirectory):/build \ --volume /bert_data:/bert_data \ - onnxruntime-ubuntu16.04-cuda10.1-cudnn7.6 \ + onnxruntime_e2e_test_image \ /build/RelWithDebInfo/launch_test.py \ --cmd_line_with_args "mpirun -n 4 -x NCCL_DEBUG=INFO python orttraining_run_glue.py" \ --cwd /build/RelWithDebInfo @@ -126,7 +128,7 @@ jobs: --volume $(Build.SourcesDirectory):/onnxruntime_src \ --volume $(Build.BinariesDirectory):/build \ --volume /bert_data:/bert_data \ - onnxruntime-ubuntu16.04-cuda10.1-cudnn7.6 \ + onnxruntime_e2e_test_image \ /build/RelWithDebInfo/launch_test.py \ --cmd_line_with_args "python orttraining_run_glue.py ORTGlueTest.test_bert_with_mrpc -v" \ --cwd /build/RelWithDebInfo \ @@ -143,7 +145,7 @@ jobs: --volume $(Build.SourcesDirectory):/onnxruntime_src \ --volume $(Build.BinariesDirectory):/build \ --volume /bert_data:/bert_data \ - onnxruntime-ubuntu16.04-cuda10.1-cudnn7.6 \ + onnxruntime_e2e_test_image \ /build/RelWithDebInfo/launch_test.py \ --cmd_line_with_args "python orttraining_run_glue.py ORTGlueTest.test_bert_fp16_with_mrpc -v" \ --cwd /build/RelWithDebInfo \ @@ -160,7 +162,7 @@ jobs: --volume $(Build.SourcesDirectory):/onnxruntime_src \ --volume $(Build.BinariesDirectory):/build \ --volume /bert_data:/bert_data \ - onnxruntime-ubuntu16.04-cuda10.1-cudnn7.6 \ + onnxruntime_e2e_test_image \ /build/RelWithDebInfo/launch_test.py \ --cmd_line_with_args "python orttraining_run_glue.py ORTGlueTest.test_roberta_with_mrpc -v" \ --cwd /build/RelWithDebInfo \ @@ -177,7 +179,7 @@ jobs: --volume $(Build.SourcesDirectory):/onnxruntime_src \ --volume $(Build.BinariesDirectory):/build \ --volume /bert_data:/bert_data \ - onnxruntime-ubuntu16.04-cuda10.1-cudnn7.6 \ + onnxruntime_e2e_test_image \ /build/RelWithDebInfo/launch_test.py \ --cmd_line_with_args "python orttraining_run_glue.py ORTGlueTest.test_roberta_fp16_with_mrpc -v" \ --cwd /build/RelWithDebInfo \ @@ -194,7 +196,7 @@ jobs: --volume $(Build.SourcesDirectory):/onnxruntime_src \ --volume $(Build.BinariesDirectory):/build \ --volume /bert_data:/bert_data \ - onnxruntime-ubuntu16.04-cuda10.1-cudnn7.6 \ + onnxruntime_e2e_test_image \ /build/RelWithDebInfo/launch_test.py \ --cmd_line_with_args "python orttraining_run_multiple_choice.py ORTMultipleChoiceTest.test_bert_fp16_with_swag -v" \ --cwd /build/RelWithDebInfo \ @@ -211,7 +213,7 @@ jobs: --volume $(Build.SourcesDirectory):/onnxruntime_src \ --volume $(Build.BinariesDirectory):/build \ --volume /bert_data:/bert_data \ - onnxruntime-ubuntu16.04-cuda10.1-cudnn7.6 \ + onnxruntime_e2e_test_image \ /build/RelWithDebInfo/launch_test.py \ --cmd_line_with_args "python onnxruntime_test_ort_trainer_with_mixed_precision.py -v" \ --cwd /build/RelWithDebInfo @@ -226,7 +228,7 @@ jobs: --volume $(Build.SourcesDirectory):/onnxruntime_src \ --volume $(Build.BinariesDirectory):/build \ --volume /bert_data:/bert_data \ - onnxruntime-ubuntu16.04-cuda10.1-cudnn7.6 \ + onnxruntime_e2e_test_image \ /build/RelWithDebInfo/launch_test.py \ --cmd_line_with_args "python orttraining_test_transformers.py BertModelTest.test_for_pretraining_mixed_precision -v" \ --cwd /build/RelWithDebInfo @@ -242,7 +244,7 @@ jobs: --volume $(Build.SourcesDirectory):/onnxruntime_src \ --volume $(Build.BinariesDirectory):/build \ --volume /bert_data:/bert_data \ - onnxruntime-ubuntu16.04-cuda10.1-cudnn7.6 \ + onnxruntime_e2e_test_image \ /build/RelWithDebInfo/launch_test.py \ --cmd_line_with_args "mpirun -n 4 -x NCCL_DEBUG=INFO python orttraining_run_bert_pretrain.py ORTBertPretrainTest.test_pretrain_convergence" \ --cwd /build/RelWithDebInfo diff --git a/tools/ci_build/github/azure-pipelines/orttraining-linux-gpu-frontend-test-ci-pipeline.yml b/tools/ci_build/github/azure-pipelines/orttraining-linux-gpu-frontend-test-ci-pipeline.yml index 9769337dee4eb..443db8c22fb5b 100644 --- a/tools/ci_build/github/azure-pipelines/orttraining-linux-gpu-frontend-test-ci-pipeline.yml +++ b/tools/ci_build/github/azure-pipelines/orttraining-linux-gpu-frontend-test-ci-pipeline.yml @@ -26,17 +26,18 @@ jobs: continueOnError: true condition: always() - - script: > - tools/ci_build/github/linux/run_dockerbuild.sh - -o ubuntu16.04 -d gpu -r $(Build.BinariesDirectory) - -x " - --enable_training - --config RelWithDebInfo - --skip_onnx_tests - --build_wheel - --enable_training_python_frontend_e2e_tests - --enable_training_pipeline_e2e_tests - " - displayName: 'Build and run frontend tests' + - template: templates/run-docker-build-steps.yml + parameters: + RunDockerBuildArgs: > + -o ubuntu16.04 -d gpu -r $(Build.BinariesDirectory) + -x " + --enable_training + --config RelWithDebInfo + --skip_onnx_tests + --build_wheel + --enable_training_python_frontend_e2e_tests + --enable_training_pipeline_e2e_tests + " + DisplayName: 'Build and run frontend tests' - template: templates/clean-agent-build-directory-step.yml \ No newline at end of file diff --git a/tools/ci_build/github/azure-pipelines/orttraining-linux-gpu-perf-test-ci-pipeline.yml b/tools/ci_build/github/azure-pipelines/orttraining-linux-gpu-perf-test-ci-pipeline.yml index e17de8350f7e9..9cc888018a0c0 100644 --- a/tools/ci_build/github/azure-pipelines/orttraining-linux-gpu-perf-test-ci-pipeline.yml +++ b/tools/ci_build/github/azure-pipelines/orttraining-linux-gpu-perf-test-ci-pipeline.yml @@ -13,15 +13,17 @@ jobs: clean: true submodules: recursive - - script: > - tools/ci_build/github/linux/run_dockerbuild.sh - -o ubuntu16.04 -d gpu -r $(Build.BinariesDirectory) - -x " - --config RelWithDebInfo - --enable_training - --update --build - " - displayName: 'Build performance tests' + - template: templates/run-docker-build-steps.yml + parameters: + RunDockerBuildArgs: > + -o ubuntu16.04 -d gpu -r $(Build.BinariesDirectory) + -t onnxruntime_perf_test_image + -x " + --config RelWithDebInfo + --enable_training + --update --build + " + DisplayName: 'Build performance tests' - script: > docker run --gpus all --rm --name onnxruntime-gpu-perf @@ -29,7 +31,7 @@ jobs: --volume $(Build.BinariesDirectory):/build --volume /bert_ort/bert_models:/build/bert_models:ro --volume /bert_data:/build/bert_data:ro - -e NIGHTLY_BUILD onnxruntime-ubuntu16.04-cuda10.1-cudnn7.6 + -e NIGHTLY_BUILD onnxruntime_perf_test_image /usr/bin/python3.6 /onnxruntime_src/orttraining/tools/ci_test/run_bert_perf_test.py --binary_dir /build/RelWithDebInfo --training_data_root /build/bert_data @@ -42,7 +44,7 @@ jobs: --volume $(Build.BinariesDirectory):/build --volume /bert_ort/gpt2_models:/build/gpt2_models:ro --volume /bert_data/gpt2_data:/build/gpt2_data:ro - -e NIGHTLY_BUILD onnxruntime-ubuntu16.04-cuda10.1-cudnn7.6 + -e NIGHTLY_BUILD onnxruntime_perf_test_image /usr/bin/python3.6 /onnxruntime_src/orttraining/tools/ci_test/run_gpt2_perf_test.py --binary_dir /build/RelWithDebInfo --training_data_root /build/gpt2_data diff --git a/tools/ci_build/github/azure-pipelines/templates/linux-ci.yml b/tools/ci_build/github/azure-pipelines/templates/linux-ci.yml index 512af00c44fd1..fd32e7fd21a0f 100644 --- a/tools/ci_build/github/azure-pipelines/templates/linux-ci.yml +++ b/tools/ci_build/github/azure-pipelines/templates/linux-ci.yml @@ -2,7 +2,7 @@ parameters: AgentPool : 'Linux-CPU' JobName : 'Linux_CI_Dev' SubmoduleCheckoutMode: '' - BuildCommand: 'tools/ci_build/github/linux/run_dockerbuild.sh -o ubuntu16.04 -d cpu -r $(Build.BinariesDirectory) -x "--use_tvm --build_wheel"' + RunDockerBuildArgs: '-o ubuntu16.04 -d cpu -r $(Build.BinariesDirectory) -x "--use_tvm --build_wheel"' DoNodejsPack: 'false' DoNugetPack: 'false' NuPackScript: '' @@ -34,8 +34,9 @@ jobs: - task: NodeTool@0 inputs: versionSpec: '12.16.3' - - script: ${{ parameters.BuildCommand }} - displayName: 'Command Line Script' + - template: run-docker-build-steps.yml + parameters: + RunDockerBuildArgs: '${{ parameters.RunDockerBuildArgs }}' - task: PublishTestResults@2 displayName: 'Publish unit test results' inputs: diff --git a/tools/ci_build/github/azure-pipelines/templates/run-docker-build-steps.yml b/tools/ci_build/github/azure-pipelines/templates/run-docker-build-steps.yml new file mode 100644 index 0000000000000..483a73c279249 --- /dev/null +++ b/tools/ci_build/github/azure-pipelines/templates/run-docker-build-steps.yml @@ -0,0 +1,17 @@ +# calls tools/ci_build/github/linux/run_dockerbuild.sh + +parameters: +- name: RunDockerBuildArgs + type: string + default: "" +- name: DisplayName + type: string + default: "Call run_dockerbuild.sh" + +steps: +- template: with-build-docker-image-cache-steps.yml + parameters: + Steps: + - script: | + tools/ci_build/github/linux/run_dockerbuild.sh ${{ parameters.RunDockerBuildArgs }} + displayName: "${{ parameters.DisplayName }}" diff --git a/tools/ci_build/github/azure-pipelines/templates/with-build-docker-image-cache-steps.yml b/tools/ci_build/github/azure-pipelines/templates/with-build-docker-image-cache-steps.yml new file mode 100644 index 0000000000000..2ed3c837eb2f8 --- /dev/null +++ b/tools/ci_build/github/azure-pipelines/templates/with-build-docker-image-cache-steps.yml @@ -0,0 +1,25 @@ +# runs the specified steps while logged in to the build docker image cache +# container registry + +parameters: +- name: Steps + type: stepList + default: [] + +steps: +- task: Docker@2 + inputs: + containerRegistry: 'onnxruntimebuildcache' + command: 'login' + addPipelineData: false + displayName: "Log in to build docker image cache container registry" + +- ${{ parameters.Steps }} + +- task: Docker@2 + inputs: + containerRegistry: 'onnxruntimebuildcache' + command: 'logout' + addPipelineData: false + displayName: "Log out of build docker image cache container registry" + condition: always() diff --git a/tools/ci_build/github/download_test_data.py b/tools/ci_build/github/download_test_data.py index 0be773b51ed65..f59edf3581b1b 100755 --- a/tools/ci_build/github/download_test_data.py +++ b/tools/ci_build/github/download_test_data.py @@ -14,7 +14,7 @@ REPO_DIR = os.path.normpath(os.path.join(SCRIPT_DIR, "..", "..", "..")) sys.path.append(os.path.join(REPO_DIR, "tools", "python")) -from get_azcopy import get_azcopy # noqa: E402 +from util import get_azcopy # noqa: E402 # Hardcoded map of storage account to azure region endpoint diff --git a/tools/ci_build/github/linux/run_dockerbuild.sh b/tools/ci_build/github/linux/run_dockerbuild.sh index cbf4e8855451a..b0b70333862e9 100755 --- a/tools/ci_build/github/linux/run_dockerbuild.sh +++ b/tools/ci_build/github/linux/run_dockerbuild.sh @@ -8,7 +8,7 @@ YOCTO_VERSION="4.19" ALLOW_RELEASED_ONNX_OPSET_ONLY_ENV="ALLOW_RELEASED_ONNX_OPSET_ONLY="$ALLOW_RELEASED_ONNX_OPSET_ONLY echo "ALLOW_RELEASED_ONNX_OPSET_ONLY environment variable is set as "$ALLOW_RELEASED_ONNX_OPSET_ONLY_ENV -while getopts c:o:d:r:p:x:a:v:y: parameter_Option +while getopts c:o:d:r:p:x:a:v:y:t: parameter_Option do case "${parameter_Option}" in #android, ubuntu16.04, manylinux2010, ubuntu18.04, CentOS7 @@ -28,6 +28,9 @@ a) BUILD_ARCH=${OPTARG};; v) OPENVINO_VERSION=${OPTARG};; # YOCTO 4.19 + ACL 19.05, YOCTO 4.14 + ACL 19.02 y) YOCTO_VERSION=${OPTARG};; +# an additional name for the resulting docker image (created with "docker tag") +# this is useful for referencing the image outside of this script +t) EXTRA_IMAGE_TAG=${OPTARG};; esac done @@ -35,18 +38,19 @@ EXIT_CODE=1 PYTHON_VER=${PYTHON_VER:=3.6} echo "bo=$BUILD_OS bd=$BUILD_DEVICE bdir=$BUILD_DIR pv=$PYTHON_VER bex=$BUILD_EXTR_PAR" -# If in docker group, call "docker". Otherwise, call "sudo docker". -if id -Gnz | grep -zq "^docker$" ; then - DOCKER_CMD=docker -else - DOCKER_CMD="sudo --preserve-env docker" -fi +DOCKER_IMAGE_CACHE_CONTAINER_REGISTRY_NAME="onnxruntimebuildcache" +COMMON_GET_DOCKER_IMAGE_ARGS="--container-registry ${DOCKER_IMAGE_CACHE_CONTAINER_REGISTRY_NAME}" + +GET_DOCKER_IMAGE_CMD="${SOURCE_ROOT}/tools/ci_build/get_docker_image.py ${COMMON_GET_DOCKER_IMAGE_ARGS}" +DOCKER_CMD="docker" cd $SCRIPT_DIR/docker if [ $BUILD_OS = "android" ]; then IMAGE="android" DOCKER_FILE=Dockerfile.ubuntu_for_android - $DOCKER_CMD build --pull -t "onnxruntime-$IMAGE" --build-arg BUILD_USER=onnxruntimedev --build-arg BUILD_UID=$(id -u) --build-arg PYTHON_VERSION=${PYTHON_VER} -f $DOCKER_FILE . + $GET_DOCKER_IMAGE_CMD --repository "onnxruntime-$IMAGE" \ + --docker-build-args="--build-arg BUILD_USER=onnxruntimedev --build-arg BUILD_UID=$(id -u) --build-arg PYTHON_VERSION=${PYTHON_VER}" \ + --dockerfile $DOCKER_FILE --context . elif [ $BUILD_OS = "manylinux2010" ]; then if [ $BUILD_DEVICE = "gpu" ]; then IMAGE="manylinux2010-cuda10.1" @@ -55,11 +59,15 @@ elif [ $BUILD_OS = "manylinux2010" ]; then IMAGE="manylinux2010" DOCKER_FILE=Dockerfile.manylinux2010 fi - $DOCKER_CMD build --pull -t "onnxruntime-$IMAGE" --build-arg BUILD_USER=onnxruntimedev --build-arg BUILD_UID=$(id -u) --build-arg PYTHON_VERSION=${PYTHON_VER} -f $DOCKER_FILE . + $GET_DOCKER_IMAGE_CMD --repository "onnxruntime-$IMAGE" \ + --docker-build-args="--build-arg BUILD_USER=onnxruntimedev --build-arg BUILD_UID=$(id -u) --build-arg PYTHON_VERSION=${PYTHON_VER}" \ + --dockerfile $DOCKER_FILE --context . elif [ $BUILD_OS = "centos7" ]; then IMAGE="centos7" DOCKER_FILE=Dockerfile.centos - $DOCKER_CMD build --pull -t "onnxruntime-$IMAGE" --build-arg BUILD_USER=onnxruntimedev --build-arg BUILD_UID=$(id -u) --build-arg PYTHON_VERSION=${PYTHON_VER} -f $DOCKER_FILE . + $GET_DOCKER_IMAGE_CMD --repository "onnxruntime-$IMAGE" \ + --docker-build-args="--build-arg BUILD_USER=onnxruntimedev --build-arg BUILD_UID=$(id -u) --build-arg PYTHON_VERSION=${PYTHON_VER}" \ + --dockerfile $DOCKER_FILE --context . elif [ $BUILD_OS = "yocto" ]; then IMAGE="arm-yocto-$YOCTO_VERSION" DOCKER_FILE=Dockerfile.ubuntu_for_arm @@ -68,35 +76,51 @@ elif [ $BUILD_OS = "yocto" ]; then if [ $YOCTO_VERSION = "4.14" ]; then TOOL_CHAIN_SCRIPT=fsl-imx-xwayland-glibc-x86_64-fsl-image-qt5-aarch64-toolchain-4.14-sumo.sh fi - $DOCKER_CMD build -t "onnxruntime-$IMAGE" --build-arg TOOL_CHAIN=$TOOL_CHAIN_SCRIPT --build-arg BUILD_USER=onnxruntimedev --build-arg BUILD_UID=$(id -u) --build-arg PYTHON_VERSION=${PYTHON_VER} -f $DOCKER_FILE . + $GET_DOCKER_IMAGE_CMD --repository "onnxruntime-$IMAGE" \ + --docker-build-args="--build-arg TOOL_CHAIN=$TOOL_CHAIN_SCRIPT --build-arg BUILD_USER=onnxruntimedev --build-arg BUILD_UID=$(id -u) --build-arg PYTHON_VERSION=${PYTHON_VER}" \ + --dockerfile $DOCKER_FILE --context . else if [ $BUILD_DEVICE = "gpu" ]; then IMAGE="$BUILD_OS-$CUDA_VER" DOCKER_FILE=Dockerfile.ubuntu_gpu if [ $CUDA_VER = "cuda9.1-cudnn7.1" ]; then - DOCKER_FILE=Dockerfile.ubuntu_gpu_cuda9 + DOCKER_FILE=Dockerfile.ubuntu_gpu_cuda9 fi - $DOCKER_CMD build --pull -t "onnxruntime-$IMAGE" --build-arg BUILD_USER=onnxruntimedev --build-arg BUILD_UID=$(id -u) --build-arg PYTHON_VERSION=${PYTHON_VER} --build-arg BUILD_EXTR_PAR="${BUILD_EXTR_PAR}" -f $DOCKER_FILE . + $GET_DOCKER_IMAGE_CMD --repository "onnxruntime-$IMAGE" \ + --docker-build-args="--build-arg BUILD_USER=onnxruntimedev --build-arg BUILD_UID=$(id -u) --build-arg PYTHON_VERSION=${PYTHON_VER} --build-arg BUILD_EXTR_PAR=\"${BUILD_EXTR_PAR}\"" \ + --dockerfile $DOCKER_FILE --context . elif [ $BUILD_DEVICE = "tensorrt" ]; then # TensorRT container release 20.07 IMAGE="$BUILD_OS-cuda11.0-cudnn8.0-tensorrt7.1" DOCKER_FILE=Dockerfile.ubuntu_tensorrt - $DOCKER_CMD build --pull -t "onnxruntime-$IMAGE" --build-arg BUILD_USER=onnxruntimedev --build-arg BUILD_UID=$(id -u) --build-arg PYTHON_VERSION=${PYTHON_VER} -f $DOCKER_FILE . + $GET_DOCKER_IMAGE_CMD --repository "onnxruntime-$IMAGE" \ + --docker-build-args="--build-arg BUILD_USER=onnxruntimedev --build-arg BUILD_UID=$(id -u) --build-arg PYTHON_VERSION=${PYTHON_VER}" \ + --dockerfile $DOCKER_FILE --context . elif [ $BUILD_DEVICE = "openvino" ]; then IMAGE="$BUILD_OS-openvino" DOCKER_FILE=Dockerfile.ubuntu_openvino - $DOCKER_CMD build --pull -t "onnxruntime-$IMAGE" --build-arg BUILD_USER=onnxruntimedev --build-arg BUILD_UID=$(id -u) --build-arg PYTHON_VERSION=${PYTHON_VER} --build-arg OPENVINO_VERSION=${OPENVINO_VERSION} -f $DOCKER_FILE . + $GET_DOCKER_IMAGE_CMD --repository "onnxruntime-$IMAGE" \ + --docker-build-args="--build-arg BUILD_USER=onnxruntimedev --build-arg BUILD_UID=$(id -u) --build-arg PYTHON_VERSION=${PYTHON_VER} --build-arg OPENVINO_VERSION=${OPENVINO_VERSION}" \ + --dockerfile $DOCKER_FILE --context . else IMAGE="$BUILD_OS" if [ $BUILD_ARCH = "x86" ]; then IMAGE="$IMAGE.x86" - $DOCKER_CMD build --pull -t "onnxruntime-$IMAGE" --build-arg BUILD_USER=onnxruntimedev --build-arg BUILD_UID=$(id -u) --build-arg PYTHON_VERSION=${PYTHON_VER} -f Dockerfile.ubuntu_x86 . + $GET_DOCKER_IMAGE_CMD --repository "onnxruntime-$IMAGE" \ + --docker-build-args="--build-arg BUILD_USER=onnxruntimedev --build-arg BUILD_UID=$(id -u) --build-arg PYTHON_VERSION=${PYTHON_VER}" \ + --dockerfile Dockerfile.ubuntu_x86 --context . else - $DOCKER_CMD build --pull -t "onnxruntime-$IMAGE" --build-arg BUILD_USER=onnxruntimedev --build-arg BUILD_UID=$(id -u) --build-arg PYTHON_VERSION=${PYTHON_VER} -f Dockerfile.ubuntu . + $GET_DOCKER_IMAGE_CMD --repository "onnxruntime-$IMAGE" \ + --docker-build-args="--build-arg BUILD_USER=onnxruntimedev --build-arg BUILD_UID=$(id -u) --build-arg PYTHON_VERSION=${PYTHON_VER}" \ + --dockerfile Dockerfile.ubuntu --context . fi fi fi +if [ -v EXTRA_IMAGE_TAG ]; then + ${DOCKER_CMD} tag "onnxruntime-$IMAGE" "${EXTRA_IMAGE_TAG}" +fi + set +e mkdir -p ~/.cache/onnxruntime mkdir -p ~/.onnx @@ -108,7 +132,7 @@ fi if [ $BUILD_DEVICE = "cpu" ] || [ $BUILD_DEVICE = "ngraph" ] || [ $BUILD_DEVICE = "openvino" ] || [ $BUILD_DEVICE = "nnapi" ] || [ $BUILD_DEVICE = "arm" ]; then RUNTIME= elif [[ $BUILD_EXTR_PAR = *--enable_training_python_frontend_e2e_tests* ]]; then - RUNTIME="--gpus all --shm-size=1024m" + RUNTIME="--gpus all --shm-size=1024m" else RUNTIME="--gpus all" fi diff --git a/tools/ci_build/logger.py b/tools/ci_build/logger.py index 928b978cd1721..c15fad76e329e 100644 --- a/tools/ci_build/logger.py +++ b/tools/ci_build/logger.py @@ -1,11 +1,12 @@ -#!/usr/bin/env python3 # Copyright (c) Microsoft Corporation. All rights reserved. # Licensed under the MIT License. import logging -logging.basicConfig( - format="%(asctime)s %(name)s [%(levelname)s] - %(message)s", - level=logging.DEBUG) -log = logging.getLogger("Build") +def get_logger(name): + logging.basicConfig( + format="%(asctime)s %(name)s [%(levelname)s] - %(message)s", + level=logging.DEBUG) + + return logging.getLogger(name) diff --git a/tools/ci_build/op_registration_utils.py b/tools/ci_build/op_registration_utils.py index 79316d45d9ba3..8e7067d578d20 100644 --- a/tools/ci_build/op_registration_utils.py +++ b/tools/ci_build/op_registration_utils.py @@ -9,7 +9,9 @@ import sys import typing -from logger import log +from logger import get_logger + +log = get_logger("op_registration_utils") domain_map = {'': 'kOnnxDomain', 'ai.onnx': 'kOnnxDomain', diff --git a/tools/ci_build/op_registration_validator.py b/tools/ci_build/op_registration_validator.py index 1d1ec26b93545..c424cb97137a2 100644 --- a/tools/ci_build/op_registration_validator.py +++ b/tools/ci_build/op_registration_validator.py @@ -11,7 +11,9 @@ import sys import typing -from logger import log +from logger import get_logger + +log = get_logger("op_registration_validator") # deprecated ops where the last registration should have an end version. # value for each entry is the opset when it was deprecated. end version of last registration should equal value - 1. diff --git a/tools/python/util/__init__.py b/tools/python/util/__init__.py new file mode 100644 index 0000000000000..13d064e3cbc27 --- /dev/null +++ b/tools/python/util/__init__.py @@ -0,0 +1,5 @@ +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. + +from .get_azcopy import get_azcopy +from .run import run diff --git a/tools/python/get_azcopy.py b/tools/python/util/get_azcopy.py similarity index 91% rename from tools/python/get_azcopy.py rename to tools/python/util/get_azcopy.py index 520d9b17cf33e..d3cb71431e61c 100644 --- a/tools/python/get_azcopy.py +++ b/tools/python/util/get_azcopy.py @@ -1,4 +1,8 @@ +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. + import contextlib +import logging import os import platform import re @@ -19,6 +23,8 @@ "Windows": "https://azcopyvnext.azureedge.net/release20200501/azcopy_windows_amd64_10.4.3.zip", } +_log = logging.getLogger("util.get_azcopy") + def _check_version(azcopy_path): proc = subprocess.run( @@ -65,7 +71,7 @@ def get_azcopy(local_azcopy_path="azcopy"): assert len(download_basename) > 0 downloaded_path = os.path.join(temp_dir, download_basename) - print("Downloading azcopy from '{}'...".format(download_url)) + _log.info("Downloading azcopy from '{}'...".format(download_url)) urllib.request.urlretrieve(download_url, downloaded_path) extracted_path = os.path.join(temp_dir, "azcopy") diff --git a/tools/python/util/run.py b/tools/python/util/run.py new file mode 100644 index 0000000000000..5afe6300977f1 --- /dev/null +++ b/tools/python/util/run.py @@ -0,0 +1,43 @@ +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. + +import logging +import os +import subprocess + + +_log = logging.getLogger("util.run") + + +def run(*args, cwd=None, capture=False, shell=False, env=None, check=True, + quiet=False): + """Runs a subprocess. + + Args: + *args: The subprocess arguments. + cwd: The working directory. If None, specifies the current directory. + capture: Whether to capture stdout and stderr. + shell: Whether to run using the shell. + env: The environment variables as a dict. If None, inherits the current + environment. + check: Whether to raise an error if the return code is not zero. + quiet: If true, do not print output from the subprocess. + + Returns: + A subprocess.CompletedProcess instance. + """ + cmd = [*args] + + _log.info("Running subprocess in '{0}'\n{1}".format( + cwd or os.getcwd(), cmd)) + + output = \ + subprocess.PIPE if capture else (subprocess.DEVNULL if quiet else None) + completed_process = subprocess.run( + cmd, cwd=cwd, check=check, stdout=output, stderr=output, env=env, + shell=shell) + + _log.debug("Subprocess completed. Return code: {}".format( + completed_process.returncode)) + + return completed_process