From 349b09cec107077ebb5b33ce413983c54b9965b7 Mon Sep 17 00:00:00 2001 From: "Mr.Nineteen" Date: Mon, 3 Apr 2023 11:35:27 +0800 Subject: [PATCH] support bazel build --- .bazelrc | 19 + README.md | 12 + WORKSPACE | 14 + benchmark/BUILD | 24 + build_deps/gpus/BUILD | 0 build_deps/gpus/check_cuda_libs.py | 86 ++ build_deps/gpus/configure.bzl | 1320 +++++++++++++++++ build_deps/gpus/crosstool/BUILD | 0 build_deps/gpus/crosstool/BUILD.tpl | 108 ++ .../crosstool/cc_toolchain_config.bzl.tpl | 645 ++++++++ .../crosstool/crosstool_compiler_wrapper.tpl | 313 ++++ build_deps/gpus/cuda/BUILD | 0 build_deps/gpus/cuda/BUILD.tpl | 229 +++ build_deps/gpus/cuda/build_defs.bzl.tpl | 56 + build_deps/gpus/cuda/cuda_config.h.tpl | 34 + build_deps/gpus/cuda/cuda_config.py.tpl | 16 + build_deps/gpus/find_cuda_config.py | 638 ++++++++ build_deps/remote_config/BUILD | 0 build_deps/remote_config/BUILD.tpl | 26 + build_deps/remote_config/common.bzl | 294 ++++ .../remote_platform_configure.bzl | 55 + include/BUILD | 29 + include/merlin/BUILD | 24 + 23 files changed, 3942 insertions(+) create mode 100644 .bazelrc create mode 100644 WORKSPACE create mode 100644 benchmark/BUILD create mode 100644 build_deps/gpus/BUILD create mode 100644 build_deps/gpus/check_cuda_libs.py create mode 100644 build_deps/gpus/configure.bzl create mode 100644 build_deps/gpus/crosstool/BUILD create mode 100644 build_deps/gpus/crosstool/BUILD.tpl create mode 100644 build_deps/gpus/crosstool/cc_toolchain_config.bzl.tpl create mode 100755 build_deps/gpus/crosstool/crosstool_compiler_wrapper.tpl create mode 100644 build_deps/gpus/cuda/BUILD create mode 100644 build_deps/gpus/cuda/BUILD.tpl create mode 100644 build_deps/gpus/cuda/build_defs.bzl.tpl create mode 100644 build_deps/gpus/cuda/cuda_config.h.tpl create mode 100644 build_deps/gpus/cuda/cuda_config.py.tpl create mode 100644 build_deps/gpus/find_cuda_config.py create mode 100644 build_deps/remote_config/BUILD create mode 100644 build_deps/remote_config/BUILD.tpl create mode 100644 build_deps/remote_config/common.bzl create mode 100644 build_deps/remote_config/remote_platform_configure.bzl create mode 100644 include/BUILD create mode 100644 include/merlin/BUILD diff --git a/.bazelrc b/.bazelrc new file mode 100644 index 000000000..f7374e348 --- /dev/null +++ b/.bazelrc @@ -0,0 +1,19 @@ +build -c opt +build --copt -O3 +build --copt -pthread +build --linkopt -pthread +build --linkopt -ldl +build --incompatible_linkopts_to_linklibs +build --copt -g --strip=never +build --experimental_repo_remote_exec + +# This config refers to building CUDA kernels with nvcc. +build:cuda --crosstool_top=@local_config_cuda//crosstool:toolchain + +# CUDA options +build:cuda --action_env GCC_HOST_COMPILER_PATH="/opt/rh/devtoolset-9/root/usr/bin/gcc" +build:cuda --action_env CUDA_TOOLKIT_PATH="/usr/local/cuda" +build:cuda --action_env CUDA_VERSION="11" +build:cuda --action_env CUDNN_VERSION="8" +build:cuda --action_env CUDNN_INSTALL_PATH="/usr/local/cuda" +build:cuda --action_env CUDA_COMPUTE_CAPABILITIES="7.5" diff --git a/README.md b/README.md index 8185af173..374efe2aa 100644 --- a/README.md +++ b/README.md @@ -57,6 +57,7 @@ and also open for public contributions, bug fixes, and documentation. [[Contribu Basically, HierarchicalKV is a headers only library, the commands below only create binaries for benchmark and unit testing. +### with cmake ```shell git clone --recursive https://github.com/NVIDIA-Merlin/HierarchicalKV.git cd HierarchicalKV && mkdir -p build && cd build @@ -73,6 +74,17 @@ For Unit Test: ./merlin_hashtable_test ``` +### with bazel +```shell +git clone --recursive https://github.com/NVIDIA-Merlin/HierarchicalKV.git +cd HierarchicalKV && bazel build --config=cuda //... +``` + +For Benchmark: +```shell +./benchmark_util +``` + Your environment must meet the following requirements: - CUDA version >= 11.2 diff --git a/WORKSPACE b/WORKSPACE new file mode 100644 index 000000000..9d57b33f5 --- /dev/null +++ b/WORKSPACE @@ -0,0 +1,14 @@ +workspace(name = "HierarchicalKV") + +load("@bazel_tools//tools/build_defs/repo:http.bzl", "http_archive") +load("//build_deps/gpus:configure.bzl", "cuda_configure") + +http_archive( + name = "bazel_skylib", + sha256 = "1dde365491125a3db70731e25658dfdd3bc5dbdfd11b840b3e987ecf043c7ca0", + urls = [ + "https://github.com/bazelbuild/bazel-skylib/releases/download/0.9.0/bazel_skylib-0.9.0.tar.gz", + ], +) + +cuda_configure(name = "local_config_cuda") diff --git a/benchmark/BUILD b/benchmark/BUILD new file mode 100644 index 000000000..a7221a33b --- /dev/null +++ b/benchmark/BUILD @@ -0,0 +1,24 @@ +load("@local_config_cuda//cuda:build_defs.bzl", "cuda_library") + +cc_binary( + name = "benchmark_util", + deps = [ + ":benchmark_lib", + ], +) + +cuda_library( + name = "benchmark_lib", + srcs = [ + "merlin_hashtable_benchmark.cc.cu", + ], + hdrs = [ + "benchmark_util.cuh", + ], + copts = ["-Iinclude/"], + linkopts = ["-pthread"], + deps = [ + "//include:merlin_hashtable", + "@local_config_cuda//cuda", + ], +) diff --git a/build_deps/gpus/BUILD b/build_deps/gpus/BUILD new file mode 100644 index 000000000..e69de29bb diff --git a/build_deps/gpus/check_cuda_libs.py b/build_deps/gpus/check_cuda_libs.py new file mode 100644 index 000000000..216a10e5b --- /dev/null +++ b/build_deps/gpus/check_cuda_libs.py @@ -0,0 +1,86 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +"""Verifies that a list of libraries is installed on the system. + +Takes a list of arguments with every two subsequent arguments being a logical +tuple of (path, check_soname). The path to the library and either True or False +to indicate whether to check the soname field on the shared library. + +Example Usage: +./check_cuda_libs.py /path/to/lib1.so True /path/to/lib2.so False +""" +import os +import os.path +import platform +import subprocess +import sys + +# pylint: disable=g-import-not-at-top,g-importing-member +try: + from shutil import which +except ImportError: + from distutils.spawn import find_executable as which +# pylint: enable=g-import-not-at-top,g-importing-member + + +class ConfigError(Exception): + pass + + +def check_cuda_lib(path, check_soname=True): + """Tests if a library exists on disk and whether its soname matches the filename. + + Args: + path: the path to the library. + check_soname: whether to check the soname as well. + + Raises: + ConfigError: If the library does not exist or if its soname does not match + the filename. + """ + if not os.path.isfile(path): + raise ConfigError("No library found under: " + path) + objdump = which("objdump") + if check_soname and objdump is not None: + # Decode is necessary as in py3 the return type changed from str to bytes + output = subprocess.check_output([objdump, "-p", path]).decode("utf-8") + output = [line for line in output.splitlines() if "SONAME" in line] + sonames = [line.strip().split(" ")[-1] for line in output] + if not any(soname == os.path.basename(path) for soname in sonames): + raise ConfigError("None of the libraries match their SONAME: " + + path) + + +def main(): + try: + args = [argv for argv in sys.argv[1:]] + if len(args) % 2 == 1: + raise ConfigError("Expected even number of arguments") + checked_paths = [] + for i in range(0, len(args), 2): + path = args[i] + check_cuda_lib(path, check_soname=args[i + 1] == "True") + checked_paths.append(path) + # pylint: disable=superfluous-parens + print(os.linesep.join(checked_paths)) + # pylint: enable=superfluous-parens + except ConfigError as e: + sys.stderr.write(str(e)) + sys.exit(1) + + +if __name__ == "__main__": + main() diff --git a/build_deps/gpus/configure.bzl b/build_deps/gpus/configure.bzl new file mode 100644 index 000000000..326096419 --- /dev/null +++ b/build_deps/gpus/configure.bzl @@ -0,0 +1,1320 @@ +"""Repository rule for CUDA autoconfiguration. + +`cuda_configure` depends on the following environment variables: + + * `NEED_CUDA`: Whether to enable building with CUDA. + * `GCC_HOST_COMPILER_PATH`: The GCC host compiler path + * `SYSROOT`: The sysroot to use when compiling. + * `CUDA_PATHS`: The base paths to look for CUDA and cuDNN. Default is + `/usr/local/cuda,usr/`. + * `CUDA_TOOLKIT_PATH` (deprecated): The path to the CUDA toolkit. Default is + `/usr/local/cuda`. + * `CUDA_VERSION`: The version of the CUDA toolkit. If this is blank, then + use the system default. + * `CUDNN_VERSION`: The version of the cuDNN library. + * `CUDNN_INSTALL_PATH` (deprecated): The path to the cuDNN library. Default is + `/usr/local/cuda`. + * `CUDA_COMPUTE_CAPABILITIES`: The CUDA compute capabilities. Default is + `3.5,5.2`. + * `PYTHON_BIN_PATH`: The python binary path +""" + +load( + "@bazel_tools//tools/cpp:lib_cc_configure.bzl", + "escape_string", + "get_env_var", +) +load( + "//build_deps/remote_config:common.bzl", + "config_repo_label", + "err_out", + "execute", + "get_bash_bin", + "get_cpu_value", + "get_host_environ", + "get_python_bin", + "raw_exec", + "read_dir", + "realpath", + "which", +) + +_GCC_HOST_COMPILER_PATH = "GCC_HOST_COMPILER_PATH" +_GCC_HOST_COMPILER_PREFIX = "GCC_HOST_COMPILER_PREFIX" +_SYSROOT = "SYSROOT" +_CUDA_TOOLKIT_PATH = "CUDA_TOOLKIT_PATH" +_CUDA_VERSION = "CUDA_VERSION" +_CUDNN_VERSION = "CUDNN_VERSION" +_CUDNN_INSTALL_PATH = "CUDNN_INSTALL_PATH" +_CUDA_COMPUTE_CAPABILITIES = "CUDA_COMPUTE_CAPABILITIES" +_CUDA_CONFIG_REPO = "CUDA_CONFIG_REPO" +_PYTHON_BIN_PATH = "PYTHON_BIN_PATH" + +_TENSORRT_VERSION = "TENSORRT_VERSION" +_TENSORRT_INSTALL_PATH = "TENSORRT_INSTALL_PATH" +_TENSORRT_STATIC_PATH = "TENSORRT_STATIC_PATH" +_TENSORRT_LIBS = [ + "nvinfer", + "nvinfer_plugin", + "nvonnxparser", + "nvparsers", +] +_TENSORRT_HEADERS = [ + "NvInfer.h", + "NvUtils.h", + "NvInferPlugin.h", +] +_TENSORRT_HEADERS_V6 = [ + "NvInfer.h", + "NvUtils.h", + "NvInferPlugin.h", + "NvInferVersion.h", + "NvInferRuntime.h", + "NvInferRuntimeCommon.h", + "NvInferPluginUtils.h", + "NvOnnxParser.h", + "NvOnnxConfig.h", +] +_TENSORRT_HEADERS_V8 = [ + "NvInfer.h", + "NvInferLegacyDims.h", + "NvInferImpl.h", + "NvUtils.h", + "NvInferPlugin.h", + "NvInferVersion.h", + "NvInferRuntime.h", + "NvInferRuntimeCommon.h", + "NvInferPluginUtils.h", + "NvOnnxParser.h", + "NvOnnxConfig.h", +] + +def _at_least_version(actual_version, required_version): + actual = [int(v) for v in actual_version.split(".")] + required = [int(v) for v in required_version.split(".")] + return actual >= required + +def _get_tensorrt_headers(tensorrt_version): + if _at_least_version(tensorrt_version, "8"): + return _TENSORRT_HEADERS_V8 + if _at_least_version(tensorrt_version, "6"): + return _TENSORRT_HEADERS_V6 + return _TENSORRT_HEADERS + +def to_list_of_strings(elements): + """Convert the list of ["a", "b", "c"] into '"a", "b", "c"'. + + This is to be used to put a list of strings into the bzl file templates + so it gets interpreted as list of strings in Starlark. + + Args: + elements: list of string elements + + Returns: + single string of elements wrapped in quotes separated by a comma.""" + quoted_strings = ["\"" + element + "\"" for element in elements] + return ", ".join(quoted_strings) + +def verify_build_defines(params): + """Verify all variables that crosstool/BUILD.tpl expects are substituted. + + Args: + params: dict of variables that will be passed to the BUILD.tpl template. + """ + missing = [] + for param in [ + "cxx_builtin_include_directories", + "extra_no_canonical_prefixes_flags", + "host_compiler_path", + "host_compiler_prefix", + "host_compiler_warnings", + "linker_bin_path", + "compiler_deps", + "unfiltered_compile_flags", + ]: + if ("%{" + param + "}") not in params: + missing.append(param) + + if missing: + auto_configure_fail( + "BUILD.tpl template is missing these variables: " + str(missing) + + ".\nWe only got: " + str(params) + ".", + ) + +# TODO(dzc): Once these functions have been factored out of Bazel's +# cc_configure.bzl, load them from @bazel_tools instead. +# BEGIN cc_configure common functions. +def find_cc(repository_ctx): + """Find the C++ compiler.""" + target_cc_name = "gcc" + cc_path_envvar = _GCC_HOST_COMPILER_PATH + cc_name = target_cc_name + + cc_name_from_env = get_host_environ(repository_ctx, cc_path_envvar) + if cc_name_from_env: + cc_name = cc_name_from_env + if cc_name.startswith("/"): + # Absolute path, maybe we should make this supported by our which function. + return cc_name + cc = which(repository_ctx, cc_name) + if cc == None: + fail(("Cannot find {}, either correct your path or set the {}" + + " environment variable").format(target_cc_name, cc_path_envvar)) + return cc + +_INC_DIR_MARKER_BEGIN = "#include <...>" + +# OSX add " (framework directory)" at the end of line, strip it. +_OSX_FRAMEWORK_SUFFIX = " (framework directory)" +_OSX_FRAMEWORK_SUFFIX_LEN = len(_OSX_FRAMEWORK_SUFFIX) + +def _cxx_inc_convert(path): + """Convert path returned by cc -E xc++ in a complete path.""" + path = path.strip() + if path.endswith(_OSX_FRAMEWORK_SUFFIX): + path = path[:-_OSX_FRAMEWORK_SUFFIX_LEN].strip() + return path + +def _normalize_include_path(repository_ctx, path): + """Normalizes include paths before writing them to the crosstool. + + If path points inside the 'crosstool' folder of the repository, a relative + path is returned. + If path points outside the 'crosstool' folder, an absolute path is returned. + """ + path = str(repository_ctx.path(path)) + crosstool_folder = str(repository_ctx.path(".").get_child("crosstool")) + + if path.startswith(crosstool_folder): + # We drop the path to "$REPO/crosstool" and a trailing path separator. + return path[len(crosstool_folder) + 1:] + return path + +def _is_compiler_option_supported(repository_ctx, cc, option): + """Checks that `option` is supported by the C compiler. Doesn't %-escape the option.""" + result = repository_ctx.execute([ + cc, + option, + "-o", + "/dev/null", + "-c", + str(repository_ctx.path("tools/cpp/empty.cc")), + ]) + return result.stderr.find(option) == -1 + +def _get_cxx_inc_directories_impl(repository_ctx, cc, lang_is_cpp, tf_sysroot): + """Compute the list of default C or C++ include directories.""" + if lang_is_cpp: + lang = "c++" + else: + lang = "c" + sysroot = [] + if tf_sysroot: + sysroot += ["--sysroot", tf_sysroot] + result = raw_exec( + repository_ctx, + [cc, "-E", "-x" + lang, "-", "-v"] + sysroot, + ) + stderr = err_out(result) + index1 = stderr.find(_INC_DIR_MARKER_BEGIN) + if index1 == -1: + return [] + index1 = stderr.find("\n", index1) + if index1 == -1: + return [] + index2 = stderr.rfind("\n ") + if index2 == -1 or index2 < index1: + return [] + index2 = stderr.find("\n", index2 + 1) + if index2 == -1: + inc_dirs = stderr[index1 + 1:] + else: + inc_dirs = stderr[index1 + 1:index2].strip() + + print_resource_dir_supported = _is_compiler_option_supported( + repository_ctx, + cc, + "-print-resource-dir", + ) + + if print_resource_dir_supported: + resource_dir = repository_ctx.execute( + [cc, "-print-resource-dir"], + ).stdout.strip() + "/share" + inc_dirs += "\n" + resource_dir + + return [ + _normalize_include_path(repository_ctx, _cxx_inc_convert(p)) + for p in inc_dirs.split("\n") + ] + +def get_cxx_inc_directories(repository_ctx, cc, tf_sysroot): + """Compute the list of default C and C++ include directories.""" + + includes_cpp = _get_cxx_inc_directories_impl( + repository_ctx, + cc, + True, + tf_sysroot, + ) + includes_c = _get_cxx_inc_directories_impl( + repository_ctx, + cc, + False, + tf_sysroot, + ) + + return includes_cpp + [ + inc + for inc in includes_c + if inc not in includes_cpp + ] + +def auto_configure_fail(msg): + """Output failure message when cuda configuration fails.""" + red = "\033[0;31m" + no_color = "\033[0m" + fail("\n%sCuda Configuration Error:%s %s\n" % (red, no_color, msg)) + +# END cc_configure common functions (see TODO above). + +def _cuda_include_path(repository_ctx, cuda_config): + """Generates the Starlark string with cuda include directories. + + Args: + repository_ctx: The repository context. + cc: The path to the gcc host compiler. + + Returns: + A list of the gcc host compiler include directories. + """ + nvcc_path = repository_ctx.path("%s/bin/nvcc%s" % ( + cuda_config.cuda_toolkit_path, + ".exe" if cuda_config.cpu_value == "Windows" else "", + )) + + # The expected exit code of this command is non-zero. Bazel remote execution + # only caches commands with zero exit code. So force a zero exit code. + cmd = "%s -v /dev/null -o /dev/null ; [ $? -eq 1 ]" % str(nvcc_path) + result = raw_exec( + repository_ctx, + [get_bash_bin(repository_ctx), "-c", cmd], + ) + target_dir = "" + for one_line in err_out(result).splitlines(): + if one_line.startswith("#$ _TARGET_DIR_="): + target_dir = (cuda_config.cuda_toolkit_path + "/" + + one_line.replace( + "#$ _TARGET_DIR_=", + "", + ) + "/include") + inc_entries = [] + if target_dir != "": + inc_entries.append(realpath(repository_ctx, target_dir)) + inc_entries.append( + realpath(repository_ctx, cuda_config.cuda_toolkit_path + "/include"), + ) + return inc_entries + +def matches_version(environ_version, detected_version): + """Checks whether the user-specified version matches the detected version. + + This function performs a weak matching so that if the user specifies only + the + major or major and minor versions, the versions are still considered + matching + if the version parts match. To illustrate: + + environ_version detected_version result + ----------------------------------------- + 5.1.3 5.1.3 True + 5.1 5.1.3 True + 5 5.1 True + 5.1.3 5.1 False + 5.2.3 5.1.3 False + + Args: + environ_version: The version specified by the user via environment + variables. + detected_version: The version autodetected from the CUDA installation on + the system. + Returns: True if user-specified version matches detected version and False + otherwise. + """ + environ_version_parts = environ_version.split(".") + detected_version_parts = detected_version.split(".") + if len(detected_version_parts) < len(environ_version_parts): + return False + for i, part in enumerate(detected_version_parts): + if i >= len(environ_version_parts): + break + if part != environ_version_parts[i]: + return False + return True + +_NVCC_VERSION_PREFIX = "Cuda compilation tools, release " + +_DEFINE_CUDNN_MAJOR = "#define CUDNN_MAJOR" + +def compute_capabilities(repository_ctx): + """Returns a list of strings representing cuda compute capabilities. + + Args: + repository_ctx: the repo rule's context. + Returns: list of cuda architectures to compile for. 'compute_xy' refers to + both PTX and SASS, 'sm_xy' refers to SASS only. + """ + capabilities = get_host_environ( + repository_ctx, + _CUDA_COMPUTE_CAPABILITIES, + "compute_35,compute_52", + ).split(",") + + # Map old 'x.y' capabilities to 'compute_xy'. + if len(capabilities) > 0 and all( + [len(x.split(".")) == 2 for x in capabilities], + ): + # If all capabilities are in 'x.y' format, only include PTX for the + # highest capability. + cc_list = sorted([x.replace(".", "") for x in capabilities]) + capabilities = [ + "sm_%s" % x + for x in cc_list[:-1] + ] + ["compute_%s" % cc_list[-1]] + for i, capability in enumerate(capabilities): + parts = capability.split(".") + if len(parts) != 2: + continue + capabilities[i] = "compute_%s%s" % (parts[0], parts[1]) + + # Make list unique + capabilities = dict(zip(capabilities, capabilities)).keys() + + # Validate capabilities. + for capability in capabilities: + if not capability.startswith(("compute_", "sm_")): + auto_configure_fail("Invalid compute capability: %s" % capability) + for prefix in ["compute_", "sm_"]: + if not capability.startswith(prefix): + continue + if len(capability) == len(prefix) + 2 and capability[-2:].isdigit( + ): + continue + auto_configure_fail("Invalid compute capability: %s" % capability) + + return capabilities + +def lib_name(base_name, cpu_value, version = None, static = False): + """Constructs the platform-specific name of a library. + + Args: + base_name: The name of the library, such as "cudart" + cpu_value: The name of the host operating system. + version: The version of the library. + static: True the library is static or False if it is a shared object. + + Returns: + The platform-specific name of the library. + """ + version = "" if not version else "." + version + if cpu_value in ("Linux", "FreeBSD"): + if static: + return "lib%s.a" % base_name + return "lib%s.so%s" % (base_name, version) + elif cpu_value == "Windows": + return "%s.lib" % base_name + elif cpu_value == "Darwin": + if static: + return "lib%s.a" % base_name + return "lib%s%s.dylib" % (base_name, version) + else: + auto_configure_fail("Invalid cpu_value: %s" % cpu_value) + +def _lib_path(lib, cpu_value, basedir, version, static): + file_name = lib_name(lib, cpu_value, version, static) + return "%s/%s" % (basedir, file_name) + +def _should_check_soname(version, static): + return version and not static + +def _check_cuda_lib_params(lib, cpu_value, basedir, version, static = False): + return ( + _lib_path(lib, cpu_value, basedir, version, static), + _should_check_soname(version, static), + ) + +def _check_cuda_libs(repository_ctx, script_path, libs): + python_bin = get_python_bin(repository_ctx) + contents = repository_ctx.read(script_path).splitlines() + + cmd = "from os import linesep;" + cmd += "f = open('script.py', 'w');" + for line in contents: + cmd += "f.write('%s' + linesep);" % line + cmd += "f.close();" + cmd += "from os import system;" + args = " ".join(["\"" + path + "\" " + str(check) for path, check in libs]) + cmd += "system('%s script.py %s');" % (python_bin, args) + + all_paths = [path for path, _ in libs] + checked_paths = execute( + repository_ctx, + [python_bin, "-c", cmd], + ).stdout.splitlines() + + # Filter out empty lines from splitting on '\r\n' on Windows + checked_paths = [path for path in checked_paths if len(path) > 0] + if all_paths != checked_paths: + auto_configure_fail( + "Error with installed CUDA libs. Expected '%s'. Actual '%s'." % + (all_paths, checked_paths), + ) + +def _find_libs(repository_ctx, check_cuda_libs_script, cuda_config): + """Returns the CUDA and cuDNN libraries on the system. + + Also, verifies that the script actually exist. + + Args: + repository_ctx: The repository context. + check_cuda_libs_script: The path to a script verifying that the cuda + libraries exist on the system. + cuda_config: The CUDA config as returned by _get_cuda_config + + Returns: + Map of library names to structs of filename and path. + """ + cpu_value = cuda_config.cpu_value + stub_dir = "/stubs" + + check_cuda_libs_params = { + "cuda": _check_cuda_lib_params( + "cuda", + cpu_value, + cuda_config.config["cuda_library_dir"] + stub_dir, + version = None, + static = False, + ), + "cudart": _check_cuda_lib_params( + "cudart", + cpu_value, + cuda_config.config["cuda_library_dir"], + cuda_config.cudart_version, + static = False, + ), + "cudart_static": _check_cuda_lib_params( + "cudart_static", + cpu_value, + cuda_config.config["cuda_library_dir"], + cuda_config.cudart_version, + static = True, + ), + "cublas": _check_cuda_lib_params( + "cublas", + cpu_value, + cuda_config.config["cublas_library_dir"], + cuda_config.cublas_version, + static = False, + ), + "cublasLt": _check_cuda_lib_params( + "cublasLt", + cpu_value, + cuda_config.config["cublas_library_dir"], + cuda_config.cublas_version, + static = False, + ), + "cusolver": _check_cuda_lib_params( + "cusolver", + cpu_value, + cuda_config.config["cusolver_library_dir"], + cuda_config.cusolver_version, + static = False, + ), + "curand": _check_cuda_lib_params( + "curand", + cpu_value, + cuda_config.config["curand_library_dir"], + cuda_config.curand_version, + static = False, + ), + "cufft": _check_cuda_lib_params( + "cufft", + cpu_value, + cuda_config.config["cufft_library_dir"], + cuda_config.cufft_version, + static = False, + ), + "cudnn": _check_cuda_lib_params( + "cudnn", + cpu_value, + cuda_config.config["cudnn_library_dir"], + cuda_config.cudnn_version, + static = False, + ), + "cupti": _check_cuda_lib_params( + "cupti", + cpu_value, + cuda_config.config["cupti_library_dir"], + cuda_config.cupti_version, + static = False, + ), + "cusparse": _check_cuda_lib_params( + "cusparse", + cpu_value, + cuda_config.config["cusparse_library_dir"], + cuda_config.cusparse_version, + static = False, + ), + } + + # Verify that the libs actually exist at their locations. + _check_cuda_libs( + repository_ctx, + check_cuda_libs_script, + check_cuda_libs_params.values(), + ) + + paths = { + filename: v[0] + for (filename, v) in check_cuda_libs_params.items() + } + return paths + +def _cudart_static_linkopt(cpu_value): + """Returns additional platform-specific linkopts for cudart.""" + return "" if cpu_value == "Darwin" else "\"-lrt\"," + +def _exec_find_cuda_config(repository_ctx, script_path, cuda_libraries): + python_bin = get_python_bin(repository_ctx) + cmd = "from os import system;" + "system('\"%s\" %s %s');" % ( + python_bin, + script_path, + " ".join(cuda_libraries), + ) + return execute(repository_ctx, [python_bin, "-c", cmd]) + +# TODO(csigg): Only call once instead of from here, tensorrt_configure.bzl, +# and nccl_configure.bzl. +def find_cuda_config(repository_ctx, script_path, cuda_libraries): + """Returns CUDA config dictionary from running find_cuda_config.py""" + exec_result = _exec_find_cuda_config( + repository_ctx, + script_path, + cuda_libraries, + ) + + if exec_result.return_code: + auto_configure_fail("Failed to run find_cuda_config.py: %s" % + err_out(exec_result)) + + # Parse the dict from stdout. + return dict( + [tuple(x.split(": ")) for x in exec_result.stdout.splitlines()], + ) + +def _get_cuda_config(repository_ctx, find_cuda_config_script): + """Detects and returns information about the CUDA installation on the system. + + Args: + repository_ctx: The repository context. + + Returns: + A struct containing the following fields: + cuda_toolkit_path: The CUDA toolkit installation directory. + cudnn_install_basedir: The cuDNN installation directory. + cuda_version: The version of CUDA on the system. + cudart_version: The CUDA runtime version on the system. + cudnn_version: The version of cuDNN on the system. + compute_capabilities: A list of the system's CUDA compute capabilities. + cpu_value: The name of the host operating system. + """ + config = find_cuda_config( + repository_ctx, + find_cuda_config_script, + ["cuda", "cudnn"], + ) + + cpu_value = get_cpu_value(repository_ctx) + toolkit_path = config["cuda_toolkit_path"] + + cuda_version = config["cuda_version"].split(".") + cuda_major = cuda_version[0] + cuda_minor = cuda_version[1] + + cuda_version = "%s.%s" % (cuda_major, cuda_minor) + cudnn_version = "%s" % config["cudnn_version"] + + if int(cuda_major) >= 11: + # The libcudart soname in CUDA 11.x is versioned as 11.0 for backward compatability. + if int(cuda_major) == 11: + cudart_version = "11.0" + cupti_version = cuda_version + else: + cudart_version = ("%s") % cuda_major + cupti_version = cudart_version + cublas_version = ("%s") % config["cublas_version"].split(".")[0] + cusolver_version = ("%s") % config["cusolver_version"].split(".")[0] + curand_version = ("%s") % config["curand_version"].split(".")[0] + cufft_version = ("%s") % config["cufft_version"].split(".")[0] + cusparse_version = ("%s") % config["cusparse_version"].split(".")[0] + elif (int(cuda_major), int(cuda_minor)) >= (10, 1): + # cuda_lib_version is for libraries like cuBLAS, cuFFT, cuSOLVER, etc. + # It changed from 'x.y' to just 'x' in CUDA 10.1. + cuda_lib_version = ("%s") % cuda_major + cudart_version = cuda_version + cupti_version = cuda_version + cublas_version = cuda_lib_version + cusolver_version = cuda_lib_version + curand_version = cuda_lib_version + cufft_version = cuda_lib_version + cusparse_version = cuda_lib_version + else: + cudart_version = cuda_version + cupti_version = cuda_version + cublas_version = cuda_version + cusolver_version = cuda_version + curand_version = cuda_version + cufft_version = cuda_version + cusparse_version = cuda_version + + return struct( + cuda_toolkit_path = toolkit_path, + cuda_version = cuda_version, + cupti_version = cupti_version, + cuda_version_major = cuda_major, + cudart_version = cudart_version, + cublas_version = cublas_version, + cusolver_version = cusolver_version, + curand_version = curand_version, + cufft_version = cufft_version, + cusparse_version = cusparse_version, + cudnn_version = cudnn_version, + compute_capabilities = compute_capabilities(repository_ctx), + cpu_value = cpu_value, + config = config, + ) + +def _tpl(repository_ctx, tpl, substitutions = {}, out = None): + if not out: + out = tpl.replace(":", "/") + repository_ctx.template( + out, + Label("//build_deps/gpus/%s.tpl" % tpl), + substitutions, + ) + +def _file(repository_ctx, label): + repository_ctx.template( + label.replace(":", "/"), + Label("//build_deps/gpus/%s.tpl" % label), + {}, + ) + +_DUMMY_CROSSTOOL_BZL_FILE = """ +def error_gpu_disabled(): + fail("ERROR: Building with --config=cuda but TensorFlow is not configured " + + "to build with GPU support. Please re-run ./configure and enter 'Y' " + + "at the prompt to build with GPU support.") + + native.genrule( + name = "error_gen_crosstool", + outs = ["CROSSTOOL"], + cmd = "echo 'Should not be run.' && exit 1", + ) + + native.filegroup( + name = "crosstool", + srcs = [":CROSSTOOL"], + output_licenses = ["unencumbered"], + ) +""" + +_DUMMY_CROSSTOOL_BUILD_FILE = """ +load("//crosstool:error_gpu_disabled.bzl", "error_gpu_disabled") + +error_gpu_disabled() +""" + +def _norm_path(path): + """Returns a path with '/' and remove the trailing slash.""" + path = path.replace("\\", "/") + if path[-1] == "/": + path = path[:-1] + return path + +def make_copy_files_rule(repository_ctx, name, srcs, outs): + """Returns a rule to copy a set of files.""" + cmds = [] + + # Copy files. + for src, out in zip(srcs, outs): + cmds.append('cp -f "%s" "$(location %s)"' % (src, out)) + outs = [(' "%s",' % out) for out in outs] + return """genrule( + name = "%s", + outs = [ +%s + ], + cmd = \"""%s \""", +)""" % (name, "\n".join(outs), " && \\\n".join(cmds)) + +def make_copy_dir_rule( + repository_ctx, + name, + src_dir, + out_dir, + exceptions = None): + """Returns a rule to recursively copy a directory. + If exceptions is not None, it must be a list of files or directories in + 'src_dir'; these will be excluded from copying. + """ + src_dir = _norm_path(src_dir) + out_dir = _norm_path(out_dir) + outs = read_dir(repository_ctx, src_dir) + post_cmd = "" + if exceptions != None: + outs = [ + x + for x in outs + if not any([x.startswith(src_dir + "/" + y) for y in exceptions]) + ] + outs = [(' "%s",' % out.replace(src_dir, out_dir)) for out in outs] + + # '@D' already contains the relative path for a single file, see + # http://docs.bazel.build/versions/master/be/make-variables.html#predefined_genrule_variables + out_dir = "$(@D)/%s" % out_dir if len(outs) > 1 else "$(@D)" + if exceptions != None: + for x in exceptions: + post_cmd += " ; rm -fR " + out_dir + "/" + x + return """genrule( + name = "%s", + outs = [ +%s + ], + cmd = \"""cp -rLf "%s/." "%s/" %s\""", +)""" % (name, "\n".join(outs), src_dir, out_dir, post_cmd) + +def _flag_enabled(repository_ctx, flag_name): + return get_host_environ(repository_ctx, flag_name) == "1" + +def _tf_sysroot(repository_ctx): + return get_host_environ(repository_ctx, _SYSROOT, "") + +def _compute_cuda_extra_copts(repository_ctx, compute_capabilities): + copts = [] + for capability in compute_capabilities: + if capability.startswith("compute_"): + capability = capability.replace("compute_", "sm_") + copts.append("--cuda-include-ptx=%s" % capability) + copts.append("--cuda-gpu-arch=%s" % capability) + + return str(copts) + +def _tpl_path(repository_ctx, filename): + return repository_ctx.path(Label("//build_deps/gpus/%s.tpl" % filename)) + +def _basename(repository_ctx, path_str): + """Returns the basename of a path of type string. + """ + + num_chars = len(path_str) + for i in range(num_chars): + r_i = num_chars - 1 - i + if path_str[r_i] == "/": + return path_str[r_i + 1:] + return path_str + +def _create_local_cuda_repository(repository_ctx): + """Creates the repository containing files set up to build with CUDA.""" + tpl_paths = { + filename: _tpl_path(repository_ctx, filename) + for filename in [ + "cuda:build_defs.bzl", + "crosstool:crosstool_compiler_wrapper", + "crosstool:BUILD", + "crosstool:cc_toolchain_config.bzl", + "cuda:cuda_config.h", + "cuda:cuda_config.py", + ] + } + tpl_paths["cuda:BUILD"] = _tpl_path(repository_ctx, "cuda:BUILD") + find_cuda_config_script = repository_ctx.path( + Label("//build_deps/gpus:find_cuda_config.py"), + ) + + cuda_config = _get_cuda_config(repository_ctx, find_cuda_config_script) + + cuda_include_path = cuda_config.config["cuda_include_dir"] + cublas_include_path = cuda_config.config["cublas_include_dir"] + cudnn_header_dir = cuda_config.config["cudnn_include_dir"] + cupti_header_dir = cuda_config.config["cupti_include_dir"] + nvvm_libdevice_dir = cuda_config.config["nvvm_library_dir"] + + # Create genrule to copy files from the installed CUDA toolkit into execroot. + copy_rules = [ + make_copy_dir_rule( + repository_ctx, + name = "cuda-include", + src_dir = cuda_include_path, + out_dir = "cuda/include", + ), + make_copy_dir_rule( + repository_ctx, + name = "cuda-nvvm", + src_dir = nvvm_libdevice_dir, + out_dir = "cuda/nvvm/libdevice", + ), + make_copy_dir_rule( + repository_ctx, + name = "cuda-extras", + src_dir = cupti_header_dir, + out_dir = "cuda/extras/CUPTI/include", + ), + ] + + copy_rules.append( + make_copy_files_rule( + repository_ctx, + name = "cublas-include", + srcs = [ + cublas_include_path + "/cublas.h", + cublas_include_path + "/cublas_v2.h", + cublas_include_path + "/cublas_api.h", + cublas_include_path + "/cublasLt.h", + ], + outs = [ + "cublas/include/cublas.h", + "cublas/include/cublas_v2.h", + "cublas/include/cublas_api.h", + "cublas/include/cublasLt.h", + ], + ), + ) + + cusolver_include_path = cuda_config.config["cusolver_include_dir"] + copy_rules.append( + make_copy_files_rule( + repository_ctx, + name = "cusolver-include", + srcs = [ + cusolver_include_path + "/cusolver_common.h", + cusolver_include_path + "/cusolverDn.h", + ], + outs = [ + "cusolver/include/cusolver_common.h", + "cusolver/include/cusolverDn.h", + ], + ), + ) + + cufft_include_path = cuda_config.config["cufft_include_dir"] + copy_rules.append( + make_copy_files_rule( + repository_ctx, + name = "cufft-include", + srcs = [ + cufft_include_path + "/cufft.h", + ], + outs = [ + "cufft/include/cufft.h", + ], + ), + ) + + cusparse_include_path = cuda_config.config["cusparse_include_dir"] + copy_rules.append( + make_copy_files_rule( + repository_ctx, + name = "cusparse-include", + srcs = [ + cusparse_include_path + "/cusparse.h", + ], + outs = [ + "cusparse/include/cusparse.h", + ], + ), + ) + + curand_include_path = cuda_config.config["curand_include_dir"] + copy_rules.append( + make_copy_files_rule( + repository_ctx, + name = "curand-include", + srcs = [ + curand_include_path + "/curand.h", + ], + outs = [ + "curand/include/curand.h", + ], + ), + ) + + check_cuda_libs_script = repository_ctx.path( + Label("//build_deps/gpus:check_cuda_libs.py"), + ) + cuda_libs = _find_libs(repository_ctx, check_cuda_libs_script, cuda_config) + cuda_lib_srcs = [] + cuda_lib_outs = [] + for path in cuda_libs.values(): + cuda_lib_srcs.append(path) + cuda_lib_outs.append("cuda/lib/" + _basename(repository_ctx, path)) + copy_rules.append( + make_copy_files_rule( + repository_ctx, + name = "cuda-lib", + srcs = cuda_lib_srcs, + outs = cuda_lib_outs, + ), + ) + + file_ext = "" + bin_files = ( + ["crt/link.stub"] + + [f + file_ext for f in ["bin2c", "fatbinary", "nvlink", "nvprune"]] + ) + copy_rules.append( + make_copy_files_rule( + repository_ctx, + name = "cuda-bin", + srcs = [ + cuda_config.cuda_toolkit_path + "/bin/" + f + for f in bin_files + ], + outs = ["cuda/bin/" + f for f in bin_files], + ), + ) + + # Select the headers based on the cuDNN version (strip '64_' for Windows). + cudnn_headers = ["cudnn.h"] + if cuda_config.cudnn_version.rsplit("_", 1)[-1] >= "8": + cudnn_headers += [ + "cudnn_backend.h", + "cudnn_adv_infer.h", + "cudnn_adv_train.h", + "cudnn_cnn_infer.h", + "cudnn_cnn_train.h", + "cudnn_ops_infer.h", + "cudnn_ops_train.h", + "cudnn_version.h", + ] + + cudnn_srcs = [] + cudnn_outs = [] + for header in cudnn_headers: + cudnn_srcs.append(cudnn_header_dir + "/" + header) + cudnn_outs.append("cudnn/include/" + header) + + copy_rules.append( + make_copy_files_rule( + repository_ctx, + name = "cudnn-include", + srcs = cudnn_srcs, + outs = cudnn_outs, + ), + ) + + # Set up BUILD file for cuda/ + repository_ctx.template( + "cuda/build_defs.bzl", + tpl_paths["cuda:build_defs.bzl"], + { + "%{cuda_is_configured}": "True", + "%{cuda_extra_copts}": _compute_cuda_extra_copts( + repository_ctx, + cuda_config.compute_capabilities, + ), + "%{cuda_gpu_architectures}": str(cuda_config.compute_capabilities), + }, + ) + + cub_actual = "@cub_archive//:cub" + if int(cuda_config.cuda_version_major) >= 11: + cub_actual = ":cuda_headers" + + repository_ctx.template( + "cuda/BUILD", + tpl_paths["cuda:BUILD"], + { + "%{cuda_driver_lib}": _basename(repository_ctx, cuda_libs["cuda"]), + "%{cudart_static_lib}": _basename(repository_ctx, cuda_libs["cudart_static"]), + "%{cudart_static_linkopt}": _cudart_static_linkopt(cuda_config.cpu_value), + "%{cudart_lib}": _basename(repository_ctx, cuda_libs["cudart"]), + "%{cublas_lib}": _basename(repository_ctx, cuda_libs["cublas"]), + "%{cublasLt_lib}": _basename(repository_ctx, cuda_libs["cublasLt"]), + "%{cusolver_lib}": _basename(repository_ctx, cuda_libs["cusolver"]), + "%{cudnn_lib}": _basename(repository_ctx, cuda_libs["cudnn"]), + "%{cufft_lib}": _basename(repository_ctx, cuda_libs["cufft"]), + "%{curand_lib}": _basename(repository_ctx, cuda_libs["curand"]), + "%{cupti_lib}": _basename(repository_ctx, cuda_libs["cupti"]), + "%{cusparse_lib}": _basename(repository_ctx, cuda_libs["cusparse"]), + "%{cub_actual}": cub_actual, + "%{copy_rules}": "\n".join(copy_rules), + }, + ) + + tf_sysroot = _tf_sysroot(repository_ctx) + + # Set up crosstool/ + cc = find_cc(repository_ctx) + cc_fullpath = cc + + host_compiler_includes = get_cxx_inc_directories( + repository_ctx, + cc_fullpath, + tf_sysroot, + ) + cuda_defines = {} + cuda_defines["%{builtin_sysroot}"] = tf_sysroot + cuda_defines["%{cuda_toolkit_path}"] = "" + cuda_defines["%{compiler}"] = "unknown" + + host_compiler_prefix = get_host_environ( + repository_ctx, + _GCC_HOST_COMPILER_PREFIX, + ) + if not host_compiler_prefix: + host_compiler_prefix = "/usr/bin" + + cuda_defines["%{host_compiler_prefix}"] = host_compiler_prefix + cuda_defines["%{linker_bin_path}"] = host_compiler_prefix + cuda_defines["%{extra_no_canonical_prefixes_flags}"] = "" + cuda_defines["%{unfiltered_compile_flags}"] = "" + + cuda_defines["%{host_compiler_path}"] = "crosstool_compiler_wrapper" + cuda_defines["%{host_compiler_warnings}"] = "" + + # nvcc has the system include paths built in and will automatically + # search them; we cannot work around that, so we add the relevant cuda + # system paths to the allowed compiler specific include paths. + cuda_defines["%{cxx_builtin_include_directories}"] = to_list_of_strings( + host_compiler_includes + _cuda_include_path( + repository_ctx, + cuda_config, + ) + [cupti_header_dir, cudnn_header_dir], + ) + + # For gcc, do not canonicalize system header paths; some versions of gcc + # pick the shortest possible path for system includes when creating the + # .d file - given that includes that are prefixed with "../" multiple + # time quickly grow longer than the root of the tree, this can lead to + # bazel's header check failing. + cuda_defines["%{extra_no_canonical_prefixes_flags}"] = "\"-fno-canonical-system-headers\"" + + file_ext = "" + nvcc_path = "%s/nvcc%s" % (cuda_config.config["cuda_binary_dir"], file_ext) + cuda_defines["%{compiler_deps}"] = ":crosstool_compiler" + + wrapper_defines = { + "%{cpu_compiler}": str(cc), + "%{cuda_version}": cuda_config.cuda_version, + "%{nvcc_path}": nvcc_path, + "%{gcc_host_compiler_path}": str(cc), + } + repository_ctx.template( + "crosstool/crosstool_compiler_wrapper", + tpl_paths["crosstool:crosstool_compiler_wrapper"], + wrapper_defines, + ) + + verify_build_defines(cuda_defines) + + # Only expand template variables in the BUILD file + repository_ctx.template( + "crosstool/BUILD", + tpl_paths["crosstool:BUILD"], + cuda_defines, + ) + + # No templating of cc_toolchain_config - use attributes and templatize the + # BUILD file. + repository_ctx.template( + "crosstool/cc_toolchain_config.bzl", + tpl_paths["crosstool:cc_toolchain_config.bzl"], + {}, + ) + + # Set up cuda_config.h + repository_ctx.template( + "cuda/cuda/cuda_config.h", + tpl_paths["cuda:cuda_config.h"], + { + "%{cuda_version}": cuda_config.cuda_version, + "%{cudart_version}": cuda_config.cudart_version, + "%{cupti_version}": cuda_config.cupti_version, + "%{cublas_version}": cuda_config.cublas_version, + "%{cusolver_version}": cuda_config.cusolver_version, + "%{curand_version}": cuda_config.curand_version, + "%{cufft_version}": cuda_config.cufft_version, + "%{cusparse_version}": cuda_config.cusparse_version, + "%{cudnn_version}": cuda_config.cudnn_version, + "%{cuda_toolkit_path}": cuda_config.cuda_toolkit_path, + "%{cuda_compute_capabilities}": ", ".join( + [cc.split("_")[1] for cc in cuda_config.compute_capabilities], + ), + }, + ) + + # Set up cuda_config.py, which is used by gen_build_info to provide + # static build environment info to the API + repository_ctx.template( + "cuda/cuda/cuda_config.py", + tpl_paths["cuda:cuda_config.py"], + _py_tmpl_dict({ + "cuda_version": cuda_config.cuda_version, + "cudnn_version": cuda_config.cudnn_version, + "cuda_compute_capabilities": cuda_config.compute_capabilities, + "cpu_compiler": str(cc), + }), + ) + +def _get_tensorrt_static_path(repository_ctx): + return get_host_environ(repository_ctx, _TENSORRT_STATIC_PATH, None) + +def _create_local_tensorrt_repository(repository_ctx): + find_cuda_config_path = repository_ctx.path( + Label("//build_deps/gpus:find_cuda_config.py"), + ) + config = find_cuda_config( + repository_ctx, + find_cuda_config_path, + ["tensorrt"], + ) + tensorrt_version = config["tensorrt_version"] + cpu_value = get_cpu_value(repository_ctx) + + # Copy the library and header files + libraries = [ + lib_name(lib, cpu_value, tensorrt_version) + for lib in _TENSORRT_LIBS + ] + library_dir = config["tensorrt_library_dir"] + "/" + headers = _get_tensorrt_headers(tensorrt_version) + include_dir = config["tensorrt_include_dir"] + "/" + copy_rules = [ + make_copy_files_rule( + repository_ctx, + name = "tensorrt_lib", + srcs = [library_dir + library for library in libraries], + outs = ["tensorrt/lib/" + library for library in libraries], + ), + make_copy_files_rule( + repository_ctx, + name = "tensorrt_include", + srcs = [include_dir + header for header in headers], + outs = ["tensorrt/include/" + header for header in headers], + ), + ] + + tensorrt_static_path = _get_tensorrt_static_path(repository_ctx) + if tensorrt_static_path: + tensorrt_static_path = tensorrt_static_path + "/" + if _at_least_version(tensorrt_version, "8"): + raw_static_library_names = _TENSORRT_LIBS + else: + raw_static_library_names = _TENSORRT_LIBS + [ + "nvrtc", + "myelin_compiler", + "myelin_executor", + "myelin_pattern_library", + "myelin_pattern_runtime", + ] + + static_library_names = [ + "%s_static" % name + for name in raw_static_library_names + ] + static_libraries = [ + lib_name(lib, cpu_value, tensorrt_version, static = True) + for lib in static_library_names + ] + copy_rules = copy_rules + [ + make_copy_files_rule( + repository_ctx, + name = "tensorrt_static_lib", + srcs = [ + tensorrt_static_path + library + for library in static_libraries + ], + outs = [ + "tensorrt/lib/" + library + for library in static_libraries + ], + ), + ] + + tpl_paths = { + "tensorrt/build_defs.bzl": _tpl_path(repository_ctx, "tensorrt:build_defs.bzl"), + "tensorrt/BUILD": _tpl_path(repository_ctx, "tensorrt:BUILD"), + "tensorrt/tensorrt_config.h": _tpl_path(repository_ctx, "tensorrt:tensorrt_config.h"), + "tensorrt/tensorrt_config.py": _tpl_path(repository_ctx, "tensorrt:tensorrt_config.py"), + } + + # Set up config file. + repository_ctx.template( + "tensorrt/build_defs.bzl", + tpl_paths["tensorrt/build_defs.bzl"], + {"%{if_tensorrt}": "if_true"}, + ) + + # Set up BUILD file. + repository_ctx.template( + "tensorrt/BUILD", + tpl_paths["tensorrt/BUILD"], + { + "%{copy_rules}": "\n".join(copy_rules), + }, + ) + + # Set up tensorrt_config.h, which is used by + # tensorflow/stream_executor/dso_loader.cc. + repository_ctx.template( + "tensorrt/tensorrt_config.h", + tpl_paths["tensorrt/tensorrt_config.h"], + {"%{tensorrt_version}": tensorrt_version}, + ) + + # Set up tensorrt_config.py, which is used by gen_build_info to provide + # build environment info to the API + repository_ctx.template( + "tensorrt/tensorrt_config.py", + tpl_paths["tensorrt/tensorrt_config.py"], + _py_tmpl_dict({ + "tensorrt_version": tensorrt_version, + }), + ) + +def _py_tmpl_dict(d): + return {"%{cuda_config}": str(d)} + +_CUDA_ENVIRONS = [ + _GCC_HOST_COMPILER_PATH, + _GCC_HOST_COMPILER_PREFIX, + "NEED_CUDA", + _CUDA_TOOLKIT_PATH, + _CUDNN_INSTALL_PATH, + _CUDA_VERSION, + _CUDNN_VERSION, + _CUDA_COMPUTE_CAPABILITIES, + "NVVMIR_LIBRARY_DIR", + _PYTHON_BIN_PATH, + "TMP", + "TMPDIR", + "CUDA_PATHS", +] + +cuda_configure = repository_rule( + implementation = _create_local_cuda_repository, + environ = _CUDA_ENVIRONS, +) + +_TENSORRT_ENVIRONS = [ + _TENSORRT_INSTALL_PATH, + _TENSORRT_VERSION, + _TENSORRT_STATIC_PATH, + "CUDA_PATHS", +] + +tensorrt_configure = repository_rule( + implementation = _create_local_tensorrt_repository, + environ = _TENSORRT_ENVIRONS, +) diff --git a/build_deps/gpus/crosstool/BUILD b/build_deps/gpus/crosstool/BUILD new file mode 100644 index 000000000..e69de29bb diff --git a/build_deps/gpus/crosstool/BUILD.tpl b/build_deps/gpus/crosstool/BUILD.tpl new file mode 100644 index 000000000..e95d18bc6 --- /dev/null +++ b/build_deps/gpus/crosstool/BUILD.tpl @@ -0,0 +1,108 @@ +# This file is expanded from a template by cuda_configure.bzl +# Update cuda_configure.bzl#verify_build_defines when adding new variables. + +load(":cc_toolchain_config.bzl", "cc_toolchain_config") + +licenses(["restricted"]) + +package(default_visibility = ["//visibility:public"]) + +toolchain( + name = "toolchain-linux-x86_64", + exec_compatible_with = [ + "@platforms//os:linux", + "@platforms//cpu:x86_64", + ], + target_compatible_with = [ + "@platforms//os:linux", + "@platforms//cpu:x86_64", + ], + toolchain = ":cc-compiler-local", + toolchain_type = "@bazel_tools//tools/cpp:toolchain_type", +) + +cc_toolchain_suite( + name = "toolchain", + toolchains = { + "local|compiler": ":cc-compiler-local", + "darwin|compiler": ":cc-compiler-darwin", + "arm": ":cc-compiler-local", + "aarch64": ":cc-compiler-local", + "k8": ":cc-compiler-local", + "piii": ":cc-compiler-local", + "ppc": ":cc-compiler-local", + "darwin": ":cc-compiler-darwin", + }, +) + +cc_toolchain( + name = "cc-compiler-local", + all_files = "%{compiler_deps}", + compiler_files = "%{compiler_deps}", + ar_files = "%{compiler_deps}", + as_files = "%{compiler_deps}", + dwp_files = ":empty", + linker_files = "%{compiler_deps}", + objcopy_files = ":empty", + strip_files = ":empty", + # To support linker flags that need to go to the start of command line + # we need the toolchain to support parameter files. Parameter files are + # last on the command line and contain all shared libraries to link, so all + # regular options will be left of them. + supports_param_files = 1, + toolchain_identifier = "local_linux", + toolchain_config = ":cc-compiler-local-config", +) + +cc_toolchain_config( + name = "cc-compiler-local-config", + cpu = "local", + builtin_include_directories = [%{cxx_builtin_include_directories}], + extra_no_canonical_prefixes_flags = [%{extra_no_canonical_prefixes_flags}], + host_compiler_path = "%{host_compiler_path}", + host_compiler_prefix = "%{host_compiler_prefix}", + host_compiler_warnings = [%{host_compiler_warnings}], + host_unfiltered_compile_flags = [%{unfiltered_compile_flags}], + linker_bin_path = "%{linker_bin_path}", + builtin_sysroot = "%{builtin_sysroot}", + cuda_path = "%{cuda_toolkit_path}", + compiler = "%{compiler}", +) + +cc_toolchain( + name = "cc-compiler-darwin", + all_files = "%{compiler_deps}", + compiler_files = "%{compiler_deps}", + ar_files = "%{compiler_deps}", + as_files = "%{compiler_deps}", + dwp_files = ":empty", + linker_files = "%{compiler_deps}", + objcopy_files = ":empty", + strip_files = ":empty", + supports_param_files = 0, + toolchain_identifier = "local_darwin", + toolchain_config = ":cc-compiler-local-darwin", +) + +cc_toolchain_config( + name = "cc-compiler-local-darwin", + cpu = "darwin", + builtin_include_directories = [%{cxx_builtin_include_directories}], + extra_no_canonical_prefixes_flags = [%{extra_no_canonical_prefixes_flags}], + host_compiler_path = "%{host_compiler_path}", + host_compiler_prefix = "%{host_compiler_prefix}", + host_compiler_warnings = [%{host_compiler_warnings}], + host_unfiltered_compile_flags = [%{unfiltered_compile_flags}], + linker_bin_path = "%{linker_bin_path}", +) + + +filegroup( + name = "empty", + srcs = [], +) + +filegroup( + name = "crosstool_compiler", + srcs = ["crosstool_compiler_wrapper"], +) diff --git a/build_deps/gpus/crosstool/cc_toolchain_config.bzl.tpl b/build_deps/gpus/crosstool/cc_toolchain_config.bzl.tpl new file mode 100644 index 000000000..9c429754a --- /dev/null +++ b/build_deps/gpus/crosstool/cc_toolchain_config.bzl.tpl @@ -0,0 +1,645 @@ +"""cc_toolchain_config rule for configuring CUDA toolchains on Linux, Mac, and Windows.""" + +load( + "@bazel_tools//tools/cpp:cc_toolchain_config_lib.bzl", + "action_config", + "artifact_name_pattern", + "env_entry", + "env_set", + "feature", + "feature_set", + "flag_group", + "flag_set", + "tool", + "tool_path", + "variable_with_value", + "with_feature_set", +) +load("@bazel_tools//tools/build_defs/cc:action_names.bzl", "ACTION_NAMES") + +def all_assembly_actions(): + return [ + ACTION_NAMES.assemble, + ACTION_NAMES.preprocess_assemble, + ] + +def all_compile_actions(): + return [ + ACTION_NAMES.assemble, + ACTION_NAMES.c_compile, + ACTION_NAMES.cpp_compile, + ACTION_NAMES.cpp_header_parsing, + ACTION_NAMES.cpp_module_codegen, + ACTION_NAMES.cpp_module_compile, + ACTION_NAMES.linkstamp_compile, + ACTION_NAMES.preprocess_assemble, + ] + +def all_c_compile_actions(): + return [ + ACTION_NAMES.c_compile, + ] + +def all_cpp_compile_actions(): + return [ + ACTION_NAMES.cpp_compile, + ACTION_NAMES.cpp_header_parsing, + ACTION_NAMES.cpp_module_codegen, + ACTION_NAMES.cpp_module_compile, + ACTION_NAMES.linkstamp_compile, + ] + +def all_preprocessed_actions(): + return [ + ACTION_NAMES.c_compile, + ACTION_NAMES.cpp_compile, + ACTION_NAMES.cpp_header_parsing, + ACTION_NAMES.cpp_module_codegen, + ACTION_NAMES.cpp_module_compile, + ACTION_NAMES.linkstamp_compile, + ACTION_NAMES.preprocess_assemble, + ] + +def all_link_actions(): + return [ + ACTION_NAMES.cpp_link_executable, + ACTION_NAMES.cpp_link_dynamic_library, + ACTION_NAMES.cpp_link_nodeps_dynamic_library, + ] + +def all_executable_link_actions(): + return [ + ACTION_NAMES.cpp_link_executable, + ] + +def all_shared_library_link_actions(): + return [ + ACTION_NAMES.cpp_link_dynamic_library, + ACTION_NAMES.cpp_link_nodeps_dynamic_library, + ] + +def all_archive_actions(): + return [ACTION_NAMES.cpp_link_static_library] + +def all_strip_actions(): + return [ACTION_NAMES.strip] + +def _library_to_link(flag_prefix, value, iterate = None): + return flag_group( + flags = [ + "{}%{{libraries_to_link.{}}}".format( + flag_prefix, + iterate if iterate else "name", + ), + ], + iterate_over = ("libraries_to_link." + iterate if iterate else None), + expand_if_equal = variable_with_value( + name = "libraries_to_link.type", + value = value, + ), + ) + +def _surround_static_library(prefix, suffix): + return [ + flag_group( + flags = [prefix, "%{libraries_to_link.name}", suffix], + expand_if_true = "libraries_to_link.is_whole_archive", + ), + flag_group( + flags = ["%{libraries_to_link.name}"], + expand_if_false = "libraries_to_link.is_whole_archive", + ), + ] + +def _prefix_static_library(prefix): + return [ + flag_group( + flags = ["%{libraries_to_link.name}"], + expand_if_false = "libraries_to_link.is_whole_archive", + ), + flag_group( + flags = [prefix + "%{libraries_to_link.name}"], + expand_if_true = "libraries_to_link.is_whole_archive", + ), + ] + +def _static_library_to_link(alwayslink_prefix, alwayslink_suffix = None): + if alwayslink_suffix: + flag_groups = _surround_static_library(alwayslink_prefix, alwayslink_suffix) + else: + flag_groups = _prefix_static_library(alwayslink_prefix) + return flag_group( + flag_groups = flag_groups, + expand_if_equal = variable_with_value( + name = "libraries_to_link.type", + value = "static_library", + ), + ) + +def _iterate_flag_group(iterate_over, flags = [], flag_groups = []): + return flag_group( + iterate_over = iterate_over, + expand_if_available = iterate_over, + flag_groups = flag_groups, + flags = flags, + ) + +def _libraries_to_link_group(flavour): + if flavour == "linux": + return _iterate_flag_group( + iterate_over = "libraries_to_link", + flag_groups = [ + flag_group( + flags = ["-Wl,--start-lib"], + expand_if_equal = variable_with_value( + name = "libraries_to_link.type", + value = "object_file_group", + ), + ), + _library_to_link("", "object_file_group", "object_files"), + flag_group( + flags = ["-Wl,--end-lib"], + expand_if_equal = variable_with_value( + name = "libraries_to_link.type", + value = "object_file_group", + ), + ), + _library_to_link("", "object_file"), + _library_to_link("", "interface_library"), + _static_library_to_link("-Wl,-whole-archive", "-Wl,-no-whole-archive"), + _library_to_link("-l", "dynamic_library"), + _library_to_link("-l:", "versioned_dynamic_library"), + ], + ) + elif flavour == "darwin": + return _iterate_flag_group( + iterate_over = "libraries_to_link", + flag_groups = [ + _library_to_link("", "object_file_group", "object_files"), + _library_to_link("", "object_file"), + _library_to_link("", "interface_library"), + _static_library_to_link("-Wl,-force_load,"), + _library_to_link("-l", "dynamic_library"), + _library_to_link("-l:", "versioned_dynamic_library"), + ], + ) + +def _action_configs_with_tool(path, actions): + return [ + action_config( + action_name = name, + enabled = True, + tools = [tool(path = path)], + ) + for name in actions + ] + +def _action_configs(assembly_path, c_compiler_path, cc_compiler_path, archiver_path, linker_path, strip_path): + return _action_configs_with_tool( + assembly_path, + all_assembly_actions(), + ) + _action_configs_with_tool( + c_compiler_path, + all_c_compile_actions(), + ) + _action_configs_with_tool( + cc_compiler_path, + all_cpp_compile_actions(), + ) + _action_configs_with_tool( + archiver_path, + all_archive_actions(), + ) + _action_configs_with_tool( + linker_path, + all_link_actions(), + ) + _action_configs_with_tool( + strip_path, + all_strip_actions(), + ) + +def _tool_paths(cpu, ctx): + if cpu in ["local", "darwin"]: + return [ + tool_path(name = "gcc", path = ctx.attr.host_compiler_path), + tool_path(name = "ar", path = ctx.attr.host_compiler_prefix + ( + "/ar" if cpu == "local" else "/libtool" + )), + tool_path(name = "compat-ld", path = ctx.attr.host_compiler_prefix + "/ld"), + tool_path(name = "cpp", path = ctx.attr.host_compiler_prefix + "/cpp"), + tool_path(name = "dwp", path = ctx.attr.host_compiler_prefix + "/dwp"), + tool_path(name = "gcov", path = ctx.attr.host_compiler_prefix + "/gcov"), + tool_path(name = "ld", path = ctx.attr.host_compiler_prefix + "/ld"), + tool_path(name = "nm", path = ctx.attr.host_compiler_prefix + "/nm"), + tool_path(name = "objcopy", path = ctx.attr.host_compiler_prefix + "/objcopy"), + tool_path(name = "objdump", path = ctx.attr.host_compiler_prefix + "/objdump"), + tool_path(name = "strip", path = ctx.attr.host_compiler_prefix + "/strip"), + ] + else: + fail("Unreachable") + +def _sysroot_group(): + return flag_group( + flags = ["--sysroot=%{sysroot}"], + expand_if_available = "sysroot", + ) + +def _no_canonical_prefixes_group(extra_flags): + return flag_group( + flags = [ + "-no-canonical-prefixes", + ] + extra_flags, + ) + +def _cuda_set(cuda_path, actions): + if cuda_path: + return [flag_set( + actions = actions, + flag_groups = [ + flag_group( + flags = ["--cuda-path=" + cuda_path], + ), + ], + )] + else: + return [] + +def _nologo(): + return flag_group(flags = ["/nologo"]) + +def _features(cpu, compiler, ctx): + if cpu in ["local", "darwin"]: + return [ + feature(name = "no_legacy_features"), + feature( + name = "all_compile_flags", + enabled = True, + flag_sets = [ + flag_set( + actions = all_compile_actions(), + flag_groups = [ + flag_group( + flags = ["-MD", "-MF", "%{dependency_file}"], + expand_if_available = "dependency_file", + ), + flag_group( + flags = ["-gsplit-dwarf"], + expand_if_available = "per_object_debug_info_file", + ), + ], + ), + flag_set( + actions = all_preprocessed_actions(), + flag_groups = [ + flag_group( + flags = ["-frandom-seed=%{output_file}"], + expand_if_available = "output_file", + ), + _iterate_flag_group( + flags = ["-D%{preprocessor_defines}"], + iterate_over = "preprocessor_defines", + ), + _iterate_flag_group( + flags = ["-include", "%{includes}"], + iterate_over = "includes", + ), + _iterate_flag_group( + flags = ["-iquote", "%{quote_include_paths}"], + iterate_over = "quote_include_paths", + ), + _iterate_flag_group( + flags = ["-I%{include_paths}"], + iterate_over = "include_paths", + ), + _iterate_flag_group( + flags = ["-isystem", "%{system_include_paths}"], + iterate_over = "system_include_paths", + ), + _iterate_flag_group( + flags = ["-F", "%{framework_include_paths}"], + iterate_over = "framework_include_paths", + ), + ], + ), + flag_set( + actions = all_cpp_compile_actions(), + flag_groups = [], + ), + flag_set( + actions = all_compile_actions(), + flag_groups = [ + flag_group( + flags = [ + "-Wno-builtin-macro-redefined", + "-D__DATE__=\"redacted\"", + "-D__TIMESTAMP__=\"redacted\"", + "-D__TIME__=\"redacted\"", + ], + ), + flag_group( + flags = ["-fPIC"], + expand_if_available = "pic", + ), + flag_group( + flags = ["-fPIE"], + expand_if_not_available = "pic", + ), + flag_group( + flags = [ + "-U_FORTIFY_SOURCE", + "-D_FORTIFY_SOURCE=1", + "-fstack-protector", + "-Wall", + ] + ctx.attr.host_compiler_warnings + [ + "-fno-omit-frame-pointer", + ], + ), + _no_canonical_prefixes_group( + ctx.attr.extra_no_canonical_prefixes_flags, + ), + ], + ), + flag_set( + actions = all_compile_actions(), + flag_groups = [flag_group(flags = ["-DNDEBUG"])], + with_features = [with_feature_set(features = ["disable-assertions"])], + ), + flag_set( + actions = all_compile_actions(), + flag_groups = [ + flag_group( + flags = [ + "-g0", + "-O2", + "-ffunction-sections", + "-fdata-sections", + ], + ), + ], + with_features = [with_feature_set(features = ["opt"])], + ), + flag_set( + actions = all_compile_actions(), + flag_groups = [flag_group(flags = ["-g"])], + with_features = [with_feature_set(features = ["dbg"])], + ), + ] + _cuda_set( + ctx.attr.cuda_path, + all_compile_actions(), + ) + [ + flag_set( + actions = all_compile_actions(), + flag_groups = [ + _iterate_flag_group( + flags = ["%{user_compile_flags}"], + iterate_over = "user_compile_flags", + ), + _sysroot_group(), + flag_group( + expand_if_available = "source_file", + flags = ["-c", "%{source_file}"], + ), + flag_group( + expand_if_available = "output_assembly_file", + flags = ["-S"], + ), + flag_group( + expand_if_available = "output_preprocess_file", + flags = ["-E"], + ), + flag_group( + expand_if_available = "output_file", + flags = ["-o", "%{output_file}"], + ), + ], + ), + ], + ), + feature( + name = "all_archive_flags", + enabled = True, + flag_sets = [ + flag_set( + actions = all_archive_actions(), + flag_groups = [ + flag_group( + expand_if_available = "linker_param_file", + flags = ["@%{linker_param_file}"], + ), + flag_group(flags = ["rcsD"]), + flag_group( + flags = ["%{output_execpath}"], + expand_if_available = "output_execpath", + ), + flag_group( + iterate_over = "libraries_to_link", + flag_groups = [ + flag_group( + flags = ["%{libraries_to_link.name}"], + expand_if_equal = variable_with_value( + name = "libraries_to_link.type", + value = "object_file", + ), + ), + flag_group( + flags = ["%{libraries_to_link.object_files}"], + iterate_over = "libraries_to_link.object_files", + expand_if_equal = variable_with_value( + name = "libraries_to_link.type", + value = "object_file_group", + ), + ), + ], + expand_if_available = "libraries_to_link", + ), + ], + ), + ], + ), + feature( + name = "all_link_flags", + enabled = True, + flag_sets = [ + flag_set( + actions = all_shared_library_link_actions(), + flag_groups = [flag_group(flags = ["-shared"])], + ), + flag_set( + actions = all_link_actions(), + flag_groups = ([ + flag_group(flags = ["-Wl,-no-as-needed"]) + ] if cpu == "local" else []) + ([ + flag_group(flags = ["-B" + ctx.attr.linker_bin_path]) + ] if ctx.attr.linker_bin_path else []) + [ + flag_group( + flags = ["@%{linker_param_file}"], + expand_if_available = "linker_param_file", + ), + _iterate_flag_group( + flags = ["%{linkstamp_paths}"], + iterate_over = "linkstamp_paths", + ), + flag_group( + flags = ["-o", "%{output_execpath}"], + expand_if_available = "output_execpath", + ), + _iterate_flag_group( + flags = ["-L%{library_search_directories}"], + iterate_over = "library_search_directories", + ), + _iterate_flag_group( + iterate_over = "runtime_library_search_directories", + flags = [ + "-Wl,-rpath,$ORIGIN/%{runtime_library_search_directories}", + ] if cpu == "local" else [ + "-Wl,-rpath,@loader_path/%{runtime_library_search_directories}", + ], + ), + _libraries_to_link_group("darwin" if cpu == "darwin" else "linux"), + _iterate_flag_group( + flags = ["%{user_link_flags}"], + iterate_over = "user_link_flags", + ), + flag_group( + flags = ["-Wl,--gdb-index"], + expand_if_available = "is_using_fission", + ), + flag_group( + flags = ["-Wl,-S"], + expand_if_available = "strip_debug_symbols", + ), + flag_group(flags = ["-lc++" if cpu == "darwin" else "-lstdc++"]), + _no_canonical_prefixes_group( + ctx.attr.extra_no_canonical_prefixes_flags, + ), + ], + ), + flag_set( + actions = all_executable_link_actions(), + flag_groups = [flag_group(flags = ["-pie"])], + ), + ] + ([ + flag_set( + actions = all_link_actions(), + flag_groups = [flag_group(flags = [ + "-Wl,-z,relro,-z,now", + ])], + ), + ] if cpu == "local" else []) + ([ + flag_set( + actions = all_link_actions(), + flag_groups = [ + flag_group(flags = ["-Wl,--gc-sections"]), + flag_group( + flags = ["-Wl,--build-id=md5", "-Wl,--hash-style=gnu"], + ), + ], + ), + ] if cpu == "local" else []) + ([ + flag_set( + actions = all_link_actions(), + flag_groups = [flag_group(flags = ["-undefined", "dynamic_lookup"])], + ), + ] if cpu == "darwin" else []) + _cuda_set( + ctx.attr.cuda_path, + all_link_actions(), + ) + [ + flag_set( + actions = all_link_actions(), + flag_groups = [ + _sysroot_group(), + ], + ), + ], + ), + feature(name = "disable-assertions"), + feature( + name = "opt", + implies = ["disable-assertions"], + ), + feature(name = "fastbuild"), + feature(name = "dbg"), + feature(name = "supports_dynamic_linker", enabled = True), + feature(name = "pic", enabled = True), + feature(name = "supports_pic", enabled = True), + feature(name = "has_configured_linker_path", enabled = True), + ] + else: + fail("Unreachable") + +def _impl(ctx): + cpu = ctx.attr.cpu + compiler = ctx.attr.compiler + + if (cpu == "darwin"): + toolchain_identifier = "local_darwin" + target_cpu = "darwin" + target_libc = "macosx" + compiler = "compiler" + action_configs = _action_configs( + assembly_path = ctx.attr.host_compiler_path, + c_compiler_path = ctx.attr.host_compiler_path, + cc_compiler_path = ctx.attr.host_compiler_path, + archiver_path = ctx.attr.host_compiler_prefix + "/libtool", + linker_path = ctx.attr.host_compiler_path, + strip_path = ctx.attr.host_compiler_prefix + "/strip", + ) + artifact_name_patterns = [] + elif (cpu == "local"): + toolchain_identifier = "local_linux" + target_cpu = "local" + target_libc = "local" + action_configs = _action_configs( + assembly_path = ctx.attr.host_compiler_path, + c_compiler_path = ctx.attr.host_compiler_path, + cc_compiler_path = ctx.attr.host_compiler_path, + archiver_path = ctx.attr.host_compiler_prefix + "/ar", + linker_path = ctx.attr.host_compiler_path, + strip_path = ctx.attr.host_compiler_prefix + "/strip", + ) + artifact_name_patterns = [] + else: + fail("Unreachable") + + out = ctx.actions.declare_file(ctx.label.name) + ctx.actions.write(out, "Fake executable") + return [ + cc_common.create_cc_toolchain_config_info( + ctx = ctx, + features = _features(cpu, compiler, ctx), + action_configs = action_configs, + artifact_name_patterns = artifact_name_patterns, + cxx_builtin_include_directories = ctx.attr.builtin_include_directories, + toolchain_identifier = toolchain_identifier, + host_system_name = "local", + target_system_name = "local", + target_cpu = target_cpu, + target_libc = target_libc, + compiler = compiler, + abi_version = "local", + abi_libc_version = "local", + tool_paths = _tool_paths(cpu, ctx), + make_variables = [], + builtin_sysroot = ctx.attr.builtin_sysroot, + cc_target_os = None, + ), + DefaultInfo( + executable = out, + ), + ] + +cc_toolchain_config = rule( + implementation = _impl, + attrs = { + "cpu": attr.string(mandatory = True, values = ["darwin", "local"]), + "compiler": attr.string(values = ["unknown"], default = "unknown"), + "builtin_include_directories": attr.string_list(), + "extra_no_canonical_prefixes_flags": attr.string_list(), + "host_compiler_path": attr.string(), + "host_compiler_prefix": attr.string(), + "host_compiler_warnings": attr.string_list(), + "host_unfiltered_compile_flags": attr.string_list(), + "linker_bin_path": attr.string(), + "builtin_sysroot": attr.string(), + "cuda_path": attr.string(), + }, + provides = [CcToolchainConfigInfo], + executable = True, +) diff --git a/build_deps/gpus/crosstool/crosstool_compiler_wrapper.tpl b/build_deps/gpus/crosstool/crosstool_compiler_wrapper.tpl new file mode 100755 index 000000000..f504a5669 --- /dev/null +++ b/build_deps/gpus/crosstool/crosstool_compiler_wrapper.tpl @@ -0,0 +1,313 @@ +#!/usr/bin/env python +# Copyright (c) 2023, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +"""Crosstool wrapper for compiling CUDA programs. + +SYNOPSIS: + crosstool_compiler_wrapper [options passed in by cc_library() + or cc_binary() rule] + +DESCRIPTION: + This script is expected to be called by the cc_library() or cc_binary() bazel + rules. When the option "-x cuda" is present in the list of arguments passed + to this script, it invokes the nvcc CUDA compiler. Most arguments are passed + as is as a string to --compiler-options of nvcc. When "-x cuda" is not + present, this wrapper invokes hybrid_driver_is_not_gcc with the input + arguments as is. +""" + +__author__ = 'keveman@google.com (Manjunath Kudlur)' + +import os +import pipes +import re +import subprocess +import sys +from argparse import ArgumentParser + +# Template values set by cuda_autoconf. +CPU_COMPILER = ('%{cpu_compiler}') +GCC_HOST_COMPILER_PATH = ('%{gcc_host_compiler_path}') + +NVCC_PATH = '%{nvcc_path}' +PREFIX_DIR = os.path.dirname(GCC_HOST_COMPILER_PATH) +NVCC_VERSION = '%{cuda_version}' + + +def Log(s): + print('gpus/crosstool: {0}'.format(s)) + + +def GetOptionValue(argv, option): + """Extract the list of values for option from the argv list. + + Args: + argv: A list of strings, possibly the argv passed to main(). + option: The option whose value to extract, with the leading '-'. + + Returns: + A list of values, either directly following the option, + (eg., -opt val1 val2) or values collected from multiple occurrences of + the option (eg., -opt val1 -opt val2). + """ + + parser = ArgumentParser() + parser.add_argument(option, nargs='*', action='append') + option = option.lstrip('-').replace('-', '_') + args, _ = parser.parse_known_args(argv) + if not args or not vars(args)[option]: + return [] + else: + return sum(vars(args)[option], []) + + +def GetHostCompilerOptions(argv): + """Collect the -isystem, -iquote, and --sysroot option values from argv. + + Args: + argv: A list of strings, possibly the argv passed to main(). + + Returns: + The string that can be used as the --compiler-options to nvcc. + """ + + parser = ArgumentParser() + parser.add_argument('-isystem', nargs='*', action='append') + parser.add_argument('-iquote', nargs='*', action='append') + parser.add_argument('--sysroot', nargs=1) + parser.add_argument('-g', nargs='*', action='append') + parser.add_argument('-fno-canonical-system-headers', action='store_true') + parser.add_argument('-no-canonical-prefixes', action='store_true') + + args, _ = parser.parse_known_args(argv) + + opts = '' + + if args.isystem: + opts += ' -isystem ' + ' -isystem '.join(sum(args.isystem, [])) + if args.iquote: + opts += ' -iquote ' + ' -iquote '.join(sum(args.iquote, [])) + if args.g: + opts += ' -g' + ' -g'.join(sum(args.g, [])) + if args.fno_canonical_system_headers: + opts += ' -fno-canonical-system-headers' + if args.no_canonical_prefixes: + opts += ' -no-canonical-prefixes' + if args.sysroot: + opts += ' --sysroot ' + args.sysroot[0] + + return opts + + +def _update_options(nvcc_options): + if NVCC_VERSION in ("7.0", ): + return nvcc_options + + update_options = {"relaxed-constexpr": "expt-relaxed-constexpr"} + return [ + update_options[opt] if opt in update_options else opt + for opt in nvcc_options + ] + + +def GetNvccOptions(argv): + """Collect the -nvcc_options values from argv. + + Args: + argv: A list of strings, possibly the argv passed to main(). + + Returns: + The string that can be passed directly to nvcc. + """ + + parser = ArgumentParser() + parser.add_argument('-nvcc_options', nargs='*', action='append') + + args, _ = parser.parse_known_args(argv) + + if args.nvcc_options: + options = _update_options(sum(args.nvcc_options, [])) + return ' '.join(['--' + a for a in options]) + return '' + + +def system(cmd): + """Invokes cmd with os.system(). + + Args: + cmd: The command. + + Returns: + The exit code if the process exited with exit() or -signal + if the process was terminated by a signal. + """ + retv = os.system(cmd) + if os.WIFEXITED(retv): + return os.WEXITSTATUS(retv) + else: + return -os.WTERMSIG(retv) + + +def InvokeNvcc(argv, log=False): + """Call nvcc with arguments assembled from argv. + + Args: + argv: A list of strings, possibly the argv passed to main(). + log: True if logging is requested. + + Returns: + The return value of calling system('nvcc ' + args) + """ + + host_compiler_options = GetHostCompilerOptions(argv) + nvcc_compiler_options = GetNvccOptions(argv) + opt_option = GetOptionValue(argv, '-O') + m_options = GetOptionValue(argv, '-m') + m_options = ''.join([' -m' + m for m in m_options if m in ['32', '64']]) + include_options = GetOptionValue(argv, '-I') + out_file = GetOptionValue(argv, '-o') + depfiles = GetOptionValue(argv, '-MF') + defines = GetOptionValue(argv, '-D') + defines = ''.join([' -D' + define for define in defines]) + undefines = GetOptionValue(argv, '-U') + undefines = ''.join([' -U' + define for define in undefines]) + std_options = GetOptionValue(argv, '-std') + nvcc_allowed_std_options = ["c++03", "c++11", "c++14"] + nvcc_std_map = {} + if int(NVCC_VERSION.split('.')[0]) >= 11: + nvcc_std_map["c++1z"] = "c++17" + nvcc_allowed_std_options += ["c++17", "c++1z"] + std_options = ''.join([ + ' -std=' + (nvcc_std_map[define] if define in nvcc_std_map else define) + for define in std_options if define in nvcc_allowed_std_options + ][-1:]) + fatbin_options = ''.join([ + ' --fatbin-options=' + option + for option in GetOptionValue(argv, '-Xcuda-fatbinary') + ]) + + # The list of source files get passed after the -c option. I don't know of + # any other reliable way to just get the list of source files to be compiled. + src_files = GetOptionValue(argv, '-c') + + # Pass -w through from host to nvcc, but don't do anything fancier with + # warnings-related flags, since they're not necessarily the same across + # compilers. + warning_options = ' -w' if '-w' in argv else '' + + if len(src_files) == 0: + return 1 + if len(out_file) != 1: + return 1 + + opt = (' -O2' if + (len(opt_option) > 0 and int(opt_option[0]) > 0) else ' -g') + + includes = (' -I ' + ' -I '.join(include_options) + if len(include_options) > 0 else '') + + # Unfortunately, there are other options that have -c prefix too. + # So allowing only those look like C/C++ files. + src_files = [ + f for f in src_files + if re.search('\.cpp$|\.cc$|\.c$|\.cxx$|\.C|\.cu|\.cuh$', f) + ] + srcs = ' '.join(src_files) + out = ' -o ' + out_file[0] + + nvccopts = '-D_FORCE_INLINES ' + capabilities_sm = set(GetOptionValue(argv, "--cuda-gpu-arch")) + capabilities_compute = set(GetOptionValue(argv, '--cuda-include-ptx')) + # When both "code=sm_xy" and "code=compute_xy" are requested for a single + # arch, they can be combined using "code=xy,compute_xy" which avoids a + # redundant PTX generation during compilation. + capabilities_both = capabilities_sm.intersection(capabilities_compute) + for capability in capabilities_both: + capability = capability[len('sm_'):] + nvccopts += r'-gencode=arch=compute_%s,code=\"sm_%s,compute_%s\" ' % ( + capability, capability, capability) + for capability in capabilities_sm - capabilities_both: + capability = capability[len('sm_'):] + nvccopts += r'-gencode=arch=compute_%s,\"code=sm_%s\" ' % (capability, + capability) + for capability in capabilities_compute - capabilities_both: + capability = capability[len('sm_'):] + nvccopts += r'-gencode=arch=compute_%s,\"code=compute_%s\" ' % ( + capability, capability) + nvccopts += nvcc_compiler_options + nvccopts += undefines + nvccopts += defines + nvccopts += std_options + nvccopts += m_options + nvccopts += warning_options + # Force C++17 dialect (note, everything in just one string!) + nvccopts += ' --std c++17 ' + nvccopts += fatbin_options + + if depfiles: + # Generate the dependency file + depfile = depfiles[0] + cmd = (NVCC_PATH + ' ' + nvccopts + ' --compiler-options "' + + host_compiler_options + '"' + ' --compiler-bindir=' + + GCC_HOST_COMPILER_PATH + ' -I .' + ' -x cu ' + opt + includes + + ' ' + srcs + ' -M -o ' + depfile) + if log: + Log(cmd) + exit_status = system(cmd) + if exit_status != 0: + return exit_status + + cmd = (NVCC_PATH + ' ' + nvccopts + ' --compiler-options "' + + host_compiler_options + ' -fPIC"' + ' --compiler-bindir=' + + GCC_HOST_COMPILER_PATH + ' -I .' + ' -x cu ' + opt + includes + + ' -c ' + srcs + out) + + # TODO(zhengxq): for some reason, 'gcc' needs this help to find 'as'. + # Need to investigate and fix. + cmd = 'PATH=' + PREFIX_DIR + ':$PATH ' + cmd + if log: + Log(cmd) + return system(cmd) + + +def main(): + parser = ArgumentParser() + parser.add_argument('-x', nargs=1) + parser.add_argument('--cuda_log', action='store_true') + args, leftover = parser.parse_known_args(sys.argv[1:]) + + if args.x and args.x[0] == 'cuda': + if args.cuda_log: + Log('-x cuda') + leftover = [pipes.quote(s) for s in leftover] + if args.cuda_log: + Log('using nvcc') + return InvokeNvcc(leftover, log=args.cuda_log) + + # Strip our flags before passing through to the CPU compiler for files which + # are not -x cuda. We can't just pass 'leftover' because it also strips -x. + # We not only want to pass -x to the CPU compiler, but also keep it in its + # relative location in the argv list (the compiler is actually sensitive to + # this). + cpu_compiler_flags = [ + flag for flag in sys.argv[1:] if not flag.startswith(('--cuda_log')) + ] + + return subprocess.call([CPU_COMPILER] + cpu_compiler_flags) + + +if __name__ == '__main__': + sys.exit(main()) diff --git a/build_deps/gpus/cuda/BUILD b/build_deps/gpus/cuda/BUILD new file mode 100644 index 000000000..e69de29bb diff --git a/build_deps/gpus/cuda/BUILD.tpl b/build_deps/gpus/cuda/BUILD.tpl new file mode 100644 index 000000000..05a750ab2 --- /dev/null +++ b/build_deps/gpus/cuda/BUILD.tpl @@ -0,0 +1,229 @@ +load(":build_defs.bzl", "cuda_header_library") +load("@bazel_skylib//:bzl_library.bzl", "bzl_library") +load("@bazel_skylib//lib:selects.bzl", "selects") +load("@bazel_skylib//rules:common_settings.bzl", "bool_flag") + +licenses(["restricted"]) # MPL2, portions GPL v3, LGPL v3, BSD-like + +package(default_visibility = ["//visibility:public"]) + +bool_flag( + name = "enable_cuda", + build_setting_default = False, +) + +config_setting( + name = "is_cuda_enabled", + flag_values = {":enable_cuda": "True"}, +) + + +# Config setting whether built with CUDA support using nvcc. +# +# TODO(b/174244321), DEPRECATED: this target will be removed when all users +# have been converted to :is_cuda_enabled (most) or :is_cuda_compiler_nvcc. +selects.config_setting_group( + name = "using_nvcc", + match_all = [ + "//:is_cuda_enabled", + "//:is_cuda_compiler_nvcc", + ], +) + +config_setting( + name = "_opt", + values = {"compilation_mode": "opt"}, + visibility = ["//visibility:private"], +) + +# Provides CUDA headers for '#include "third_party/gpus/cuda/include/cuda.h"' +# All clients including TensorFlow should use these directives. +cuda_header_library( + name = "cuda_headers", + hdrs = [ + "cuda/cuda_config.h", + ":cuda-include", + ], + include_prefix = "third_party/gpus", + includes = [ + ".", # required to include cuda/cuda/cuda_config.h as cuda/config.h + "cuda/include", + ], +) + +cc_library( + name = "cudart_static", + srcs = ["cuda/lib/%{cudart_static_lib}"], + linkopts = [ + "-ldl", + "-lpthread", + %{cudart_static_linkopt} + ], +) + +cc_library( + name = "cuda_driver", + srcs = ["cuda/lib/%{cuda_driver_lib}"], +) + +cc_library( + name = "cudart", + srcs = ["cuda/lib/%{cudart_lib}"], + data = ["cuda/lib/%{cudart_lib}"], + linkstatic = 1, +) + +cuda_header_library( + name = "cublas_headers", + hdrs = [":cublas-include"], + include_prefix = "third_party/gpus/cuda/include", + includes = ["cublas/include"], + strip_include_prefix = "cublas/include", + deps = [":cuda_headers"], +) + +cuda_header_library( + name = "cusolver_headers", + hdrs = [":cusolver-include"], + include_prefix = "third_party/gpus/cuda/include", + includes = ["cusolver/include"], + strip_include_prefix = "cusolver/include", + deps = [":cuda_headers"], +) + +cuda_header_library( + name = "cufft_headers", + hdrs = [":cufft-include"], + include_prefix = "third_party/gpus/cuda/include", + includes = ["cufft/include"], + strip_include_prefix = "cufft/include", + deps = [":cuda_headers"], +) + +cuda_header_library( + name = "cusparse_headers", + hdrs = [":cusparse-include"], + include_prefix = "third_party/gpus/cuda/include", + includes = ["cusparse/include"], + strip_include_prefix = "cusparse/include", + deps = [":cuda_headers"], +) + +cuda_header_library( + name = "curand_headers", + hdrs = [":curand-include"], + include_prefix = "third_party/gpus/cuda/include", + includes = ["curand/include"], + strip_include_prefix = "curand/include", + deps = [":cuda_headers"], +) + +cc_library( + name = "cublas", + srcs = ["cuda/lib/%{cublas_lib}"], + data = ["cuda/lib/%{cublas_lib}"], + linkstatic = 1, +) + +cc_library( + name = "cublasLt", + srcs = ["cuda/lib/%{cublasLt_lib}"], + data = ["cuda/lib/%{cublasLt_lib}"], + linkstatic = 1, +) + +cc_library( + name = "cusolver", + srcs = ["cuda/lib/%{cusolver_lib}"], + data = ["cuda/lib/%{cusolver_lib}"], + linkopts = ["-lgomp"], + linkstatic = 1, +) + +cc_library( + name = "cudnn", + srcs = ["cuda/lib/%{cudnn_lib}"], + data = ["cuda/lib/%{cudnn_lib}"], + linkstatic = 1, +) + +cc_library( + name = "cudnn_header", + hdrs = [":cudnn-include"], + include_prefix = "third_party/gpus/cudnn", + strip_include_prefix = "cudnn/include", + deps = [":cuda_headers"], +) + +cc_library( + name = "cufft", + srcs = ["cuda/lib/%{cufft_lib}"], + data = ["cuda/lib/%{cufft_lib}"], + linkstatic = 1, +) + +cc_library( + name = "curand", + srcs = ["cuda/lib/%{curand_lib}"], + data = ["cuda/lib/%{curand_lib}"], + linkstatic = 1, +) + +cc_library( + name = "cuda", + deps = [ + ":cublas", + ":cublasLt", + ":cuda_headers", + ":cudart", + ":cudnn", + ":cufft", + ":curand", + ], +) + +alias( + name = "cub_headers", + actual = "%{cub_actual}", +) + +cuda_header_library( + name = "cupti_headers", + hdrs = [":cuda-extras"], + include_prefix = "third_party/gpus", + includes = ["cuda/extras/CUPTI/include/"], + deps = [":cuda_headers"], +) + +cc_library( + name = "cupti_dsos", + data = ["cuda/lib/%{cupti_lib}"], +) + +cc_library( + name = "cusparse", + srcs = ["cuda/lib/%{cusparse_lib}"], + data = ["cuda/lib/%{cusparse_lib}"], + linkopts = ["-lgomp"], + linkstatic = 1, +) + +cc_library( + name = "libdevice_root", + data = [":cuda-nvvm"], +) + +bzl_library( + name = "build_defs_bzl", + srcs = ["build_defs.bzl"], + deps = [ + "@bazel_skylib//lib:selects", + ], +) + +py_library( + name = "cuda_config_py", + srcs = ["cuda/cuda_config.py"], +) + +%{copy_rules} diff --git a/build_deps/gpus/cuda/build_defs.bzl.tpl b/build_deps/gpus/cuda/build_defs.bzl.tpl new file mode 100644 index 000000000..7ab1304fa --- /dev/null +++ b/build_deps/gpus/cuda/build_defs.bzl.tpl @@ -0,0 +1,56 @@ +# Macros for building CUDA code. +def cuda_default_copts(): + """Default options for all CUDA compilations.""" + return [ + "-x", + "cuda", + "-DUSE_CUDA=1", + "-Xcuda-fatbinary=--compress-all", + ] + %{cuda_extra_copts} + + +def cuda_gpu_architectures(): + """Returns a list of supported GPU architectures.""" + return %{cuda_gpu_architectures} + + +def cuda_header_library(name, + hdrs, + include_prefix=None, + strip_include_prefix=None, + deps=[], + **kwargs): + """Generates a cc_library containing both virtual and system include paths. + + Generates both a header-only target with virtual includes plus the full + target without virtual includes. This works around the fact that bazel can't + mix 'includes' and 'include_prefix' in the same target.""" + + native.cc_library( + name=name + "_virtual", + hdrs=hdrs, + include_prefix=include_prefix, + strip_include_prefix=strip_include_prefix, + deps=deps, + visibility=["//visibility:private"], + ) + + native.cc_library(name=name, + textual_hdrs=hdrs, + deps=deps + [":%s_virtual" % name], + **kwargs) + + +def cuda_library(copts=[], **kwargs): + """Wrapper over cc_library which adds default CUDA options.""" + native.cc_library(copts=cuda_default_copts() + copts, **kwargs) + + +def cuda_binary(copts=[], **kwargs): + """Wrapper over cc_library which adds default CUDA options.""" + native.cc_binary(copts=cuda_default_copts() + copts, **kwargs) + + +def cuda_cc_test(copts=[], **kwargs): + """Wrapper over cc_test which adds default CUDA options.""" + native.cc_test(copts=copts, **kwargs) diff --git a/build_deps/gpus/cuda/cuda_config.h.tpl b/build_deps/gpus/cuda/cuda_config.h.tpl new file mode 100644 index 000000000..a92871e71 --- /dev/null +++ b/build_deps/gpus/cuda/cuda_config.h.tpl @@ -0,0 +1,34 @@ +/* + * Copyright (c) 2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef CUDA_CUDA_CONFIG_H_ +#define CUDA_CUDA_CONFIG_H_ + +#define CUDA_VERSION "%{cuda_version}" +#define CUDART_VERSION "%{cudart_version}" +#define CUPTI_VERSION "%{cupti_version}" +#define CUBLAS_VERSION "%{cublas_version}" +#define CUSOLVER_VERSION "%{cusolver_version}" +#define CURAND_VERSION "%{curand_version}" +#define CUFFT_VERSION "%{cufft_version}" +#define CUSPARSE_VERSION "%{cusparse_version}" +#define CUDNN_VERSION "%{cudnn_version}" + +#define CUDA_TOOLKIT_PATH "%{cuda_toolkit_path}" + +#define CUDA_COMPUTE_CAPABILITIES %{cuda_compute_capabilities} + +#endif // CUDA_CUDA_CONFIG_H_ diff --git a/build_deps/gpus/cuda/cuda_config.py.tpl b/build_deps/gpus/cuda/cuda_config.py.tpl new file mode 100644 index 000000000..328558e12 --- /dev/null +++ b/build_deps/gpus/cuda/cuda_config.py.tpl @@ -0,0 +1,16 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +config = %{cuda_config} diff --git a/build_deps/gpus/find_cuda_config.py b/build_deps/gpus/find_cuda_config.py new file mode 100644 index 000000000..e384feaaf --- /dev/null +++ b/build_deps/gpus/find_cuda_config.py @@ -0,0 +1,638 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +"""Prints CUDA library and header directories and versions found on the system. + +The script searches for CUDA library and header files on the system, inspects +them to determine their version and prints the configuration to stdout. +The paths to inspect and the required versions are specified through environment +variables. If no valid configuration is found, the script prints to stderr and +returns an error code. + +The list of libraries to find is specified as arguments. Supported libraries are +CUDA (includes cuBLAS), cuDNN, NCCL, and TensorRT. + +The script takes a list of base directories specified by the CUDA_PATHS +environment variable as comma-separated glob list. The script looks for headers +and library files in a hard-coded set of subdirectories from these base paths. +If CUDA_PATHS is not specified, a OS specific default is used: + + Linux: /usr/local/cuda, /usr, and paths from 'ldconfig -p'. + Windows: CUDA_PATH environment variable, or + C:\\Program Files\\NVIDIA GPU Computing Toolkit\\CUDA\\* + +For backwards compatibility, some libraries also use alternative base +directories from other environment variables if they are specified. List of +library-specific environment variables: + + Library Version env variable Additional base directories + ---------------------------------------------------------------- + CUDA CUDA_VERSION CUDA_TOOLKIT_PATH + cuBLAS CUBLAS_VERSION CUDA_TOOLKIT_PATH + cuDNN CUDNN_VERSION CUDNN_INSTALL_PATH + NCCL NCCL_VERSION NCCL_INSTALL_PATH, NCCL_HDR_PATH + TensorRT TENSORRT_VERSION TENSORRT_INSTALL_PATH + +Versions environment variables can be of the form 'x' or 'x.y' to request a +specific version, empty or unspecified to accept any version. + +The output of a found library is of the form: +tf__version: x.y.z +tf__header_dir: ... +tf__library_dir: ... +""" + +import glob +import io +import os +import platform +import re +import subprocess +import sys + +# pylint: disable=g-import-not-at-top +try: + from shutil import which +except ImportError: + from distutils.spawn import find_executable as which +# pylint: enable=g-import-not-at-top + + +class ConfigError(Exception): + pass + + +def _is_linux(): + return platform.system() == "Linux" + + +def _is_macos(): + return platform.system() == "Darwin" + + +def _matches_version(actual_version, required_version): + """Checks whether some version meets the requirements. + + All elements of the required_version need to be present in the + actual_version. + + required_version actual_version result + ----------------------------------------- + 1 1.1 True + 1.2 1 False + 1.2 1.3 False + 1 True + + Args: + required_version: The version specified by the user. + actual_version: The version detected from the CUDA installation. + Returns: Whether the actual version matches the required one. + """ + if actual_version is None: + return False + + # Strip spaces from the versions. + actual_version = actual_version.strip() + required_version = required_version.strip() + return actual_version.startswith(required_version) + + +def _at_least_version(actual_version, required_version): + actual = [int(v) for v in actual_version.split(".")] + required = [int(v) for v in required_version.split(".")] + return actual >= required + + +def _get_header_version(path, name): + """Returns preprocessor defines in C header file.""" + for line in io.open(path, "r", encoding="utf-8").readlines(): + match = re.match("\s*#\s*define %s\s+(\d+)" % name, line) + if match: + return match.group(1) + return "" + + +def _cartesian_product(first, second): + """Returns all path combinations of first and second.""" + return [os.path.join(f, s) for f in first for s in second] + + +def _get_ld_config_paths(): + """Returns all directories from 'ldconfig -p'.""" + if not _is_linux(): + return [] + ldconfig_path = which("ldconfig") or "/sbin/ldconfig" + output = subprocess.check_output([ldconfig_path, "-p"]) + pattern = re.compile(".* => (.*)") + result = set() + for line in output.splitlines(): + try: + match = pattern.match(line.decode("ascii")) + except UnicodeDecodeError: + match = False + if match: + result.add(os.path.dirname(match.group(1))) + return sorted(list(result)) + + +def _get_default_cuda_paths(cuda_version): + if not cuda_version: + cuda_version = "*" + elif not "." in cuda_version: + cuda_version = cuda_version + ".*" + + return [ + "/usr/local/cuda-%s" % cuda_version, "/usr/local/cuda", "/usr", + "/usr/local/cudnn" + ] + _get_ld_config_paths() + + +def _header_paths(): + """Returns hard-coded set of relative paths to look for header files.""" + return [ + "", + "include", + "include/cuda", + "include/*-linux-gnu", + "extras/CUPTI/include", + "include/cuda/CUPTI", + "local/cuda/extras/CUPTI/include", + ] + + +def _library_paths(): + """Returns hard-coded set of relative paths to look for library files.""" + return [ + "", + "lib64", + "lib", + "lib/*-linux-gnu", + "lib/x64", + "extras/CUPTI/*", + "local/cuda/lib64", + "local/cuda/extras/CUPTI/lib64", + ] + + +def _not_found_error(base_paths, relative_paths, filepattern): + base_paths = "".join( + ["\n '%s'" % path for path in sorted(base_paths)]) + relative_paths = "".join( + ["\n '%s'" % path for path in relative_paths]) + return ConfigError( + "Could not find any %s in any subdirectory:%s\nof:%s\n" % + (filepattern, relative_paths, base_paths)) + + +def _find_file(base_paths, relative_paths, filepattern): + for path in _cartesian_product(base_paths, relative_paths): + for file in glob.glob(os.path.join(path, filepattern)): + return file + raise _not_found_error(base_paths, relative_paths, filepattern) + + +def _find_library(base_paths, library_name, required_version): + """Returns first valid path to the requested library.""" + if _is_macos(): + filepattern = "%s*.dylib" % (".".join(["lib" + library_name] + + required_version.split(".")[:1])) + else: + filepattern = ".".join(["lib" + library_name, "so"] + + required_version.split(".")[:1]) + "*" + return _find_file(base_paths, _library_paths(), filepattern) + + +def _find_versioned_file(base_paths, relative_paths, filepatterns, + required_version, get_version): + """Returns first valid path to a file that matches the requested version.""" + if type(filepatterns) not in [list, tuple]: + filepatterns = [filepatterns] + for path in _cartesian_product(base_paths, relative_paths): + for filepattern in filepatterns: + for file in glob.glob(os.path.join(path, filepattern)): + actual_version = get_version(file) + if _matches_version(actual_version, required_version): + return file, actual_version + raise _not_found_error( + base_paths, relative_paths, + ", ".join(filepatterns) + " matching version '%s'" % required_version) + + +def _find_header(base_paths, header_name, required_version, get_version): + """Returns first valid path to a header that matches the requested version.""" + return _find_versioned_file(base_paths, _header_paths(), header_name, + required_version, get_version) + + +def _find_cuda_config(base_paths, required_version): + + def get_header_version(path): + version = int(_get_header_version(path, "CUDA_VERSION")) + if not version: + return None + return "%d.%d" % (version // 1000, version % 1000 // 10) + + cuda_header_path, header_version = _find_header(base_paths, "cuda.h", + required_version, + get_header_version) + cuda_version = header_version # x.y, see above. + + cuda_library_path = _find_library(base_paths, "cudart", cuda_version) + + def get_nvcc_version(path): + pattern = "Cuda compilation tools, release \d+\.\d+, V(\d+\.\d+\.\d+)" + for line in subprocess.check_output([path, "--version"]).splitlines(): + match = re.match(pattern, line.decode("ascii")) + if match: + return match.group(1) + return None + + nvcc_name = "nvcc" + nvcc_path, nvcc_version = _find_versioned_file(base_paths, [ + "", + "bin", + "local/cuda/bin", + ], nvcc_name, cuda_version, get_nvcc_version) + + nvvm_path = _find_file(base_paths, [ + "nvvm/libdevice", + "share/cuda", + "lib/nvidia-cuda-toolkit/libdevice", + "local/cuda/nvvm/libdevice", + ], "libdevice*.10.bc") + + cupti_header_path = _find_file(base_paths, _header_paths(), "cupti.h") + cupti_library_path = _find_library(base_paths, "cupti", required_version) + + cuda_binary_dir = os.path.dirname(nvcc_path) + nvvm_library_dir = os.path.dirname(nvvm_path) + + # XLA requires the toolkit path to find ptxas and libdevice. + # TODO(csigg): pass in both directories instead. + cuda_toolkit_paths = ( + os.path.normpath(os.path.join(cuda_binary_dir, "..")), + os.path.normpath(os.path.join(nvvm_library_dir, "../..")), + ) + if cuda_toolkit_paths[0] != cuda_toolkit_paths[1]: + raise ConfigError("Inconsistent CUDA toolkit path: %s vs %s" % + cuda_toolkit_paths) + + return { + "cuda_version": cuda_version, + "cuda_include_dir": os.path.dirname(cuda_header_path), + "cuda_library_dir": os.path.dirname(cuda_library_path), + "cuda_binary_dir": cuda_binary_dir, + "nvvm_library_dir": nvvm_library_dir, + "cupti_include_dir": os.path.dirname(cupti_header_path), + "cupti_library_dir": os.path.dirname(cupti_library_path), + "cuda_toolkit_path": cuda_toolkit_paths[0], + } + + +def _find_cublas_config(base_paths, required_version, cuda_version): + + if _at_least_version(cuda_version, "10.1"): + + def get_header_version(path): + version = (_get_header_version(path, name) + for name in ("CUBLAS_VER_MAJOR", "CUBLAS_VER_MINOR", + "CUBLAS_VER_PATCH")) + return ".".join(version) + + header_path, header_version = _find_header(base_paths, "cublas_api.h", + required_version, + get_header_version) + # cuBLAS uses the major version only. + cublas_version = header_version.split(".")[0] + + else: + # There is no version info available before CUDA 10.1, just find the file. + header_version = cuda_version + header_path = _find_file(base_paths, _header_paths(), "cublas_api.h") + # cuBLAS version is the same as CUDA version (x.y). + cublas_version = required_version + + library_path = _find_library(base_paths, "cublas", cublas_version) + + return { + "cublas_version": header_version, + "cublas_include_dir": os.path.dirname(header_path), + "cublas_library_dir": os.path.dirname(library_path), + } + + +def _find_cusolver_config(base_paths, required_version, cuda_version): + + if _at_least_version(cuda_version, "11.0"): + + def get_header_version(path): + version = (_get_header_version(path, name) + for name in ("CUSOLVER_VER_MAJOR", "CUSOLVER_VER_MINOR", + "CUSOLVER_VER_PATCH")) + return ".".join(version) + + header_path, header_version = _find_header(base_paths, + "cusolver_common.h", + required_version, + get_header_version) + cusolver_version = header_version.split(".")[0] + + else: + header_version = cuda_version + header_path = _find_file(base_paths, _header_paths(), + "cusolver_common.h") + cusolver_version = required_version + + library_path = _find_library(base_paths, "cusolver", cusolver_version) + + return { + "cusolver_version": header_version, + "cusolver_include_dir": os.path.dirname(header_path), + "cusolver_library_dir": os.path.dirname(library_path), + } + + +def _find_curand_config(base_paths, required_version, cuda_version): + + if _at_least_version(cuda_version, "11.0"): + + def get_header_version(path): + version = (_get_header_version(path, name) + for name in ("CURAND_VER_MAJOR", "CURAND_VER_MINOR", + "CURAND_VER_PATCH")) + return ".".join(version) + + header_path, header_version = _find_header(base_paths, "curand.h", + required_version, + get_header_version) + curand_version = header_version.split(".")[0] + + else: + header_version = cuda_version + header_path = _find_file(base_paths, _header_paths(), "curand.h") + curand_version = required_version + + library_path = _find_library(base_paths, "curand", curand_version) + + return { + "curand_version": header_version, + "curand_include_dir": os.path.dirname(header_path), + "curand_library_dir": os.path.dirname(library_path), + } + + +def _find_cufft_config(base_paths, required_version, cuda_version): + + if _at_least_version(cuda_version, "11.0"): + + def get_header_version(path): + version = (_get_header_version(path, name) + for name in ("CUFFT_VER_MAJOR", "CUFFT_VER_MINOR", + "CUFFT_VER_PATCH")) + return ".".join(version) + + header_path, header_version = _find_header(base_paths, "cufft.h", + required_version, + get_header_version) + cufft_version = header_version.split(".")[0] + + else: + header_version = cuda_version + header_path = _find_file(base_paths, _header_paths(), "cufft.h") + cufft_version = required_version + + library_path = _find_library(base_paths, "cufft", cufft_version) + + return { + "cufft_version": header_version, + "cufft_include_dir": os.path.dirname(header_path), + "cufft_library_dir": os.path.dirname(library_path), + } + + +def _find_cudnn_config(base_paths, required_version): + + def get_header_version(path): + version = [ + _get_header_version(path, name) + for name in ("CUDNN_MAJOR", "CUDNN_MINOR", "CUDNN_PATCHLEVEL") + ] + return ".".join(version) if version[0] else None + + header_path, header_version = _find_header(base_paths, + ("cudnn.h", "cudnn_version.h"), + required_version, + get_header_version) + cudnn_version = header_version.split(".")[0] + + library_path = _find_library(base_paths, "cudnn", cudnn_version) + + return { + "cudnn_version": cudnn_version, + "cudnn_include_dir": os.path.dirname(header_path), + "cudnn_library_dir": os.path.dirname(library_path), + } + + +def _find_cusparse_config(base_paths, required_version, cuda_version): + + if _at_least_version(cuda_version, "11.0"): + + def get_header_version(path): + version = (_get_header_version(path, name) + for name in ("CUSPARSE_VER_MAJOR", "CUSPARSE_VER_MINOR", + "CUSPARSE_VER_PATCH")) + return ".".join(version) + + header_path, header_version = _find_header(base_paths, "cusparse.h", + required_version, + get_header_version) + cusparse_version = header_version.split(".")[0] + + else: + header_version = cuda_version + header_path = _find_file(base_paths, _header_paths(), "cusparse.h") + cusparse_version = required_version + + library_path = _find_library(base_paths, "cusparse", cusparse_version) + + return { + "cusparse_version": header_version, + "cusparse_include_dir": os.path.dirname(header_path), + "cusparse_library_dir": os.path.dirname(library_path), + } + + +def _find_nccl_config(base_paths, required_version): + + def get_header_version(path): + version = (_get_header_version(path, name) + for name in ("NCCL_MAJOR", "NCCL_MINOR", "NCCL_PATCH")) + return ".".join(version) + + header_path, header_version = _find_header(base_paths, "nccl.h", + required_version, + get_header_version) + nccl_version = header_version.split(".")[0] + + library_path = _find_library(base_paths, "nccl", nccl_version) + + return { + "nccl_version": nccl_version, + "nccl_include_dir": os.path.dirname(header_path), + "nccl_library_dir": os.path.dirname(library_path), + } + + +def _find_tensorrt_config(base_paths, required_version): + + def get_header_version(path): + version = (_get_header_version(path, name) + for name in ("NV_TENSORRT_MAJOR", "NV_TENSORRT_MINOR", + "NV_TENSORRT_PATCH")) + # `version` is a generator object, so we convert it to a list before using + # it (muitiple times below). + version = list(version) + if not all(version): + # Versions not found, make _matches_version returns False. + return None + return ".".join(version) + + header_path, header_version = _find_header(base_paths, "NvInferVersion.h", + required_version, + get_header_version) + + tensorrt_version = header_version.split(".")[0] + library_path = _find_library(base_paths, "nvinfer", tensorrt_version) + + return { + "tensorrt_version": tensorrt_version, + "tensorrt_include_dir": os.path.dirname(header_path), + "tensorrt_library_dir": os.path.dirname(library_path), + } + + +def _list_from_env(env_name, default=[]): + """Returns comma-separated list from environment variable.""" + if env_name in os.environ: + return os.environ[env_name].split(",") + return default + + +def _get_legacy_path(env_name, default=[]): + """Returns a path specified by a legacy environment variable. + + CUDNN_INSTALL_PATH, NCCL_INSTALL_PATH, TENSORRT_INSTALL_PATH set to + '/usr/lib/x86_64-linux-gnu' would previously find both library and header + paths. Detect those and return '/usr', otherwise forward to _list_from_env(). + """ + if env_name in os.environ: + match = re.match("^(/[^/ ]*)+/lib/\w+-linux-gnu/?$", + os.environ[env_name]) + if match: + return [match.group(1)] + return _list_from_env(env_name, default) + + +def _normalize_path(path): + """Returns normalized path, with forward slashes on Windows.""" + return os.path.realpath(path) + + +def find_cuda_config(): + """Returns a dictionary of CUDA library and header file paths.""" + libraries = [argv.lower() for argv in sys.argv[1:]] + cuda_version = os.environ.get("CUDA_VERSION", "") + base_paths = _list_from_env("CUDA_PATHS", + _get_default_cuda_paths(cuda_version)) + base_paths = [path for path in base_paths if os.path.exists(path)] + + result = {} + if "cuda" in libraries: + cuda_paths = _list_from_env("CUDA_TOOLKIT_PATH", base_paths) + result.update(_find_cuda_config(cuda_paths, cuda_version)) + + cuda_version = result["cuda_version"] + cublas_paths = base_paths + if tuple(int(v) for v in cuda_version.split(".")) < (10, 1): + # Before CUDA 10.1, cuBLAS was in the same directory as the toolkit. + cublas_paths = cuda_paths + cublas_version = os.environ.get("CUBLAS_VERSION", "") + result.update( + _find_cublas_config(cublas_paths, cublas_version, cuda_version)) + + cusolver_paths = base_paths + if tuple(int(v) for v in cuda_version.split(".")) < (11, 0): + cusolver_paths = cuda_paths + cusolver_version = os.environ.get("CUSOLVER_VERSION", "") + result.update( + _find_cusolver_config(cusolver_paths, cusolver_version, + cuda_version)) + + curand_paths = base_paths + if tuple(int(v) for v in cuda_version.split(".")) < (11, 0): + curand_paths = cuda_paths + curand_version = os.environ.get("CURAND_VERSION", "") + result.update( + _find_curand_config(curand_paths, curand_version, cuda_version)) + + cufft_paths = base_paths + if tuple(int(v) for v in cuda_version.split(".")) < (11, 0): + cufft_paths = cuda_paths + cufft_version = os.environ.get("CUFFT_VERSION", "") + result.update( + _find_cufft_config(cufft_paths, cufft_version, cuda_version)) + + cusparse_paths = base_paths + if tuple(int(v) for v in cuda_version.split(".")) < (11, 0): + cusparse_paths = cuda_paths + cusparse_version = os.environ.get("CUSPARSE_VERSION", "") + result.update( + _find_cusparse_config(cusparse_paths, cusparse_version, + cuda_version)) + + if "cudnn" in libraries: + cudnn_paths = _get_legacy_path("CUDNN_INSTALL_PATH", base_paths) + cudnn_version = os.environ.get("CUDNN_VERSION", "") + result.update(_find_cudnn_config(cudnn_paths, cudnn_version)) + + if "nccl" in libraries: + nccl_paths = _get_legacy_path("NCCL_INSTALL_PATH", base_paths) + nccl_version = os.environ.get("NCCL_VERSION", "") + result.update(_find_nccl_config(nccl_paths, nccl_version)) + + if "tensorrt" in libraries: + tensorrt_paths = _get_legacy_path("TENSORRT_INSTALL_PATH", base_paths) + tensorrt_version = os.environ.get("TENSORRT_VERSION", "") + result.update(_find_tensorrt_config(tensorrt_paths, tensorrt_version)) + + for k, v in result.items(): + if k.endswith("_dir") or k.endswith("_path"): + result[k] = _normalize_path(v) + + return result + + +def main(): + try: + for key, value in sorted(find_cuda_config().items()): + print("%s: %s" % (key, value)) + except ConfigError as e: + sys.stderr.write(str(e) + '\n') + sys.exit(1) + + +if __name__ == "__main__": + main() diff --git a/build_deps/remote_config/BUILD b/build_deps/remote_config/BUILD new file mode 100644 index 000000000..e69de29bb diff --git a/build_deps/remote_config/BUILD.tpl b/build_deps/remote_config/BUILD.tpl new file mode 100644 index 000000000..d97eb9701 --- /dev/null +++ b/build_deps/remote_config/BUILD.tpl @@ -0,0 +1,26 @@ +# Each platform creates a constraint @//:platform_constraint that +# is listed in its constraint_values; rule that want to select a specific +# platform to run on can put @//:platform_constraing into their +# exec_compatible_with attribute. +# Toolchains can similarly be marked with target_compatible_with or +# exec_compatible_with to bind them to this platform. +constraint_setting( + name = "platform_setting" +) + +constraint_value( + name = "platform_constraint", + constraint_setting = ":platform_setting", + visibility = ["//visibility:public"], +) + +platform( + name = "platform", + visibility = ["//visibility:public"], + constraint_values = [ + "@platforms//cpu:%{cpu}", + "@platforms//os:%{platform}", + ":platform_constraint", + ], + exec_properties = %{exec_properties}, +) diff --git a/build_deps/remote_config/common.bzl b/build_deps/remote_config/common.bzl new file mode 100644 index 000000000..47df004e2 --- /dev/null +++ b/build_deps/remote_config/common.bzl @@ -0,0 +1,294 @@ +"""Functions common across configure rules.""" + +BAZEL_SH = "BAZEL_SH" +PYTHON_BIN_PATH = "PYTHON_BIN_PATH" +PYTHON_LIB_PATH = "PYTHON_LIB_PATH" +PYTHON_CONFIG_REPO = "PYTHON_CONFIG_REPO" + + +def auto_config_fail(msg): + """Output failure message when auto configuration fails.""" + red = "\033[0;31m" + no_color = "\033[0m" + fail("%sConfiguration Error:%s %s\n" % (red, no_color, msg)) + + +def which(repository_ctx, program_name, allow_failure=False): + """Returns the full path to a program on the execution platform. + + Args: + repository_ctx: the repository_ctx + program_name: name of the program on the PATH + + Returns: + The full path to a program on the execution platform. + """ + out = execute( + repository_ctx, + ["which", program_name], + allow_failure=allow_failure, + ).stdout + if out != None: + out = out.replace("\\", "\\\\").rstrip() + return out + + +def get_python_bin(repository_ctx): + """Gets the python bin path. + + Args: + repository_ctx: the repository_ctx + + Returns: + The python bin path. + """ + python_bin = get_host_environ(repository_ctx, PYTHON_BIN_PATH) + if python_bin: + return python_bin + + # First check for an explicit "python3" + python_bin = which(repository_ctx, "python3", True) + if python_bin: + return python_bin + + # Some systems just call pythone3 "python" + python_bin = which(repository_ctx, "python", True) + if python_bin: + return python_bin + + auto_config_fail( + "Cannot find python in PATH, please make sure " + + "python is installed and add its directory in PATH, or --define " + + "%s='/something/else'.\nPATH=%s" % ( + PYTHON_BIN_PATH, + get_environ(repository_ctx, "PATH"), + )) + return python_bin # unreachable + + +def get_bash_bin(repository_ctx): + """Gets the bash bin path. + + Args: + repository_ctx: the repository_ctx + + Returns: + The bash bin path. + """ + bash_bin = get_host_environ(repository_ctx, BAZEL_SH) + if bash_bin != None: + return bash_bin + bash_bin_path = which(repository_ctx, "bash") + if bash_bin_path == None: + auto_config_fail( + "Cannot find bash in PATH, please make sure " + + "bash is installed and add its directory in PATH, or --define " + + "%s='/path/to/bash'.\nPATH=%s" % ( + BAZEL_SH, + get_environ(repository_ctx, "PATH"), + )) + return bash_bin_path + + +def read_dir(repository_ctx, src_dir): + """Returns a sorted list with all files in a directory. + + Finds all files inside a directory, traversing subfolders and following + symlinks. + + Args: + repository_ctx: the repository_ctx + src_dir: the directory to traverse + + Returns: + A sorted list with all files in a directory. + """ + find_result = execute( + repository_ctx, + ["find", src_dir, "-follow", "-type", "f"], + allow_failure=True, + ) + result = find_result.stdout + return sorted(result.splitlines()) + + +def get_environ(repository_ctx, name, default_value=None): + """Returns the value of an environment variable on the execution platform. + + Args: + repository_ctx: the repository_ctx + name: the name of environment variable + default_value: the value to return if not set + + Returns: + The value of the environment variable 'name' on the execution platform + or 'default_value' if it's not set. + """ + cmd = "echo -n \"$%s\"" % name + result = execute( + repository_ctx, + [get_bash_bin(repository_ctx), "-c", cmd], + allow_failure=True, + ) + if len(result.stdout) == 0: + return default_value + return result.stdout + + +def get_host_environ(repository_ctx, name, default_value=None): + """Returns the value of an environment variable on the host platform. + + The host platform is the machine that Bazel runs on. + + Args: + repository_ctx: the repository_ctx + name: the name of environment variable + + Returns: + The value of the environment variable 'name' on the host platform. + """ + if name in repository_ctx.os.environ: + return repository_ctx.os.environ.get(name).strip() + + if hasattr(repository_ctx.attr, + "environ") and name in repository_ctx.attr.environ: + return repository_ctx.attr.environ.get(name).strip() + + return default_value + + +def get_cpu_value(repository_ctx): + """Returns the name of the host operating system. + + Args: + repository_ctx: The repository context. + Returns: + A string containing the name of the host operating system. + """ + result = raw_exec(repository_ctx, ["uname", "-s"]) + return result.stdout.strip() + + +def execute(repository_ctx, + cmdline, + error_msg=None, + error_details=None, + allow_failure=False): + """Executes an arbitrary shell command. + + Args: + repository_ctx: the repository_ctx object + cmdline: list of strings, the command to execute + error_msg: string, a summary of the error if the command fails + error_details: string, details about the error or steps to fix it + allow_failure: bool, if True, an empty stdout result or output to stderr + is fine, otherwise either of these is an error + Returns: + The result of repository_ctx.execute(cmdline) + """ + result = raw_exec(repository_ctx, cmdline) + if (result.stderr or not result.stdout) and not allow_failure: + fail( + "\n".join([ + error_msg.strip() + if error_msg else "Repository command failed", + result.stderr.strip(), + error_details if error_details else "", + ]), ) + return result + + +def raw_exec(repository_ctx, cmdline): + """Executes a command via repository_ctx.execute() and returns the result. + + This method is useful for debugging purposes. For example, to print all + commands executed as well as their return code. + + Args: + repository_ctx: the repository_ctx + cmdline: the list of args + + Returns: + The 'exec_result' of repository_ctx.execute(). + """ + return repository_ctx.execute(cmdline) + + +def files_exist(repository_ctx, paths, bash_bin=None): + """Checks which files in paths exists. + + Args: + repository_ctx: the repository_ctx + paths: a list of paths + bash_bin: path to the bash interpreter + + Returns: + Returns a list of Bool. True means that the path at the + same position in the paths list exists. + """ + if bash_bin == None: + bash_bin = get_bash_bin(repository_ctx) + + cmd_tpl = "[ -e \"%s\" ] && echo True || echo False" + cmds = [cmd_tpl % path for path in paths] + cmd = " ; ".join(cmds) + + stdout = execute(repository_ctx, [bash_bin, "-c", cmd]).stdout.strip() + return [val == "True" for val in stdout.splitlines()] + + +def realpath(repository_ctx, path, bash_bin=None): + """Returns the result of "realpath path". + + Args: + repository_ctx: the repository_ctx + path: a path on the file system + bash_bin: path to the bash interpreter + + Returns: + Returns the result of "realpath path" + """ + if bash_bin == None: + bash_bin = get_bash_bin(repository_ctx) + + return execute(repository_ctx, + [bash_bin, "-c", "realpath \"%s\"" % path]).stdout.strip() + + +def err_out(result): + """Returns stderr if set, else stdout. + + This function is a workaround for a bug in RBE where stderr is returned as stdout. Instead + of using result.stderr use err_out(result) instead. + + Args: + result: the exec_result. + + Returns: + The stderr if set, else stdout + """ + if len(result.stderr) == 0: + return result.stdout + return result.stderr + + +def config_repo_label(config_repo, target): + """Construct a label from config_repo and target. + + This function exists to ease the migration from preconfig to remote config. In preconfig + the *_CONFIG_REPO environ variables are set to packages in the main repo while in + remote config they will point to remote repositories. + + Args: + config_repo: a remote repository or package. + target: a target + Returns: + A label constructed from config_repo and target. + """ + if config_repo.startswith("@") and not config_repo.find("//") > 0: + # remote config is being used. + return Label(config_repo + "//" + target) + elif target.startswith(":"): + return Label(config_repo + target) + else: + return Label(config_repo + "/" + target) diff --git a/build_deps/remote_config/remote_platform_configure.bzl b/build_deps/remote_config/remote_platform_configure.bzl new file mode 100644 index 000000000..780de4e7d --- /dev/null +++ b/build_deps/remote_config/remote_platform_configure.bzl @@ -0,0 +1,55 @@ +"""Repository rule to create a platform for a docker image to be used with RBE.""" + + +def _remote_platform_configure_impl(repository_ctx): + platform = repository_ctx.attr.platform + if platform == "local": + os = repository_ctx.os.name.lower() + if os.startswith("mac os"): + platform = "osx" + else: + platform = "linux" + + cpu = "x86_64" + machine_type = repository_ctx.execute(["bash", "-c", + "echo $MACHTYPE"]).stdout + if (machine_type.startswith("ppc") or machine_type.startswith("powerpc")): + cpu = "ppc" + elif machine_type.startswith("s390x"): + cpu = "s390x" + elif machine_type.startswith("aarch64"): + cpu = "aarch64" + elif machine_type.startswith("arm64"): + cpu = "aarch64" + elif machine_type.startswith("arm"): + cpu = "arm" + elif machine_type.startswith("mips64"): + cpu = "mips64" + elif machine_type.startswith("riscv64"): + cpu = "riscv64" + + exec_properties = repository_ctx.attr.platform_exec_properties + + serialized_exec_properties = "{" + for k, v in exec_properties.items(): + serialized_exec_properties += "\"%s\" : \"%s\"," % (k, v) + serialized_exec_properties += "}" + + repository_ctx.template( + "BUILD", + Label("//remote_config:BUILD.tpl"), + { + "%{platform}": platform, + "%{exec_properties}": serialized_exec_properties, + "%{cpu}": cpu, + }, + ) + + +remote_platform_configure = repository_rule( + implementation=_remote_platform_configure_impl, + attrs={ + "platform_exec_properties": attr.string_dict(mandatory=True), + "platform": attr.string(default="linux", values=["linux", "local"]), + }, +) diff --git a/include/BUILD b/include/BUILD new file mode 100644 index 000000000..cbacc911e --- /dev/null +++ b/include/BUILD @@ -0,0 +1,29 @@ +load("@local_config_cuda//cuda:build_defs.bzl", "cuda_library") + +cuda_library( + name = "merlin_localfile", + hdrs = [ + "merlin_localfile.hpp", + ], + visibility = [ + "//visibility:public", + ], + deps = [ + "//include/merlin", + "@local_config_cuda//cuda", + ], +) + +cuda_library( + name = "merlin_hashtable", + hdrs = [ + "merlin_hashtable.cuh", + ], + visibility = [ + "//visibility:public", + ], + deps = [ + "//include/merlin", + "@local_config_cuda//cuda", + ], +) diff --git a/include/merlin/BUILD b/include/merlin/BUILD new file mode 100644 index 000000000..2057bee4a --- /dev/null +++ b/include/merlin/BUILD @@ -0,0 +1,24 @@ +load("@local_config_cuda//cuda:build_defs.bzl", "cuda_library") + +cuda_library( + name = "merlin", + srcs = [ + ], + hdrs = [ + "array_kernels.cuh", + "core_kernels.cuh", + "debug.hpp", + "flexible_buffer.cuh", + "initializers.cuh", + "memory_pool.cuh", + "optimizers.cuh", + "types.cuh", + "utils.cuh", + ], + visibility = [ + "//visibility:public", + ], + deps = [ + "@local_config_cuda//cuda", + ], +)