From 447f5b5e2d2c5f8f1717823d19ac41ec7c7c3cf7 Mon Sep 17 00:00:00 2001 From: Sergei Vorobev Date: Tue, 28 Feb 2023 03:51:08 +0000 Subject: [PATCH] [bazel] enable sccache+nvcc in CI (#95528) Fixes #79348 This change is mostly focused on enabling nvcc+sccache in the PyTorch CI. Along the way we had to do couple tweaks: 1. Split the rules_cc from the rules_cuda that embeeded them before. This is needed in order to apply a different patch to the rules_cc compare to the one that rules_cuda does by default. This is in turn needed because we need to workaround an nvcc behavior where it doesn't send `-iquote xxx` to the host compiler, but it does send `-isystem xxx`. So we workaround this problem with (ab)using `-isystem` instead. Without it we are getting errors like `xxx` is not found. 2. Workaround bug in bazel https://github.com/bazelbuild/bazel/issues/10167 that prevents us from using a straightforward and honest `nvcc` sccache wrapper. Instead we generate ad-hock bazel specific nvcc wrapper that has internal knowledge of the relative bazel paths to local_cuda. This allows us to workaround the issue with CUDA symlinks. Without it we are getting `undeclared inclusion(s) in rule` all over the place for CUDA headers. ## Test plan Green CI build https://github.com/pytorch/pytorch/actions/runs/4267147180/jobs/7428431740 Note that now it says "CUDA" in the sccache output ``` + sccache --show-stats Compile requests 9784 Compile requests executed 6726 Cache hits 6200 Cache hits (C/C++) 6131 Cache hits (CUDA) 69 Cache misses 519 Cache misses (C/C++) 201 Cache misses (CUDA) 318 Cache timeouts 0 Cache read errors 0 Forced recaches 0 Cache write errors 0 Compilation failures 0 Cache errors 7 Cache errors (C/C++) 7 Non-cacheable compilations 0 Non-cacheable calls 2893 Non-compilation calls 165 Unsupported compiler calls 0 Average cache write 0.116 s Average cache read miss 23.722 s Average cache read hit 0.057 s Failed distributed compilations 0 ``` Pull Request resolved: https://github.com/pytorch/pytorch/pull/95528 Approved by: https://github.com/huydhn --- .ci/pytorch/build.sh | 1 + .ci/pytorch/common_utils.sh | 20 ++++++++ .lintrunner.toml | 1 + WORKSPACE | 14 +++++- tools/rules_cc/cuda_support.patch | 80 +++++++++++++++++++++++++++++++ 5 files changed, 115 insertions(+), 1 deletion(-) create mode 100644 tools/rules_cc/cuda_support.patch diff --git a/.ci/pytorch/build.sh b/.ci/pytorch/build.sh index fd0af8c57e33..cfca6fad834c 100755 --- a/.ci/pytorch/build.sh +++ b/.ci/pytorch/build.sh @@ -191,6 +191,7 @@ if [[ "$BUILD_ENVIRONMENT" == *-bazel-* ]]; then set -e get_bazel + install_sccache_nvcc_for_bazel # Leave 1 CPU free and use only up to 80% of memory to reduce the change of crashing # the runner diff --git a/.ci/pytorch/common_utils.sh b/.ci/pytorch/common_utils.sh index e4172c6aa593..c344b9b39ac6 100644 --- a/.ci/pytorch/common_utils.sh +++ b/.ci/pytorch/common_utils.sh @@ -95,6 +95,26 @@ function get_bazel() { chmod +x tools/bazel } +# This function is bazel specific because of the bug +# in the bazel that requires some special paths massaging +# as a workaround. See +# https://github.com/bazelbuild/bazel/issues/10167 +function install_sccache_nvcc_for_bazel() { + sudo mv /usr/local/cuda/bin/nvcc /usr/local/cuda/bin/nvcc-real + + # Write the `/usr/local/cuda/bin/nvcc` + cat << EOF | sudo tee /usr/local/cuda/bin/nvcc +#!/bin/sh +if [ \$(env -u LD_PRELOAD ps -p \$PPID -o comm=) != sccache ]; then + exec sccache /usr/local/cuda/bin/nvcc "\$@" +else + exec external/local_cuda/cuda/bin/nvcc-real "\$@" +fi +EOF + + sudo chmod +x /usr/local/cuda/bin/nvcc +} + function install_monkeytype { # Install MonkeyType pip_install MonkeyType diff --git a/.lintrunner.toml b/.lintrunner.toml index dd94aae4a1d3..940dea358dd2 100644 --- a/.lintrunner.toml +++ b/.lintrunner.toml @@ -367,6 +367,7 @@ include_patterns = ['**'] exclude_patterns = [ '**/contrib/**', '**/*.diff', + '**/*.patch', 'third_party/**', 'aten/src/ATen/native/vulkan/api/vk_mem_alloc.h', 'test/cpp/jit/upgrader_models/*.ptl', diff --git a/WORKSPACE b/WORKSPACE index c016da0cb310..9272e448c50a 100644 --- a/WORKSPACE +++ b/WORKSPACE @@ -3,6 +3,18 @@ workspace(name = "pytorch") load("@bazel_tools//tools/build_defs/repo:http.bzl", "http_archive") load("//tools/rules:workspace.bzl", "new_patched_local_repository") +http_archive( + name = "rules_cc", + strip_prefix = "rules_cc-40548a2974f1aea06215272d9c2b47a14a24e556", + patches = [ + "//:tools/rules_cc/cuda_support.patch", + ], + urls = [ + "https://mirror.bazel.build/github.com/bazelbuild/rules_cc/archive/40548a2974f1aea06215272d9c2b47a14a24e556.tar.gz", + "https://github.com/bazelbuild/rules_cc/archive/40548a2974f1aea06215272d9c2b47a14a24e556.tar.gz", + ], +) + http_archive( name = "rules_cuda", strip_prefix = "runtime-b1c7cce21ba4661c17ac72421c6a0e2015e7bef3/third_party/rules_cuda", @@ -11,7 +23,7 @@ http_archive( load("@rules_cuda//cuda:dependencies.bzl", "rules_cuda_dependencies") -rules_cuda_dependencies() +rules_cuda_dependencies(with_rules_cc = False) load("@rules_cc//cc:repositories.bzl", "rules_cc_toolchains") diff --git a/tools/rules_cc/cuda_support.patch b/tools/rules_cc/cuda_support.patch new file mode 100644 index 000000000000..d097eee5036a --- /dev/null +++ b/tools/rules_cc/cuda_support.patch @@ -0,0 +1,80 @@ +diff --git cc/private/toolchain/unix_cc_configure.bzl cc/private/toolchain/unix_cc_configure.bzl +index ba992fc..e4e8364 100644 +--- cc/private/toolchain/unix_cc_configure.bzl ++++ cc/private/toolchain/unix_cc_configure.bzl +@@ -27,6 +27,7 @@ load( + "which", + "write_builtin_include_directory_paths", + ) ++load("@rules_cuda//cuda:toolchain.bzl", "cuda_compiler_deps") + + def _field(name, value): + """Returns properly indented top level crosstool field.""" +@@ -397,7 +398,7 @@ def configure_unix_toolchain(repository_ctx, cpu_value, overriden_tools): + cxx_opts = split_escaped(get_env_var( + repository_ctx, + "BAZEL_CXXOPTS", +- "-std=c++0x", ++ "-std=c++11", + False, + ), ":") + +@@ -463,7 +464,7 @@ def configure_unix_toolchain(repository_ctx, cpu_value, overriden_tools): + )), + "%{cc_compiler_deps}": get_starlark_list([":builtin_include_directory_paths"] + ( + [":cc_wrapper"] if darwin else [] +- )), ++ ) + cuda_compiler_deps()), + "%{cc_toolchain_identifier}": cc_toolchain_identifier, + "%{compile_flags}": get_starlark_list( + [ +diff --git cc/private/toolchain/unix_cc_toolchain_config.bzl cc/private/toolchain/unix_cc_toolchain_config.bzl +index c3cf3ba..1744eb4 100644 +--- cc/private/toolchain/unix_cc_toolchain_config.bzl ++++ cc/private/toolchain/unix_cc_toolchain_config.bzl +@@ -25,6 +25,7 @@ load( + "variable_with_value", + "with_feature_set", + ) ++load("@rules_cuda//cuda:toolchain.bzl", "cuda_toolchain_config") + + all_compile_actions = [ + ACTION_NAMES.c_compile, +@@ -580,7 +581,8 @@ def _impl(ctx): + ], + flag_groups = [ + flag_group( +- flags = ["-iquote", "%{quote_include_paths}"], ++ # -isystem because there is an nvcc thing where it doesn't forward -iquote to host compiler. ++ flags = ["-isystem", "%{quote_include_paths}"], + iterate_over = "quote_include_paths", + ), + flag_group( +@@ -1152,10 +1154,15 @@ def _impl(ctx): + unfiltered_compile_flags_feature, + ] + ++ cuda = cuda_toolchain_config( ++ cuda_toolchain_info = ctx.attr._cuda_toolchain_info, ++ compiler_path = ctx.attr.tool_paths["gcc"], ++ ) ++ + return cc_common.create_cc_toolchain_config_info( + ctx = ctx, +- features = features, +- action_configs = action_configs, ++ features = features + cuda.features, ++ action_configs = action_configs + cuda.action_configs, + cxx_builtin_include_directories = ctx.attr.cxx_builtin_include_directories, + toolchain_identifier = ctx.attr.toolchain_identifier, + host_system_name = ctx.attr.host_system_name, +@@ -1192,6 +1199,9 @@ cc_toolchain_config = rule( + "tool_paths": attr.string_dict(), + "toolchain_identifier": attr.string(mandatory = True), + "unfiltered_compile_flags": attr.string_list(), ++ "_cuda_toolchain_info": attr.label( ++ default = Label("@rules_cuda//cuda:cuda_toolchain_info"), ++ ), + }, + provides = [CcToolchainConfigInfo], + )