From 295162ec3abb2a58ab6a54af15196ea6fadf4852 Mon Sep 17 00:00:00 2001 From: atalman Date: Tue, 1 Apr 2025 19:18:44 +0000 Subject: [PATCH] Smoke Test - disable pypi package validation for binaries that package cuda libs (#150194) Smoke Test - disable pypi package validation for binaries that package cuda libs. These binaries do not install packages via pypi. Should Resolve this from `linux-binary-manywheel / manywheel-py3_11-cuda12_6-full-test / test`: ``` Traceback (most recent call last): File "/pytorch/.ci/pytorch/smoke_test/smoke_test.py", line 468, in main() File "/pytorch/.ci/pytorch/smoke_test/smoke_test.py", line 462, in main smoke_test_cuda( File "/pytorch/.ci/pytorch/smoke_test/smoke_test.py", line 274, in smoke_test_cuda compare_pypi_to_torch_versions( File "/pytorch/.ci/pytorch/smoke_test/smoke_test.py", line 220, in compare_pypi_to_torch_versions raise RuntimeError(f"Can't find {package} in PyPI for Torch: {torch_version}") RuntimeError: Can't find cudnn in PyPI for Torch: 9.5.1 ``` Link: https://github.com/pytorch/pytorch/actions/runs/14101221665/job/39505479587#step:15:982 Pull Request resolved: https://github.com/pytorch/pytorch/pull/150194 Approved by: https://github.com/ZainRizvi --- .ci/pytorch/smoke_test/smoke_test.py | 22 ++++++++++++++++++---- .circleci/scripts/binary_linux_test.sh | 13 +++++++++++-- 2 files changed, 29 insertions(+), 6 deletions(-) diff --git a/.ci/pytorch/smoke_test/smoke_test.py b/.ci/pytorch/smoke_test/smoke_test.py index c4f41a874774..acc69e36a5a5 100644 --- a/.ci/pytorch/smoke_test/smoke_test.py +++ b/.ci/pytorch/smoke_test/smoke_test.py @@ -227,7 +227,10 @@ def compare_pypi_to_torch_versions( def smoke_test_cuda( - package: str, runtime_error_check: str, torch_compile_check: str + package: str, + runtime_error_check: str, + torch_compile_check: str, + pypi_pkg_check: str, ) -> None: if not torch.cuda.is_available() and is_cuda_system: raise RuntimeError(f"Expected CUDA {gpu_arch_ver}. However CUDA is not loaded.") @@ -268,13 +271,14 @@ def smoke_test_cuda( print(f"cuDNN enabled? {torch.backends.cudnn.enabled}") torch_cudnn_version = cudnn_to_version_str(torch.backends.cudnn.version()) print(f"Torch cuDNN version: {torch_cudnn_version}") + torch_nccl_version = ".".join(str(v) for v in torch.cuda.nccl.version()) + print(f"Torch nccl; version: {torch_nccl_version}") # Pypi dependencies are installed on linux ony and nccl is availbale only on Linux. - if sys.platform in ["linux", "linux2"]: + if pypi_pkg_check == "enabled" and sys.platform in ["linux", "linux2"]: compare_pypi_to_torch_versions( "cudnn", find_pypi_package_version("nvidia-cudnn"), torch_cudnn_version ) - torch_nccl_version = ".".join(str(v) for v in torch.cuda.nccl.version()) compare_pypi_to_torch_versions( "nccl", find_pypi_package_version("nvidia-nccl"), torch_nccl_version ) @@ -436,6 +440,13 @@ def parse_args(): choices=["enabled", "disabled"], default="enabled", ) + parser.add_argument( + "--pypi-pkg-check", + help="Check pypi package versions cudnn and nccl", + type=str, + choices=["enabled", "disabled"], + default="enabled", + ) return parser.parse_args() @@ -460,7 +471,10 @@ def main() -> None: smoke_test_modules() smoke_test_cuda( - options.package, options.runtime_error_check, options.torch_compile_check + options.package, + options.runtime_error_check, + options.torch_compile_check, + options.pypi_pkg_check, ) diff --git a/.circleci/scripts/binary_linux_test.sh b/.circleci/scripts/binary_linux_test.sh index 3ee84f46d8fa..051b4f16f27a 100755 --- a/.circleci/scripts/binary_linux_test.sh +++ b/.circleci/scripts/binary_linux_test.sh @@ -90,8 +90,17 @@ fi /pytorch/.ci/pytorch/check_binary.sh if [[ "\$GPU_ARCH_TYPE" != *s390x* && "\$GPU_ARCH_TYPE" != *xpu* && "\$GPU_ARCH_TYPE" != *rocm* && "$PACKAGE_TYPE" != libtorch ]]; then - # Exclude s390, xpu, rocm and libtorch builds from smoke testing - python /pytorch/.ci/pytorch/smoke_test/smoke_test.py --package=torchonly --torch-compile-check disabled + + torch_pkg_size="$(ls -1 /final_pkgs/torch-* | sort |tail -1 |xargs wc -c |cut -d ' ' -f1)" + # todo: implement check for large binaries + # if the package is larger than 1.5GB, we disable the pypi check. + # this package contains all libraries packaged in torch libs folder + # example of such package is https://download.pytorch.org/whl/cu126_full/torch + if [[ "\$torch_pkg_size" -gt 1500000000 ]]; then + python /pytorch/.ci/pytorch/smoke_test/smoke_test.py --package=torchonly --torch-compile-check disabled --pypi-pkg-check disabled + else + python /pytorch/.ci/pytorch/smoke_test/smoke_test.py --package=torchonly --torch-compile-check disabled $extra_parameters + fi fi # Clean temp files