diff --git a/.ci/manywheel/build_common.sh b/.ci/manywheel/build_common.sh index 2f125ce05291..411fdf15ebd0 100644 --- a/.ci/manywheel/build_common.sh +++ b/.ci/manywheel/build_common.sh @@ -321,8 +321,8 @@ for pkg in /$WHEELHOUSE_DIR/torch_no_python*.whl /$WHEELHOUSE_DIR/torch*linux*.w # ROCm workaround for roctracer dlopens if [[ "$DESIRED_CUDA" == *"rocm"* ]]; then patchedpath=$(fname_without_so_number $destpath) - # Keep the so number for XPU dependencies - elif [[ "$DESIRED_CUDA" == *"xpu"* ]]; then + # Keep the so number for XPU dependencies and libgomp.so.1 to avoid twice load + elif [[ "$DESIRED_CUDA" == *"xpu"* || "$filename" == "libgomp.so.1" ]]; then patchedpath=$destpath else patchedpath=$(fname_with_sha256 $destpath) diff --git a/.ci/pytorch/smoke_test/check_gomp.py b/.ci/pytorch/smoke_test/check_gomp.py new file mode 100644 index 000000000000..93430ff39906 --- /dev/null +++ b/.ci/pytorch/smoke_test/check_gomp.py @@ -0,0 +1,74 @@ +import ctypes +import os +import sys +from pathlib import Path + + +def get_gomp_thread(): + """ + Retrieves the maximum number of OpenMP threads after loading the `libgomp.so.1` library + and the `libtorch_cpu.so` library. It then queries the + maximum number of threads available for OpenMP parallel regions using the + `omp_get_max_threads` function. + + Returns: + int: The maximum number of OpenMP threads available. + + Notes: + - The function assumes the default path for `libgomp.so.1` on AlmaLinux OS. + - The path to `libtorch_cpu.so` is constructed based on the Python executable's + installation directory. + - This function is specific to environments where PyTorch and OpenMP are used + together and may require adjustments for other setups. + """ + python_path = Path(sys.executable).resolve() + python_prefix = ( + python_path.parent.parent + ) # Typically goes to the Python installation root + + # Get the additional ABI flags (if any); it may be an empty string. + abiflags = getattr(sys, "abiflags", "") + + # Construct the Python directory name correctly (e.g., "python3.13t"). + python_version = ( + f"python{sys.version_info.major}.{sys.version_info.minor}{abiflags}" + ) + + libtorch_cpu_path = ( + python_prefix + / "lib" + / python_version + / "site-packages" + / "torch" + / "lib" + / "libtorch_cpu.so" + ) + + # use the default gomp path of AlmaLinux OS + libgomp_path = "/usr/lib64/libgomp.so.1" + + os.environ["GOMP_CPU_AFFINITY"] = "0-3" + + libgomp = ctypes.CDLL(libgomp_path) + libgomp = ctypes.CDLL(libtorch_cpu_path) + + libgomp.omp_get_max_threads.restype = ctypes.c_int + libgomp.omp_get_max_threads.argtypes = [] + + omp_max_threads = libgomp.omp_get_max_threads() + return omp_max_threads + + +def main(): + omp_max_threads = get_gomp_thread() + print( + f"omp_max_threads after loading libgomp.so and libtorch_cpu.so: {omp_max_threads}" + ) + if omp_max_threads == 1: + raise RuntimeError( + "omp_max_threads is 1. Check whether libgomp.so is loaded twice." + ) + + +if __name__ == "__main__": + main() diff --git a/.circleci/scripts/binary_linux_test.sh b/.circleci/scripts/binary_linux_test.sh index 051b4f16f27a..11678cabb2c3 100755 --- a/.circleci/scripts/binary_linux_test.sh +++ b/.circleci/scripts/binary_linux_test.sh @@ -101,6 +101,11 @@ if [[ "\$GPU_ARCH_TYPE" != *s390x* && "\$GPU_ARCH_TYPE" != *xpu* && "\$GPU_ARCH_ else python /pytorch/.ci/pytorch/smoke_test/smoke_test.py --package=torchonly --torch-compile-check disabled $extra_parameters fi + + if [[ "\$GPU_ARCH_TYPE" != *cpu-aarch64* ]]; then + # https://github.com/pytorch/pytorch/issues/149422 + python /pytorch/.ci/pytorch/smoke_test/check_gomp.py + fi fi # Clean temp files