mirror of
https://github.com/pytorch/pytorch.git
synced 2025-10-20 12:54:11 +08:00
Simplify nvtx3 CMake handling, always use nvtx3 (#153784)
Fall back to third-party NVTX3 if system NVTX3 doesn't exist. We also reuse the `CUDA::nvtx3` target for better interoperability. Pull Request resolved: https://github.com/pytorch/pytorch/pull/153784 Approved by: https://github.com/ezyang
This commit is contained in:
@ -154,11 +154,6 @@ if [[ $CUDA_VERSION == 12* ]]; then
|
||||
"libcupti.so.12"
|
||||
"libnvperf_host.so"
|
||||
)
|
||||
# Add libnvToolsExt only if CUDA version is not 12.9
|
||||
if [[ $CUDA_VERSION != 12.9* ]]; then
|
||||
DEPS_LIST+=("/usr/local/cuda/lib64/libnvToolsExt.so.1")
|
||||
DEPS_SONAME+=("libnvToolsExt.so.1")
|
||||
fi
|
||||
else
|
||||
echo "Using nvidia libs from pypi."
|
||||
CUDA_RPATHS=(
|
||||
|
@ -16,7 +16,7 @@ target_link_libraries(simple-torch-test CUDA::cudart CUDA::cufft CUDA::cusparse
|
||||
find_library(CUDNN_LIBRARY NAMES cudnn)
|
||||
target_link_libraries(simple-torch-test ${CUDNN_LIBRARY} )
|
||||
if(MSVC)
|
||||
file(GLOB TORCH_DLLS "$ENV{CUDA_PATH}/bin/cudnn64_8.dll" "$ENV{NVTOOLSEXT_PATH}/bin/x64/*.dll")
|
||||
file(GLOB TORCH_DLLS "$ENV{CUDA_PATH}/bin/cudnn64_8.dll")
|
||||
message("dlls to copy " ${TORCH_DLLS})
|
||||
add_custom_command(TARGET simple-torch-test
|
||||
POST_BUILD
|
||||
|
@ -18,15 +18,6 @@ REM Check for optional components
|
||||
set USE_CUDA=
|
||||
set CMAKE_GENERATOR=Visual Studio 15 2017 Win64
|
||||
|
||||
IF "%NVTOOLSEXT_PATH%"=="" (
|
||||
IF EXIST "C:\Program Files\NVIDIA Corporation\NvToolsExt\lib\x64\nvToolsExt64_1.lib" (
|
||||
set NVTOOLSEXT_PATH=C:\Program Files\NVIDIA Corporation\NvToolsExt
|
||||
) ELSE (
|
||||
echo NVTX ^(Visual Studio Extension ^for CUDA^) ^not installed, failing
|
||||
exit /b 1
|
||||
)
|
||||
)
|
||||
|
||||
IF "%CUDA_PATH_V126%"=="" (
|
||||
IF EXIST "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.6\bin\nvcc.exe" (
|
||||
set "CUDA_PATH_V126=C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.6"
|
||||
|
@ -18,15 +18,6 @@ REM Check for optional components
|
||||
set USE_CUDA=
|
||||
set CMAKE_GENERATOR=Visual Studio 15 2017 Win64
|
||||
|
||||
IF "%NVTOOLSEXT_PATH%"=="" (
|
||||
IF EXIST "C:\Program Files\NVIDIA Corporation\NvToolsExt\lib\x64\nvToolsExt64_1.lib" (
|
||||
set NVTOOLSEXT_PATH=C:\Program Files\NVIDIA Corporation\NvToolsExt
|
||||
) ELSE (
|
||||
echo NVTX ^(Visual Studio Extension ^for CUDA^) ^not installed, failing
|
||||
exit /b 1
|
||||
)
|
||||
)
|
||||
|
||||
IF "%CUDA_PATH_V128%"=="" (
|
||||
IF EXIST "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.8\bin\nvcc.exe" (
|
||||
set "CUDA_PATH_V128=C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.8"
|
||||
|
@ -18,15 +18,6 @@ REM Check for optional components
|
||||
set USE_CUDA=
|
||||
set CMAKE_GENERATOR=Visual Studio 15 2017 Win64
|
||||
|
||||
IF "%NVTOOLSEXT_PATH%"=="" (
|
||||
IF EXIST "C:\Program Files\NVIDIA Corporation\NvToolsExt\lib\x64\nvToolsExt64_1.lib" (
|
||||
set NVTOOLSEXT_PATH=C:\Program Files\NVIDIA Corporation\NvToolsExt
|
||||
) ELSE (
|
||||
echo NVTX ^(Visual Studio Extension ^for CUDA^) ^not installed, failing
|
||||
exit /b 1
|
||||
)
|
||||
)
|
||||
|
||||
IF "%CUDA_PATH_V129%"=="" (
|
||||
IF EXIST "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.9\bin\nvcc.exe" (
|
||||
set "CUDA_PATH_V128=C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.9"
|
||||
|
@ -10,7 +10,6 @@ copy "%CUDA_PATH%\bin\nvrtc*64_*.dll*" pytorch\torch\lib
|
||||
copy "%CUDA_PATH%\extras\CUPTI\lib64\cupti64_*.dll*" pytorch\torch\lib
|
||||
copy "%CUDA_PATH%\extras\CUPTI\lib64\nvperf_host*.dll*" pytorch\torch\lib
|
||||
|
||||
copy "C:\Program Files\NVIDIA Corporation\NvToolsExt\bin\x64\nvToolsExt64_1.dll*" pytorch\torch\lib
|
||||
copy "%PYTHON_LIB_PATH%\libiomp*5md.dll" pytorch\torch\lib
|
||||
|
||||
:: Should be set in build_pytorch.bat
|
||||
|
@ -119,11 +119,6 @@ goto cuda_common
|
||||
:: If you cannot find the CUDA version you want to build for here then please
|
||||
:: add it @ https://github.com/pytorch/test-infra/tree/main/aws/ami/windows
|
||||
if not exist "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v%CUDA_VERSION_STR%\bin\nvcc.exe" (
|
||||
if not exist "%SRC_DIR%\temp_build\NvToolsExt.7z" (
|
||||
curl -k -L https://ossci-windows.s3.us-east-1.amazonaws.com/builder/NvToolsExt.7z --output "%SRC_DIR%\temp_build\NvToolsExt.7z"
|
||||
if errorlevel 1 exit /b 1
|
||||
)
|
||||
|
||||
if not exist "%SRC_DIR%\temp_build\gpu_driver_dlls.zip" (
|
||||
curl -k -L "https://ossci-windows.s3.us-east-1.amazonaws.com/builder/additional_dlls.zip" --output "%SRC_DIR%\temp_build\gpu_driver_dlls.zip"
|
||||
if errorlevel 1 exit /b 1
|
||||
@ -150,15 +145,6 @@ if not exist "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v%CUDA_VERSION_
|
||||
xcopy /Y "%SRC_DIR%\temp_build\cuda\CUDAVisualStudioIntegration\extras\visual_studio_integration\MSBuildExtensions\*.*" "C:\Program Files (x86)\Microsoft Visual Studio\2022\BuildTools\MSBuild\Microsoft\VC\v170\BuildCustomizations"
|
||||
)
|
||||
|
||||
echo Installing NvToolsExt...
|
||||
7z x %SRC_DIR%\temp_build\NvToolsExt.7z -o"%SRC_DIR%\temp_build\NvToolsExt"
|
||||
mkdir "%ProgramFiles%\NVIDIA Corporation\NvToolsExt\bin\x64"
|
||||
mkdir "%ProgramFiles%\NVIDIA Corporation\NvToolsExt\include"
|
||||
mkdir "%ProgramFiles%\NVIDIA Corporation\NvToolsExt\lib\x64"
|
||||
xcopy /Y "%SRC_DIR%\temp_build\NvToolsExt\bin\x64\*.*" "%ProgramFiles%\NVIDIA Corporation\NvToolsExt\bin\x64"
|
||||
xcopy /Y "%SRC_DIR%\temp_build\NvToolsExt\include\*.*" "%ProgramFiles%\NVIDIA Corporation\NvToolsExt\include"
|
||||
xcopy /Y "%SRC_DIR%\temp_build\NvToolsExt\lib\x64\*.*" "%ProgramFiles%\NVIDIA Corporation\NvToolsExt\lib\x64"
|
||||
|
||||
echo Installing cuDNN...
|
||||
7z x %CUDNN_SETUP_FILE% -o"%SRC_DIR%\temp_build\cudnn"
|
||||
xcopy /Y "%SRC_DIR%\temp_build\cudnn\%CUDNN_FOLDER%\bin\*.*" "%ProgramFiles%\NVIDIA GPU Computing Toolkit\CUDA\v%CUDA_VERSION_STR%\bin"
|
||||
@ -189,4 +175,3 @@ echo Setting up environment...
|
||||
set "PATH=%ProgramFiles%\NVIDIA GPU Computing Toolkit\CUDA\v%CUDA_VERSION_STR%\bin;%ProgramFiles%\NVIDIA GPU Computing Toolkit\CUDA\v%CUDA_VERSION_STR%\libnvvp;%PATH%"
|
||||
set "CUDA_PATH=%ProgramFiles%\NVIDIA GPU Computing Toolkit\CUDA\v%CUDA_VERSION_STR%"
|
||||
set "CUDA_PATH_V%CUDA_VER_MAJOR%_%CUDA_VER_MINOR%=%ProgramFiles%\NVIDIA GPU Computing Toolkit\CUDA\v%CUDA_VERSION_STR%"
|
||||
set "NVTOOLSEXT_PATH=%ProgramFiles%\NVIDIA Corporation\NvToolsExt"
|
||||
|
1
.github/scripts/windows/build_magma.bat
vendored
1
.github/scripts/windows/build_magma.bat
vendored
@ -17,7 +17,6 @@ if errorlevel 1 exit /b 1
|
||||
|
||||
set "PATH=C:\Tools;C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v%CUVER%\bin;C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v%CUVER%\libnvvp;%PATH%"
|
||||
set CUDA_PATH=C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v%CUVER%
|
||||
set NVTOOLSEXT_PATH=C:\Program Files\NVIDIA Corporation\NvToolsExt
|
||||
|
||||
mkdir magma_cuda%CUVER_NODOT%
|
||||
cd magma_cuda%CUVER_NODOT%
|
||||
|
@ -1616,11 +1616,7 @@ if(USE_CUDA)
|
||||
endif()
|
||||
target_link_libraries(torch_cuda INTERFACE torch::cudart)
|
||||
target_link_libraries(torch_cuda PUBLIC c10_cuda)
|
||||
if(TARGET torch::nvtx3)
|
||||
target_link_libraries(torch_cuda PRIVATE torch::nvtx3)
|
||||
else()
|
||||
target_link_libraries(torch_cuda PUBLIC torch::nvtoolsext)
|
||||
endif()
|
||||
target_link_libraries(torch_cuda PRIVATE CUDA::nvtx3)
|
||||
|
||||
target_include_directories(
|
||||
torch_cuda INTERFACE $<INSTALL_INTERFACE:include>)
|
||||
@ -1712,9 +1708,6 @@ if(BUILD_SHARED_LIBS)
|
||||
if(USE_CUDA)
|
||||
target_link_libraries(torch_global_deps ${Caffe2_PUBLIC_CUDA_DEPENDENCY_LIBS})
|
||||
target_link_libraries(torch_global_deps torch::cudart)
|
||||
if(TARGET torch::nvtoolsext)
|
||||
target_link_libraries(torch_global_deps torch::nvtoolsext)
|
||||
endif()
|
||||
endif()
|
||||
install(TARGETS torch_global_deps DESTINATION "${TORCH_INSTALL_LIB_DIR}")
|
||||
endif()
|
||||
|
@ -968,18 +968,17 @@ endif()
|
||||
# ---[ nvtx
|
||||
if(USE_SYSTEM_NVTX)
|
||||
find_path(nvtx3_dir NAMES nvtx3 PATHS ${CUDA_INCLUDE_DIRS})
|
||||
else()
|
||||
find_path(nvtx3_dir NAMES nvtx3 PATHS "${PROJECT_SOURCE_DIR}/third_party/NVTX/c/include" NO_DEFAULT_PATH)
|
||||
find_package_handle_standard_args(nvtx3 DEFAULT_MSG nvtx3_dir)
|
||||
if(NOT nvtx3_FOUND)
|
||||
message(WARNING "Cannot find system NVTX3, find shipped NVTX3 instead")
|
||||
endif()
|
||||
endif()
|
||||
find_package_handle_standard_args(nvtx3 DEFAULT_MSG nvtx3_dir)
|
||||
if(nvtx3_FOUND)
|
||||
add_library(torch::nvtx3 INTERFACE IMPORTED)
|
||||
target_include_directories(torch::nvtx3 INTERFACE "${nvtx3_dir}")
|
||||
target_compile_definitions(torch::nvtx3 INTERFACE TORCH_CUDA_USE_NVTX3)
|
||||
else()
|
||||
message(WARNING "Cannot find NVTX3, find old NVTX instead")
|
||||
add_library(torch::nvtoolsext INTERFACE IMPORTED)
|
||||
set_property(TARGET torch::nvtoolsext PROPERTY INTERFACE_LINK_LIBRARIES CUDA::nvToolsExt)
|
||||
if(NOT TARGET CUDA::nvtx3)
|
||||
add_library(CUDA::nvtx3 INTERFACE IMPORTED)
|
||||
endif()
|
||||
if(NOT nvtx3_dir)
|
||||
find_path(nvtx3_dir NAMES nvtx3 PATHS "${PROJECT_SOURCE_DIR}/third_party/NVTX/c/include" NO_DEFAULT_PATH)
|
||||
target_include_directories(CUDA::nvtx3 INTERFACE "${nvtx3_dir}")
|
||||
endif()
|
||||
|
||||
|
||||
|
@ -132,9 +132,6 @@ if(@USE_CUDA@)
|
||||
else()
|
||||
list(APPEND TORCH_CUDA_LIBRARIES torch::nvrtc)
|
||||
endif()
|
||||
if(TARGET torch::nvtoolsext)
|
||||
list(APPEND TORCH_CUDA_LIBRARIES torch::nvtoolsext)
|
||||
endif()
|
||||
|
||||
if(@BUILD_SHARED_LIBS@)
|
||||
find_library(C10_CUDA_LIBRARY c10_cuda PATHS "${TORCH_INSTALL_PREFIX}/lib")
|
||||
|
@ -146,13 +146,7 @@ if(USE_CUDA)
|
||||
list(APPEND TORCH_PYTHON_COMPILE_DEFINITIONS USE_CUFILE)
|
||||
endif()
|
||||
|
||||
if(TARGET torch::nvtx3)
|
||||
list(APPEND TORCH_PYTHON_LINK_LIBRARIES torch::nvtx3)
|
||||
else()
|
||||
if(TARGET torch::nvtoolsext)
|
||||
list(APPEND TORCH_PYTHON_LINK_LIBRARIES torch::nvtoolsext)
|
||||
endif()
|
||||
endif()
|
||||
list(APPEND TORCH_PYTHON_LINK_LIBRARIES CUDA::nvtx3)
|
||||
endif()
|
||||
|
||||
if(USE_ROCM)
|
||||
|
@ -206,20 +206,6 @@ if sys.platform == "win32":
|
||||
if os.path.exists(p)
|
||||
]
|
||||
|
||||
if not builtins.any(
|
||||
os.path.exists(os.path.join(p, "nvToolsExt64_1.dll")) for p in dll_paths
|
||||
):
|
||||
nvtoolsext_dll_path = os.path.join(
|
||||
os.getenv(
|
||||
"NVTOOLSEXT_PATH",
|
||||
os.path.join(pfiles_path, "NVIDIA Corporation", "NvToolsExt"),
|
||||
),
|
||||
"bin",
|
||||
"x64",
|
||||
)
|
||||
else:
|
||||
nvtoolsext_dll_path = ""
|
||||
|
||||
if cuda_version and builtins.all(
|
||||
not glob.glob(os.path.join(p, "cudart64*.dll")) for p in dll_paths
|
||||
):
|
||||
@ -232,9 +218,7 @@ if sys.platform == "win32":
|
||||
else:
|
||||
cuda_path = ""
|
||||
|
||||
dll_paths.extend(
|
||||
p for p in (nvtoolsext_dll_path, cuda_path) if os.path.exists(p)
|
||||
)
|
||||
dll_paths.extend(p for p in (cuda_path,) if os.path.exists(p))
|
||||
|
||||
kernel32 = ctypes.WinDLL("kernel32.dll", use_last_error=True)
|
||||
with_load_library_flags = hasattr(kernel32, "AddDllDirectory")
|
||||
@ -371,7 +355,6 @@ def _load_global_deps() -> None:
|
||||
"cusparselt": "libcusparseLt.so.*[0-9]",
|
||||
"cusolver": "libcusolver.so.*[0-9]",
|
||||
"nccl": "libnccl.so.*[0-9]",
|
||||
"nvtx": "libnvToolsExt.so.*[0-9]",
|
||||
"nvshmem": "libnvshmem_host.so.*[0-9]",
|
||||
}
|
||||
# cufiile is only available on cuda 12+
|
||||
|
@ -3,11 +3,7 @@
|
||||
#endif
|
||||
|
||||
#ifndef ROCM_ON_WINDOWS
|
||||
#ifdef TORCH_CUDA_USE_NVTX3
|
||||
#include <nvtx3/nvtx3.hpp>
|
||||
#else // TORCH_CUDA_USE_NVTX3
|
||||
#include <nvToolsExt.h>
|
||||
#endif // TORCH_CUDA_USE_NVTX3
|
||||
#else // ROCM_ON_WINDOWS
|
||||
#include <c10/util/Exception.h>
|
||||
#endif // ROCM_ON_WINDOWS
|
||||
@ -54,11 +50,7 @@ static void* device_nvtxRangeStart(const char* msg, std::intptr_t stream) {
|
||||
void initNvtxBindings(PyObject* module) {
|
||||
auto m = py::handle(module).cast<py::module>();
|
||||
|
||||
#ifdef TORCH_CUDA_USE_NVTX3
|
||||
auto nvtx = m.def_submodule("_nvtx", "nvtx3 bindings");
|
||||
#else
|
||||
auto nvtx = m.def_submodule("_nvtx", "libNvToolsExt.so bindings");
|
||||
#endif
|
||||
nvtx.def("rangePushA", nvtxRangePushA);
|
||||
nvtx.def("rangePop", nvtxRangePop);
|
||||
nvtx.def("rangeStartA", nvtxRangeStartA);
|
||||
|
@ -1,11 +1,7 @@
|
||||
#include <sstream>
|
||||
|
||||
#ifndef ROCM_ON_WINDOWS
|
||||
#ifdef TORCH_CUDA_USE_NVTX3
|
||||
#include <nvtx3/nvtx3.hpp>
|
||||
#else
|
||||
#include <nvToolsExt.h>
|
||||
#endif
|
||||
#else // ROCM_ON_WINDOWS
|
||||
#include <c10/util/Exception.h>
|
||||
#endif // ROCM_ON_WINDOWS
|
||||
|
@ -630,7 +630,6 @@ CUDA_INCLUDE_MAP = collections.OrderedDict(
|
||||
("cub/device/device_scan.cuh", ("hipcub/hipcub.hpp", CONV_INCLUDE, API_BLAS)),
|
||||
("cub/device/device_select.cuh", ("hipcub/hipcub.hpp", CONV_INCLUDE, API_BLAS)),
|
||||
("nvtx3/nvtx3.hpp", ("roctracer/roctx.h", CONV_INCLUDE, API_ROCTX)),
|
||||
("nvToolsExt.h", ("roctracer/roctx.h", CONV_INCLUDE, API_ROCTX)),
|
||||
("nvml.h", ("rocm_smi/rocm_smi.h", CONV_INCLUDE, API_ROCMSMI)),
|
||||
]
|
||||
)
|
||||
|
Reference in New Issue
Block a user