[Split Build] Add option to create libtorch wheel and use it to build pytorch as a separate wheel (#126328)

Creates an option to just build the libtorch portion of pytorch such that we have the necessary .so files. Then it builds a torch package using the libtorch wheel. These options are enabled using ` BUILD_LIBTORCH_WHL` and `BUILD_PYTHON_ONLY`. We run ``` BUILD_LIBTORCH_WHL=1 python setup.py install python setup.py clean BUILD_PYTHON_ONLY=1 python setup.py install ``` to produce ``` sahanp@devgpu086 ~/pytorch (detached HEAD|REBASE-i 3/5)> ls /home/sahanp/.conda/envs/pytorch-3.10/lib/python3.10/site-packages/torch/lib/ (pytorch-3.10) libshm.so* libtorch_global_deps.so* libtorch_python.so* sahanp@devgpu086 ~/pytorch (detached HEAD|REBASE-i 3/5)> ldd build/lib/libtorch_python.so (pytorch-3.10) linux-vdso.so.1 (0x00007ffdc2d37000) libtorch.so => /home/sahanp/.conda/envs/pytorch-3.10/lib/python3.10/site-packages/libtorch/lib/libtorch.so (0x00007f539fe99000) libshm.so => /home/sahanp/pytorch/build/lib/libshm.so (0x00007f539fe90000) libcudnn.so.8 => /usr/local/cuda-12.1/targets/x86_64-linux/lib/libcudnn.so.8 (0x00007f539e800000) libnvToolsExt.so.1 => /usr/local/cuda/lib64/libnvToolsExt.so.1 (0x00007f539e400000) libstdc++.so.6 => /lib64/libstdc++.so.6 (0x00007f539e000000) libm.so.6 => /lib64/libm.so.6 (0x00007f539fda5000) libgcc_s.so.1 => /lib64/libgcc_s.so.1 (0x00007f539ebe5000) libc.so.6 => /lib64/libc.so.6 (0x00007f539dc00000) /lib64/ld-linux-x86-64.so.2 (0x00007f539fea0000) libtorch_cpu.so => /home/sahanp/.conda/envs/pytorch-3.10/lib/python3.10/site-packages/libtorch/lib/libtorch_cpu.so (0x00007f5392400000) libtorch_cuda.so => /home/sahanp/.conda/envs/pytorch-3.10/lib/python3.10/site-packages/libtorch/lib/libtorch_cuda.so (0x00007f5380000000) librt.so.1 => /lib64/librt.so.1 (0x00007f539fd9e000) libpthread.so.0 => /lib64/libpthread.so.0 (0x00007f539fd99000) libdl.so.2 => /lib64/libdl.so.2 (0x00007f539fd94000) libc10.so => /home/sahanp/.conda/envs/pytorch-3.10/lib/python3.10/site-packages/libtorch/lib/libc10.so (0x00007f539eb07000) libmkl_intel_lp64.so.2 => /home/sahanp/.conda/envs/pytorch-3.10/lib/libmkl_intel_lp64.so.2 (0x00007f537ec00000) libmkl_gnu_thread.so.2 => /home/sahanp/.conda/envs/pytorch-3.10/lib/libmkl_gnu_thread.so.2 (0x00007f537ce00000) libmkl_core.so.2 => /home/sahanp/.conda/envs/pytorch-3.10/lib/libmkl_core.so.2 (0x00007f5378800000) libomp.so => /home/sahanp/.conda/envs/pytorch-3.10/lib/libomp.so (0x00007f539e707000) libcupti.so.12 => /usr/local/cuda/lib64/libcupti.so.12 (0x00007f5377e00000) libcudart.so.12 => /usr/local/cuda/lib64/libcudart.so.12 (0x00007f5377a00000) libc10_cuda.so => /home/sahanp/.conda/envs/pytorch-3.10/lib/python3.10/site-packages/libtorch/lib/libc10_cuda.so (0x00007f539ea6a000) libcusparse.so.12 => /usr/local/cuda/lib64/libcusparse.so.12 (0x00007f5368400000) libcufft.so.11 => /usr/local/cuda/lib64/libcufft.so.11 (0x00007f535ee00000) libcusolver.so.11 => /usr/local/cuda/lib64/libcusolver.so.11 (0x00007f534c800000) libcurand.so.10 => /usr/local/cuda/lib64/libcurand.so.10 (0x00007f5346200000) libcublas.so.12 => /usr/local/cuda/lib64/libcublas.so.12 (0x00007f533f800000) libcublasLt.so.12 => /usr/local/cuda/lib64/libcublasLt.so.12 (0x00007f531e800000) libutil.so.1 => /lib64/libutil.so.1 (0x00007f539ea63000) libnvJitLink.so.12 => /usr/local/cuda/lib64/libnvJitLink.so.12 (0x00007f531b800000) sahanp@devgpu086 ~/pytorch (detached HEAD|REBASE-i 3/5)> ldd build/lib/libtorch_global_deps.so (pytorch-3.10) linux-vdso.so.1 (0x00007ffc265df000) libmkl_intel_lp64.so.2 => /home/sahanp/.conda/envs/pytorch-3.10/lib/libmkl_intel_lp64.so.2 (0x00007fa93fc00000) libmkl_gnu_thread.so.2 => /home/sahanp/.conda/envs/pytorch-3.10/lib/libmkl_gnu_thread.so.2 (0x00007fa93de00000) libmkl_core.so.2 => /home/sahanp/.conda/envs/pytorch-3.10/lib/libmkl_core.so.2 (0x00007fa939800000) libm.so.6 => /lib64/libm.so.6 (0x00007fa940f05000) libcudart.so.12 => /usr/local/cuda/lib64/libcudart.so.12 (0x00007fa939400000) libnvToolsExt.so.1 => /usr/local/cuda/lib64/libnvToolsExt.so.1 (0x00007fa939000000) libgomp.so.1 => /home/sahanp/.conda/envs/pytorch-3.10/lib/libgomp.so.1 (0x00007fa93fb07000) libc.so.6 => /lib64/libc.so.6 (0x00007fa938c00000) libdl.so.2 => /lib64/libdl.so.2 (0x00007fa940efe000) libpthread.so.0 => /lib64/libpthread.so.0 (0x00007fa940ef9000) /lib64/ld-linux-x86-64.so.2 (0x00007fa940ff5000) librt.so.1 => /lib64/librt.so.1 (0x00007fa940ef2000) libstdc++.so.6 => /home/sahanp/.conda/envs/pytorch-3.10/lib/libstdc++.so.6 (0x00007fa93921d000) libgcc_s.so.1 => /home/sahanp/.conda/envs/pytorch-3.10/lib/libgcc_s.so.1 (0x00007fa93faec000) ``` Pull Request resolved: https://github.com/pytorch/pytorch/pull/126328 Approved by: https://github.com/atalman
2025-10-20 21:14:14 +08:00 · 2024-05-28 15:33:53 -07:00
parent 8090145936
commit a25b28a753
14 changed files with 918 additions and 600 deletions
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
--- a/c10/CMakeLists.txt
+++ b/c10/CMakeLists.txt
@ -12,120 +12,128 @@ set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
 # protobuf header files, because protobuf header files will transitively force
 # one to link against a specific protobuf version.

-# ---[ Configure macro file.
-set(C10_USE_GFLAGS ${USE_GFLAGS}) # used in cmake_macros.h.in
-set(C10_USE_GLOG ${USE_GLOG}) # used in cmake_macros.h.in
-set(C10_BUILD_SHARED_LIBS ${BUILD_SHARED_LIBS}) # used in cmake_macros.h.in
-set(C10_USE_NUMA ${USE_NUMA})
-set(C10_USE_MSVC_STATIC_RUNTIME ${CAFFE2_USE_MSVC_STATIC_RUNTIME})
-set(C10_USE_ROCM_KERNEL_ASSERT ${USE_ROCM_KERNEL_ASSERT})
-configure_file(
-    ${CMAKE_CURRENT_LIST_DIR}/macros/cmake_macros.h.in
-    ${CMAKE_BINARY_DIR}/c10/macros/cmake_macros.h)
-
-# Note: if you want to add ANY dependency to the c10 library, make sure you
-# check with the core PyTorch developers as the dependency will be
-# transitively passed on to all libraries dependent on PyTorch.
-file(GLOB C10_SRCS
-        *.cpp
-        core/*.cpp
-        core/impl/*.cpp
-        mobile/*.cpp
-        macros/*.cpp
-        util/*.cpp
-      )
-file(GLOB C10_HEADERS
-        *.h
-        core/*.h
-        core/impl/*.h
-        mobile/*.h
-        macros/*.h
-        util/*.h
-      )
-add_library(c10 ${C10_SRCS} ${C10_HEADERS})
-target_compile_options_if_supported(c10 "-Wdeprecated")
-if(HAVE_SOVERSION)
-  set_target_properties(c10 PROPERTIES
-      VERSION ${TORCH_VERSION} SOVERSION ${TORCH_SOVERSION})
-endif()
-# If building shared library, set dllimport/dllexport proper.
-target_compile_options(c10 PRIVATE "-DC10_BUILD_MAIN_LIB")
-# Enable hidden visibility if compiler supports it.
-if(${COMPILER_SUPPORTS_HIDDEN_VISIBILITY})
-  target_compile_options(c10 PRIVATE "-fvisibility=hidden")
+if(BUILD_LIBTORCHLESS)
+  find_library(C10_LIB c10 PATHS $ENV{LIBTORCH_LIB_PATH} NO_DEFAULT_PATH)
+else()
+  set(C10_LIB c10)
 endif()

-option(C10_USE_IWYU "Use include-what-you-use to clean up header inclusion" OFF)
-if(C10_USE_IWYU)
-  find_program(iwyu NAMES include-what-you-use)
-  if(iwyu)
-    set(iwyu_cmd
-        "include-what-you-use"
-        "-Xiwyu"
-        "--transitive_includes_only"
-        "-Xiwyu"
-        "--no_fwd_decls"
-        "-Xiwyu"
-        "--prefix_header_includes=keep"
-        "-Xiwyu"
-        "--mapping_file=${CMAKE_CURRENT_LIST_DIR}/../tools/iwyu/all.imp"
-      )
-    set_property(TARGET c10 PROPERTY CXX_INCLUDE_WHAT_YOU_USE ${iwyu_cmd})
+  # ---[ Configure macro file.
+  set(C10_USE_GFLAGS ${USE_GFLAGS}) # used in cmake_macros.h.in
+  set(C10_USE_GLOG ${USE_GLOG}) # used in cmake_macros.h.in
+  set(C10_BUILD_SHARED_LIBS ${BUILD_SHARED_LIBS}) # used in cmake_macros.h.in
+  set(C10_USE_NUMA ${USE_NUMA})
+  set(C10_USE_MSVC_STATIC_RUNTIME ${CAFFE2_USE_MSVC_STATIC_RUNTIME})
+  set(C10_USE_ROCM_KERNEL_ASSERT ${USE_ROCM_KERNEL_ASSERT})
+  configure_file(
+      ${CMAKE_CURRENT_LIST_DIR}/macros/cmake_macros.h.in
+      ${CMAKE_BINARY_DIR}/c10/macros/cmake_macros.h)
+
+  # Note: if you want to add ANY dependency to the c10 library, make sure you
+  # check with the core PyTorch developers as the dependency will be
+  # transitively passed on to all libraries dependent on PyTorch.
+  file(GLOB C10_SRCS
+          *.cpp
+          core/*.cpp
+          core/impl/*.cpp
+          mobile/*.cpp
+          macros/*.cpp
+          util/*.cpp
+        )
+  file(GLOB C10_HEADERS
+          *.h
+          core/*.h
+          core/impl/*.h
+          mobile/*.h
+          macros/*.h
+          util/*.h
+        )
+if(NOT BUILD_LIBTORCHLESS)
+  add_library(c10 ${C10_SRCS} ${C10_HEADERS})
+  target_compile_options_if_supported(c10 "-Wdeprecated")
+  if(HAVE_SOVERSION)
+    set_target_properties(c10 PROPERTIES
+        VERSION ${TORCH_VERSION} SOVERSION ${TORCH_SOVERSION})
+  endif()
+  # If building shared library, set dllimport/dllexport proper.
+  target_compile_options(c10 PRIVATE "-DC10_BUILD_MAIN_LIB")
+  # Enable hidden visibility if compiler supports it.
+  if(${COMPILER_SUPPORTS_HIDDEN_VISIBILITY})
+    target_compile_options(c10 PRIVATE "-fvisibility=hidden")
  endif()
-endif()

-if(WERROR)
-  target_compile_options_if_supported(c10 PRIVATE "-Werror=sign-compare")
-  target_compile_options_if_supported(c10 PRIVATE "-Werror=shadow")
-endif()
+  option(C10_USE_IWYU "Use include-what-you-use to clean up header inclusion" OFF)
+  if(C10_USE_IWYU)
+    find_program(iwyu NAMES include-what-you-use)
+    if(iwyu)
+      set(iwyu_cmd
+          "include-what-you-use"
+          "-Xiwyu"
+          "--transitive_includes_only"
+          "-Xiwyu"
+          "--no_fwd_decls"
+          "-Xiwyu"
+          "--prefix_header_includes=keep"
+          "-Xiwyu"
+          "--mapping_file=${CMAKE_CURRENT_LIST_DIR}/../tools/iwyu/all.imp"
+        )
+      set_property(TARGET c10 PROPERTY CXX_INCLUDE_WHAT_YOU_USE ${iwyu_cmd})
+    endif()
+  endif()

-# ---[ Dependency of c10
-if(C10_USE_GFLAGS)
-  target_link_libraries(c10 PUBLIC gflags)
-endif()
+  if(WERROR)
+    target_compile_options_if_supported(c10 PRIVATE "-Werror=sign-compare")
+    target_compile_options_if_supported(c10 PRIVATE "-Werror=shadow")
+  endif()

-if(C10_USE_GLOG)
-  target_link_libraries(c10 PUBLIC glog::glog)
-endif()
-target_link_libraries(c10 PRIVATE fmt::fmt-header-only)
+  # ---[ Dependency of c10
+  if(C10_USE_GFLAGS)
+    target_link_libraries(c10 PUBLIC gflags)
+  endif()

-if(C10_USE_NUMA)
-  message(STATUS "NUMA paths:")
-  message(STATUS ${Numa_INCLUDE_DIR})
-  message(STATUS ${Numa_LIBRARIES})
-  target_include_directories(c10 PRIVATE ${Numa_INCLUDE_DIR})
-  target_link_libraries(c10 PRIVATE ${Numa_LIBRARIES})
-else()
-  message(STATUS "don't use NUMA")
-endif()
+  if(C10_USE_GLOG)
+    target_link_libraries(c10 PUBLIC glog::glog)
+  endif()
+  target_link_libraries(c10 PRIVATE fmt::fmt-header-only)

-if(NOT CMAKE_SYSTEM_PROCESSOR MATCHES "s390x" AND NOT CMAKE_SYSTEM_PROCESSOR MATCHES "ppc64le")
-  target_link_libraries(c10 PRIVATE cpuinfo)
-endif()
+  if(C10_USE_NUMA)
+    message(STATUS "NUMA paths:")
+    message(STATUS ${Numa_INCLUDE_DIR})
+    message(STATUS ${Numa_LIBRARIES})
+    target_include_directories(c10 PRIVATE ${Numa_INCLUDE_DIR})
+    target_link_libraries(c10 PRIVATE ${Numa_LIBRARIES})
+  else()
+    message(STATUS "don't use NUMA")
+  endif()

-find_package(Backtrace)
-if(Backtrace_FOUND)
-  target_include_directories(c10 PRIVATE ${Backtrace_INCLUDE_DIRS})
-  target_link_libraries(c10 PRIVATE ${Backtrace_LIBRARIES})
-  target_compile_definitions(c10 PRIVATE SUPPORTS_BACKTRACE=1)
-else()
-  target_compile_definitions(c10 PRIVATE SUPPORTS_BACKTRACE=0)
-endif()
+  if(NOT CMAKE_SYSTEM_PROCESSOR MATCHES "s390x" AND NOT CMAKE_SYSTEM_PROCESSOR MATCHES "ppc64le")
+    target_link_libraries(c10 PRIVATE cpuinfo)
+  endif()

-if(USE_MIMALLOC)
-  target_link_libraries(c10 PRIVATE "mimalloc-static")
-  add_dependencies(c10 mimalloc-static)
-endif()
+  find_package(Backtrace)
+  if(Backtrace_FOUND)
+    target_include_directories(c10 PRIVATE ${Backtrace_INCLUDE_DIRS})
+    target_link_libraries(c10 PRIVATE ${Backtrace_LIBRARIES})
+    target_compile_definitions(c10 PRIVATE SUPPORTS_BACKTRACE=1)
+  else()
+    target_compile_definitions(c10 PRIVATE SUPPORTS_BACKTRACE=0)
+  endif()

-if(ANDROID)
-  target_link_libraries(c10 PRIVATE log)
-endif()
+  if(USE_MIMALLOC)
+    target_link_libraries(c10 PRIVATE "mimalloc-static")
+    add_dependencies(c10 mimalloc-static)
+  endif()

-target_include_directories(
-    c10 PUBLIC
-    $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/../>
-    $<BUILD_INTERFACE:${CMAKE_BINARY_DIR}>
-    $<INSTALL_INTERFACE:include>)
+  if(ANDROID)
+    target_link_libraries(c10 PRIVATE log)
+  endif()
+
+  target_include_directories(
+      c10 PUBLIC
+      $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/../>
+      $<BUILD_INTERFACE:${CMAKE_BINARY_DIR}>
+      $<INSTALL_INTERFACE:include>)
+endif()

 add_subdirectory(test)
 add_subdirectory(benchmark)
@ -144,11 +152,14 @@ if(USE_XPU)
  add_subdirectory(xpu)
 endif()

-# ---[ Installation
-# Note: for now, we will put all export path into one single Caffe2Targets group
-# to deal with the cmake deployment need. Inside the Caffe2Targets set, the
-# individual libraries like libc10.so and libcaffe2.so are still self-contained.
-install(TARGETS c10 EXPORT Caffe2Targets DESTINATION lib)
+if(NOT BUILD_LIBTORCHLESS)
+  # ---[ Installation
+  # Note: for now, we will put all export path into one single Caffe2Targets group
+  # to deal with the cmake deployment need. Inside the Caffe2Targets set, the
+  # individual libraries like libc10.so and libcaffe2.so are still self-contained.
+  install(TARGETS c10 EXPORT Caffe2Targets DESTINATION lib)
+endif()
+
 install(DIRECTORY ${CMAKE_CURRENT_LIST_DIR}
        DESTINATION include
        FILES_MATCHING PATTERN "*.h")
--- a/c10/benchmark/CMakeLists.txt
+++ b/c10/benchmark/CMakeLists.txt
@ -6,7 +6,7 @@ if(BUILD_TEST)
    get_filename_component(bench_file_name ${bench_src} NAME_WE)
    set(bench_name "c10_${bench_file_name}")
    add_executable(${bench_name} "${bench_src}")
-    target_link_libraries(${bench_name} c10 benchmark)
+    target_link_libraries(${bench_name} ${C10_LIB} benchmark)
    if(INSTALL_TEST)
      install(TARGETS ${bench_name} DESTINATION test)
    endif()
--- a/c10/cuda/CMakeLists.txt
+++ b/c10/cuda/CMakeLists.txt
@ -12,6 +12,10 @@ configure_file(
    ${CMAKE_CURRENT_LIST_DIR}/impl/cuda_cmake_macros.h.in
    ${CMAKE_BINARY_DIR}/c10/cuda/impl/cuda_cmake_macros.h)

+if(BUILD_LIBTORCHLESS)
+  find_library(C10_CUDA_LIB c10_cuda PATHS $ENV{LIBTORCH_LIB_PATH} NO_DEFAULT_PATH)
+endif()
+
 # Note: if you want to add ANY dependency to the c10 library, make sure you
 # check with the core PyTorch developers as the dependency will be
 # transitively passed on to all libraries dependent on PyTorch.
@ -47,36 +51,42 @@ set(C10_CUDA_HEADERS
    impl/CUDATest.h
 )
 set(CUDA_LINK_LIBRARIES_KEYWORD PRIVATE)
-torch_cuda_based_add_library(c10_cuda ${C10_CUDA_SRCS} ${C10_CUDA_HEADERS})
-set(CUDA_LINK_LIBRARIES_KEYWORD)
-# If building shared library, set dllimport/dllexport proper.
-target_compile_options(c10_cuda PRIVATE "-DC10_CUDA_BUILD_MAIN_LIB")
-# Enable hidden visibility if compiler supports it.
-if(${COMPILER_SUPPORTS_HIDDEN_VISIBILITY})
-  target_compile_options(c10_cuda PRIVATE "-fvisibility=hidden")
-endif()

-# ---[ Dependency of c10_cuda
-target_link_libraries(c10_cuda PUBLIC c10 torch::cudart)
+if(NOT BUILD_LIBTORCHLESS)
+  torch_cuda_based_add_library(c10_cuda ${C10_CUDA_SRCS} ${C10_CUDA_HEADERS})
+  set(CUDA_LINK_LIBRARIES_KEYWORD)
+  # If building shared library, set dllimport/dllexport proper.
+  target_compile_options(c10_cuda PRIVATE "-DC10_CUDA_BUILD_MAIN_LIB")
+  # Enable hidden visibility if compiler supports it.
+  if(${COMPILER_SUPPORTS_HIDDEN_VISIBILITY})
+    target_compile_options(c10_cuda PRIVATE "-fvisibility=hidden")
+  endif()

-if(NOT WIN32)
-target_link_libraries(c10_cuda PRIVATE dl)
-target_compile_options(c10_cuda PRIVATE "-DPYTORCH_C10_DRIVER_API_SUPPORTED")
-endif()
+  # ---[ Dependency of c10_cuda
+  target_link_libraries(c10_cuda PUBLIC ${C10_LIB} torch::cudart)

-target_include_directories(
-    c10_cuda PUBLIC
-    $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/../..>
-    $<BUILD_INTERFACE:${CMAKE_BINARY_DIR}>
-    $<INSTALL_INTERFACE:include>)
+  if(NOT WIN32)
+  target_link_libraries(c10_cuda PRIVATE dl)
+  target_compile_options(c10_cuda PRIVATE "-DPYTORCH_C10_DRIVER_API_SUPPORTED")
+  endif()

-add_subdirectory(test)
+  target_include_directories(
+      c10_cuda PUBLIC
+      $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/../..>
+      $<BUILD_INTERFACE:${CMAKE_BINARY_DIR}>
+      $<INSTALL_INTERFACE:include>)
+  set(C10_CUDA_LIB c10_cuda)

 # ---[ Installation
 # Note: for now, we will put all export path into one single Caffe2Targets group
 # to deal with the cmake deployment need. Inside the Caffe2Targets set, the
 # individual libraries like libc10.so and libcaffe2.so are still self-contained.
 install(TARGETS c10_cuda EXPORT Caffe2Targets DESTINATION lib)
+
+endif()
+
+add_subdirectory(test)
+
 foreach(file ${C10_CUDA_HEADERS})
  get_filename_component( dir ${file} DIRECTORY )
  install( FILES ${file} DESTINATION include/c10/cuda/${dir} )
--- a/c10/cuda/test/CMakeLists.txt
+++ b/c10/cuda/test/CMakeLists.txt
@ -15,7 +15,7 @@ if(BUILD_TEST)
    get_filename_component(test_file_name ${test_src} NAME_WE)
    set(test_name "c10_cuda_${test_file_name}")
    add_executable(${test_name} "${test_src}")
-    target_link_libraries(${test_name} c10_cuda gtest_main)
+    target_link_libraries(${test_name} ${C10_CUDA_LIB} ${C10_LIB} gtest_main)
    add_test(NAME ${test_name} COMMAND $<TARGET_FILE:${test_name}>)
    if(INSTALL_TEST)
      install(TARGETS ${test_name} DESTINATION test)
--- a/c10/hip/CMakeLists.txt
+++ b/c10/hip/CMakeLists.txt
@ -5,6 +5,10 @@

 include(../../cmake/public/utils.cmake)

+if(BUILD_LIBTORCHLESS)
+  find_library(C10_HIP_LIB c10_hip PATHS $ENV{LIBTORCH_LIB_PATH} NO_DEFAULT_PATH)
+endif()
+
 # ---[ Configure macro file.
 set(C10_HIP_BUILD_SHARED_LIBS ${BUILD_SHARED_LIBS}) # used in cmake_macros.h.in
 configure_file(
@ -26,36 +30,40 @@ file(GLOB __c10_hip_srcs_cpp *.cc impl/*.cc)
 set_source_files_properties(${__c10_hip_srcs_cpp} PROPERTIES HIP_SOURCE_PROPERTY_FORMAT 1)

 file(GLOB_RECURSE C10_HIP_HEADERS *.h)
-hip_add_library(c10_hip ${C10_HIP_SRCS} ${C10_HIP_HEADERS})

-# Propagate HIP_CXX_FLAGS that were set from Dependencies.cmake
-target_compile_options(c10_hip PRIVATE ${HIP_CXX_FLAGS})
+if(NOT BUILD_LIBTORCHLESS)
+  hip_add_library(c10_hip ${C10_HIP_SRCS} ${C10_HIP_HEADERS})

-# caffe2_hip adds a bunch of dependencies like rocsparse, but c10/hip is supposed to be
-# minimal.  I'm not sure if we need hip_hcc or not; for now leave it out
+  # Propagate HIP_CXX_FLAGS that were set from Dependencies.cmake
+  target_compile_options(c10_hip PRIVATE ${HIP_CXX_FLAGS})

-# If building shared library, set dllimport/dllexport proper.
-target_compile_options(c10_hip PRIVATE "-DC10_HIP_BUILD_MAIN_LIB")
-# Enable hidden visibility if compiler supports it.
-if(${COMPILER_SUPPORTS_HIDDEN_VISIBILITY})
-  target_compile_options(c10_hip PRIVATE "-fvisibility=hidden")
+  # caffe2_hip adds a bunch of dependencies like rocsparse, but c10/hip is supposed to be
+  # minimal.  I'm not sure if we need hip_hcc or not; for now leave it out
+
+  # If building shared library, set dllimport/dllexport proper.
+  target_compile_options(c10_hip PRIVATE "-DC10_HIP_BUILD_MAIN_LIB")
+  # Enable hidden visibility if compiler supports it.
+  if(${COMPILER_SUPPORTS_HIDDEN_VISIBILITY})
+    target_compile_options(c10_hip PRIVATE "-fvisibility=hidden")
+  endif()
+
+  # ---[ Dependency of c10_hip
+  target_link_libraries(c10_hip PUBLIC c10)
+
+  target_link_libraries(c10_hip PUBLIC ${PYTORCH_HIP_LIBRARIES})
+
+  target_include_directories(
+      c10_hip PUBLIC
+      $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/../..>
+      $<BUILD_INTERFACE:${CMAKE_BINARY_DIR}>
+      $<INSTALL_INTERFACE:include>)
+  install(TARGETS c10_hip EXPORT Caffe2Targets DESTINATION lib)
+  set(C10_HIP_LIB c10_hip)
 endif()

-# ---[ Dependency of c10_hip
-target_link_libraries(c10_hip PUBLIC c10)
-
-target_link_libraries(c10_hip PUBLIC ${PYTORCH_HIP_LIBRARIES})
-
-target_include_directories(
-    c10_hip PUBLIC
-    $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/../..>
-    $<BUILD_INTERFACE:${CMAKE_BINARY_DIR}>
-    $<INSTALL_INTERFACE:include>)
-
 add_subdirectory(test)

 # ---[ Installation
-install(TARGETS c10_hip EXPORT Caffe2Targets DESTINATION lib)
 install(DIRECTORY ${CMAKE_CURRENT_LIST_DIR}
        DESTINATION include
        FILES_MATCHING PATTERN "*.h")
--- a/c10/test/CMakeLists.txt
+++ b/c10/test/CMakeLists.txt
@ -9,7 +9,7 @@ if(BUILD_TEST)
    if(NOT MSVC)
      target_compile_options(${test_name} PRIVATE -Wno-unused-variable)
    endif()
-    target_link_libraries(${test_name} c10 gmock gtest gtest_main)
+    target_link_libraries(${test_name} ${C10_LIB} gmock gtest gtest_main)
    add_test(NAME ${test_name} COMMAND $<TARGET_FILE:${test_name}>)
    if(INSTALL_TEST)
      install(TARGETS ${test_name} DESTINATION test)
--- a/c10/xpu/CMakeLists.txt
+++ b/c10/xpu/CMakeLists.txt
@ -4,6 +4,10 @@

 include(../../cmake/public/xpu.cmake)

+if(NOT BUILD_LIBTORCHLESS)
+  find_library(C10_XPU_LIB c10_xpu PATHS $ENV{LIBTORCH_LIB_PATH} NO_DEFAULT_PATH)
+endif()
+
 set(C10_XPU_SRCS
    XPUCachingAllocator.cpp
    XPUFunctions.cpp
@ -19,7 +23,7 @@ set(C10_XPU_HEADERS
    XPUStream.h
    impl/XPUGuardImpl.h
 )
-
+if(NOT BUILD_LIBTORCHLESS)
 add_library(c10_xpu ${C10_XPU_SRCS} ${C10_XPU_HEADERS})
 target_compile_options(c10_xpu PRIVATE "-DC10_XPU_BUILD_MAIN_LIB")
 # Enable hidden visibility if compiler supports it.
@ -35,11 +39,13 @@ target_include_directories(
    $<BUILD_INTERFACE:${CMAKE_BINARY_DIR}>
    $<INSTALL_INTERFACE:include>
    )
-
-add_subdirectory(test)
+  install(TARGETS c10_xpu EXPORT Caffe2Targets DESTINATION lib)
+  set(C10_XPU_LIB c10_xpu)
+  add_subdirectory(test)
+endif()

 # ---[ Installation
-install(TARGETS c10_xpu EXPORT Caffe2Targets DESTINATION lib)
+
 foreach(file ${C10_XPU_HEADERS})
  get_filename_component(dir ${file} DIRECTORY)
  install(FILES ${file} DESTINATION include/c10/xpu/${dir})
--- a/c10/xpu/test/CMakeLists.txt
+++ b/c10/xpu/test/CMakeLists.txt
@ -11,7 +11,7 @@ if(BUILD_TEST)
    get_filename_component(test_file_name ${test_src} NAME_WE)
    set(test_name "c10_xpu_${test_file_name}")
    add_executable(${test_name} "${test_src}")
-    target_link_libraries(${test_name} c10_xpu gtest_main)
+    target_link_libraries(${test_name} ${C10_XPU_LIB} gtest_main)
    add_test(NAME ${test_name} COMMAND $<TARGET_FILE:${test_name}>)
    if(INSTALL_TEST)
      install(TARGETS ${test_name} DESTINATION test)
--- a/caffe2/CMakeLists.txt
+++ b/caffe2/CMakeLists.txt
@ -785,6 +785,32 @@ set(Caffe2_CPU_SRCS ${Caffe2_CPU_SRCS_NON_AVX} ${Caffe2_CPU_SRCS_AVX2} ${Caffe2_
 # END formerly-libtorch sources
 # ==========================================================

+if(BUILD_LIBTORCHLESS)
+  find_library(TORCH_LIB torch PATHS $ENV{LIBTORCH_LIB_PATH} NO_DEFAULT_PATH)
+  find_library(TORCH_CPU_LIB torch_cpu PATHS $ENV{LIBTORCH_LIB_PATH} NO_DEFAULT_PATH)
+
+  if(USE_CUDA)
+    find_library(TORCH_CUDA_LIB torch_cuda PATHS $ENV{LIBTORCH_LIB_PATH} NO_DEFAULT_PATH)
+  endif()
+
+  if(USE_ROCM)
+    find_library(TORCH_HIP_LIB torch_hip PATHS $ENV{LIBTORCH_LIB_PATH} NO_DEFAULT_PATH)
+  endif()
+
+  if(USE_XPU)
+    find_library(TORCH_XPU_LIB torch_xpu PATHS $ENV{LIBTORCH_LIB_PATH} NO_DEFAULT_PATH)
+  endif()
+  add_subdirectory(../torch torch)
+else()
+  set(TORCH_LIB torch)
+  set(TORCH_CPU_LIB torch_cpu)
+  set(TORCH_CUDA_LIB torch_cuda)
+  set(TORCH_HIP_LIB torch_hip)
+  set(TORCH_XPU_LIB torch_xpu)
+endif()
+
+
+if(NOT BUILD_LIBTORCHLESS)
 add_library(torch_cpu ${Caffe2_CPU_SRCS})
 if(HAVE_SOVERSION)
  set_target_properties(torch_cpu PROPERTIES
@ -1622,6 +1648,7 @@ endif()
 # ---[ XPU library.
 if(USE_XPU)
  target_link_libraries(torch_xpu INTERFACE torch::xpurt)
+
  target_link_libraries(torch_xpu PUBLIC c10_xpu)

  target_include_directories(
@ -1701,6 +1728,7 @@ if(USE_ROCM)

  # Since PyTorch files contain HIP headers, these flags are required for the necessary definitions to be added.
  target_compile_options(torch_hip PUBLIC ${HIP_CXX_FLAGS})  # experiment
+
  target_link_libraries(torch_hip PUBLIC c10_hip)

  if(NOT INTERN_BUILD_MOBILE)
@ -1921,6 +1949,7 @@ if(MSVC)
    set_source_files_properties(${tmp_path} PROPERTIES COMPILE_FLAGS "-Xcompiler /Zc:lambda")
  endforeach()
 endif()
+endif()

 # Note: we only install the caffe2 python files if BUILD_CAFFE2_OPS is ON
 # This is because the build rules here written in such a way that they always
--- a/functorch/CMakeLists.txt
+++ b/functorch/CMakeLists.txt
@ -16,7 +16,12 @@ target_compile_definitions(${PROJECT_NAME} PRIVATE TORCH_API_INCLUDE_EXTENSION_H
 target_compile_options(${PROJECT_NAME} PRIVATE ${TORCH_PYTHON_COMPILE_OPTIONS})
 target_compile_options_if_supported(${PROJECT_NAME} "-Wmissing-prototypes")
 target_compile_options_if_supported(${PROJECT_NAME} "-Werror=missing-prototypes")
-target_link_libraries(${PROJECT_NAME} PRIVATE torch torch_python)
+if(BUILD_LIBTORCHLESS)
+  target_link_libraries(${PROJECT_NAME} PRIVATE ${TORCH_LIB} torch_python)
+else()
+  # functorch cannot use the alias to build on windows
+  target_link_libraries(${PROJECT_NAME} PRIVATE torch torch_python)
+endif()
 target_link_libraries(${PROJECT_NAME} PRIVATE pybind::pybind11)

 set_target_properties(${PROJECT_NAME} PROPERTIES LIBRARY_OUTPUT_DIRECTORY
--- a/setup.py
+++ b/setup.py
@ -1,5 +1,4 @@
 # Welcome to the PyTorch setup.py.
-#
 # Environment variables you are probably interested in:
 #
 #   DEBUG
@ -199,7 +198,15 @@
 #
 #   USE_PRIORITIZED_TEXT_FOR_LD
 #      Uses prioritized text form cmake/prioritized_text.txt for LD
+#
+#   BUILD_LIBTORCH_WHL
+#      Builds libtorch.so and its dependencies as a wheel
+#
+#   BUILD_PYTHON_ONLY
+#      Builds pytorch as a wheel using libtorch.so from a seperate wheel

+import os
+import pkgutil
 import sys

 if sys.platform == "win32" and sys.maxsize.bit_length() == 31:
@ -210,6 +217,34 @@ if sys.platform == "win32" and sys.maxsize.bit_length() == 31:

 import platform

+
+def _get_package_path(package_name):
+    loader = pkgutil.find_loader(package_name)
+    if loader:
+        # The package might be a namespace package, so get_data may fail
+        try:
+            file_path = loader.get_filename()
+            return os.path.dirname(file_path)
+        except AttributeError:
+            pass
+    return None
+
+
+BUILD_LIBTORCH_WHL = os.getenv("BUILD_LIBTORCH_WHL", "0") == "1"
+BUILD_PYTHON_ONLY = os.getenv("BUILD_PYTHON_ONLY", "0") == "1"
+
+
+# set up appropriate env variables
+if BUILD_LIBTORCH_WHL:
+    # Set up environment variables for ONLY building libtorch.so and not libtorch_python.so
+    # functorch is not supported without python
+    os.environ["BUILD_FUNCTORCH"] = "OFF"
+
+
+if BUILD_PYTHON_ONLY:
+    os.environ["BUILD_LIBTORCHLESS"] = "ON"
+    os.environ["LIBTORCH_LIB_PATH"] = f"{_get_package_path('libtorch')}/lib"
+
 python_min_version = (3, 8, 0)
 python_min_version_str = ".".join(map(str, python_min_version))
 if sys.version_info < python_min_version:
@ -222,7 +257,6 @@ import filecmp
 import glob
 import importlib
 import json
-import os
 import shutil
 import subprocess
 import sysconfig
@ -314,7 +348,10 @@ cmake_python_include_dir = sysconfig.get_path("include")
 ################################################################################
 # Version, create_version_file, and package_name
 ################################################################################
-package_name = os.getenv("TORCH_PACKAGE_NAME", "torch")
+
+DEFAULT_PACKAGE_NAME = "libtorch" if BUILD_LIBTORCH_WHL else "torch"
+
+package_name = os.getenv("TORCH_PACKAGE_NAME", DEFAULT_PACKAGE_NAME)
 package_type = os.getenv("PACKAGE_TYPE", "wheel")
 version = get_torch_version()
 report(f"Building wheel {package_name}-{version}")
@ -437,11 +474,12 @@ def build_deps():

    check_submodules()
    check_pydep("yaml", "pyyaml")
+    build_python = not BUILD_LIBTORCH_WHL

    build_caffe2(
        version=version,
        cmake_python_library=cmake_python_library,
-        build_python=True,
+        build_python=build_python,
        rerun_cmake=RERUN_CMAKE,
        cmake_only=CMAKE_ONLY,
        cmake=cmake,
@ -698,6 +736,8 @@ class build_ext(setuptools.command.build_ext.build_ext):
            "caffe2.python.caffe2_pybind11_state_gpu",
            "caffe2.python.caffe2_pybind11_state_hip",
        ]
+        if BUILD_LIBTORCH_WHL:
+            caffe2_pybind_exts = []
        i = 0
        while i < len(self.extensions):
            ext = self.extensions[i]
@ -929,9 +969,14 @@ def configure_extension_build():

    main_compile_args = []
    main_libraries = ["torch_python"]
+
    main_link_args = []
    main_sources = ["torch/csrc/stub.c"]

+    if BUILD_LIBTORCH_WHL:
+        main_libraries = ["torch"]
+        main_sources = []
+
    if cmake_cache_vars["USE_CUDA"]:
        library_dirs.append(os.path.dirname(cmake_cache_vars["CUDA_CUDA_LIB"]))

@ -1053,7 +1098,6 @@ def configure_extension_build():
            "default = torch.distributed.elastic.multiprocessing:DefaultLogsSpecs",
        ],
    }
-
    return extensions, cmdclass, packages, entry_points, extra_install_requires


@ -1080,6 +1124,11 @@ def print_box(msg):


 def main():
+    if BUILD_LIBTORCH_WHL and BUILD_PYTHON_ONLY:
+        raise RuntimeError(
+            "Conflict: 'BUILD_LIBTORCH_WHL' and 'BUILD_PYTHON_ONLY' can't both be 1. Set one to 0 and rerun."
+        )
+
    # the list of runtime dependencies required by this built package
    install_requires = [
        "filelock",
@ -1091,6 +1140,9 @@ def main():
        'mkl>=2021.1.1,<=2021.4.0; platform_system == "Windows"',
    ]

+    if BUILD_PYTHON_ONLY:
+        install_requires.append("libtorch")
+
    use_prioritized_text = str(os.getenv("USE_PRIORITIZED_TEXT_FOR_LD", ""))
    if (
        use_prioritized_text == ""
@ -1166,10 +1218,6 @@ def main():
        "nn/parallel/*.pyi",
        "utils/data/*.pyi",
        "utils/data/datapipes/*.pyi",
-        "lib/*.so*",
-        "lib/*.dylib*",
-        "lib/*.dll",
-        "lib/*.lib",
        "lib/*.pdb",
        "lib/torch_shm_manager",
        "lib/*.h",
@ -1335,6 +1383,23 @@ def main():
        "utils/model_dump/*.mjs",
    ]

+    if BUILD_PYTHON_ONLY:
+        torch_package_data.extend(
+            [
+                "lib/libtorch_python*",
+                "lib/*shm*",
+                "lib/libtorch_global_deps*",
+            ]
+        )
+    else:
+        torch_package_data.extend(
+            [
+                "lib/*.so*",
+                "lib/*.dylib*",
+                "lib/*.dll",
+                "lib/*.lib",
+            ]
+        )
    if get_cmake_cache_vars()["BUILD_CAFFE2"]:
        torch_package_data.extend(
            [
@ -1377,6 +1442,29 @@ def main():
        "packaged/autograd/*",
        "packaged/autograd/templates/*",
    ]
+
+    if BUILD_LIBTORCH_WHL:
+        modified_packages = []
+        for package in packages:
+            parts = package.split(".")
+            if parts[0] == "torch":
+                modified_packages.append(DEFAULT_PACKAGE_NAME + package[len("torch") :])
+        packages = modified_packages
+        package_dir = {"libtorch": "torch"}
+        torch_package_dir_name = "libtorch"
+        package_data = {"libtorch": torch_package_data}
+        extensions = []
+    else:
+        torch_package_dir_name = "torch"
+        package_dir = {}
+        package_data = {
+            "torch": torch_package_data,
+            "torchgen": torchgen_package_data,
+            "caffe2": [
+                "python/serialized_test/data/operator_test/*.zip",
+            ],
+        }
+
    setup(
        name=package_name,
        version=version,
@ -1392,13 +1480,8 @@ def main():
        entry_points=entry_points,
        install_requires=install_requires,
        extras_require=extras_require,
-        package_data={
-            "torch": torch_package_data,
-            "torchgen": torchgen_package_data,
-            "caffe2": [
-                "python/serialized_test/data/operator_test/*.zip",
-            ],
-        },
+        package_data=package_data,
+        package_dir=package_dir,
        url="https://pytorch.org/",
        download_url="https://github.com/pytorch/pytorch/tags",
        author="PyTorch Team",
--- a/torch/CMakeLists.txt
+++ b/torch/CMakeLists.txt
@ -296,6 +296,8 @@ endif()


 add_library(torch_python SHARED ${TORCH_PYTHON_SRCS})
+add_dependencies(torch_python Caffe2_PROTO)
+add_dependencies(torch_python onnx_proto)
 # Avoid numpy for the DEPLOY build
 if(USE_NUMPY)
  target_link_libraries(torch_python PRIVATE numpy::numpy)
@ -344,7 +346,7 @@ endif()

 target_compile_definitions(torch_python PRIVATE "-DTHP_BUILD_MAIN_LIB")

-target_link_libraries(torch_python PRIVATE torch_library ${TORCH_PYTHON_LINK_LIBRARIES})
+target_link_libraries(torch_python PRIVATE ${TORCH_LIB} ${TORCH_PYTHON_LINK_LIBRARIES})

 target_compile_definitions(torch_python PRIVATE ${TORCH_PYTHON_COMPILE_DEFINITIONS})

@ -359,7 +361,9 @@ endif()

 if(BUILD_ONEDNN_GRAPH)
  target_compile_definitions(torch_python PRIVATE "-DBUILD_ONEDNN_GRAPH")
-  target_compile_definitions(torch_cpu PRIVATE "-DBUILD_ONEDNN_GRAPH")
+  if(NOT BUILD_LIBTORCHLESS)
+    target_compile_definitions(torch_cpu PRIVATE "-DBUILD_ONEDNN_GRAPH")
+  endif()
 endif()

 if(NOT TORCH_PYTHON_LINK_FLAGS STREQUAL "")
@ -401,7 +405,11 @@ if(NOT ${CMAKE_SYSTEM_NAME} MATCHES "Darwin")
          ${TORCH_SRC_DIR}/csrc/jit/backends/nnapi/nnapi_backend_preprocess.cpp
          )
  # Pybind11 requires explicit linking of the torch_python library
-  target_link_libraries(nnapi_backend PRIVATE torch torch_python pybind::pybind11)
+  if(BUILD_LIBTORCHLESS)
+    target_link_libraries(nnapi_backend PRIVATE ${TORCH_LIB} torch_python pybind::pybind11)
+  else()
+    target_link_libraries(nnapi_backend PRIVATE torch torch_python pybind::pybind11)
+  endif()
 endif()

 set(TORCH_PYTHON_COMPILE_OPTIONS ${TORCH_PYTHON_COMPILE_OPTIONS} PARENT_SCOPE)
--- a/torch/lib/libshm/CMakeLists.txt
+++ b/torch/lib/libshm/CMakeLists.txt
@ -22,7 +22,7 @@ set_target_properties(shm PROPERTIES
  PREFIX "lib"
  IMPORT_PREFIX "lib"
  CXX_STANDARD 17)
-target_link_libraries(shm PRIVATE torch_cpu)
+target_link_libraries(shm PRIVATE ${TORCH_CPU_LIB})

 if(UNIX AND NOT APPLE)
  include(CheckLibraryExists)
@ -60,7 +60,12 @@ if(UNIX AND NOT APPLE)
 endif()

 add_executable(torch_shm_manager manager.cpp)
-target_link_libraries(torch_shm_manager PRIVATE shm c10)
+if(BUILD_LIBTORCHLESS)
+  target_link_libraries(torch_shm_manager PRIVATE shm ${C10_LIB})
+else()
+  # we need to link directly to c10 here otherwise we miss symbols
+  target_link_libraries(torch_shm_manager PRIVATE shm c10)
+endif()
 set_target_properties(torch_shm_manager PROPERTIES
  INSTALL_RPATH "${_rpath_portable_origin}/../lib")