mirror of
https://github.com/pytorch/pytorch.git
synced 2025-10-20 21:14:14 +08:00
Enable mimalloc on pytorch Windows (#102595)
This PR is implemention of [#102534](https://github.com/pytorch/pytorch/issues/102534), option 2. Major changes: 1. Add mimalloc to the submodule. 2. Add build option "USE_MIMALLOC". 3. It is only enabled on Windows build, And it would improve pytorch memory allocation performance. Additional Test: <img width="953" alt="image" src="https://github.com/pytorch/pytorch/assets/8433590/4b2ec2dc-16f1-4ad9-b457-cfeb37e489d3"> This PR also build & static link mimalloc on Linux well. Pull Request resolved: https://github.com/pytorch/pytorch/pull/102595 Approved by: https://github.com/jgong5, https://github.com/malfet
This commit is contained in:
3
.gitmodules
vendored
3
.gitmodules
vendored
@ -154,3 +154,6 @@
|
||||
[submodule "third_party/cutlass"]
|
||||
path = third_party/cutlass
|
||||
url = https://github.com/NVIDIA/cutlass.git
|
||||
[submodule "third_party/mimalloc"]
|
||||
path = third_party/mimalloc
|
||||
url = https://github.com/microsoft/mimalloc.git
|
||||
|
@ -348,6 +348,11 @@ cmake_dependent_option(
|
||||
cmake_dependent_option(
|
||||
BUILD_FUNCTORCH "Build Functorch" ON "BUILD_PYTHON" OFF)
|
||||
|
||||
option(USE_MIMALLOC "Use mimalloc" OFF)
|
||||
# Enable third party mimalloc library to improve memory allocation performance on Windows.
|
||||
if(WIN32)
|
||||
set(USE_MIMALLOC ON)
|
||||
endif()
|
||||
|
||||
if(USE_CCACHE)
|
||||
find_program(CCACHE_PROGRAM ccache)
|
||||
@ -1025,6 +1030,12 @@ include_directories(BEFORE ${PROJECT_BINARY_DIR})
|
||||
include_directories(BEFORE ${PROJECT_SOURCE_DIR}/aten/src/)
|
||||
include_directories(BEFORE ${PROJECT_BINARY_DIR}/aten/src/)
|
||||
|
||||
if(USE_MIMALLOC)
|
||||
add_definitions(-DUSE_MIMALLOC)
|
||||
add_subdirectory(third_party/mimalloc)
|
||||
include_directories(third_party/mimalloc/include)
|
||||
endif()
|
||||
|
||||
# ---[ Main build
|
||||
add_subdirectory(c10)
|
||||
add_subdirectory(caffe2)
|
||||
|
@ -108,6 +108,11 @@ else()
|
||||
message(STATUS "don't use NUMA")
|
||||
endif()
|
||||
|
||||
if(USE_MIMALLOC)
|
||||
target_link_libraries(c10 PRIVATE "mimalloc-static")
|
||||
add_dependencies(c10 mimalloc-static)
|
||||
endif()
|
||||
|
||||
if(ANDROID)
|
||||
target_link_libraries(c10 PRIVATE log)
|
||||
endif()
|
||||
|
@ -6,6 +6,10 @@
|
||||
#include <c10/util/irange.h>
|
||||
#include <c10/util/numa.h>
|
||||
|
||||
#ifdef USE_MIMALLOC
|
||||
#include <mimalloc.h>
|
||||
#endif
|
||||
|
||||
// TODO: rename flags to C10
|
||||
C10_DEFINE_bool(
|
||||
caffe2_cpu_allocator_do_zero_fill,
|
||||
@ -64,7 +68,11 @@ void* alloc_cpu(size_t nbytes) {
|
||||
nbytes,
|
||||
" bytes.");
|
||||
#elif defined(_MSC_VER)
|
||||
#ifdef USE_MIMALLOC
|
||||
data = mi_malloc_aligned(nbytes, gAlignment);
|
||||
#else
|
||||
data = _aligned_malloc(nbytes, gAlignment);
|
||||
#endif
|
||||
CAFFE_ENFORCE(
|
||||
data,
|
||||
"DefaultCPUAllocator: not enough memory: you tried to allocate ",
|
||||
@ -100,7 +108,11 @@ void* alloc_cpu(size_t nbytes) {
|
||||
|
||||
void free_cpu(void* data) {
|
||||
#ifdef _MSC_VER
|
||||
#ifdef USE_MIMALLOC
|
||||
mi_free(data);
|
||||
#else
|
||||
_aligned_free(data);
|
||||
#endif
|
||||
#else
|
||||
// NOLINTNEXTLINE(cppcoreguidelines-no-malloc)
|
||||
free(data);
|
||||
|
@ -162,6 +162,7 @@ function(caffe2_print_configuration_summary)
|
||||
if(${USE_TBB})
|
||||
message(STATUS " USE_SYSTEM_TBB : ${USE_SYSTEM_TBB}")
|
||||
endif()
|
||||
message(STATUS " USE_MIMALLOC : ${USE_MIMALLOC}")
|
||||
message(STATUS " USE_VULKAN : ${USE_VULKAN}")
|
||||
if(${USE_VULKAN})
|
||||
message(STATUS " USE_VULKAN_FP16_INFERENCE : ${USE_VULKAN_FP16_INFERENCE}")
|
||||
|
4
setup.py
4
setup.py
@ -205,6 +205,10 @@
|
||||
# Use system-provided libraries to satisfy the build dependencies.
|
||||
# When turned on, the following cmake variables will be toggled as well:
|
||||
# USE_SYSTEM_CPUINFO=ON USE_SYSTEM_SLEEF=ON BUILD_CUSTOM_PROTOBUF=OFF
|
||||
#
|
||||
# USE_MIMALLOC
|
||||
# Static link mimalloc into C10, and use mimalloc in alloc_cpu & alloc_free.
|
||||
# By default, It is only enabled on Windows.
|
||||
|
||||
import sys
|
||||
if sys.platform == 'win32' and sys.maxsize.bit_length() == 31:
|
||||
|
1
third_party/mimalloc
vendored
Submodule
1
third_party/mimalloc
vendored
Submodule
Submodule third_party/mimalloc added at b66e3214d8
Reference in New Issue
Block a user