Enable mimalloc on pytorch Windows (#102595)

This PR is implemention of [#102534](https://github.com/pytorch/pytorch/issues/102534), option 2.
Major changes:
1. Add mimalloc to the submodule.
2. Add build option "USE_MIMALLOC".
3. It is only enabled on Windows build, And it would improve pytorch memory allocation performance.

Additional Test:
<img width="953" alt="image" src="https://github.com/pytorch/pytorch/assets/8433590/4b2ec2dc-16f1-4ad9-b457-cfeb37e489d3">
This PR also build & static link mimalloc on Linux well.

Pull Request resolved: https://github.com/pytorch/pytorch/pull/102595
Approved by: https://github.com/jgong5, https://github.com/malfet
This commit is contained in:
Xu Han
2023-06-27 08:53:23 +00:00
committed by PyTorch MergeBot
parent 803c14490b
commit 6c1ccccf21
7 changed files with 37 additions and 0 deletions

3
.gitmodules vendored
View File

@ -154,3 +154,6 @@
[submodule "third_party/cutlass"]
path = third_party/cutlass
url = https://github.com/NVIDIA/cutlass.git
[submodule "third_party/mimalloc"]
path = third_party/mimalloc
url = https://github.com/microsoft/mimalloc.git

View File

@ -348,6 +348,11 @@ cmake_dependent_option(
cmake_dependent_option(
BUILD_FUNCTORCH "Build Functorch" ON "BUILD_PYTHON" OFF)
option(USE_MIMALLOC "Use mimalloc" OFF)
# Enable third party mimalloc library to improve memory allocation performance on Windows.
if(WIN32)
set(USE_MIMALLOC ON)
endif()
if(USE_CCACHE)
find_program(CCACHE_PROGRAM ccache)
@ -1025,6 +1030,12 @@ include_directories(BEFORE ${PROJECT_BINARY_DIR})
include_directories(BEFORE ${PROJECT_SOURCE_DIR}/aten/src/)
include_directories(BEFORE ${PROJECT_BINARY_DIR}/aten/src/)
if(USE_MIMALLOC)
add_definitions(-DUSE_MIMALLOC)
add_subdirectory(third_party/mimalloc)
include_directories(third_party/mimalloc/include)
endif()
# ---[ Main build
add_subdirectory(c10)
add_subdirectory(caffe2)

View File

@ -108,6 +108,11 @@ else()
message(STATUS "don't use NUMA")
endif()
if(USE_MIMALLOC)
target_link_libraries(c10 PRIVATE "mimalloc-static")
add_dependencies(c10 mimalloc-static)
endif()
if(ANDROID)
target_link_libraries(c10 PRIVATE log)
endif()

View File

@ -6,6 +6,10 @@
#include <c10/util/irange.h>
#include <c10/util/numa.h>
#ifdef USE_MIMALLOC
#include <mimalloc.h>
#endif
// TODO: rename flags to C10
C10_DEFINE_bool(
caffe2_cpu_allocator_do_zero_fill,
@ -64,7 +68,11 @@ void* alloc_cpu(size_t nbytes) {
nbytes,
" bytes.");
#elif defined(_MSC_VER)
#ifdef USE_MIMALLOC
data = mi_malloc_aligned(nbytes, gAlignment);
#else
data = _aligned_malloc(nbytes, gAlignment);
#endif
CAFFE_ENFORCE(
data,
"DefaultCPUAllocator: not enough memory: you tried to allocate ",
@ -100,7 +108,11 @@ void* alloc_cpu(size_t nbytes) {
void free_cpu(void* data) {
#ifdef _MSC_VER
#ifdef USE_MIMALLOC
mi_free(data);
#else
_aligned_free(data);
#endif
#else
// NOLINTNEXTLINE(cppcoreguidelines-no-malloc)
free(data);

View File

@ -162,6 +162,7 @@ function(caffe2_print_configuration_summary)
if(${USE_TBB})
message(STATUS " USE_SYSTEM_TBB : ${USE_SYSTEM_TBB}")
endif()
message(STATUS " USE_MIMALLOC : ${USE_MIMALLOC}")
message(STATUS " USE_VULKAN : ${USE_VULKAN}")
if(${USE_VULKAN})
message(STATUS " USE_VULKAN_FP16_INFERENCE : ${USE_VULKAN_FP16_INFERENCE}")

View File

@ -205,6 +205,10 @@
# Use system-provided libraries to satisfy the build dependencies.
# When turned on, the following cmake variables will be toggled as well:
# USE_SYSTEM_CPUINFO=ON USE_SYSTEM_SLEEF=ON BUILD_CUSTOM_PROTOBUF=OFF
#
# USE_MIMALLOC
# Static link mimalloc into C10, and use mimalloc in alloc_cpu & alloc_free.
# By default, It is only enabled on Windows.
import sys
if sys.platform == 'win32' and sys.maxsize.bit_length() == 31:

1
third_party/mimalloc vendored Submodule

Submodule third_party/mimalloc added at b66e3214d8