mirror of
https://github.com/pytorch/pytorch.git
synced 2025-10-20 21:14:14 +08:00
Enable win-arm64
This patch enables Pytorch build from source with Ninja and 'Visual Studio 16 2019' CMake generator on Windows on Arm. Tests: - Build from source: 'python setup.py develop'. - Run simple Pytorch example: passed - python test\test_torch.py: -- same results as on x64 -- Ran 1344 tests, failures=2 Pull Request resolved: https://github.com/pytorch/pytorch/pull/72424
This commit is contained in:
committed by
PyTorch MergeBot
parent
a1d5b5d2b3
commit
c4ff49f4c7
@ -76,7 +76,7 @@ struct bitset final {
|
||||
// (i.e. if the very first bit is set, this function returns '1'), and a
|
||||
// return of '0' means that there was no bit set.
|
||||
size_t find_first_set() const {
|
||||
#if defined(_MSC_VER) && defined(_M_X64)
|
||||
#if defined(_MSC_VER) && (defined(_M_X64) || defined(_M_ARM64))
|
||||
unsigned long result;
|
||||
bool has_bits_set = (0 != _BitScanForward64(&result, bitset_));
|
||||
if (!has_bits_set) {
|
||||
|
@ -101,8 +101,11 @@ uint32_t crc32_16bytes_prefetch(const void* data, size_t length, uint32_t previo
|
||||
// Windows always little endian
|
||||
#define __BYTE_ORDER __LITTLE_ENDIAN
|
||||
|
||||
#if !defined(_M_ARM64)
|
||||
// intrinsics / prefetching
|
||||
#include <xmmintrin.h>
|
||||
#endif
|
||||
|
||||
#ifdef __MINGW32__
|
||||
#define PREFETCH(location) __builtin_prefetch(location)
|
||||
#else
|
||||
|
@ -1,4 +1,5 @@
|
||||
import os
|
||||
import platform
|
||||
from glob import glob
|
||||
import shutil
|
||||
from typing import Dict, Optional
|
||||
@ -10,6 +11,22 @@ from setuptools import distutils # type: ignore[import]
|
||||
|
||||
def _overlay_windows_vcvars(env: Dict[str, str]) -> Dict[str, str]:
|
||||
vc_arch = 'x64' if IS_64BIT else 'x86'
|
||||
|
||||
if platform.machine() == 'ARM64':
|
||||
vc_arch = 'x64_arm64'
|
||||
|
||||
# First Win11 Windows on Arm build version that supports x64 emulation
|
||||
# is 10.0.22000.
|
||||
win11_1st_version = (10, 0, 22000)
|
||||
current_win_version = tuple(int(version_part) for version_part in
|
||||
platform.version().split('.'))
|
||||
if current_win_version < win11_1st_version:
|
||||
vc_arch = 'x86_arm64'
|
||||
print("Warning: 32-bit toolchain will be used, but 64-bit linker "
|
||||
"is recommended to avoid out-of-memory linker error!")
|
||||
print("Warning: Please consider upgrading to Win11, where x64 "
|
||||
"emulation is enabled!")
|
||||
|
||||
vc_env: Dict[str, str] = distutils._msvccompiler._get_vc_env(vc_arch)
|
||||
# Keys in `_get_vc_env` are always lowercase.
|
||||
# We turn them into uppercase before overlaying vcvars
|
||||
|
@ -4,6 +4,7 @@
|
||||
|
||||
import multiprocessing
|
||||
import os
|
||||
import platform
|
||||
import re
|
||||
from subprocess import check_call, check_output, CalledProcessError
|
||||
import sys
|
||||
@ -220,8 +221,11 @@ class CMake:
|
||||
'in the build steps carefully.')
|
||||
sys.exit(1)
|
||||
if IS_64BIT:
|
||||
args.append('-Ax64')
|
||||
toolset_dict['host'] = 'x64'
|
||||
if platform.machine() == 'ARM64':
|
||||
args.append('-A ARM64')
|
||||
else:
|
||||
args.append('-Ax64')
|
||||
toolset_dict['host'] = 'x64'
|
||||
if toolset_dict:
|
||||
toolset_expr = ','.join(["{}={}".format(k, v) for k, v in toolset_dict.items()])
|
||||
args.append('-T' + toolset_expr)
|
||||
|
@ -230,7 +230,7 @@ static CompilerConfig& getConfig() {
|
||||
// understand for AVX512. When we need better CPU performance this
|
||||
// optimization can be re-enabled by tracking down the platforms where
|
||||
// this error occurs and only selectively disabling it.
|
||||
#ifdef _MSC_VER
|
||||
#if (defined(_MSC_VER) && !defined(_M_ARM64))
|
||||
// According to https://stackoverflow.com/a/29178079, we are able to
|
||||
// detect which arch level is supported by the vectorizer using
|
||||
// the macro __isa_available. It is added during runtime.
|
||||
|
Reference in New Issue
Block a user