mirror of
https://github.com/pytorch/pytorch.git
synced 2025-10-20 12:54:11 +08:00
Re-enable some C++ warnings (#142332)
It enables some C++ warnings since the code base is fairly clean. Meanwhile, Wextra-semi is disabled on CUDA generated code since there is no way to fix them without the cooperation of CUDA team. Pull Request resolved: https://github.com/pytorch/pytorch/pull/142332 Approved by: https://github.com/albanD, https://github.com/eqy
This commit is contained in:
@ -997,8 +997,6 @@ if(NOT MSVC)
|
||||
append_cxx_flag_if_supported("-Wnarrowing" CMAKE_CXX_FLAGS)
|
||||
append_cxx_flag_if_supported("-Wno-missing-field-initializers"
|
||||
CMAKE_CXX_FLAGS)
|
||||
append_cxx_flag_if_supported("-Wno-type-limits" CMAKE_CXX_FLAGS)
|
||||
append_cxx_flag_if_supported("-Wno-array-bounds" CMAKE_CXX_FLAGS)
|
||||
append_cxx_flag_if_supported("-Wno-unknown-pragmas" CMAKE_CXX_FLAGS)
|
||||
append_cxx_flag_if_supported("-Wno-unused-parameter" CMAKE_CXX_FLAGS)
|
||||
append_cxx_flag_if_supported("-Wno-strict-overflow" CMAKE_CXX_FLAGS)
|
||||
@ -1076,7 +1074,6 @@ if(NOT MSVC)
|
||||
set(WERROR FALSE)
|
||||
endif()
|
||||
endif()
|
||||
append_cxx_flag_if_supported("-Wno-unused-but-set-variable" CMAKE_CXX_FLAGS)
|
||||
append_cxx_flag_if_supported("-Wno-maybe-uninitialized" CMAKE_CXX_FLAGS)
|
||||
append_cxx_flag_if_supported("-fstandalone-debug" CMAKE_CXX_FLAGS_DEBUG)
|
||||
if(CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64" AND CMAKE_CXX_COMPILER_ID MATCHES "GNU")
|
||||
@ -1093,6 +1090,7 @@ if(NOT MSVC)
|
||||
append_cxx_flag_if_supported("-fno-trapping-math" CMAKE_CXX_FLAGS)
|
||||
append_cxx_flag_if_supported("-Werror=format" CMAKE_CXX_FLAGS)
|
||||
if(CMAKE_COMPILER_IS_GNUCXX AND CMAKE_CXX_COMPILER_VERSION VERSION_GREATER_EQUAL 13)
|
||||
append_cxx_flag_if_supported("-Wno-dangling-reference" CMAKE_CXX_FLAGS)
|
||||
append_cxx_flag_if_supported("-Wno-error=dangling-reference" CMAKE_CXX_FLAGS)
|
||||
append_cxx_flag_if_supported("-Wno-error=redundant-move" CMAKE_CXX_FLAGS)
|
||||
endif()
|
||||
|
@ -466,6 +466,6 @@ void CUDAHooks::deviceSynchronize(DeviceIndex device_index) const {
|
||||
using at::CUDAHooksRegistry;
|
||||
using at::RegistererCUDAHooksRegistry;
|
||||
|
||||
REGISTER_CUDA_HOOKS(CUDAHooks);
|
||||
REGISTER_CUDA_HOOKS(CUDAHooks)
|
||||
|
||||
} // namespace at::cuda::detail
|
||||
|
@ -127,8 +127,8 @@ RETTYPE NAME(ARG1 a1, ARG2 a2, ARG3 a3, ARG4 a4) {
|
||||
#define NVRTC_STUB2(NAME, A1, A2) _STUB_2(NVRTC, NAME, nvrtcResult, A1, A2)
|
||||
#define NVRTC_STUB3(NAME, A1, A2, A3) _STUB_3(NVRTC, NAME, nvrtcResult, A1, A2, A3)
|
||||
|
||||
NVRTC_STUB2(nvrtcVersion, int*, int*);
|
||||
NVRTC_STUB2(nvrtcAddNameExpression, nvrtcProgram, const char * const);
|
||||
NVRTC_STUB2(nvrtcVersion, int*, int*)
|
||||
NVRTC_STUB2(nvrtcAddNameExpression, nvrtcProgram, const char * const)
|
||||
|
||||
nvrtcResult nvrtcCreateProgram(nvrtcProgram *prog,
|
||||
const char *src,
|
||||
@ -143,32 +143,32 @@ nvrtcResult nvrtcCreateProgram(nvrtcProgram *prog,
|
||||
return fn(prog, src, name, numHeaders, headers, includeNames);
|
||||
}
|
||||
|
||||
NVRTC_STUB1(nvrtcDestroyProgram, nvrtcProgram *);
|
||||
NVRTC_STUB2(nvrtcGetPTXSize, nvrtcProgram, size_t *);
|
||||
NVRTC_STUB2(nvrtcGetPTX, nvrtcProgram, char *);
|
||||
NVRTC_STUB1(nvrtcDestroyProgram, nvrtcProgram *)
|
||||
NVRTC_STUB2(nvrtcGetPTXSize, nvrtcProgram, size_t *)
|
||||
NVRTC_STUB2(nvrtcGetPTX, nvrtcProgram, char *)
|
||||
#if defined(CUDA_VERSION) && CUDA_VERSION >= 11010
|
||||
NVRTC_STUB2(nvrtcGetCUBINSize, nvrtcProgram, size_t *);
|
||||
NVRTC_STUB2(nvrtcGetCUBIN, nvrtcProgram, char *);
|
||||
NVRTC_STUB2(nvrtcGetCUBINSize, nvrtcProgram, size_t *)
|
||||
NVRTC_STUB2(nvrtcGetCUBIN, nvrtcProgram, char *)
|
||||
#endif
|
||||
NVRTC_STUB3(nvrtcCompileProgram, nvrtcProgram, int, const char * const *);
|
||||
_STUB_1(NVRTC, nvrtcGetErrorString, const char *, nvrtcResult);
|
||||
NVRTC_STUB2(nvrtcGetProgramLogSize,nvrtcProgram, size_t*);
|
||||
NVRTC_STUB2(nvrtcGetProgramLog, nvrtcProgram, char *);
|
||||
NVRTC_STUB3(nvrtcGetLoweredName, nvrtcProgram, const char *, const char **);
|
||||
NVRTC_STUB3(nvrtcCompileProgram, nvrtcProgram, int, const char * const *)
|
||||
_STUB_1(NVRTC, nvrtcGetErrorString, const char *, nvrtcResult)
|
||||
NVRTC_STUB2(nvrtcGetProgramLogSize,nvrtcProgram, size_t*)
|
||||
NVRTC_STUB2(nvrtcGetProgramLog, nvrtcProgram, char *)
|
||||
NVRTC_STUB3(nvrtcGetLoweredName, nvrtcProgram, const char *, const char **)
|
||||
|
||||
CUDA_STUB2(cuModuleLoadData, CUmodule *, const void *);
|
||||
CUDA_STUB3(cuModuleGetFunction, CUfunction *, CUmodule, const char *);
|
||||
CUDA_STUB4(cuOccupancyMaxActiveBlocksPerMultiprocessor, int *, CUfunction, int, size_t);
|
||||
CUDA_STUB2(cuGetErrorString, CUresult, const char **);
|
||||
CUDA_STUB1(cuCtxGetCurrent, CUcontext *);
|
||||
CUDA_STUB1(cuCtxSetCurrent, CUcontext);
|
||||
CUDA_STUB1(cuModuleUnload, CUmodule);
|
||||
CUDA_STUB3(cuDevicePrimaryCtxGetState, CUdevice, unsigned int *, int *);
|
||||
CUDA_STUB2(cuDevicePrimaryCtxRetain, CUcontext *, CUdevice);
|
||||
CUDA_STUB4(cuLinkCreate, unsigned int, CUjit_option *, void **, CUlinkState *);
|
||||
CUDA_STUB3(cuLinkComplete, CUlinkState, void **, size_t *);
|
||||
CUDA_STUB3(cuFuncSetAttribute, CUfunction, CUfunction_attribute, int);
|
||||
CUDA_STUB3(cuFuncGetAttribute, int*, CUfunction_attribute, CUfunction);
|
||||
CUDA_STUB2(cuModuleLoadData, CUmodule *, const void *)
|
||||
CUDA_STUB3(cuModuleGetFunction, CUfunction *, CUmodule, const char *)
|
||||
CUDA_STUB4(cuOccupancyMaxActiveBlocksPerMultiprocessor, int *, CUfunction, int, size_t)
|
||||
CUDA_STUB2(cuGetErrorString, CUresult, const char **)
|
||||
CUDA_STUB1(cuCtxGetCurrent, CUcontext *)
|
||||
CUDA_STUB1(cuCtxSetCurrent, CUcontext)
|
||||
CUDA_STUB1(cuModuleUnload, CUmodule)
|
||||
CUDA_STUB3(cuDevicePrimaryCtxGetState, CUdevice, unsigned int *, int *)
|
||||
CUDA_STUB2(cuDevicePrimaryCtxRetain, CUcontext *, CUdevice)
|
||||
CUDA_STUB4(cuLinkCreate, unsigned int, CUjit_option *, void **, CUlinkState *)
|
||||
CUDA_STUB3(cuLinkComplete, CUlinkState, void **, size_t *)
|
||||
CUDA_STUB3(cuFuncSetAttribute, CUfunction, CUfunction_attribute, int)
|
||||
CUDA_STUB3(cuFuncGetAttribute, int*, CUfunction_attribute, CUfunction)
|
||||
|
||||
#if defined(CUDA_VERSION) && CUDA_VERSION >= 12000
|
||||
CUresult CUDAAPI
|
||||
|
@ -561,8 +561,8 @@ namespace {
|
||||
bool expected = std::isnan(val);
|
||||
CACHE_ALIGN c10::Half actual_vals[vHalf::size()];
|
||||
vHalf(val).isnan().store(actual_vals);
|
||||
for (int jj = 0; jj < vHalf::size(); ++jj) {
|
||||
EXPECT_EQ(expected, c10::bit_cast<uint16_t>(actual_vals[jj]) != 0) << "fp16 isnan failure for bit pattern " << std::hex << ii << std::dec;
|
||||
for (auto actual_val : actual_vals) {
|
||||
EXPECT_EQ(expected, c10::bit_cast<uint16_t>(actual_val) != 0) << "fp16 isnan failure for bit pattern " << std::hex << ii << std::dec;
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -1046,7 +1046,7 @@ namespace {
|
||||
mask[idx] = (VT)0;
|
||||
}
|
||||
else {
|
||||
int64_t hex_mask = 0xFFFFFFFFFFFFFFFF;
|
||||
uint64_t hex_mask = 0xFFFFFFFFFFFFFFFF;
|
||||
std::memcpy(&mask[idx], &hex_mask, sizeof(VT));
|
||||
}
|
||||
if (!test_blendv<vec, VT, idx+1, N>(expected_val, a, b, mask)) return false;
|
||||
@ -1315,8 +1315,8 @@ namespace {
|
||||
ValueGen<float> generator_sc(1.f, 15.f, seed.add(2));
|
||||
for ([[maybe_unused]] const auto i : c10::irange(trials)) {
|
||||
float scale = generator_sc.get();
|
||||
int32_t zero_point_val = generator.get();
|
||||
float scale_zp_premul = -(scale * zero_point_val);
|
||||
auto zero_point_val = generator.get();
|
||||
float scale_zp_premul = -(scale * static_cast<float>(zero_point_val));
|
||||
vfloat vf_scale = vfloat{scale};
|
||||
vfloat vf_zp = vfloat{static_cast<float>(zero_point_val)};
|
||||
vfloat vf_scale_zp = vfloat{scale_zp_premul};
|
||||
@ -1657,18 +1657,16 @@ namespace {
|
||||
TEST(HalfConversionTest, HalfFloat) {
|
||||
float f32s[100];
|
||||
for (const auto i : c10::irange(100)) {
|
||||
f32s[i] = i + 0.3;
|
||||
f32s[i] = static_cast<float>(i + 0.3);
|
||||
}
|
||||
uint16_t u16;
|
||||
float x;
|
||||
for (const auto i : c10::irange(100)) {
|
||||
#if (defined(CPU_CAPABILITY_AVX2) || defined(CPU_CAPABILITY_AVX512)) && \
|
||||
!defined(__APPLE__)
|
||||
u16 = at::vec::float2half_scalar(f32s[i]);
|
||||
x = at::vec::half2float_scalar(u16);
|
||||
uint16_t u16 = at::vec::float2half_scalar(f32s[i]);
|
||||
float x = at::vec::half2float_scalar(u16);
|
||||
#else
|
||||
u16 = c10::detail::fp16_ieee_from_fp32_value(f32s[i]);
|
||||
x = c10::detail::fp16_ieee_to_fp32_value(u16);
|
||||
uint16_t u16 = c10::detail::fp16_ieee_from_fp32_value(f32s[i]);
|
||||
float x = c10::detail::fp16_ieee_to_fp32_value(u16);
|
||||
#endif
|
||||
|
||||
EXPECT_EQ(u16, c10::detail::fp16_ieee_from_fp32_value(f32s[i]))
|
||||
@ -1697,7 +1695,7 @@ namespace {
|
||||
VT v_pinf = static_cast<VT>(*(float *)&infBits);
|
||||
values[index] = v_pinf;
|
||||
auto vec_pinf = vec::loadu(values);
|
||||
int negInfBits = 0xFF800000;
|
||||
unsigned int negInfBits = 0xFF800000;
|
||||
VT v_ninf = static_cast<VT>(*(float *)&negInfBits);
|
||||
values[index] = v_ninf;
|
||||
auto vec_ninf = vec::loadu(values);
|
||||
@ -1779,8 +1777,8 @@ namespace {
|
||||
const auto expected = static_cast<float>(val);
|
||||
CACHE_ALIGN float actual_vals[vfloat::size()];
|
||||
at::vec::convert<float>(vBFloat16(val)).store(actual_vals);
|
||||
for (int jj = 0; jj < vfloat::size(); ++jj) {
|
||||
EXPECT_EQ(c10::bit_cast<uint32_t>(expected), c10::bit_cast<uint32_t>(actual_vals[jj]))
|
||||
for (auto actual_val : actual_vals) {
|
||||
EXPECT_EQ(c10::bit_cast<uint32_t>(expected), c10::bit_cast<uint32_t>(actual_val))
|
||||
<< "convert-to-float failure for bf16 bit pattern "
|
||||
<< std::hex << ii << std::dec;
|
||||
}
|
||||
@ -1794,20 +1792,20 @@ namespace {
|
||||
|
||||
#define TEST_MASK_LOAD(dst_t, mask_t, mask_n) \
|
||||
do { \
|
||||
CACHE_ALIGN dst_t x[mask_n * size]; \
|
||||
CACHE_ALIGN dst_t y[mask_n * size]; \
|
||||
CACHE_ALIGN dst_t ref[mask_n * size]; \
|
||||
auto seed = TestSeed(); \
|
||||
dst_t generator_min = std::numeric_limits<dst_t>::is_signed ? dst_t(-100) : dst_t(0); \
|
||||
ValueGen<dst_t> generator(generator_min, dst_t(100), seed); \
|
||||
for (const auto i : c10::irange(mask_n * size)) { \
|
||||
x[i] = generator.get(); \
|
||||
} \
|
||||
auto vec_mask = generate_vec_mask<mask_t, mask_n>(seed); \
|
||||
constexpr int dst_size = at::vec::Vectorized<dst_t>::size(); \
|
||||
constexpr int dst_n = mask_n * size / dst_size; \
|
||||
constexpr int rnd_n = (mask_n * size + dst_size - 1) / dst_size; \
|
||||
if constexpr(dst_n * dst_size >= mask_n * size) { \
|
||||
CACHE_ALIGN dst_t x[mask_n * size]; \
|
||||
CACHE_ALIGN dst_t y[mask_n * size]; \
|
||||
CACHE_ALIGN dst_t ref[mask_n * size]; \
|
||||
auto seed = TestSeed(); \
|
||||
dst_t generator_min = std::numeric_limits<dst_t>::is_signed ? dst_t(-100) : dst_t(0); \
|
||||
ValueGen<dst_t> generator(generator_min, dst_t(100), seed); \
|
||||
for (const auto i : c10::irange(mask_n * size)) { \
|
||||
x[i] = generator.get(); \
|
||||
} \
|
||||
auto vec_mask = generate_vec_mask<mask_t, mask_n>(seed); \
|
||||
constexpr int rnd_n = (mask_n * size + dst_size - 1) / dst_size;\
|
||||
auto x_vec = vec_mask.template loadu<dst_t, rnd_n>(x); \
|
||||
x_vec.store(y); \
|
||||
for (const auto i : c10::irange(mask_n * size)) { \
|
||||
|
@ -353,7 +353,7 @@ void testStaticRuntime(
|
||||
|
||||
size_t new_managed_bytes =
|
||||
memory_planner ? memory_planner->total_managed() : 0;
|
||||
if (check_resize && new_managed_bytes >= 0) {
|
||||
if (check_resize) {
|
||||
EXPECT_GE(new_managed_bytes, managed_bytes);
|
||||
}
|
||||
|
||||
|
@ -188,7 +188,6 @@ class BlockingCounter {
|
||||
// returns false.
|
||||
bool DecrementCount() {
|
||||
const auto count_value = count_.fetch_sub(1, std::memory_order_relaxed) - 1;
|
||||
TORCH_DCHECK_GE(count_value, 0);
|
||||
if (count_value == 0) {
|
||||
std::lock_guard<std::mutex> g(mutex_);
|
||||
cond_.notify_one();
|
||||
|
@ -414,6 +414,9 @@ function(torch_compile_options libname)
|
||||
$<$<COMPILE_LANGUAGE:CXX>:${private_compile_options}>)
|
||||
if(USE_CUDA)
|
||||
foreach(option IN LISTS private_compile_options)
|
||||
if("${option}" STREQUAL "-Wextra-semi")
|
||||
continue()
|
||||
endif()
|
||||
target_compile_options(${libname} PRIVATE $<$<COMPILE_LANGUAGE:CUDA>:-Xcompiler ${option}>)
|
||||
endforeach()
|
||||
endif()
|
||||
|
@ -1888,7 +1888,6 @@ static PyObject* order(PyObject *_,
|
||||
}
|
||||
}
|
||||
|
||||
int ndim = 0;
|
||||
int insert_point = -1;
|
||||
Slice<DimEntry> new_levels;
|
||||
for (auto l : levels) {
|
||||
@ -1896,7 +1895,6 @@ static PyObject* order(PyObject *_,
|
||||
continue;
|
||||
}
|
||||
if (l.is_positional()) {
|
||||
ndim++;
|
||||
if (insert_point == -1) {
|
||||
insert_point = new_levels.size();
|
||||
new_levels.extend(A, flat_positional_dims);
|
||||
|
@ -69,7 +69,7 @@ std::string jsonStrEscape(const std::string& str) {
|
||||
ostream << "\\r";
|
||||
} else if (ch == '\t') {
|
||||
ostream << "\\t";
|
||||
} else if ('\x00' <= ch && ch <= '\x1f') {
|
||||
} else if (ch <= '\x1f') {
|
||||
ostream << "\\u" << std::hex << std::setw(4) << std::setfill('0')
|
||||
<< static_cast<int>(ch);
|
||||
} else {
|
||||
|
@ -43,13 +43,13 @@ std::optional<AttrTag> ParseAttrTag(
|
||||
}
|
||||
|
||||
std::string::size_type vpos = match[1].second - node_string.begin() + 1;
|
||||
char nested_open = -1;
|
||||
char nested_close = -1;
|
||||
std::optional<char> nested_open;
|
||||
std::optional<char> nested_close;
|
||||
size_t nest_count = 1;
|
||||
AttrTag tag;
|
||||
tag.name = match[1].str();
|
||||
for (pos = vpos; pos < node_string.size(); ++pos) {
|
||||
if (nested_open < 0) {
|
||||
if (!nested_open.has_value()) {
|
||||
if (SkipTagSeparator(node_string, pos) != pos) {
|
||||
break;
|
||||
}
|
||||
@ -72,7 +72,8 @@ std::optional<AttrTag> ParseAttrTag(
|
||||
--nest_count;
|
||||
if (nest_count == 0) {
|
||||
nest_count = 1;
|
||||
nested_open = nested_close = -1;
|
||||
nested_open.reset();
|
||||
nested_close.reset();
|
||||
}
|
||||
} else if (node_string[pos] == nested_open) {
|
||||
++nest_count;
|
||||
|
@ -669,7 +669,7 @@ static std::string json_str_escape(const std::string& str) {
|
||||
ostream << "\\r";
|
||||
} else if (ch == '\t') {
|
||||
ostream << "\\t";
|
||||
} else if ('\x00' <= ch && ch <= '\x1f') {
|
||||
} else if (ch <= '\x1f') {
|
||||
ostream << "\\u" << std::hex << std::setw(4) << std::setfill('0')
|
||||
<< static_cast<int>(ch);
|
||||
} else {
|
||||
|
Reference in New Issue
Block a user