Re-enable some C++ warnings (#142332)

It enables some C++ warnings since the code base is fairly clean. Meanwhile, Wextra-semi is disabled on CUDA generated code since there is no way to fix them without the cooperation of CUDA team.

Pull Request resolved: https://github.com/pytorch/pytorch/pull/142332
Approved by: https://github.com/albanD, https://github.com/eqy
This commit is contained in:
cyy
2024-12-12 04:02:12 +00:00
committed by PyTorch MergeBot
parent f892f9862a
commit 2903cf0ad8
11 changed files with 62 additions and 65 deletions

View File

@ -997,8 +997,6 @@ if(NOT MSVC)
append_cxx_flag_if_supported("-Wnarrowing" CMAKE_CXX_FLAGS)
append_cxx_flag_if_supported("-Wno-missing-field-initializers"
CMAKE_CXX_FLAGS)
append_cxx_flag_if_supported("-Wno-type-limits" CMAKE_CXX_FLAGS)
append_cxx_flag_if_supported("-Wno-array-bounds" CMAKE_CXX_FLAGS)
append_cxx_flag_if_supported("-Wno-unknown-pragmas" CMAKE_CXX_FLAGS)
append_cxx_flag_if_supported("-Wno-unused-parameter" CMAKE_CXX_FLAGS)
append_cxx_flag_if_supported("-Wno-strict-overflow" CMAKE_CXX_FLAGS)
@ -1076,7 +1074,6 @@ if(NOT MSVC)
set(WERROR FALSE)
endif()
endif()
append_cxx_flag_if_supported("-Wno-unused-but-set-variable" CMAKE_CXX_FLAGS)
append_cxx_flag_if_supported("-Wno-maybe-uninitialized" CMAKE_CXX_FLAGS)
append_cxx_flag_if_supported("-fstandalone-debug" CMAKE_CXX_FLAGS_DEBUG)
if(CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64" AND CMAKE_CXX_COMPILER_ID MATCHES "GNU")
@ -1093,6 +1090,7 @@ if(NOT MSVC)
append_cxx_flag_if_supported("-fno-trapping-math" CMAKE_CXX_FLAGS)
append_cxx_flag_if_supported("-Werror=format" CMAKE_CXX_FLAGS)
if(CMAKE_COMPILER_IS_GNUCXX AND CMAKE_CXX_COMPILER_VERSION VERSION_GREATER_EQUAL 13)
append_cxx_flag_if_supported("-Wno-dangling-reference" CMAKE_CXX_FLAGS)
append_cxx_flag_if_supported("-Wno-error=dangling-reference" CMAKE_CXX_FLAGS)
append_cxx_flag_if_supported("-Wno-error=redundant-move" CMAKE_CXX_FLAGS)
endif()

View File

@ -466,6 +466,6 @@ void CUDAHooks::deviceSynchronize(DeviceIndex device_index) const {
using at::CUDAHooksRegistry;
using at::RegistererCUDAHooksRegistry;
REGISTER_CUDA_HOOKS(CUDAHooks);
REGISTER_CUDA_HOOKS(CUDAHooks)
} // namespace at::cuda::detail

View File

@ -127,8 +127,8 @@ RETTYPE NAME(ARG1 a1, ARG2 a2, ARG3 a3, ARG4 a4) {
#define NVRTC_STUB2(NAME, A1, A2) _STUB_2(NVRTC, NAME, nvrtcResult, A1, A2)
#define NVRTC_STUB3(NAME, A1, A2, A3) _STUB_3(NVRTC, NAME, nvrtcResult, A1, A2, A3)
NVRTC_STUB2(nvrtcVersion, int*, int*);
NVRTC_STUB2(nvrtcAddNameExpression, nvrtcProgram, const char * const);
NVRTC_STUB2(nvrtcVersion, int*, int*)
NVRTC_STUB2(nvrtcAddNameExpression, nvrtcProgram, const char * const)
nvrtcResult nvrtcCreateProgram(nvrtcProgram *prog,
const char *src,
@ -143,32 +143,32 @@ nvrtcResult nvrtcCreateProgram(nvrtcProgram *prog,
return fn(prog, src, name, numHeaders, headers, includeNames);
}
NVRTC_STUB1(nvrtcDestroyProgram, nvrtcProgram *);
NVRTC_STUB2(nvrtcGetPTXSize, nvrtcProgram, size_t *);
NVRTC_STUB2(nvrtcGetPTX, nvrtcProgram, char *);
NVRTC_STUB1(nvrtcDestroyProgram, nvrtcProgram *)
NVRTC_STUB2(nvrtcGetPTXSize, nvrtcProgram, size_t *)
NVRTC_STUB2(nvrtcGetPTX, nvrtcProgram, char *)
#if defined(CUDA_VERSION) && CUDA_VERSION >= 11010
NVRTC_STUB2(nvrtcGetCUBINSize, nvrtcProgram, size_t *);
NVRTC_STUB2(nvrtcGetCUBIN, nvrtcProgram, char *);
NVRTC_STUB2(nvrtcGetCUBINSize, nvrtcProgram, size_t *)
NVRTC_STUB2(nvrtcGetCUBIN, nvrtcProgram, char *)
#endif
NVRTC_STUB3(nvrtcCompileProgram, nvrtcProgram, int, const char * const *);
_STUB_1(NVRTC, nvrtcGetErrorString, const char *, nvrtcResult);
NVRTC_STUB2(nvrtcGetProgramLogSize,nvrtcProgram, size_t*);
NVRTC_STUB2(nvrtcGetProgramLog, nvrtcProgram, char *);
NVRTC_STUB3(nvrtcGetLoweredName, nvrtcProgram, const char *, const char **);
NVRTC_STUB3(nvrtcCompileProgram, nvrtcProgram, int, const char * const *)
_STUB_1(NVRTC, nvrtcGetErrorString, const char *, nvrtcResult)
NVRTC_STUB2(nvrtcGetProgramLogSize,nvrtcProgram, size_t*)
NVRTC_STUB2(nvrtcGetProgramLog, nvrtcProgram, char *)
NVRTC_STUB3(nvrtcGetLoweredName, nvrtcProgram, const char *, const char **)
CUDA_STUB2(cuModuleLoadData, CUmodule *, const void *);
CUDA_STUB3(cuModuleGetFunction, CUfunction *, CUmodule, const char *);
CUDA_STUB4(cuOccupancyMaxActiveBlocksPerMultiprocessor, int *, CUfunction, int, size_t);
CUDA_STUB2(cuGetErrorString, CUresult, const char **);
CUDA_STUB1(cuCtxGetCurrent, CUcontext *);
CUDA_STUB1(cuCtxSetCurrent, CUcontext);
CUDA_STUB1(cuModuleUnload, CUmodule);
CUDA_STUB3(cuDevicePrimaryCtxGetState, CUdevice, unsigned int *, int *);
CUDA_STUB2(cuDevicePrimaryCtxRetain, CUcontext *, CUdevice);
CUDA_STUB4(cuLinkCreate, unsigned int, CUjit_option *, void **, CUlinkState *);
CUDA_STUB3(cuLinkComplete, CUlinkState, void **, size_t *);
CUDA_STUB3(cuFuncSetAttribute, CUfunction, CUfunction_attribute, int);
CUDA_STUB3(cuFuncGetAttribute, int*, CUfunction_attribute, CUfunction);
CUDA_STUB2(cuModuleLoadData, CUmodule *, const void *)
CUDA_STUB3(cuModuleGetFunction, CUfunction *, CUmodule, const char *)
CUDA_STUB4(cuOccupancyMaxActiveBlocksPerMultiprocessor, int *, CUfunction, int, size_t)
CUDA_STUB2(cuGetErrorString, CUresult, const char **)
CUDA_STUB1(cuCtxGetCurrent, CUcontext *)
CUDA_STUB1(cuCtxSetCurrent, CUcontext)
CUDA_STUB1(cuModuleUnload, CUmodule)
CUDA_STUB3(cuDevicePrimaryCtxGetState, CUdevice, unsigned int *, int *)
CUDA_STUB2(cuDevicePrimaryCtxRetain, CUcontext *, CUdevice)
CUDA_STUB4(cuLinkCreate, unsigned int, CUjit_option *, void **, CUlinkState *)
CUDA_STUB3(cuLinkComplete, CUlinkState, void **, size_t *)
CUDA_STUB3(cuFuncSetAttribute, CUfunction, CUfunction_attribute, int)
CUDA_STUB3(cuFuncGetAttribute, int*, CUfunction_attribute, CUfunction)
#if defined(CUDA_VERSION) && CUDA_VERSION >= 12000
CUresult CUDAAPI

View File

@ -561,8 +561,8 @@ namespace {
bool expected = std::isnan(val);
CACHE_ALIGN c10::Half actual_vals[vHalf::size()];
vHalf(val).isnan().store(actual_vals);
for (int jj = 0; jj < vHalf::size(); ++jj) {
EXPECT_EQ(expected, c10::bit_cast<uint16_t>(actual_vals[jj]) != 0) << "fp16 isnan failure for bit pattern " << std::hex << ii << std::dec;
for (auto actual_val : actual_vals) {
EXPECT_EQ(expected, c10::bit_cast<uint16_t>(actual_val) != 0) << "fp16 isnan failure for bit pattern " << std::hex << ii << std::dec;
}
}
}
@ -1046,7 +1046,7 @@ namespace {
mask[idx] = (VT)0;
}
else {
int64_t hex_mask = 0xFFFFFFFFFFFFFFFF;
uint64_t hex_mask = 0xFFFFFFFFFFFFFFFF;
std::memcpy(&mask[idx], &hex_mask, sizeof(VT));
}
if (!test_blendv<vec, VT, idx+1, N>(expected_val, a, b, mask)) return false;
@ -1315,8 +1315,8 @@ namespace {
ValueGen<float> generator_sc(1.f, 15.f, seed.add(2));
for ([[maybe_unused]] const auto i : c10::irange(trials)) {
float scale = generator_sc.get();
int32_t zero_point_val = generator.get();
float scale_zp_premul = -(scale * zero_point_val);
auto zero_point_val = generator.get();
float scale_zp_premul = -(scale * static_cast<float>(zero_point_val));
vfloat vf_scale = vfloat{scale};
vfloat vf_zp = vfloat{static_cast<float>(zero_point_val)};
vfloat vf_scale_zp = vfloat{scale_zp_premul};
@ -1657,18 +1657,16 @@ namespace {
TEST(HalfConversionTest, HalfFloat) {
float f32s[100];
for (const auto i : c10::irange(100)) {
f32s[i] = i + 0.3;
f32s[i] = static_cast<float>(i + 0.3);
}
uint16_t u16;
float x;
for (const auto i : c10::irange(100)) {
#if (defined(CPU_CAPABILITY_AVX2) || defined(CPU_CAPABILITY_AVX512)) && \
!defined(__APPLE__)
u16 = at::vec::float2half_scalar(f32s[i]);
x = at::vec::half2float_scalar(u16);
uint16_t u16 = at::vec::float2half_scalar(f32s[i]);
float x = at::vec::half2float_scalar(u16);
#else
u16 = c10::detail::fp16_ieee_from_fp32_value(f32s[i]);
x = c10::detail::fp16_ieee_to_fp32_value(u16);
uint16_t u16 = c10::detail::fp16_ieee_from_fp32_value(f32s[i]);
float x = c10::detail::fp16_ieee_to_fp32_value(u16);
#endif
EXPECT_EQ(u16, c10::detail::fp16_ieee_from_fp32_value(f32s[i]))
@ -1697,7 +1695,7 @@ namespace {
VT v_pinf = static_cast<VT>(*(float *)&infBits);
values[index] = v_pinf;
auto vec_pinf = vec::loadu(values);
int negInfBits = 0xFF800000;
unsigned int negInfBits = 0xFF800000;
VT v_ninf = static_cast<VT>(*(float *)&negInfBits);
values[index] = v_ninf;
auto vec_ninf = vec::loadu(values);
@ -1779,8 +1777,8 @@ namespace {
const auto expected = static_cast<float>(val);
CACHE_ALIGN float actual_vals[vfloat::size()];
at::vec::convert<float>(vBFloat16(val)).store(actual_vals);
for (int jj = 0; jj < vfloat::size(); ++jj) {
EXPECT_EQ(c10::bit_cast<uint32_t>(expected), c10::bit_cast<uint32_t>(actual_vals[jj]))
for (auto actual_val : actual_vals) {
EXPECT_EQ(c10::bit_cast<uint32_t>(expected), c10::bit_cast<uint32_t>(actual_val))
<< "convert-to-float failure for bf16 bit pattern "
<< std::hex << ii << std::dec;
}
@ -1794,20 +1792,20 @@ namespace {
#define TEST_MASK_LOAD(dst_t, mask_t, mask_n) \
do { \
CACHE_ALIGN dst_t x[mask_n * size]; \
CACHE_ALIGN dst_t y[mask_n * size]; \
CACHE_ALIGN dst_t ref[mask_n * size]; \
auto seed = TestSeed(); \
dst_t generator_min = std::numeric_limits<dst_t>::is_signed ? dst_t(-100) : dst_t(0); \
ValueGen<dst_t> generator(generator_min, dst_t(100), seed); \
for (const auto i : c10::irange(mask_n * size)) { \
x[i] = generator.get(); \
} \
auto vec_mask = generate_vec_mask<mask_t, mask_n>(seed); \
constexpr int dst_size = at::vec::Vectorized<dst_t>::size(); \
constexpr int dst_n = mask_n * size / dst_size; \
constexpr int rnd_n = (mask_n * size + dst_size - 1) / dst_size; \
if constexpr(dst_n * dst_size >= mask_n * size) { \
CACHE_ALIGN dst_t x[mask_n * size]; \
CACHE_ALIGN dst_t y[mask_n * size]; \
CACHE_ALIGN dst_t ref[mask_n * size]; \
auto seed = TestSeed(); \
dst_t generator_min = std::numeric_limits<dst_t>::is_signed ? dst_t(-100) : dst_t(0); \
ValueGen<dst_t> generator(generator_min, dst_t(100), seed); \
for (const auto i : c10::irange(mask_n * size)) { \
x[i] = generator.get(); \
} \
auto vec_mask = generate_vec_mask<mask_t, mask_n>(seed); \
constexpr int rnd_n = (mask_n * size + dst_size - 1) / dst_size;\
auto x_vec = vec_mask.template loadu<dst_t, rnd_n>(x); \
x_vec.store(y); \
for (const auto i : c10::irange(mask_n * size)) { \

View File

@ -353,7 +353,7 @@ void testStaticRuntime(
size_t new_managed_bytes =
memory_planner ? memory_planner->total_managed() : 0;
if (check_resize && new_managed_bytes >= 0) {
if (check_resize) {
EXPECT_GE(new_managed_bytes, managed_bytes);
}

View File

@ -188,7 +188,6 @@ class BlockingCounter {
// returns false.
bool DecrementCount() {
const auto count_value = count_.fetch_sub(1, std::memory_order_relaxed) - 1;
TORCH_DCHECK_GE(count_value, 0);
if (count_value == 0) {
std::lock_guard<std::mutex> g(mutex_);
cond_.notify_one();

View File

@ -414,6 +414,9 @@ function(torch_compile_options libname)
$<$<COMPILE_LANGUAGE:CXX>:${private_compile_options}>)
if(USE_CUDA)
foreach(option IN LISTS private_compile_options)
if("${option}" STREQUAL "-Wextra-semi")
continue()
endif()
target_compile_options(${libname} PRIVATE $<$<COMPILE_LANGUAGE:CUDA>:-Xcompiler ${option}>)
endforeach()
endif()

View File

@ -1888,7 +1888,6 @@ static PyObject* order(PyObject *_,
}
}
int ndim = 0;
int insert_point = -1;
Slice<DimEntry> new_levels;
for (auto l : levels) {
@ -1896,7 +1895,6 @@ static PyObject* order(PyObject *_,
continue;
}
if (l.is_positional()) {
ndim++;
if (insert_point == -1) {
insert_point = new_levels.size();
new_levels.extend(A, flat_positional_dims);

View File

@ -69,7 +69,7 @@ std::string jsonStrEscape(const std::string& str) {
ostream << "\\r";
} else if (ch == '\t') {
ostream << "\\t";
} else if ('\x00' <= ch && ch <= '\x1f') {
} else if (ch <= '\x1f') {
ostream << "\\u" << std::hex << std::setw(4) << std::setfill('0')
<< static_cast<int>(ch);
} else {

View File

@ -43,13 +43,13 @@ std::optional<AttrTag> ParseAttrTag(
}
std::string::size_type vpos = match[1].second - node_string.begin() + 1;
char nested_open = -1;
char nested_close = -1;
std::optional<char> nested_open;
std::optional<char> nested_close;
size_t nest_count = 1;
AttrTag tag;
tag.name = match[1].str();
for (pos = vpos; pos < node_string.size(); ++pos) {
if (nested_open < 0) {
if (!nested_open.has_value()) {
if (SkipTagSeparator(node_string, pos) != pos) {
break;
}
@ -72,7 +72,8 @@ std::optional<AttrTag> ParseAttrTag(
--nest_count;
if (nest_count == 0) {
nest_count = 1;
nested_open = nested_close = -1;
nested_open.reset();
nested_close.reset();
}
} else if (node_string[pos] == nested_open) {
++nest_count;

View File

@ -669,7 +669,7 @@ static std::string json_str_escape(const std::string& str) {
ostream << "\\r";
} else if (ch == '\t') {
ostream << "\\t";
} else if ('\x00' <= ch && ch <= '\x1f') {
} else if (ch <= '\x1f') {
ostream << "\\u" << std::hex << std::setw(4) << std::setfill('0')
<< static_cast<int>(ch);
} else {