Accept non-standard bools in more CUDA kernels

This fixes all remaining CUDA kernels, except those using `cub` or
`thrust`, to accept boolean tensors with values other than 1 or 0.

I do this by using `c10::load` in more places, and also adding a
`load_vector` helper into `MemoryAccess.cuh` that does the same thing
for vectorized loads.

Pull Request resolved: https://github.com/pytorch/pytorch/pull/78957

Approved by: https://github.com/mruberry
This commit is contained in:
Peter Bell
2022-06-08 01:12:33 +01:00
committed by PyTorch MergeBot
parent 4945c72151
commit cd9e158007
8 changed files with 116 additions and 118 deletions

View File

@ -16,6 +16,7 @@ template <>
struct LoadImpl<bool> {
C10_HOST_DEVICE static bool apply(const void* src) {
static_assert(sizeof(bool) == sizeof(char), "");
// NOTE: [Loading boolean values]
// Protect against invalid boolean values by loading as a byte
// first, then converting to bool (see gh-54789).
return *reinterpret_cast<const unsigned char*>(src);
@ -29,4 +30,9 @@ C10_HOST_DEVICE T load(const void* src) {
return c10::detail::LoadImpl<T>::apply(src);
}
template <typename scalar_t>
C10_HOST_DEVICE scalar_t load(const scalar_t* src) {
return c10::detail::LoadImpl<scalar_t>::apply(src);
}
} // namespace c10