mirror of
https://github.com/pytorch/pytorch.git
synced 2025-10-22 14:15:01 +08:00
Contain workarounds for _RuntimeError: "addmm_impl_cpu_" not implemented for 'Half'_ to CPU tensors, do computations on CUDA tensors in f16. Fixes https://github.com/Quansight-Labs/numpy_pytorch_interop/issues/170 We do not really systematically test CUDA tensors in torch._numpy, so I only spot-checked locally that the affected functions work with `tensor.to("cuda")`. Pull Request resolved: https://github.com/pytorch/pytorch/pull/107768 Approved by: https://github.com/lezcano
85 lines
2.3 KiB
Python
85 lines
2.3 KiB
Python
"""Export torch work functions for binary ufuncs, rename/tweak to match numpy.
|
|
This listing is further exported to public symbols in the `torch._numpy/_ufuncs.py` module.
|
|
"""
|
|
|
|
import torch
|
|
|
|
from torch import ( # noqa: F401
|
|
add, # noqa: F401
|
|
arctan2, # noqa: F401
|
|
bitwise_and, # noqa: F401
|
|
bitwise_left_shift as left_shift, # noqa: F401
|
|
bitwise_or, # noqa: F401
|
|
bitwise_right_shift as right_shift, # noqa: F401
|
|
bitwise_xor, # noqa: F401
|
|
copysign, # noqa: F401
|
|
divide, # noqa: F401
|
|
eq as equal, # noqa: F401
|
|
float_power, # noqa: F401
|
|
floor_divide, # noqa: F401
|
|
fmax, # noqa: F401
|
|
fmin, # noqa: F401
|
|
fmod, # noqa: F401
|
|
gcd, # noqa: F401
|
|
greater, # noqa: F401
|
|
greater_equal, # noqa: F401
|
|
heaviside, # noqa: F401
|
|
hypot, # noqa: F401
|
|
lcm, # noqa: F401
|
|
ldexp, # noqa: F401
|
|
less, # noqa: F401
|
|
less_equal, # noqa: F401
|
|
logaddexp, # noqa: F401
|
|
logaddexp2, # noqa: F401
|
|
logical_and, # noqa: F401
|
|
logical_or, # noqa: F401
|
|
logical_xor, # noqa: F401
|
|
maximum, # noqa: F401
|
|
minimum, # noqa: F401
|
|
multiply, # noqa: F401
|
|
nextafter, # noqa: F401
|
|
not_equal, # noqa: F401
|
|
pow as power, # noqa: F401
|
|
remainder, # noqa: F401
|
|
remainder as mod, # noqa: F401
|
|
subtract, # noqa: F401
|
|
true_divide, # noqa: F401
|
|
)
|
|
|
|
from . import _dtypes_impl, _util
|
|
|
|
|
|
# work around torch limitations w.r.t. numpy
|
|
def matmul(x, y):
|
|
# work around:
|
|
# - RuntimeError: expected scalar type Int but found Double
|
|
# - RuntimeError: "addmm_impl_cpu_" not implemented for 'Bool'
|
|
# - RuntimeError: "addmm_impl_cpu_" not implemented for 'Half'
|
|
dtype = _dtypes_impl.result_type_impl(x, y)
|
|
is_bool = dtype == torch.bool
|
|
is_half = (x.dtype == torch.float16 or y.dtype == torch.float16) and (
|
|
x.is_cpu or y.is_cpu
|
|
)
|
|
|
|
work_dtype = dtype
|
|
if is_bool:
|
|
work_dtype = torch.uint8
|
|
if is_half:
|
|
work_dtype = torch.float32
|
|
|
|
x = _util.cast_if_needed(x, work_dtype)
|
|
y = _util.cast_if_needed(y, work_dtype)
|
|
|
|
result = torch.matmul(x, y)
|
|
|
|
if work_dtype != dtype:
|
|
result = result.to(dtype)
|
|
|
|
return result
|
|
|
|
|
|
# a stub implementation of divmod, should be improved after
|
|
# https://github.com/pytorch/pytorch/issues/90820 is fixed in pytorch
|
|
def divmod(x, y):
|
|
return x // y, x % y
|