mirror of
https://github.com/pytorch/pytorch.git
synced 2025-10-20 21:14:14 +08:00
[CUDA][cuBLAS] Add fp16 accumulate option to cuBLAS/cuBLASLt (#144441)
Test for `cublasGemmEx` added, still need to figure out the best way to exercise the other APIs... Pull Request resolved: https://github.com/pytorch/pytorch/pull/144441 Approved by: https://github.com/Chillee
This commit is contained in:
committed by
PyTorch MergeBot
parent
6e53588789
commit
de945d78da
@ -1133,6 +1133,29 @@ static PyObject* THPModule_allowBF16ReductionCuBLAS(
|
||||
Py_RETURN_FALSE;
|
||||
}
|
||||
|
||||
static PyObject* THPModule_setAllowFP16AccumulationCuBLAS(
|
||||
PyObject* _unused,
|
||||
PyObject* arg) {
|
||||
HANDLE_TH_ERRORS
|
||||
TORCH_CHECK(
|
||||
PyBool_Check(arg),
|
||||
"set_allow_fp16_accumulation_cublas expects a bool, "
|
||||
"but got ",
|
||||
THPUtils_typename(arg));
|
||||
at::globalContext().setAllowFP16AccumulationCuBLAS(arg == Py_True);
|
||||
Py_RETURN_NONE;
|
||||
END_HANDLE_TH_ERRORS
|
||||
}
|
||||
|
||||
static PyObject* THPModule_allowFP16AccumulationCuBLAS(
|
||||
PyObject* _unused,
|
||||
PyObject* noargs) {
|
||||
if (at::globalContext().allowFP16AccumulationCuBLAS()) {
|
||||
Py_RETURN_TRUE;
|
||||
}
|
||||
Py_RETURN_FALSE;
|
||||
}
|
||||
|
||||
static PyObject* THPModule_setAllowFP16ReductionCPU(
|
||||
PyObject* _unused,
|
||||
PyObject* arg) {
|
||||
@ -1574,6 +1597,14 @@ static std::initializer_list<PyMethodDef> TorchMethods = {
|
||||
THPModule_setAllowBF16ReductionCuBLAS,
|
||||
METH_O,
|
||||
nullptr},
|
||||
{"_get_cublas_allow_fp16_accumulation",
|
||||
THPModule_allowFP16AccumulationCuBLAS,
|
||||
METH_NOARGS,
|
||||
nullptr},
|
||||
{"_set_cublas_allow_fp16_accumulation",
|
||||
THPModule_setAllowFP16AccumulationCuBLAS,
|
||||
METH_O,
|
||||
nullptr},
|
||||
{"_get_cpu_allow_fp16_reduced_precision_reduction",
|
||||
THPModule_allowFP16ReductionCPU,
|
||||
METH_NOARGS,
|
||||
|
Reference in New Issue
Block a user