mirror of
https://github.com/pytorch/pytorch.git
synced 2025-10-20 21:14:14 +08:00
[caffe2] Fix alias analysis for quantization compression ops (#74169)
Summary: Pull Request resolved: https://github.com/pytorch/pytorch/pull/74169 Alias DB was being way too conservative about the semantics of exported Caffe2 ops - it thought some pure functions were writing to their inputs, which caused `ReplaceWithMaybeCopy` to fail. This in turn lead to a huge decrease in out variant coverage and regressions in many models. I've extended the export macro to let the user specify an `AliasAnalysisKind` and marked all of the quantization compression ops as pure functions. ghstack-source-id: 151394133 Reviewed By: hlu1 Differential Revision: D34733630 fbshipit-source-id: e968812e052f14261c10f9a280abe1d910de1f2f (cherry picked from commit 5e9de49b98caff57be13e8bd101144ae2475b6b5)
This commit is contained in:
committed by
PyTorch MergeBot
parent
ddb34e7b6a
commit
6bd4376c60
@ -4,12 +4,13 @@
|
||||
|
||||
#if defined(EXPOSE_C2_OPS) || \
|
||||
!defined(CAFFE2_IS_XPLAT_BUILD) && !defined(C10_MOBILE)
|
||||
#include <ATen/core/dispatch/OperatorOptions.h>
|
||||
#include <ATen/core/function_schema.h>
|
||||
#include <ATen/core/grad_mode.h>
|
||||
#include <ATen/core/op_registration/op_registration.h>
|
||||
#include <torch/csrc/jit/frontend/function_schema_parser.h>
|
||||
#include <c10/core/CompileTimeFunctionPointer.h>
|
||||
#include <c10/util/irange.h>
|
||||
#include <torch/csrc/jit/frontend/function_schema_parser.h>
|
||||
#include <torch/library.h>
|
||||
#include <vector>
|
||||
|
||||
@ -113,7 +114,9 @@ void call_caffe2_op_from_c10(
|
||||
_call_caffe2_op_from_c10(stack, Schema(), &_call_caffe2_op<Caffe2Operator>);
|
||||
}
|
||||
|
||||
inline FunctionSchema make_function_schema_for_c10(const char* schema_str) {
|
||||
inline FunctionSchema make_function_schema_for_c10(
|
||||
const char* schema_str,
|
||||
c10::optional<c10::AliasAnalysisKind> optional_alias_analysis_kind) {
|
||||
#if !defined(EXPOSE_C2_OPS) && \
|
||||
(defined(CAFFE2_IS_XPLAT_BUILD) || defined(C10_MOBILE))
|
||||
throw std::logic_error(
|
||||
@ -127,13 +130,17 @@ inline FunctionSchema make_function_schema_for_c10(const char* schema_str) {
|
||||
nullopt,
|
||||
IValue());
|
||||
|
||||
return FunctionSchema(
|
||||
auto schema = FunctionSchema(
|
||||
parsed_schema.name(),
|
||||
parsed_schema.overload_name(),
|
||||
std::move(arguments),
|
||||
parsed_schema.returns(),
|
||||
parsed_schema.is_vararg(),
|
||||
parsed_schema.is_varret());
|
||||
if (optional_alias_analysis_kind) {
|
||||
schema.setAliasAnalysis(*optional_alias_analysis_kind);
|
||||
}
|
||||
return schema;
|
||||
#endif
|
||||
}
|
||||
|
||||
@ -169,7 +176,7 @@ inline FunctionSchema make_function_schema_for_c10(const char* schema_str) {
|
||||
* caffe2.
|
||||
* - all operators must call C10_DECLARE_EXPORT_CAFFE2_OP_TO_C10 and
|
||||
* C10_EXPORT_CAFFE2_OP_TO_C10_CPU .
|
||||
* - calling C10_EXPORT_CAFFE2_OP_TO_C10_CUDA is optional and can be omitted i f
|
||||
* - calling C10_EXPORT_CAFFE2_OP_TO_C10_CUDA is optional and can be omitted if
|
||||
* you don't want to expose the operator for CUDA operations.
|
||||
* - caffe2 arguments must come after caffe2 inputs, in other words, any tensor
|
||||
* inputs must precede any non-tensor inputs.
|
||||
@ -178,73 +185,85 @@ inline FunctionSchema make_function_schema_for_c10(const char* schema_str) {
|
||||
* - If your operator has a variable number of input tensors, make the first (!)
|
||||
* input an input of type TensorList. There must be no other tensor inputs.
|
||||
*/
|
||||
#define C10_DECLARE_EXPORT_CAFFE2_OP_TO_C10(OperatorName) \
|
||||
namespace caffe2 { \
|
||||
namespace _c10_ops { \
|
||||
#define C10_DECLARE_EXPORT_CAFFE2_OP_TO_C10(OperatorName) \
|
||||
namespace caffe2 { \
|
||||
namespace _c10_ops { \
|
||||
TORCH_API const FunctionSchema& schema_##OperatorName(); \
|
||||
} \
|
||||
} \
|
||||
}
|
||||
|
||||
#define C10_EXPORT_CAFFE2_OP_TO_C10_SCHEMA_ONLY(OperatorName, OperatorSchema) \
|
||||
/* Register the op schema with the c10 dispatcher */ \
|
||||
namespace caffe2 { \
|
||||
namespace _c10_ops { \
|
||||
C10_EXPORT const FunctionSchema& schema_##OperatorName() { \
|
||||
static const FunctionSchema schema = \
|
||||
::caffe2::detail::make_function_schema_for_c10(OperatorSchema); \
|
||||
return schema; \
|
||||
} \
|
||||
TORCH_LIBRARY_FRAGMENT(_caffe2, m) { \
|
||||
m.def(::caffe2::detail::make_function_schema_for_c10(OperatorSchema)); \
|
||||
} \
|
||||
} \
|
||||
#define C10_EXPORT_CAFFE2_OP_TO_C10_SCHEMA_ONLY( \
|
||||
OperatorName, OperatorSchema, OptionalAliasAnalysisKind) \
|
||||
/* Register the op schema with the c10 dispatcher */ \
|
||||
namespace caffe2 { \
|
||||
namespace _c10_ops { \
|
||||
C10_EXPORT const FunctionSchema& schema_##OperatorName() { \
|
||||
static const FunctionSchema schema = \
|
||||
::caffe2::detail::make_function_schema_for_c10( \
|
||||
OperatorSchema, OptionalAliasAnalysisKind); \
|
||||
return schema; \
|
||||
} \
|
||||
TORCH_LIBRARY_FRAGMENT(_caffe2, m) { \
|
||||
m.def(::caffe2::detail::make_function_schema_for_c10( \
|
||||
OperatorSchema, OptionalAliasAnalysisKind)); \
|
||||
} \
|
||||
} \
|
||||
}
|
||||
|
||||
#define C10_EXPORT_CAFFE2_OP_TO_C10_CPU_KERNEL_ONLY( \
|
||||
OperatorName, OperatorClass) \
|
||||
/* Register call_caffe2_op_from_c10 as a kernel with the c10 dispatcher */ \
|
||||
TORCH_LIBRARY_IMPL(_caffe2, CPU, m) { \
|
||||
m.impl("_caffe2::" #OperatorName, \
|
||||
torch::CppFunction::makeFromBoxedFunction< \
|
||||
::caffe2::detail::call_caffe2_op_from_c10< \
|
||||
::caffe2::_c10_ops::schema_##OperatorName, \
|
||||
OperatorClass>>()); \
|
||||
}
|
||||
TORCH_LIBRARY_IMPL(_caffe2, CPU, m) { \
|
||||
m.impl( \
|
||||
"_caffe2::" #OperatorName, \
|
||||
torch::CppFunction::makeFromBoxedFunction< \
|
||||
::caffe2::detail::call_caffe2_op_from_c10< \
|
||||
::caffe2::_c10_ops::schema_##OperatorName, \
|
||||
OperatorClass>>()); \
|
||||
}
|
||||
|
||||
#define C10_EXPORT_CAFFE2_OP_TO_C10_CPU( \
|
||||
OperatorName, OperatorSchema, OperatorClass) \
|
||||
C10_EXPORT_CAFFE2_OP_TO_C10_SCHEMA_ONLY(OperatorName, OperatorSchema) \
|
||||
#define C10_EXPORT_CAFFE2_OP_TO_C10_CPU( \
|
||||
OperatorName, OperatorSchema, OperatorClass) \
|
||||
C10_EXPORT_CAFFE2_OP_TO_C10_SCHEMA_ONLY( \
|
||||
OperatorName, OperatorSchema, c10::nullopt) \
|
||||
C10_EXPORT_CAFFE2_OP_TO_C10_CPU_KERNEL_ONLY(OperatorName, OperatorClass)
|
||||
|
||||
#define C10_EXPORT_CAFFE2_OP_TO_C10_CPU_WITH_ALIAS_ANALYSIS( \
|
||||
OperatorName, OperatorSchema, OperatorClass, OptionalAliasAnalysisKind) \
|
||||
C10_EXPORT_CAFFE2_OP_TO_C10_SCHEMA_ONLY( \
|
||||
OperatorName, OperatorSchema, OptionalAliasAnalysisKind) \
|
||||
C10_EXPORT_CAFFE2_OP_TO_C10_CPU_KERNEL_ONLY(OperatorName, OperatorClass)
|
||||
|
||||
#define C10_EXPORT_CAFFE2_OP_TO_C10_CUDA(OperatorName, OperatorClass) \
|
||||
/* Register call_caffe2_op_from_c10 as a kernel with the c10 dispatcher */ \
|
||||
TORCH_LIBRARY_IMPL(_caffe2, CUDA, m) { \
|
||||
m.impl("_caffe2::" #OperatorName, \
|
||||
torch::CppFunction::makeFromBoxedFunction< \
|
||||
::caffe2::detail::call_caffe2_op_from_c10< \
|
||||
::caffe2::_c10_ops::schema_##OperatorName, \
|
||||
OperatorClass>>()); \
|
||||
}
|
||||
|
||||
TORCH_LIBRARY_IMPL(_caffe2, CUDA, m) { \
|
||||
m.impl( \
|
||||
"_caffe2::" #OperatorName, \
|
||||
torch::CppFunction::makeFromBoxedFunction< \
|
||||
::caffe2::detail::call_caffe2_op_from_c10< \
|
||||
::caffe2::_c10_ops::schema_##OperatorName, \
|
||||
OperatorClass>>()); \
|
||||
}
|
||||
|
||||
// You should never manually call the C10_EXPORT_CAFFE2_OP_TO_C10_HIP macro .
|
||||
// The C10_EXPORT_CAFFE2_OP_TO_C10_CUDA macro from above will be automatically
|
||||
// rewritten to C10_EXPORT_CAFFE2_OP_TO_C10_HIP by hipify .
|
||||
#define C10_EXPORT_CAFFE2_OP_TO_C10_HIP(OperatorName, OperatorClass) \
|
||||
/* Register call_caffe2_op_from_c10 as a kernel with the c10 dispatcher */ \
|
||||
TORCH_LIBRARY_IMPL(_caffe2, HIP, m) { \
|
||||
m.impl("_caffe2::" #OperatorName, \
|
||||
torch::CppFunction::makeFromBoxedFunction< \
|
||||
::caffe2::detail::call_caffe2_op_from_c10< \
|
||||
::caffe2::_c10_ops::schema_##OperatorName, \
|
||||
OperatorClass>>()); \
|
||||
}
|
||||
|
||||
TORCH_LIBRARY_IMPL(_caffe2, HIP, m) { \
|
||||
m.impl( \
|
||||
"_caffe2::" #OperatorName, \
|
||||
torch::CppFunction::makeFromBoxedFunction< \
|
||||
::caffe2::detail::call_caffe2_op_from_c10< \
|
||||
::caffe2::_c10_ops::schema_##OperatorName, \
|
||||
OperatorClass>>()); \
|
||||
}
|
||||
|
||||
#else
|
||||
// Don't use c10 dispatcher on mobile because of binary size
|
||||
#define C10_DECLARE_EXPORT_CAFFE2_OP_TO_C10(OperatorName)
|
||||
#define C10_EXPORT_CAFFE2_OP_TO_C10_SCHEMA_ONLY(OperatorName, OperatorSchema)
|
||||
#define C10_EXPORT_CAFFE2_OP_TO_C10_SCHEMA_ONLY( \
|
||||
OperatorName, OperatorSchema, OptionalAliasAnalysisKind)
|
||||
#define C10_EXPORT_CAFFE2_OP_TO_C10_CPU_KERNEL_ONLY(OperatorName, OperatorClass)
|
||||
#define C10_EXPORT_CAFFE2_OP_TO_C10_CPU( \
|
||||
OperatorName, OperatorSchema, OperatorClass)
|
||||
|
@ -200,8 +200,10 @@ REGISTER_GRADIENT(CopyCPUToGPU, GetCPUToGPUGradient);
|
||||
|
||||
C10_EXPORT_CAFFE2_OP_TO_C10_SCHEMA_ONLY(
|
||||
CopyGPUToCPU,
|
||||
"_caffe2::CopyGPUToCPU(Tensor input) -> Tensor");
|
||||
"_caffe2::CopyGPUToCPU(Tensor input) -> Tensor",
|
||||
/*optional_alias_analysis_kind=*/c10::nullopt);
|
||||
|
||||
C10_EXPORT_CAFFE2_OP_TO_C10_SCHEMA_ONLY(
|
||||
CopyCPUToGPU,
|
||||
"_caffe2::CopyCPUToGPU(Tensor input) -> Tensor");
|
||||
"_caffe2::CopyCPUToGPU(Tensor input) -> Tensor",
|
||||
/*optional_alias_analysis_kind=*/c10::nullopt);
|
||||
|
Reference in New Issue
Block a user