mirror of
https://github.com/pytorch/pytorch.git
synced 2025-11-03 07:24:58 +08:00
Enable loading int8 prepacked models in PredictorContainer
Summary: To test the int8 ads models on CPU and accelerators with the ads replayer, we need to load the PREPACKING_INIT_NET_TYPE in the int8 model to initialize the int8 w_packed blobs. Test Plan: Ads replayer test. P74811059 Reviewed By: zrphercule Differential Revision: D16518888 fbshipit-source-id: cee212710ad37d9e491c970b25b2fe484373e5e4
This commit is contained in:
committed by
Facebook Github Bot
parent
cc4211069e
commit
d95763b4dc
@ -53,7 +53,7 @@ void BoundShapeInferencer::InferOps(
|
||||
InferSparseLengthsSum(op);
|
||||
} else if (
|
||||
op.type() == "FC" || op.type() == "FCTransposed" ||
|
||||
op.type() == "FbFCPacked") {
|
||||
op.type() == "FbFCPacked" || op.type() == "Int8FC") {
|
||||
InferFC(op);
|
||||
} else if (op.type() == "Concat") {
|
||||
InferConcat(op);
|
||||
@ -424,12 +424,13 @@ void BoundShapeInferencer::InferFC(const OperatorDef& op) {
|
||||
const ShapeInfo& w_shape_info = w_it->second;
|
||||
const auto b_it = shape_info_.find(op.input(2));
|
||||
CAFFE_ENFORCE(
|
||||
w_it != shape_info_.end(),
|
||||
b_it != shape_info_.end(),
|
||||
"Shape of BIAS input of FC ",
|
||||
op.input(2),
|
||||
" needs to be presented");
|
||||
const ShapeInfo& b_shape_info = b_it->second;
|
||||
bool fp16 = (op.type() == "FbFCPacked");
|
||||
bool int8_fc = (op.type() == "Int8FC" || op.engine() == "DNNLOWP");
|
||||
auto x_it = shape_info_.find(op.input(0));
|
||||
if (x_it == shape_info_.end()) {
|
||||
// We don't have a hint at the x input we try to deduce it from weight
|
||||
@ -451,13 +452,21 @@ void BoundShapeInferencer::InferFC(const OperatorDef& op) {
|
||||
dims.push_back(K);
|
||||
current_dim_type_ = ShapeInfo::DimType::BATCH;
|
||||
current_max_batch_size_ = spec_.max_batch_size;
|
||||
TensorProto::DataType w_data_type;
|
||||
if (fp16) {
|
||||
w_data_type = TensorProto_DataType_FLOAT;
|
||||
} else if (int8_fc) {
|
||||
w_data_type = TensorProto_DataType_UINT8;
|
||||
} else {
|
||||
w_data_type = w_shape.data_type();
|
||||
}
|
||||
// Note: for FbFCPacked, weight is fp16 but actications are in fp32
|
||||
CheckAndSetTensorShapeAndType(
|
||||
op.input(0),
|
||||
ShapeInfo::DimType::BATCH,
|
||||
dims,
|
||||
fp16 ? TensorProto_DataType_FLOAT : w_shape.data_type(),
|
||||
false);
|
||||
w_data_type,
|
||||
int8_fc ? true : false);
|
||||
} else {
|
||||
ShapeInfo& x_shape_info = x_it->second;
|
||||
if (x_shape_info.dim_type != ShapeInfo::DimType::BATCH) {
|
||||
@ -472,12 +481,20 @@ void BoundShapeInferencer::InferFC(const OperatorDef& op) {
|
||||
shape_info_[op.input(0)].shape, w_shape_info.shape, b_shape_info.shape};
|
||||
std::vector<TensorShape> output_shapes = InferOutput(op, input_shapes);
|
||||
CAFFE_ENFORCE_EQ(output_shapes.size(), 1);
|
||||
TensorProto::DataType output_data_type;
|
||||
if (fp16) {
|
||||
output_data_type = TensorProto_DataType_FLOAT;
|
||||
} else if (int8_fc) {
|
||||
output_data_type = TensorProto_DataType_UINT8;
|
||||
} else {
|
||||
output_data_type = output_shapes[0].data_type();
|
||||
}
|
||||
CheckAndSetTensorShapeAndType(
|
||||
op.output(0),
|
||||
ShapeInfo::DimType::BATCH,
|
||||
ConvertToVec(output_shapes[0].dims()),
|
||||
fp16 ? TensorProto_DataType_FLOAT : output_shapes[0].data_type(),
|
||||
false);
|
||||
output_data_type,
|
||||
int8_fc ? true : false);
|
||||
}
|
||||
|
||||
void BoundShapeInferencer::InferCommonOp(const OperatorDef& op) {
|
||||
@ -511,7 +528,8 @@ void BoundShapeInferencer::InferCommonOp(const OperatorDef& op) {
|
||||
{"Int8AveragePool", 0},
|
||||
{"Int8FC", 1},
|
||||
{"Int8Conv", 1},
|
||||
{"Int8SumRelu", 0}};
|
||||
{"Int8SumRelu", 0},
|
||||
{"Int8Relu", 0}};
|
||||
CAFFE_ENFORCE(
|
||||
type_info_from_input.find(op.type()) != type_info_from_input.end(),
|
||||
"Undefined quantized output data type, add it into type_info_from_input");
|
||||
|
||||
@ -125,7 +125,11 @@ void onnxifi(
|
||||
if (kv.size() == 2) {
|
||||
auto dims = caffe2::split(',', kv.back());
|
||||
TensorShape input;
|
||||
input.set_data_type(TensorProto_DataType_FLOAT);
|
||||
if (kv.front().find("int8") != std::string::npos) {
|
||||
input.set_data_type(TensorProto_DataType_UINT8);
|
||||
} else {
|
||||
input.set_data_type(TensorProto_DataType_FLOAT);
|
||||
}
|
||||
bool valid = true;
|
||||
for (const auto& d : dims) {
|
||||
try {
|
||||
|
||||
@ -229,7 +229,9 @@ FullyConnectedDNNLowPPackWeightOp::FullyConnectedDNNLowPPackWeightOp(
|
||||
: DNNLowPOp<uint8_t, FCFp32Op>(operator_def, ws),
|
||||
axis_w_(this->GetSingleArgument<int32_t>("axis_w", 1)),
|
||||
quantize_channelwise_(
|
||||
this->GetSingleArgument<bool>("quantize_channelwise", false)) {
|
||||
this->GetSingleArgument<bool>("quantize_channelwise", false)),
|
||||
save_unpacked_weights_(
|
||||
this->GetSingleArgument<bool>("save_unpacked_weights", false)) {
|
||||
if (this->debug_def().engine() == "DNNLOWP_ROWWISE") {
|
||||
quantize_channelwise_ = true;
|
||||
}
|
||||
@ -258,6 +260,13 @@ bool FullyConnectedDNNLowPPackWeightOp::RunOnDevice() {
|
||||
QuantizeWeight<uint8_t>(
|
||||
InputBlob(0), K, N, Y->qparams, W_quantized, qfactory_.get());
|
||||
|
||||
if (save_unpacked_weights_) {
|
||||
ReinitializeTensor(
|
||||
&Y->original_tensor, filter.sizes(), at::dtype<int8_t>().device(CPU));
|
||||
auto* buffer = Y->original_tensor.template mutable_data<int8_t>();
|
||||
CAFFE_ENFORCE_EQ(Y->original_tensor.numel(), W_quantized.size());
|
||||
memcpy(buffer, W_quantized.data(), W_quantized.size() * sizeof(int8_t));
|
||||
}
|
||||
if (this->InputIsType<int8::Int8TensorCPU>(0) && quantize_channelwise_) {
|
||||
static int log_occurences = 0;
|
||||
if (log_occurences < 32) {
|
||||
|
||||
@ -24,6 +24,7 @@ class FullyConnectedDNNLowPPackWeightOp final
|
||||
int axis_w_;
|
||||
bool quantize_channelwise_;
|
||||
int nbits_in_non_outlier_; // only for DNNLOWP_ACC16
|
||||
bool save_unpacked_weights_;
|
||||
|
||||
INPUT_TAGS(FILTER, BIAS);
|
||||
};
|
||||
|
||||
Reference in New Issue
Block a user