Enable loading int8 prepacked models in PredictorContainer

Summary: To test the int8 ads models on CPU and accelerators with the ads replayer, we need to load the PREPACKING_INIT_NET_TYPE in the int8 model to initialize the int8 w_packed blobs.

Test Plan:
Ads replayer test.

P74811059

Reviewed By: zrphercule

Differential Revision: D16518888

fbshipit-source-id: cee212710ad37d9e491c970b25b2fe484373e5e4
This commit is contained in:
Summer Deng
2019-09-06 02:51:32 -07:00
committed by Facebook Github Bot
parent cc4211069e
commit d95763b4dc
4 changed files with 41 additions and 9 deletions

View File

@ -53,7 +53,7 @@ void BoundShapeInferencer::InferOps(
InferSparseLengthsSum(op);
} else if (
op.type() == "FC" || op.type() == "FCTransposed" ||
op.type() == "FbFCPacked") {
op.type() == "FbFCPacked" || op.type() == "Int8FC") {
InferFC(op);
} else if (op.type() == "Concat") {
InferConcat(op);
@ -424,12 +424,13 @@ void BoundShapeInferencer::InferFC(const OperatorDef& op) {
const ShapeInfo& w_shape_info = w_it->second;
const auto b_it = shape_info_.find(op.input(2));
CAFFE_ENFORCE(
w_it != shape_info_.end(),
b_it != shape_info_.end(),
"Shape of BIAS input of FC ",
op.input(2),
" needs to be presented");
const ShapeInfo& b_shape_info = b_it->second;
bool fp16 = (op.type() == "FbFCPacked");
bool int8_fc = (op.type() == "Int8FC" || op.engine() == "DNNLOWP");
auto x_it = shape_info_.find(op.input(0));
if (x_it == shape_info_.end()) {
// We don't have a hint at the x input we try to deduce it from weight
@ -451,13 +452,21 @@ void BoundShapeInferencer::InferFC(const OperatorDef& op) {
dims.push_back(K);
current_dim_type_ = ShapeInfo::DimType::BATCH;
current_max_batch_size_ = spec_.max_batch_size;
TensorProto::DataType w_data_type;
if (fp16) {
w_data_type = TensorProto_DataType_FLOAT;
} else if (int8_fc) {
w_data_type = TensorProto_DataType_UINT8;
} else {
w_data_type = w_shape.data_type();
}
// Note: for FbFCPacked, weight is fp16 but actications are in fp32
CheckAndSetTensorShapeAndType(
op.input(0),
ShapeInfo::DimType::BATCH,
dims,
fp16 ? TensorProto_DataType_FLOAT : w_shape.data_type(),
false);
w_data_type,
int8_fc ? true : false);
} else {
ShapeInfo& x_shape_info = x_it->second;
if (x_shape_info.dim_type != ShapeInfo::DimType::BATCH) {
@ -472,12 +481,20 @@ void BoundShapeInferencer::InferFC(const OperatorDef& op) {
shape_info_[op.input(0)].shape, w_shape_info.shape, b_shape_info.shape};
std::vector<TensorShape> output_shapes = InferOutput(op, input_shapes);
CAFFE_ENFORCE_EQ(output_shapes.size(), 1);
TensorProto::DataType output_data_type;
if (fp16) {
output_data_type = TensorProto_DataType_FLOAT;
} else if (int8_fc) {
output_data_type = TensorProto_DataType_UINT8;
} else {
output_data_type = output_shapes[0].data_type();
}
CheckAndSetTensorShapeAndType(
op.output(0),
ShapeInfo::DimType::BATCH,
ConvertToVec(output_shapes[0].dims()),
fp16 ? TensorProto_DataType_FLOAT : output_shapes[0].data_type(),
false);
output_data_type,
int8_fc ? true : false);
}
void BoundShapeInferencer::InferCommonOp(const OperatorDef& op) {
@ -511,7 +528,8 @@ void BoundShapeInferencer::InferCommonOp(const OperatorDef& op) {
{"Int8AveragePool", 0},
{"Int8FC", 1},
{"Int8Conv", 1},
{"Int8SumRelu", 0}};
{"Int8SumRelu", 0},
{"Int8Relu", 0}};
CAFFE_ENFORCE(
type_info_from_input.find(op.type()) != type_info_from_input.end(),
"Undefined quantized output data type, add it into type_info_from_input");

View File

@ -125,7 +125,11 @@ void onnxifi(
if (kv.size() == 2) {
auto dims = caffe2::split(',', kv.back());
TensorShape input;
input.set_data_type(TensorProto_DataType_FLOAT);
if (kv.front().find("int8") != std::string::npos) {
input.set_data_type(TensorProto_DataType_UINT8);
} else {
input.set_data_type(TensorProto_DataType_FLOAT);
}
bool valid = true;
for (const auto& d : dims) {
try {

View File

@ -229,7 +229,9 @@ FullyConnectedDNNLowPPackWeightOp::FullyConnectedDNNLowPPackWeightOp(
: DNNLowPOp<uint8_t, FCFp32Op>(operator_def, ws),
axis_w_(this->GetSingleArgument<int32_t>("axis_w", 1)),
quantize_channelwise_(
this->GetSingleArgument<bool>("quantize_channelwise", false)) {
this->GetSingleArgument<bool>("quantize_channelwise", false)),
save_unpacked_weights_(
this->GetSingleArgument<bool>("save_unpacked_weights", false)) {
if (this->debug_def().engine() == "DNNLOWP_ROWWISE") {
quantize_channelwise_ = true;
}
@ -258,6 +260,13 @@ bool FullyConnectedDNNLowPPackWeightOp::RunOnDevice() {
QuantizeWeight<uint8_t>(
InputBlob(0), K, N, Y->qparams, W_quantized, qfactory_.get());
if (save_unpacked_weights_) {
ReinitializeTensor(
&Y->original_tensor, filter.sizes(), at::dtype<int8_t>().device(CPU));
auto* buffer = Y->original_tensor.template mutable_data<int8_t>();
CAFFE_ENFORCE_EQ(Y->original_tensor.numel(), W_quantized.size());
memcpy(buffer, W_quantized.data(), W_quantized.size() * sizeof(int8_t));
}
if (this->InputIsType<int8::Int8TensorCPU>(0) && quantize_channelwise_) {
static int log_occurences = 0;
if (log_occurences < 32) {

View File

@ -24,6 +24,7 @@ class FullyConnectedDNNLowPPackWeightOp final
int axis_w_;
bool quantize_channelwise_;
int nbits_in_non_outlier_; // only for DNNLOWP_ACC16
bool save_unpacked_weights_;
INPUT_TAGS(FILTER, BIAS);
};