From 060eb0de353ccabebe2f5ade27f72854787c7142 Mon Sep 17 00:00:00 2001 From: Man Wang Date: Tue, 13 Nov 2018 10:38:18 -0800 Subject: [PATCH] [Quantization] Load quantized resnet50 model --- include/glow/Graph/Graph.h | 5 + lib/Graph/Graph.cpp | 12 ++ lib/Importer/Caffe2ModelLoader.cpp | 257 +++++++++++++++++++++++++---- tests/images/run.sh | 2 + utils/download_caffe2_models.sh | 1 + 5 files changed, 249 insertions(+), 28 deletions(-) diff --git a/include/glow/Graph/Graph.h b/include/glow/Graph/Graph.h index fbd0e1f660..ed7678c17a 100644 --- a/include/glow/Graph/Graph.h +++ b/include/glow/Graph/Graph.h @@ -253,6 +253,11 @@ class Function final : public Named { llvm::ArrayRef strides, llvm::ArrayRef pads); + AvgPoolNode *createAvgPool(llvm::StringRef name, NodeValue input, + TypeRef outTy, llvm::ArrayRef kernels, + llvm::ArrayRef strides, + llvm::ArrayRef pads); + AvgPoolNode *createAvgPool(llvm::StringRef name, NodeValue input, unsigned_t kernel, unsigned_t stride, unsigned_t pad); diff --git a/lib/Graph/Graph.cpp b/lib/Graph/Graph.cpp index 1515306bdf..b7b81fcd60 100644 --- a/lib/Graph/Graph.cpp +++ b/lib/Graph/Graph.cpp @@ -521,6 +521,18 @@ AvgPoolNode *Function::createAvgPool(llvm::StringRef name, NodeValue input, return addNode(new AvgPoolNode(name, OT, input, kernels, strides, pads)); } +AvgPoolNode *Function::createAvgPool(llvm::StringRef name, NodeValue input, + TypeRef outTy, + llvm::ArrayRef kernels, + llvm::ArrayRef strides, + llvm::ArrayRef pads) { + ShapeNHWC idim = ShapeNHWC(input.dims()); + ShapeHW kdim(kernels); + (void)kdim; + checkKernelSize(idim, kernels, pads); + return addNode(new AvgPoolNode(name, outTy, input, kernels, strides, pads)); +} + AvgPoolNode *Function::createAvgPool(llvm::StringRef name, NodeValue input, unsigned_t kernel, unsigned_t stride, unsigned_t pad) { diff --git a/lib/Importer/Caffe2ModelLoader.cpp b/lib/Importer/Caffe2ModelLoader.cpp index 26a03ce65d..4a45c53b28 100644 --- a/lib/Importer/Caffe2ModelLoader.cpp +++ b/lib/Importer/Caffe2ModelLoader.cpp @@ -39,6 +39,15 @@ using llvm::cast; using ArgumentDictionaryTy = std::unordered_map; +/// For the quantized Caffe2 ops, the activations are quantized to uint_8. +/// In Glow, the activations are quantized to int_8. Therefore, for the offset +/// read from quantized caffe2 model, we need to subtract 128(i.e. INT8_MIN) to +/// make the activations becomes int8_t. +/// For Glow: -127 <= orig_fp32/scale_1 + offset_1 < 128 +/// For Caffe2: 0 <= orig_fp32/scale_2 + offset_2 < 255 +/// Therefore, we can make scale_1 == scale_2, and offset_1 = offset2 - 128 +const int32_t OFFSETSHIFT = 128; + /// Translates the protocol buffer node \p op into a random access map. static ArgumentDictionaryTy loadArgumentMap(const caffe2::OperatorDef &op) { ArgumentDictionaryTy dict; @@ -147,7 +156,8 @@ void Caffe2ModelLoader::loadOperator(const caffe2::OperatorDef &op) { const std::string &opName = loadOperatorName(op); - if (typeName == "Conv") { + if (typeName == "Conv" || typeName == "Int8Conv" || + typeName == "Int8ConvRelu") { // Load the inputs: std::vector strides = getSizeHW(dict, "stride", 1); std::vector pads = getPads(dict); @@ -159,34 +169,22 @@ void Caffe2ModelLoader::loadOperator(const caffe2::OperatorDef &op) { Tensor *w = getTensorByName(op.input(1)); // Transpose the weights to the right format. Glow expects to read the - // weights in the format CRSK. Caffe2 stores the operators as KCRS. + // weights in the format CRSK. // C - output_depth, R - filter_height, S - filter_width, K - input_depth. + // Caffe2 "Conv" op always stores the weight as CKRS, while for "Int8Conv", + // and "Int8ConvRelu", the weights always follows the "order" arg. Tensor wtag; - w->transpose(&wtag, NCHW2NHWC); + if (typeName != "Conv" && order == "NHWC") { + wtag.assign(w); + } else { + w->transpose(&wtag, NCHW2NHWC); + } // The structure of the conv weigts is: NHWC. We take the C, which is the // number of filters. We use this value to calculate the size of the bias // if it is not specified. size_t depth = wtag.dims()[0]; - // Construct the Filter field. - auto *filter = G_.getParent()->createConstant("conv.filter", wtag); - - // Construct the Bias field. - Tensor biasTensor(ElemKind::FloatTy, {depth}); - biasTensor.zero(); - - // Check if we have a serialized bias vector. - if (op.input_size() > 2) { - auto &biasTensorName = op.input(2); - if (tensors_.count(biasTensorName)) { - // Load the serialized bias vector. - Tensor *b = getTensorByName(biasTensorName); - biasTensor.assign(b); - } - } - auto *bias = G_.getParent()->createConstant("conv.bias", biasTensor); - // We expect the input to be NHWC. Node *tr; if (order == "NCHW") { @@ -201,7 +199,60 @@ void Caffe2ModelLoader::loadOperator(const caffe2::OperatorDef &op) { calculateConvPoolOutputDims(idim.h, idim.w, kernels, strides, pads); std::array outDims = { {idim.n, outSz.first, outSz.second, depth}}; - auto outTy = G_.getParent()->uniqueType(ElemKind::FloatTy, outDims); + + TypeRef outTy; + Constant *filter; + Constant *bias; + if (typeName == "Conv") { + // Construct the Bias field. + Tensor biasTensor(ElemKind::FloatTy, {depth}); + biasTensor.zero(); + + // Check if we have a serialized bias vector. + if (op.input_size() > 2) { + const auto &biasTensorName = op.input(2); + if (tensors_.count(biasTensorName)) { + // Load the serialized bias vector. + Tensor *b = getTensorByName(biasTensorName); + biasTensor.assign(b); + } + } + outTy = G_.getParent()->uniqueType(ElemKind::FloatTy, outDims); + filter = G_.getParent()->createConstant("conv.filter", wtag); + bias = G_.getParent()->createConstant("conv.bias", biasTensor); + } else { + assert(dict.count("Y_zero_point") && + "missing zero point for quantized output type"); + assert(dict.count("Y_scale") && + "missing Y_scale for quantized output type"); + // Construct the Bias field. + Tensor biasTensor(ElemKind::Int32QTy, {depth}, 1.0, 0); + biasTensor.zero(); + // Check if we have a serialized bias vector. + if (op.input_size() > 2) { + const auto &biasTensorName = op.input(2); + if (tensors_.count(biasTensorName)) { + // Load the serialized bias vector. + Tensor *b = getTensorByName(biasTensorName); + biasTensor.assign(b); + } + } + float scale = loadFloat(dict["Y_scale"]); + int32_t offset = loadInt(dict["Y_zero_point"]); + outTy = G_.getParent()->uniqueType(ElemKind::Int8QTy, outDims, scale, + offset - OFFSETSHIFT); + + // Construct the quantized Filter and bias field. + filter = G_.getParent()->createConstant( + ElemKind::Int8QTy, wtag.dims(), wtag.getType().getScale(), + wtag.getType().getOffset(), "conv.filter"); + filter->assign(&wtag); + bias = G_.getParent()->createConstant( + ElemKind::Int32QTy, biasTensor.dims(), + biasTensor.getType().getScale(), biasTensor.getType().getOffset(), + "conv.bias"); + bias->assign(&biasTensor); + } Node *node = G_.createConv(opName, tr, filter, bias, outTy, kernels, strides, pads, group); @@ -214,7 +265,47 @@ void Caffe2ModelLoader::loadOperator(const caffe2::OperatorDef &op) { return; } - if (typeName == "MaxPool" || typeName == "AveragePool") { + if (typeName == "Int8SumRelu") { + assert(op.input_size() == 2 && "Only Sum of 2 inputs is supported."); + assert(dict.count("Y_zero_point") && + "missing zero point for quantized outout type"); + assert(dict.count("Y_scale") && + "missing Y_scale for quantized output type"); + auto in0 = getNodeValueOrCreateConstantByName(op.input(0)); + auto in1 = getNodeValueOrCreateConstantByName(op.input(1)); + auto outDims = in0.getType()->dims(); + auto outTy = G_.getParent()->uniqueType( + ElemKind::Int8QTy, outDims, loadFloat(dict["Y_scale"]), + loadInt(dict["Y_zero_point"]) - OFFSETSHIFT); + auto *node = G_.createAdd(opName, outTy, in0, in1); + addNodeAsOutput(op, node); + return; + } + + if (typeName == "Int8Quantize") { + assert(dict.count("Y_zero_point") && + "missing zero point for quantized output type"); + assert(dict.count("Y_scale") && + "missing Y_scale for quantized output type"); + auto in = getNodeValueOrCreateConstantByName(op.input(0)); + auto outDims = in.getType()->dims(); + auto outTy = G_.getParent()->uniqueType( + ElemKind::Int8QTy, outDims, loadFloat(dict["Y_scale"]), + loadInt(dict["Y_zero_point"]) - OFFSETSHIFT); + Node *N = G_.createQuantize(opName, in, outTy); + addNodeAsOutput(op, N); + return; + } + + if (typeName == "Int8Dequantize") { + auto in = getNodeValueOrCreateConstantByName(op.input(0)); + auto *node = G_.createDequantize(opName, in); + addNodeAsOutput(op, node); + return; + } + + if (typeName == "MaxPool" || typeName == "AveragePool" || + typeName == "Int8MaxPool" || typeName == "Int8AveragePool") { // Load the inputs: auto in = getNodeValueOrCreateConstantByName(op.input(0)); std::vector strides = getSizeHW(dict, "stride", 1); @@ -238,7 +329,29 @@ void Caffe2ModelLoader::loadOperator(const caffe2::OperatorDef &op) { } Node *node = nullptr; - if (typeName == "MaxPool") { + + if (typeName == "Int8MaxPool" || typeName == "Int8AveragePool") { + // Create the node with quantized type. + assert(dict.count("Y_zero_point") && + "missing zero point for quantized output type"); + assert(dict.count("Y_scale") && + "missing Y_scale for quantized output type"); + ShapeNHWC idim = ShapeNHWC(tr->getType(0)->dims()); + auto outSz = + calculateConvPoolOutputDims(idim.h, idim.w, kernels, strides, pads); + std::array outDims = { + {idim.n, outSz.first, outSz.second, idim.c}}; + if (typeName == "Int8MaxPool") { + // Int8Maxpool output quantization should be same as the input, so just + // ignore the given params. + node = G_.createMaxPool(opName, tr, kernels, strides, pads); + } else { + auto outTy = G_.getParent()->uniqueType( + ElemKind::Int8QTy, outDims, loadFloat(dict["Y_scale"]), + loadInt(dict["Y_zero_point"]) - OFFSETSHIFT); + node = G_.createAvgPool(opName, tr, outTy, kernels, strides, pads); + } + } else if (typeName == "MaxPool") { node = G_.createMaxPool(opName, tr, kernels, strides, pads); } else { node = G_.createAvgPool(opName, tr, kernels, strides, pads); @@ -309,7 +422,7 @@ void Caffe2ModelLoader::loadOperator(const caffe2::OperatorDef &op) { return; } - if (typeName == "FC" || typeName == "FCTransposed") { + if (typeName == "FC" || typeName == "FCTransposed" || typeName == "Int8FC") { // Load the inputs: auto in = getNodeValueOrCreateConstantByName(op.input(0)); if (in.getType()->dims().size() > 2) { @@ -327,12 +440,18 @@ void Caffe2ModelLoader::loadOperator(const caffe2::OperatorDef &op) { Tensor tmp; if (w->dims().size() > 2) { auto wDims = flattenCdr(w->dims(), axis_w); - tmp.reset(ElemKind::FloatTy, {wDims.first, wDims.second}); + if (typeName == "FC" || typeName == "FCTransposed") { + tmp.reset(ElemKind::FloatTy, {wDims.first, wDims.second}); + } else { + tmp.reset(ElemKind::Int8QTy, {wDims.first, wDims.second}, + w->getType().getScale(), w->getType().getOffset()); + } tmp.copyRawFrom(w); w = &tmp; } + Tensor wtag; - if (typeName == "FC") { + if (typeName == "FC" || typeName == "Int8FC") { w->transpose(&wtag, {1, 0}); } else { wtag.assign(w); @@ -341,7 +460,22 @@ void Caffe2ModelLoader::loadOperator(const caffe2::OperatorDef &op) { auto W = G_.getParent()->addConstant(new Constant("weights", std::move(wtag))); auto B = G_.getParent()->addConstant(new Constant("biases", std::move(*b))); - auto *node = G_.createFullyConnected(opName, in, W, B); + + Node *node = nullptr; + if (typeName == "Int8FC") { + // Create the node with quantized type. + assert(dict.count("Y_zero_point") && + "missing zero point for quantized output type"); + assert(dict.count("Y_scale") && + "missing Y_scale for quantized output type"); + auto outTy = G_.getParent()->uniqueType( + ElemKind::Int8QTy, {in.getType()->dims()[0], B->getType()->dims()[0]}, + loadFloat(dict["Y_scale"]), + loadInt(dict["Y_zero_point"]) - OFFSETSHIFT); + node = G_.createFullyConnected(opName, in, W, B, outTy); + } else { + node = G_.createFullyConnected(opName, in, W, B); + } // Save the outputs: addNodeAsOutput(op, node); @@ -602,6 +736,73 @@ void Caffe2ModelLoader::loadWeight(const caffe2::OperatorDef &op) { return; } + // Load quantized tensors: + if (typeName == "Int8GivenTensorFill" || + typeName == "Int8GivenIntTensorFill") { + /* + output: "conv1_w" + name: "" + type: "Int8GivenTensorFill" + arg { + name: "shape" + ints: 96 + ints: 3 + ints: 11 + ints: 11 + } + arg { + name: "values" + s: "\x7f\x80\x80\x7" + } + arg { + name: "Y_scale" + f: 0.00044428 + } + arg { + name: "Y_zero_point" + i: 127 + } + */ + auto *T = new Tensor(); + for (auto &o : op.output()) { + if (tensors_.count(o)) + continue; + tensors_[o] = T; + } + + auto dim = getShape(dict["shape"]); + + assert(dict.count("Y_zero_point") && + "missing zero point for quantized output type"); + assert(dict.count("Y_scale") && + "missing Y_scale for quantized output type"); + + float scale = loadFloat(dict["Y_scale"]); + int32_t offset = loadInt(dict["Y_zero_point"]); + size_t i = 0; + if (typeName == "Int8GivenTensorFill") { + // Although in Caffe2 quantized model, the weights is int8 quantized, + // the weights is stored in uint8_t format due to that Caffe2 requires the + // type of input and weights must be the same. Therefore, we need to + // convert it to int8 by subtracting 128. + T->reset(ElemKind::Int8QTy, dim, scale, offset - OFFSETSHIFT); + auto TH = T->getHandle(); + std::string str = dict["values"]->s(); + for (; i < str.size(); i++) { + TH.raw(i) = ((uint8_t)(str.c_str()[i]) - OFFSETSHIFT); + } + } else { + T->reset(ElemKind::Int32QTy, dim, scale, offset); + auto TH = T->getHandle(); + for (auto num : dict["values"]->ints()) { + TH.raw(i++) = num; + } + } + assert(i == T->size() && "The number of serialized values does not " + "match the size of the tensor."); + return; + } + // Load tensors with constant fill: if (typeName == "ConstantFill") { /* diff --git a/tests/images/run.sh b/tests/images/run.sh index 67e9adbdd6..d7faaae237 100755 --- a/tests/images/run.sh +++ b/tests/images/run.sh @@ -41,3 +41,5 @@ done for png_filename in tests/images/imagenet_299/*.png; do ./bin/image-classifier "$png_filename" -image-mode=0to1 -m=googlenet_v4_slim/googlenet_v4_slim.onnx -model-input-name=input:0 -image-layout=NHWC -label-offset=1 "$@" done +#Quantized Resnet50 Caffe2 model test +./bin/image-classifier tests/images/imagenet/*.png -image-mode=0to1 -m=quant_resnet50 -model-input-name=gpu_0/data_0 -use-imagenet-normalization "$@" diff --git a/utils/download_caffe2_models.sh b/utils/download_caffe2_models.sh index 1148775cf8..440d34cc32 100755 --- a/utils/download_caffe2_models.sh +++ b/utils/download_caffe2_models.sh @@ -26,6 +26,7 @@ vgg19 zfnet512 bvlc_alexnet en2gr +quant_resnet50 EOF )