Skip to content

Commit 9f706f3

Browse files
Man Wangbeicy
Man Wang
authored andcommitted
[Quantization] Load quantized resnet50 model
1 parent 72603d9 commit 9f706f3

File tree

5 files changed

+249
-28
lines changed

5 files changed

+249
-28
lines changed

include/glow/Graph/Graph.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -253,6 +253,11 @@ class Function final : public Named {
253253
llvm::ArrayRef<unsigned_t> strides,
254254
llvm::ArrayRef<unsigned_t> pads);
255255

256+
AvgPoolNode *createAvgPool(llvm::StringRef name, NodeValue input,
257+
TypeRef outTy, llvm::ArrayRef<unsigned_t> kernels,
258+
llvm::ArrayRef<unsigned_t> strides,
259+
llvm::ArrayRef<unsigned_t> pads);
260+
256261
AvgPoolNode *createAvgPool(llvm::StringRef name, NodeValue input,
257262
unsigned_t kernel, unsigned_t stride,
258263
unsigned_t pad);

lib/Graph/Graph.cpp

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -521,6 +521,18 @@ AvgPoolNode *Function::createAvgPool(llvm::StringRef name, NodeValue input,
521521
return addNode(new AvgPoolNode(name, OT, input, kernels, strides, pads));
522522
}
523523

524+
AvgPoolNode *Function::createAvgPool(llvm::StringRef name, NodeValue input,
525+
TypeRef outTy,
526+
llvm::ArrayRef<unsigned_t> kernels,
527+
llvm::ArrayRef<unsigned_t> strides,
528+
llvm::ArrayRef<unsigned_t> pads) {
529+
ShapeNHWC idim = ShapeNHWC(input.dims());
530+
ShapeHW kdim(kernels);
531+
(void)kdim;
532+
checkKernelSize(idim, kernels, pads);
533+
return addNode(new AvgPoolNode(name, outTy, input, kernels, strides, pads));
534+
}
535+
524536
AvgPoolNode *Function::createAvgPool(llvm::StringRef name, NodeValue input,
525537
unsigned_t kernel, unsigned_t stride,
526538
unsigned_t pad) {

lib/Importer/Caffe2ModelLoader.cpp

Lines changed: 229 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,15 @@ using llvm::cast;
3939
using ArgumentDictionaryTy =
4040
std::unordered_map<std::string, const caffe2::Argument *>;
4141

42+
/// For the quantized Caffe2 ops, the activations are quantized to uint_8.
43+
/// In Glow, the activations are quantized to int_8. Therefore, for the offset
44+
/// read from quantized caffe2 model, we need to subtract 128(i.e. INT8_MIN) to
45+
/// make the activations becomes int8_t.
46+
/// For Glow: -127 <= orig_fp32/scale_1 + offset_1 < 128
47+
/// For Caffe2: 0 <= orig_fp32/scale_2 + offset_2 < 255
48+
/// Therefore, we can make scale_1 == scale_2, and offset_1 = offset2 - 128
49+
const int32_t OFFSETSHIFT = 128;
50+
4251
/// Translates the protocol buffer node \p op into a random access map.
4352
static ArgumentDictionaryTy loadArgumentMap(const caffe2::OperatorDef &op) {
4453
ArgumentDictionaryTy dict;
@@ -147,7 +156,8 @@ void Caffe2ModelLoader::loadOperator(const caffe2::OperatorDef &op) {
147156

148157
const std::string &opName = loadOperatorName(op);
149158

150-
if (typeName == "Conv") {
159+
if (typeName == "Conv" || typeName == "Int8Conv" ||
160+
typeName == "Int8ConvRelu") {
151161
// Load the inputs:
152162
std::vector<unsigned_t> strides = getSizeHW(dict, "stride", 1);
153163
std::vector<unsigned_t> pads = getPads(dict);
@@ -159,34 +169,22 @@ void Caffe2ModelLoader::loadOperator(const caffe2::OperatorDef &op) {
159169
Tensor *w = getTensorByName(op.input(1));
160170

161171
// Transpose the weights to the right format. Glow expects to read the
162-
// weights in the format CRSK. Caffe2 stores the operators as KCRS.
172+
// weights in the format CRSK.
163173
// C - output_depth, R - filter_height, S - filter_width, K - input_depth.
174+
// Caffe2 "Conv" op always stores the weight as CKRS, while for "Int8Conv",
175+
// and "Int8ConvRelu", the weights always follows the "order" arg.
164176
Tensor wtag;
165-
w->transpose(&wtag, NCHW2NHWC);
177+
if (typeName != "Conv" && order == "NHWC") {
178+
wtag.assign(w);
179+
} else {
180+
w->transpose(&wtag, NCHW2NHWC);
181+
}
166182

167183
// The structure of the conv weigts is: NHWC. We take the C, which is the
168184
// number of filters. We use this value to calculate the size of the bias
169185
// if it is not specified.
170186
size_t depth = wtag.dims()[0];
171187

172-
// Construct the Filter field.
173-
auto *filter = G_.getParent()->createConstant("conv.filter", wtag);
174-
175-
// Construct the Bias field.
176-
Tensor biasTensor(ElemKind::FloatTy, {depth});
177-
biasTensor.zero();
178-
179-
// Check if we have a serialized bias vector.
180-
if (op.input_size() > 2) {
181-
auto &biasTensorName = op.input(2);
182-
if (tensors_.count(biasTensorName)) {
183-
// Load the serialized bias vector.
184-
Tensor *b = getTensorByName(biasTensorName);
185-
biasTensor.assign(b);
186-
}
187-
}
188-
auto *bias = G_.getParent()->createConstant("conv.bias", biasTensor);
189-
190188
// We expect the input to be NHWC.
191189
Node *tr;
192190
if (order == "NCHW") {
@@ -201,7 +199,60 @@ void Caffe2ModelLoader::loadOperator(const caffe2::OperatorDef &op) {
201199
calculateConvPoolOutputDims(idim.h, idim.w, kernels, strides, pads);
202200
std::array<size_t, 4> outDims = {
203201
{idim.n, outSz.first, outSz.second, depth}};
204-
auto outTy = G_.getParent()->uniqueType(ElemKind::FloatTy, outDims);
202+
203+
TypeRef outTy;
204+
Constant *filter;
205+
Constant *bias;
206+
if (typeName == "Conv") {
207+
// Construct the Bias field.
208+
Tensor biasTensor(ElemKind::FloatTy, {depth});
209+
biasTensor.zero();
210+
211+
// Check if we have a serialized bias vector.
212+
if (op.input_size() > 2) {
213+
const auto &biasTensorName = op.input(2);
214+
if (tensors_.count(biasTensorName)) {
215+
// Load the serialized bias vector.
216+
Tensor *b = getTensorByName(biasTensorName);
217+
biasTensor.assign(b);
218+
}
219+
}
220+
outTy = G_.getParent()->uniqueType(ElemKind::FloatTy, outDims);
221+
filter = G_.getParent()->createConstant("conv.filter", wtag);
222+
bias = G_.getParent()->createConstant("conv.bias", biasTensor);
223+
} else {
224+
assert(dict.count("Y_zero_point") &&
225+
"missing zero point for quantized output type");
226+
assert(dict.count("Y_scale") &&
227+
"missing Y_scale for quantized output type");
228+
// Construct the Bias field.
229+
Tensor biasTensor(ElemKind::Int32QTy, {depth}, 1.0, 0);
230+
biasTensor.zero();
231+
// Check if we have a serialized bias vector.
232+
if (op.input_size() > 2) {
233+
const auto &biasTensorName = op.input(2);
234+
if (tensors_.count(biasTensorName)) {
235+
// Load the serialized bias vector.
236+
Tensor *b = getTensorByName(biasTensorName);
237+
biasTensor.assign(b);
238+
}
239+
}
240+
float scale = loadFloat(dict["Y_scale"]);
241+
int32_t offset = loadInt(dict["Y_zero_point"]);
242+
outTy = G_.getParent()->uniqueType(ElemKind::Int8QTy, outDims, scale,
243+
offset - OFFSETSHIFT);
244+
245+
// Construct the quantized Filter and bias field.
246+
filter = G_.getParent()->createConstant(
247+
ElemKind::Int8QTy, wtag.dims(), wtag.getType().getScale(),
248+
wtag.getType().getOffset(), "conv.filter");
249+
filter->assign(&wtag);
250+
bias = G_.getParent()->createConstant(
251+
ElemKind::Int32QTy, biasTensor.dims(),
252+
biasTensor.getType().getScale(), biasTensor.getType().getOffset(),
253+
"conv.bias");
254+
bias->assign(&biasTensor);
255+
}
205256

206257
Node *node = G_.createConv(opName, tr, filter, bias, outTy, kernels,
207258
strides, pads, group);
@@ -214,7 +265,47 @@ void Caffe2ModelLoader::loadOperator(const caffe2::OperatorDef &op) {
214265
return;
215266
}
216267

217-
if (typeName == "MaxPool" || typeName == "AveragePool") {
268+
if (typeName == "Int8SumRelu") {
269+
assert(op.input_size() == 2 && "Only Sum of 2 inputs is supported.");
270+
assert(dict.count("Y_zero_point") &&
271+
"missing zero point for quantized outout type");
272+
assert(dict.count("Y_scale") &&
273+
"missing Y_scale for quantized output type");
274+
auto in0 = getNodeValueOrCreateConstantByName(op.input(0));
275+
auto in1 = getNodeValueOrCreateConstantByName(op.input(1));
276+
auto outDims = in0.getType()->dims();
277+
auto outTy = G_.getParent()->uniqueType(
278+
ElemKind::Int8QTy, outDims, loadFloat(dict["Y_scale"]),
279+
loadInt(dict["Y_zero_point"]) - OFFSETSHIFT);
280+
auto *node = G_.createAdd(opName, outTy, in0, in1);
281+
addNodeAsOutput(op, node);
282+
return;
283+
}
284+
285+
if (typeName == "Int8Quantize") {
286+
assert(dict.count("Y_zero_point") &&
287+
"missing zero point for quantized output type");
288+
assert(dict.count("Y_scale") &&
289+
"missing Y_scale for quantized output type");
290+
auto in = getNodeValueOrCreateConstantByName(op.input(0));
291+
auto outDims = in.getType()->dims();
292+
auto outTy = G_.getParent()->uniqueType(
293+
ElemKind::Int8QTy, outDims, loadFloat(dict["Y_scale"]),
294+
loadInt(dict["Y_zero_point"]) - OFFSETSHIFT);
295+
Node *N = G_.createQuantize(opName, in, outTy);
296+
addNodeAsOutput(op, N);
297+
return;
298+
}
299+
300+
if (typeName == "Int8Dequantize") {
301+
auto in = getNodeValueOrCreateConstantByName(op.input(0));
302+
auto *node = G_.createDequantize(opName, in);
303+
addNodeAsOutput(op, node);
304+
return;
305+
}
306+
307+
if (typeName == "MaxPool" || typeName == "AveragePool" ||
308+
typeName == "Int8MaxPool" || typeName == "Int8AveragePool") {
218309
// Load the inputs:
219310
auto in = getNodeValueOrCreateConstantByName(op.input(0));
220311
std::vector<unsigned_t> strides = getSizeHW(dict, "stride", 1);
@@ -238,7 +329,29 @@ void Caffe2ModelLoader::loadOperator(const caffe2::OperatorDef &op) {
238329
}
239330

240331
Node *node = nullptr;
241-
if (typeName == "MaxPool") {
332+
333+
if (typeName == "Int8MaxPool" || typeName == "Int8AveragePool") {
334+
// Create the node with quantized type.
335+
assert(dict.count("Y_zero_point") &&
336+
"missing zero point for quantized output type");
337+
assert(dict.count("Y_scale") &&
338+
"missing Y_scale for quantized output type");
339+
ShapeNHWC idim = ShapeNHWC(tr->getType(0)->dims());
340+
auto outSz =
341+
calculateConvPoolOutputDims(idim.h, idim.w, kernels, strides, pads);
342+
std::array<size_t, 4> outDims = {
343+
{idim.n, outSz.first, outSz.second, idim.c}};
344+
if (typeName == "Int8MaxPool") {
345+
// Int8Maxpool output quantization should be same as the input, so just
346+
// ignore the given params.
347+
node = G_.createMaxPool(opName, tr, kernels, strides, pads);
348+
} else {
349+
auto outTy = G_.getParent()->uniqueType(
350+
ElemKind::Int8QTy, outDims, loadFloat(dict["Y_scale"]),
351+
loadInt(dict["Y_zero_point"]) - OFFSETSHIFT);
352+
node = G_.createAvgPool(opName, tr, outTy, kernels, strides, pads);
353+
}
354+
} else if (typeName == "MaxPool") {
242355
node = G_.createMaxPool(opName, tr, kernels, strides, pads);
243356
} else {
244357
node = G_.createAvgPool(opName, tr, kernels, strides, pads);
@@ -309,7 +422,7 @@ void Caffe2ModelLoader::loadOperator(const caffe2::OperatorDef &op) {
309422
return;
310423
}
311424

312-
if (typeName == "FC" || typeName == "FCTransposed") {
425+
if (typeName == "FC" || typeName == "FCTransposed" || typeName == "Int8FC") {
313426
// Load the inputs:
314427
auto in = getNodeValueOrCreateConstantByName(op.input(0));
315428
if (in.getType()->dims().size() > 2) {
@@ -327,12 +440,18 @@ void Caffe2ModelLoader::loadOperator(const caffe2::OperatorDef &op) {
327440
Tensor tmp;
328441
if (w->dims().size() > 2) {
329442
auto wDims = flattenCdr(w->dims(), axis_w);
330-
tmp.reset(ElemKind::FloatTy, {wDims.first, wDims.second});
443+
if (typeName == "FC" || typeName == "FCTransposed") {
444+
tmp.reset(ElemKind::FloatTy, {wDims.first, wDims.second});
445+
} else {
446+
tmp.reset(ElemKind::Int8QTy, {wDims.first, wDims.second},
447+
w->getType().getScale(), w->getType().getOffset());
448+
}
331449
tmp.copyRawFrom(w);
332450
w = &tmp;
333451
}
452+
334453
Tensor wtag;
335-
if (typeName == "FC") {
454+
if (typeName == "FC" || typeName == "Int8FC") {
336455
w->transpose(&wtag, {1, 0});
337456
} else {
338457
wtag.assign(w);
@@ -341,7 +460,22 @@ void Caffe2ModelLoader::loadOperator(const caffe2::OperatorDef &op) {
341460
auto W =
342461
G_.getParent()->addConstant(new Constant("weights", std::move(wtag)));
343462
auto B = G_.getParent()->addConstant(new Constant("biases", std::move(*b)));
344-
auto *node = G_.createFullyConnected(opName, in, W, B);
463+
464+
Node *node = nullptr;
465+
if (typeName == "Int8FC") {
466+
// Create the node with quantized type.
467+
assert(dict.count("Y_zero_point") &&
468+
"missing zero point for quantized output type");
469+
assert(dict.count("Y_scale") &&
470+
"missing Y_scale for quantized output type");
471+
auto outTy = G_.getParent()->uniqueType(
472+
ElemKind::Int8QTy, {in.getType()->dims()[0], B->getType()->dims()[0]},
473+
loadFloat(dict["Y_scale"]),
474+
loadInt(dict["Y_zero_point"]) - OFFSETSHIFT);
475+
node = G_.createFullyConnected(opName, in, W, B, outTy);
476+
} else {
477+
node = G_.createFullyConnected(opName, in, W, B);
478+
}
345479

346480
// Save the outputs:
347481
addNodeAsOutput(op, node);
@@ -602,6 +736,73 @@ void Caffe2ModelLoader::loadWeight(const caffe2::OperatorDef &op) {
602736
return;
603737
}
604738

739+
// Load quantized tensors:
740+
if (typeName == "Int8GivenTensorFill" ||
741+
typeName == "Int8GivenIntTensorFill") {
742+
/*
743+
output: "conv1_w"
744+
name: ""
745+
type: "Int8GivenTensorFill"
746+
arg {
747+
name: "shape"
748+
ints: 96
749+
ints: 3
750+
ints: 11
751+
ints: 11
752+
}
753+
arg {
754+
name: "values"
755+
s: "\x7f\x80\x80\x7"
756+
}
757+
arg {
758+
name: "Y_scale"
759+
f: 0.00044428
760+
}
761+
arg {
762+
name: "Y_zero_point"
763+
i: 127
764+
}
765+
*/
766+
auto *T = new Tensor();
767+
for (auto &o : op.output()) {
768+
if (tensors_.count(o))
769+
continue;
770+
tensors_[o] = T;
771+
}
772+
773+
auto dim = getShape(dict["shape"]);
774+
775+
assert(dict.count("Y_zero_point") &&
776+
"missing zero point for quantized output type");
777+
assert(dict.count("Y_scale") &&
778+
"missing Y_scale for quantized output type");
779+
780+
float scale = loadFloat(dict["Y_scale"]);
781+
int32_t offset = loadInt(dict["Y_zero_point"]);
782+
size_t i = 0;
783+
if (typeName == "Int8GivenTensorFill") {
784+
// Although in Caffe2 quantized model, the weights is int8 quantized,
785+
// the weights is stored in uint8_t format due to that Caffe2 requires the
786+
// type of input and weights must be the same. Therefore, we need to
787+
// convert it to int8 by subtracting 128.
788+
T->reset(ElemKind::Int8QTy, dim, scale, offset - OFFSETSHIFT);
789+
auto TH = T->getHandle<int8_t>();
790+
std::string str = dict["values"]->s();
791+
for (; i < str.size(); i++) {
792+
TH.raw(i) = ((uint8_t)(str.c_str()[i]) - OFFSETSHIFT);
793+
}
794+
} else {
795+
T->reset(ElemKind::Int32QTy, dim, scale, offset);
796+
auto TH = T->getHandle<int32_t>();
797+
for (auto num : dict["values"]->ints()) {
798+
TH.raw(i++) = num;
799+
}
800+
}
801+
assert(i == T->size() && "The number of serialized values does not "
802+
"match the size of the tensor.");
803+
return;
804+
}
805+
605806
// Load tensors with constant fill:
606807
if (typeName == "ConstantFill") {
607808
/*

tests/images/run.sh

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,3 +41,5 @@ done
4141
for png_filename in tests/images/imagenet_299/*.png; do
4242
./bin/image-classifier "$png_filename" -image-mode=0to1 -m=googlenet_v4_slim/googlenet_v4_slim.onnx -model-input-name=input:0 -image-layout=NHWC -label-offset=1 "$@"
4343
done
44+
#Quantized Resnet50 Caffe2 model test
45+
./bin/image-classifier tests/images/imagenet/*.png -image-mode=0to1 -m=quant_resnet50 -model-input-name=gpu_0/data_0 -use-imagenet-normalization "$@"

utils/download_caffe2_models.sh

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ vgg19
2626
zfnet512
2727
bvlc_alexnet
2828
en2gr
29+
quant_resnet50
2930
EOF
3031
)
3132

0 commit comments

Comments
 (0)