[Importer] Add C2 importer support for RWQ SLWS/SLS

jfix71 · jfix71 · commit c30905af3a2c · 2019-02-07T20:21:54.000-08:00
diff --git a/include/glow/Graph/Graph.h b/include/glow/Graph/Graph.h
@@ -580,12 +580,26 @@ class Function final : public Named {
   /// Result[0], next Lengths[1] slices are aggregated to Result[1],
   /// etc. I.e. sum(Lengths) must be equal to len(Indices).
   RowwiseQuantizedSparseLengthsWeightedSumNode *
+  createRowwiseQuantizedSparseLengthsWeightedSum(
+      llvm::StringRef name, Constant *data, Constant *scales, Constant *offsets,
+      NodeValue weights, NodeValue indices, NodeValue lengths);
+
+  /// Same as \ref createRowwiseQuantizedSparseLengthsWeightedSum(), but expects
+  /// float input \p data, which is rowwise-quantized internally.
+  RowwiseQuantizedSparseLengthsWeightedSumNode *
   createRowwiseQuantizedSparseLengthsSum(llvm::StringRef name, Tensor &data,
                                          NodeValue indices, NodeValue lengths);
 
   /// Same as \ref createRowwiseQuantizedSparseLengthsSum(), but i-th slice is
   /// multiplied by weights[i]. len(weights) must be equal to len(indices).
   RowwiseQuantizedSparseLengthsWeightedSumNode *
+  createRowwiseQuantizedSparseLengthsSum(llvm::StringRef name, Constant *data,
+                                         Constant *scales, Constant *offsets,
+                                         NodeValue indices, NodeValue lengths);
+
+  /// Same as \ref createRowwiseQuantizedSparseLengthsSum(), but expects
+  /// float input \p data, which is rowwise-quantized internally.
+  RowwiseQuantizedSparseLengthsWeightedSumNode *
   createRowwiseQuantizedSparseLengthsWeightedSum(llvm::StringRef name,
                                                  Tensor &data,
                                                  NodeValue weights,
diff --git a/lib/Graph/Graph.cpp b/lib/Graph/Graph.cpp
@@ -1376,6 +1376,28 @@ Function::createSparseLengthsWeightedSum(llvm::StringRef name, TypeRef outTy,
                                                   indices, lengths));
 }
 
+RowwiseQuantizedSparseLengthsWeightedSumNode *
+Function::createRowwiseQuantizedSparseLengthsWeightedSum(
+    llvm::StringRef name, Constant *data, Constant *scales, Constant *offsets,
+    NodeValue weights, NodeValue indices, NodeValue lengths) {
+  auto inDims = data->dims();
+  ShapeVector outDims(inDims.begin(), inDims.end());
+  outDims[0] = lengths.dims()[0];
+  auto outTy = getParent()->uniqueType(ElemKind::FloatTy, outDims);
+  return addNode(new RowwiseQuantizedSparseLengthsWeightedSumNode(
+      name, outTy, data, scales, offsets, weights, indices, lengths));
+}
+
+RowwiseQuantizedSparseLengthsWeightedSumNode *
+Function::createRowwiseQuantizedSparseLengthsSum(
+    llvm::StringRef name, Constant *data, Constant *scales, Constant *offsets,
+    NodeValue indices, NodeValue lengths) {
+  auto ty = getParent()->uniqueType(ElemKind::FloatTy, {indices.dims()[0]});
+  auto ones = createSplat(name.str() + ".ones", ty, 1.0);
+  return createRowwiseQuantizedSparseLengthsWeightedSum(
+      name, data, scales, offsets, ones, indices, lengths);
+}
+
 /// Helper to create a RowwiseQuantizedSparseLengthsWeightedSumNode in the
 /// Function \p F with \p name, using \ data, \p weights, \p indices, and \p
 /// lengths as inputs. The provided float data in \p Tensor is rowwise
@@ -1386,9 +1408,6 @@ quantizeDataAndCreateRowwiseQuantizedSparseLengthsWeightedSum(
     Function *F, llvm::StringRef name, Tensor &data, NodeValue weights,
     NodeValue indices, NodeValue lengths) {
   auto inDims = data.dims();
-  ShapeVector outDims(inDims.begin(), inDims.end());
-  outDims[0] = lengths.dims()[0];
-  auto outTy = F->getParent()->uniqueType(ElemKind::FloatTy, outDims);
 
   // Note: In rwqData, we are using a quantized type, however the scale/offset
   // are set to dummy values 0.0/0. This is because the actually used
@@ -1403,10 +1422,8 @@ quantizeDataAndCreateRowwiseQuantizedSparseLengthsWeightedSum(
   quantization::tensorRowwiseQuantization(data, rwqData->getPayload(),
                                           dataScales->getPayload(),
                                           dataOffsets->getPayload());
-
-  return F->addNode(new RowwiseQuantizedSparseLengthsWeightedSumNode(
-      name, outTy, rwqData, dataScales, dataOffsets, weights, indices,
-      lengths));
+  return F->createRowwiseQuantizedSparseLengthsWeightedSum(
+      name, rwqData, dataScales, dataOffsets, weights, indices, lengths);
 }
 
 RowwiseQuantizedSparseLengthsWeightedSumNode *
diff --git a/lib/Importer/Caffe2ModelLoader.cpp b/lib/Importer/Caffe2ModelLoader.cpp
@@ -870,6 +870,80 @@ llvm::Error Caffe2ModelLoader::loadOperator(const caffe2::OperatorDef &op) {
     return llvm::Error::success();
   }
 
+  if (typeName == "SparseLengthsWeightedSum8BitsRowwise" ||
+      typeName == "SparseLengthsSum8BitsRowwise") {
+    // If SparseLengthsWeightedSum8BitsRowwise, then the weights are the second
+    // input and so we need to shift indices/lengths/scalesBiases.
+    size_t indicesIdx = 1;
+    size_t lengthsIdx = 2;
+    size_t scalesBiasesIdx = 3;
+    if (typeName == "SparseLengthsWeightedSum8BitsRowwise") {
+      indicesIdx++;
+      lengthsIdx++;
+      scalesBiasesIdx++;
+    }
+
+    NodeValue data;
+    ASSIGN_VALUE_OR_RETURN_ERR(data,
+                               getNodeValueOrCreateConstantByName(op.input(0)));
+    NodeValue indices;
+    ASSIGN_VALUE_OR_RETURN_ERR(
+        indices, getNodeValueOrCreateConstantByName(op.input(indicesIdx)));
+    NodeValue lengths;
+    ASSIGN_VALUE_OR_RETURN_ERR(
+        lengths, getNodeValueOrCreateConstantByName(op.input(lengthsIdx)));
+    NodeValue scalesBiases;
+    ASSIGN_VALUE_OR_RETURN_ERR(scalesBiases, getNodeValueOrCreateConstantByName(
+                                                 op.input(scalesBiasesIdx)));
+
+    Constant *scalesBiasesC = llvm::dyn_cast<Constant>(scalesBiases);
+    RETURN_ERR_IF_NOT(scalesBiasesC, "scales_biases must be Constant.");
+    Constant *dataC = llvm::dyn_cast<Constant>(data);
+    RETURN_ERR_IF_NOT(dataC->getElementType() == ElemKind::Int8QTy,
+                      "Data must be Int8QTy.");
+
+    const size_t numRows = data.dims()[0];
+
+    // Make sure all the shapes make sense.
+    RETURN_ERR_IF_NOT(lengths.dims().size() == 1, "lengths must be a vector.");
+    RETURN_ERR_IF_NOT(indices.dims().size() == 1, "indices must be a vector.");
+    RETURN_ERR_IF_NOT(scalesBiases.dims().size() == 2,
+                      "scale_bias has to be a matrix.");
+    RETURN_ERR_IF_NOT(scalesBiases.dims()[0] == numRows,
+                      "scale_bias must have the same number of rows as data.");
+    RETURN_ERR_IF_NOT(scalesBiases.dims()[1] == 2,
+                      "Second dim of scale_bias has to be equal to 2.");
+
+    // Now strip out the scales and biases into their own tensors.
+    Constant *dataScales = G_.getParent()->createConstant(
+        ElemKind::FloatTy, {numRows}, "dataScales");
+    Constant *dataOffsets = G_.getParent()->createConstant(
+        ElemKind::Int32ITy, {numRows}, "dataOffsets");
+
+    auto dataScalesH = dataScales->getHandle<float>();
+    auto dataOffsetsH = dataOffsets->getHandle<int32_t>();
+    auto scalesBiasesH = scalesBiasesC->getHandle<float>();
+    for (size_t i = 0, e = numRows; i < e; i++) {
+      dataScalesH.at({i}) = scalesBiasesH.at({i, 0});
+      // Caffe2 represents offsets (bias) using float, while Glow uses int32_t.
+      dataOffsetsH.at({i}) = static_cast<int32_t>(scalesBiasesH.at({i, 1}));
+    }
+
+    Node *node;
+    if (typeName == "SparseLengthsWeightedSum8BitsRowwise") {
+      NodeValue weights;
+      ASSIGN_VALUE_OR_RETURN_ERR(
+          weights, getNodeValueOrCreateConstantByName(op.input(1)));
+      node = G_.createRowwiseQuantizedSparseLengthsWeightedSum(
+          opName, dataC, dataScales, dataOffsets, weights, indices, lengths);
+    } else {
+      node = G_.createRowwiseQuantizedSparseLengthsSum(
+          opName, dataC, dataScales, dataOffsets, indices, lengths);
+    }
+    RETURN_IF_ERR(addNodeAsOutput(op, node));
+    return llvm::Error::success();
+  }
+
   RETURN_ERR(unexpectedNodeErrorMessage(op, "Unsupported operator."));
 }
 
diff --git a/tests/models/caffe2Models/rowwise_quantized_sparse_lengths_sum_init_net.pbtxt b/tests/models/caffe2Models/rowwise_quantized_sparse_lengths_sum_init_net.pbtxt
@@ -0,0 +1,40 @@
+name: "rowwise_quantized_sparse_lengths_sum_init_net_test"
+op {
+  output: "data"
+  type: "Int8GivenTensorFill"
+  arg {
+    name: "shape"
+    ints: 3
+    ints: 2
+  }
+  arg {
+    name: "values"
+    s: "\324\377\254\377\311\377"
+  }
+  arg {
+    name: "Y_zero_point"
+    i: 0
+  }
+  arg {
+    name: "Y_scale"
+    f: 0.0
+  }
+}
+op {
+  output: "scales_bias"
+  type: "GivenTensorFill"
+  arg {
+    name: "shape"
+    ints: 3
+    ints: 2
+  }
+  arg {
+    name: "values"
+    floats: 0.004706
+    floats: -128.0
+    floats: 0.013333
+    floats: -128.0
+    floats: 0.022353
+    floats: -128.0
+  }
+}
diff --git a/tests/models/caffe2Models/rowwise_quantized_sparse_lengths_sum_predict_net.pbtxt b/tests/models/caffe2Models/rowwise_quantized_sparse_lengths_sum_predict_net.pbtxt
@@ -0,0 +1,13 @@
+name: "rowwise_quantized_sparse_lengths_sum_predict_net_test"
+op {
+  input: "data"
+  input: "indices"
+  input: "lengths"
+  input: "scales_bias"
+  output: "result"
+  name: ""
+  type: "SparseLengthsSum8BitsRowwise"
+}
+external_input: "indices"
+external_input: "lengths"
+external_output: "result"
diff --git a/tests/models/caffe2Models/rowwise_quantized_sparse_lengths_weighted_sum_init_net.pbtxt b/tests/models/caffe2Models/rowwise_quantized_sparse_lengths_weighted_sum_init_net.pbtxt
@@ -0,0 +1,58 @@
+name: "rowwise_quantized_sparse_lengths_weighted_sum_init_net_test"
+op {
+  output: "data"
+  type: "Int8GivenTensorFill"
+  arg {
+    name: "shape"
+    ints: 3
+  }
+  arg {
+    name: "values"
+    s: "\377\000\377"
+  }
+  arg {
+    name: "Y_zero_point"
+    i: 0
+  }
+  arg {
+    name: "Y_scale"
+    f: 0.0
+  }
+}
+op {
+  output: "weights"
+  type: "GivenTensorFill"
+  arg {
+    name: "shape"
+    ints: 8
+  }
+  arg {
+    name: "values"
+    floats: 3.0
+    floats: 1.0
+    floats: 0.0
+    floats: 0.0
+    floats: 0.0
+    floats: 0.0
+    floats: 2.0
+    floats: -0.5
+  }
+}
+op {
+  output: "scales_bias"
+  type: "GivenTensorFill"
+  arg {
+    name: "shape"
+    ints: 3
+    ints: 2
+  }
+  arg {
+    name: "values"
+    floats: 0.007843
+    floats: -128.0
+    floats: 0.001961
+    floats: 127.0
+    floats: 0.050980
+    floats: -128.0
+  }
+}
diff --git a/tests/models/caffe2Models/rowwise_quantized_sparse_lengths_weighted_sum_predict_net.pbtxt b/tests/models/caffe2Models/rowwise_quantized_sparse_lengths_weighted_sum_predict_net.pbtxt
@@ -0,0 +1,14 @@
+name: "rowwise_quantized_sparse_lengths_weighted_sum_predict_net_test"
+op {
+  input: "data"
+  input: "weights"
+  input: "indices"
+  input: "lengths"
+  input: "scales_bias"
+  output: "result"
+  name: ""
+  type: "SparseLengthsWeightedSum8BitsRowwise"
+}
+external_input: "indices"
+external_input: "lengths"
+external_output: "result"
diff --git a/tests/unittests/Caffe2ImporterTest.cpp b/tests/unittests/Caffe2ImporterTest.cpp