-
Notifications
You must be signed in to change notification settings - Fork 699
[New Operator] FusedRowwiseQuantizedSparseLengthsWeightedSumNode #2368
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
8c6229f
914d10f
7c40989
c00113f
23182ed
013791e
a7e795f
3f0740f
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change | ||||||||||||||||||||
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
|
@@ -212,3 +212,16 @@ Row-wise quantized SparseLengthsWeightedSum is also supported. Similar to the | |||||||||||||||||||||
above, we compute scales and offsets per row, to be used with the `Data` input | ||||||||||||||||||||||
for the `RowwiseQuantizedSparseLengthsSumNode`. Scales and Offsets are inputs to | ||||||||||||||||||||||
the node. Output of this node is float, matching the Caffe2 implementation. | ||||||||||||||||||||||
|
||||||||||||||||||||||
### Fused Row-wise Quantization | ||||||||||||||||||||||
|
||||||||||||||||||||||
For some backends it may be beneficial to keep each row's scales and offsets | ||||||||||||||||||||||
fused inline with the data. Caffe2 implements nodes with fused storage, such as | ||||||||||||||||||||||
[SparseLengthsWeightedSum](https://caffe2.ai/docs/operators-catalogue.html#sparselengthsweightedsumfused8bitrowwise). Glow | ||||||||||||||||||||||
supports such fused Nodes/Instructions, for example | ||||||||||||||||||||||
`FusedRowwiseQuantizedSparseLengthsWeightedSum`. The `ElemKind` of fused tensors | ||||||||||||||||||||||
is `Int8FusedQTy`. Tensors with `Int8FusedQTy` are 2-dimensional, and have an | ||||||||||||||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It should be uint8? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We subtract glow/lib/Importer/Caffe2ModelLoader.cpp Lines 1118 to 1127 in a64d64f
|
||||||||||||||||||||||
extra 8 columns for each row. The first extra 4 bytes are the float scale of the | ||||||||||||||||||||||
row, and the second extra 4 bytes are the in32_t offset. Note that similar to | ||||||||||||||||||||||
normal row-wise quantized tensors, they use a dummy scale and offset in the | ||||||||||||||||||||||
Type. |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -185,13 +185,14 @@ inline bool operator==(const ShapeNCHW &LHS, const ShapeNCHW &RHS) { | |
/// An enum representing the type used by the elements of a tensor. The types of | ||
/// Handles for these tensors should match the element kind. | ||
enum class ElemKind : unsigned char { | ||
FloatTy, // 32-bit float type (float) | ||
Float16Ty, // 16-bit float type (half, fp16) | ||
Int8QTy, // 8-bit quantized type (int8_t) | ||
Int16QTy, // 16-bit quantized type (int16_t) | ||
Int32QTy, // 32-bit quantized type (int32_t) | ||
Int32ITy, // 32-bit index type (int32_t) | ||
Int64ITy, // 64-bit index type (int64_t) | ||
FloatTy, // 32-bit float type (float) | ||
Float16Ty, // 16-bit float type (half, fp16) | ||
Int8QTy, // 8-bit quantized type (int8_t) | ||
Int16QTy, // 16-bit quantized type (int16_t) | ||
Int32QTy, // 32-bit quantized type (int32_t) | ||
Int32ITy, // 32-bit index type (int32_t) | ||
Int64ITy, // 64-bit index type (int64_t) | ||
Int8FusedQTy, // 8-bit quantized type with fused scale/offset (int8_t) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why do we have such a type instead of plain There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The biggest reason I wanted to explicitly differentiate between normal |
||
}; | ||
|
||
/// A class that represents a type of a tensor. | ||
|
@@ -360,6 +361,8 @@ struct Type final { | |
return std::is_same<ElemTy, int32_t>::value; | ||
case ElemKind::Int64ITy: | ||
return std::is_same<ElemTy, int64_t>::value; | ||
case ElemKind::Int8FusedQTy: | ||
return std::is_same<ElemTy, int8_t>::value; | ||
} | ||
GLOW_UNREACHABLE("Invalid type."); | ||
} | ||
|
@@ -368,7 +371,8 @@ struct Type final { | |
bool isQuantizedType() const { | ||
return elementType_ == ElemKind::Int8QTy || | ||
elementType_ == ElemKind::Int16QTy || | ||
elementType_ == ElemKind::Int32QTy; | ||
elementType_ == ElemKind::Int32QTy || | ||
elementType_ == ElemKind::Int8FusedQTy; | ||
} | ||
|
||
/// \returns true if the type of this Tensor is one of the floating point | ||
|
@@ -401,6 +405,8 @@ struct Type final { | |
return sizeof(int32_t); | ||
case ElemKind::Int64ITy: | ||
return sizeof(int64_t); | ||
case ElemKind::Int8FusedQTy: | ||
return sizeof(int8_t); | ||
} | ||
GLOW_UNREACHABLE("Invalid type."); | ||
} | ||
|
@@ -413,7 +419,7 @@ struct Type final { | |
/// \return the textual name of the element \p Ty. | ||
static llvm::StringRef getElementName(ElemKind Ty) { | ||
static const char *names[] = { | ||
"float", "float16", "i8", "i16", "i32", "index32", "index64", | ||
"float", "float16", "i8", "i16", "i32", "index32", "index64", "i8fused", | ||
}; | ||
return names[(int)Ty]; | ||
} | ||
|
Uh oh!
There was an error while loading. Please reload this page.