Skip to content

Commit c5cd64a

Browse files
Quentin Colombetqcolombet
Quentin Colombet
authored andcommitted
[Loader] Add an option to run a model in fp16
*Description* This patch adds an option `-convert-to-fp16` to convert all fp32 operators into fp16 ones. We may want to expose more conversion options in the future and change the `convert-to` option to something taking an enum like fp32-to-fp16. As part of the conversion mechanism it is possible to choose which nodes are okay to convert (`-keep-original-precision-for-nodes=<listOfNodes>`). The conversion process does not alter the inputs and outputs of the network. Thus, the converted graph will have ConvertToNode at least at its start and end. *Testing* Able to convert and run resnet50 in fp16 with the interpreter. *Documentation* None so far, we want the interpreter to fully support fp16 before people play with it. Related to #1329
1 parent e170d02 commit c5cd64a

File tree

2 files changed

+33
-16
lines changed

2 files changed

+33
-16
lines changed

tools/loader/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ add_executable(image-classifier
55
target_link_libraries(image-classifier
66
PRIVATE
77
Base
8+
Converter
89
Importer
910
ExecutionEngine
1011
Quantization)

tools/loader/Loader.cpp

Lines changed: 32 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
#include "Loader.h"
1818

1919
#include "glow/Base/Tensor.h"
20+
#include "glow/Converter/TypeAToTypeBFunctionConverter.h"
2021
#include "glow/ExecutionEngine/ExecutionEngine.h"
2122
#include "glow/IR/IR.h"
2223
#include "glow/Quantization/Serialization.h"
@@ -89,13 +90,14 @@ llvm::cl::opt<std::string> loadProfileFileOpt(
8990
llvm::cl::value_desc("profile.yaml"), llvm::cl::Optional,
9091
llvm::cl::cat(loaderCat));
9192

92-
llvm::cl::list<std::string> doNotQuantizeNodesOpt(
93-
"do_not_quantize_nodes",
93+
llvm::cl::list<std::string> keepOriginalPrecisionForNodesOpt(
94+
"keep-original-precision-for-nodes",
9495
llvm::cl::desc(
9596
"Use to specify the name of nodes (e.g. Add, Div, etc.) that should "
96-
"not be quantized. All nodes of the listed kinds would not be "
97-
"quantized; e.g. if Add is specififed and there are multiple Add nodes "
98-
"in the input loaded model, none would be quantized."),
97+
"be kept as is when conversion/quantization is requested. "
98+
"All nodes of the listed kinds will be kept as is;"
99+
"e.g. if Add is specified and there are multiple Add nodes "
100+
"in the input loaded model, none would be quantized/converted."),
99101
llvm::cl::value_desc("NodeNames (e.g. Add,Div)"), llvm::cl::ZeroOrMore,
100102
llvm::cl::CommaSeparated, llvm::cl::cat(loaderCat));
101103

@@ -123,6 +125,11 @@ llvm::cl::opt<bool> dumpGraphOpt("dumpGraph",
123125
llvm::cl::desc("Prints Graph to stdout"),
124126
llvm::cl::cat(modelExportCat));
125127

128+
llvm::cl::opt<bool>
129+
convertToFP16("convert-to-fp16",
130+
llvm::cl::desc("Run all floating-point computation in fp16."),
131+
llvm::cl::init(false), llvm::cl::cat(loaderCat));
132+
126133
/// Emit a bundle into the specified output directory.
127134
llvm::cl::opt<std::string>
128135
emitBundle("emit-bundle",
@@ -217,6 +224,16 @@ void Loader::compile(Context &ctx) {
217224
F_ = ::profileQuantization(ctx, F_);
218225
}
219226

227+
// By default, when converting models, all nodes that can be
228+
// converted are converted. However, some models may need to
229+
// keep higher precision for some nodes to prevent high accuracy loss.
230+
// Those nodes are gathered via the keepOriginalPrecisionForNodesOpt
231+
// option and passed to the related conversion function.
232+
KindSet keepOriginalPrecisionForNodes;
233+
for (llvm::StringRef kindName : keepOriginalPrecisionForNodesOpt) {
234+
keepOriginalPrecisionForNodes.insert(getKindFromNodeName(kindName));
235+
}
236+
220237
// Load the quantization profile and transform the graph.
221238
if (!loadProfileFileOpt.empty()) {
222239
// The profiled graph was optimized before it was instrumentated. In this
@@ -233,25 +250,24 @@ void Loader::compile(Context &ctx) {
233250
std::string oldName = F_->getName();
234251
F_->setName("old");
235252

236-
// By default, when quantizing loaded models, all nodes that can be
237-
// quantized are quantized. However, some models that are loaded may need to
238-
// keep higher precision for some nodes to prevent high accuracy loss. This
239-
// set is passed into quantizeFunction() to prevent quantization.
240-
KindSet doNotQuantizeKinds;
241-
for (llvm::StringRef kindName : doNotQuantizeNodesOpt) {
242-
doNotQuantizeKinds.insert(getKindFromNodeName(kindName));
243-
}
244-
245253
// Quantize the graph based on the captured profile.
246-
auto *Q = quantization::quantizeFunction(EE_, quantizationInfos, F_,
247-
oldName, doNotQuantizeKinds);
254+
auto *Q = quantization::quantizeFunction(
255+
EE_, quantizationInfos, F_, oldName, keepOriginalPrecisionForNodes);
248256

249257
// Erase the original function so that the redundant variables that are only
250258
// referenced by the original function will be removed.
251259
Q->getParent()->eraseFunction(F_);
252260
F_ = Q;
253261
}
254262

263+
if (convertToFP16) {
264+
TypeAToTypeBFunctionConverter converter(*F_, ElemKind::FloatTy,
265+
ElemKind::Float16Ty,
266+
&keepOriginalPrecisionForNodes);
267+
converter.convert();
268+
::optimize(F_, glow::CompilationMode::Infer);
269+
}
270+
255271
if (emittingBundle()) {
256272
// Emit IR for the graph, compile it and save as a bundle.
257273
EE_.save(CompilationMode::Infer, F_, emitBundle, networkName);

0 commit comments

Comments
 (0)