[Loader] Add an option to run a model in fp16

Quentin Colombet · qcolombet · commit c5cd64aa6457 · 2018-10-15T20:52:33.000-07:00
*Description* This patch adds an option `-convert-to-fp16` to convert all fp32 operators into fp16 ones. We may want to expose more conversion options in the future and change the `convert-to` option to something taking an enum like fp32-to-fp16. As part of the conversion mechanism it is possible to choose which nodes are okay to convert (`-keep-original-precision-for-nodes=<listOfNodes>`). The conversion process does not alter the inputs and outputs of the network. Thus, the converted graph will have ConvertToNode at least at its start and end. *Testing* Able to convert and run resnet50 in fp16 with the interpreter. *Documentation* None so far, we want the interpreter to fully support fp16 before people play with it. Related to #1329
diff --git a/tools/loader/CMakeLists.txt b/tools/loader/CMakeLists.txt
@@ -5,6 +5,7 @@ add_executable(image-classifier
 target_link_libraries(image-classifier
                       PRIVATE
                         Base
+                        Converter
                         Importer
                         ExecutionEngine
                         Quantization)
diff --git a/tools/loader/Loader.cpp b/tools/loader/Loader.cpp
@@ -17,6 +17,7 @@
 #include "Loader.h"
 
 #include "glow/Base/Tensor.h"
+#include "glow/Converter/TypeAToTypeBFunctionConverter.h"
 #include "glow/ExecutionEngine/ExecutionEngine.h"
 #include "glow/IR/IR.h"
 #include "glow/Quantization/Serialization.h"
@@ -89,13 +90,14 @@ llvm::cl::opt<std::string> loadProfileFileOpt(
     llvm::cl::value_desc("profile.yaml"), llvm::cl::Optional,
     llvm::cl::cat(loaderCat));
 
-llvm::cl::list<std::string> doNotQuantizeNodesOpt(
-    "do_not_quantize_nodes",
+llvm::cl::list<std::string> keepOriginalPrecisionForNodesOpt(
+    "keep-original-precision-for-nodes",
     llvm::cl::desc(
         "Use to specify the name of nodes (e.g. Add, Div, etc.) that should "
-        "not be quantized. All nodes of the listed kinds would not be "
-        "quantized; e.g. if Add is specififed and there are multiple Add nodes "
-        "in the input loaded model, none would be quantized."),
+        "be kept as is when conversion/quantization is requested. "
+        "All nodes of the listed kinds will be kept as is;"
+        "e.g. if Add is specified and there are multiple Add nodes "
+        "in the input loaded model, none would be quantized/converted."),
     llvm::cl::value_desc("NodeNames (e.g. Add,Div)"), llvm::cl::ZeroOrMore,
     llvm::cl::CommaSeparated, llvm::cl::cat(loaderCat));
 
@@ -123,6 +125,11 @@ llvm::cl::opt<bool> dumpGraphOpt("dumpGraph",
                                  llvm::cl::desc("Prints Graph to stdout"),
                                  llvm::cl::cat(modelExportCat));
 
+llvm::cl::opt<bool>
+    convertToFP16("convert-to-fp16",
+                  llvm::cl::desc("Run all floating-point computation in fp16."),
+                  llvm::cl::init(false), llvm::cl::cat(loaderCat));
+
 /// Emit a bundle into the specified output directory.
 llvm::cl::opt<std::string>
     emitBundle("emit-bundle",
@@ -217,6 +224,16 @@ void Loader::compile(Context &ctx) {
     F_ = ::profileQuantization(ctx, F_);
   }
 
+  // By default, when converting models, all nodes that can be
+  // converted are converted. However, some models may need to
+  // keep higher precision for some nodes to prevent high accuracy loss.
+  // Those nodes are gathered via the keepOriginalPrecisionForNodesOpt
+  // option and passed to the related conversion function.
+  KindSet keepOriginalPrecisionForNodes;
+  for (llvm::StringRef kindName : keepOriginalPrecisionForNodesOpt) {
+    keepOriginalPrecisionForNodes.insert(getKindFromNodeName(kindName));
+  }
+
   // Load the quantization profile and transform the graph.
   if (!loadProfileFileOpt.empty()) {
     // The profiled graph was optimized before it was instrumentated. In this
@@ -233,25 +250,24 @@ void Loader::compile(Context &ctx) {
     std::string oldName = F_->getName();
     F_->setName("old");
 
-    // By default, when quantizing loaded models, all nodes that can be
-    // quantized are quantized. However, some models that are loaded may need to
-    // keep higher precision for some nodes to prevent high accuracy loss. This
-    // set is passed into quantizeFunction() to prevent quantization.
-    KindSet doNotQuantizeKinds;
-    for (llvm::StringRef kindName : doNotQuantizeNodesOpt) {
-      doNotQuantizeKinds.insert(getKindFromNodeName(kindName));
-    }
-
     // Quantize the graph based on the captured profile.
-    auto *Q = quantization::quantizeFunction(EE_, quantizationInfos, F_,
-                                             oldName, doNotQuantizeKinds);
+    auto *Q = quantization::quantizeFunction(
+        EE_, quantizationInfos, F_, oldName, keepOriginalPrecisionForNodes);
 
     // Erase the original function so that the redundant variables that are only
     // referenced by the original function will be removed.
     Q->getParent()->eraseFunction(F_);
     F_ = Q;
   }
 
+  if (convertToFP16) {
+    TypeAToTypeBFunctionConverter converter(*F_, ElemKind::FloatTy,
+                                            ElemKind::Float16Ty,
+                                            &keepOriginalPrecisionForNodes);
+    converter.convert();
+    ::optimize(F_, glow::CompilationMode::Infer);
+  }
+
   if (emittingBundle()) {
     // Emit IR for the graph, compile it and save as a bundle.
     EE_.save(CompilationMode::Infer, F_, emitBundle, networkName);