refactor(//cpp/api)!: Refactoring ptq to use includes but seperate from

narendasan · narendasan · commit d2f8a5906416 · 2020-07-09T12:52:49.000-07:00
the core header

BREAKING CHANGE: To use ptq you now need to include trtorch/ptq.h in
addition to trtorch/trtorch.h, similarly for logging commands you need
to include trtorch/logging.h

Signed-off-by: Naren Dasan &lt;naren@narendasan.com&gt;
Signed-off-by: Naren Dasan &lt;narens@nvidia.com&gt;
diff --git a/cpp/api/include/trtorch/logging.h b/cpp/api/include/trtorch/logging.h
@@ -1,3 +1,10 @@
+/*
+ * Copyright (c) NVIDIA Corporation.
+ * All rights reserved.
+ *
+ * This library is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
 #pragma once
 
 #include <string>
diff --git a/cpp/api/include/trtorch/macros.h b/cpp/api/include/trtorch/macros.h
@@ -1,3 +1,10 @@
+/*
+ * Copyright (c) NVIDIA Corporation.
+ * All rights reserved.
+ *
+ * This library is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
 #pragma once
 
 #if defined(__GNUC__)
diff --git a/cpp/api/include/trtorch/ptq.h b/cpp/api/include/trtorch/ptq.h
@@ -1,23 +1,30 @@
+/*
+ * Copyright (c) NVIDIA Corporation.
+ * All rights reserved.
+ *
+ * This library is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
 #pragma once
 
 #include <string>
 #include <vector>
 #include <memory>
 #include <iostream>
+#include <fstream>
+#include <iterator>
 #include <sstream>
 
+#include "torch/torch.h"
 #include "trtorch/logging.h"
+#include "NvInfer.h"
 
 #ifndef DOXYGEN_SHOULD_SKIP_THIS
 namespace nvinfer1 {
 class IInt8Calibrator;
 class IInt8EntropyCalibrator2;
 }
 
-namespace torch {
-class Tensor;
-}
-
 namespace trtorch {
 namespace ptq {
 bool get_batch_impl(void* bindings[], const char* names[], int nbBindings, torch::Tensor& data);
@@ -269,5 +276,49 @@ class Int8CacheCalibrator : Algorithm {
     std::vector<char> cache_;
 };
 
+/**
+ * @brief A factory to build a post training quantization calibrator from a torch dataloader
+ *
+ * Creates a calibrator to use for post training quantization. By default the returned calibrator uses TensorRT Entropy v2
+ * algorithm to perform calibration. This is recommended for feed forward networks. You can override the algorithm selection
+ * (such as to use the MinMax Calibrator recomended for NLP tasks) by calling make_int8_calibrator with the calibrator class
+ * as a template parameter.
+ *
+ * e.g. ``trtorch::ptq::make_int8_calibrator<nvinfer1::IInt8MinMaxCalibrator>(std::move(calibration_dataloader), calibration_cache_file, use_cache);``
+ * @tparam Algorithm: class nvinfer1::IInt8Calibrator (Default: nvinfer1::IInt8EntropyCalibrator2) - Algorithm to use
+ * @tparam DataLoader: std::unique_ptr<torch::data::DataLoader> - DataLoader type
+ * @param dataloader: std::unique_ptr<torch::data::DataLoader> - DataLoader containing data
+ * @param cache_file_path: const std::string& - Path to read/write calibration cache
+ * @param use_cache: bool - use calibration cache
+ * @return Int8Calibrator<Algorithm, DataLoader>
+ */
+
+template<typename Algorithm = nvinfer1::IInt8EntropyCalibrator2, typename DataLoader>
+TRTORCH_API inline Int8Calibrator<Algorithm, DataLoader> make_int8_calibrator(DataLoader dataloader, const std::string& cache_file_path, bool use_cache) {
+    return Int8Calibrator<Algorithm, DataLoader>(std::move(dataloader), cache_file_path, use_cache);
+}
+
+/**
+ * @brief A factory to build a post training quantization calibrator from a torch dataloader that only uses the calibration cache
+ *
+ * Creates a calibrator to use for post training quantization which reads from a previously created calibration cache, therefore
+ * you can have a calibration cache generating program that requires a dataloader and a dataset, then save the cache to use later
+ * in a different program that needs to calibrate from scratch and not have the dataset dependency. However, the network should also
+ *  be recalibrated if its structure changes, or the input data set changes, and it is the responsibility of the application to ensure this.
+ *
+ * By default the returned calibrator uses TensorRT Entropy v2 algorithm to perform calibration. This is recommended for feed forward networks
+ * You can override the algorithm selection (such as to use the MinMax Calibrator recomended for NLP tasks) by calling make_int8_calibrator with
+ * the calibrator class as a template parameter.
+ *
+ * e.g. trtorch::ptq::make_int8_cache_calibrator<nvinfer1::IInt8MinMaxCalibrator>(calibration_cache_file);
+ * @tparam Algorithm: class nvinfer1::IInt8Calibrator (Default: nvinfer1::IInt8EntropyCalibrator2) - Algorithm to use
+ * @param cache_file_path: const std::string& - Path to read/write calibration cache
+ * @return Int8CacheCalibrator<Algorithm>
+ */
+template<typename Algorithm = nvinfer1::IInt8EntropyCalibrator2>
+TRTORCH_API inline Int8CacheCalibrator<Algorithm> make_int8_cache_calibrator(const std::string& cache_file_path) {
+    return Int8CacheCalibrator<Algorithm>(cache_file_path);
+}
+
 } // namespace ptq
 } // namespace trtorch
diff --git a/cpp/api/include/trtorch/trtorch.h b/cpp/api/include/trtorch/trtorch.h
@@ -29,13 +29,11 @@ class ArrayRef;
 }
 
 namespace nvinfer1 {
-class IInt8EntropyCalibrator2;
+class IInt8Calibrator;
 }
 #endif //DOXYGEN_SHOULD_SKIP_THIS
 
 #include "trtorch/macros.h"
-#include "trtorch/logging.h"
-#include "trtorch/ptq.h"
 namespace trtorch {
 /**
  * Settings data structure for TRTorch compilation
@@ -406,50 +404,4 @@ TRTORCH_API torch::jit::Module CompileGraph(const torch::jit::Module& module, Ex
  * @return: std::string: Serialized TensorRT engine equivilant to the method graph
  */
 TRTORCH_API std::string ConvertGraphToTRTEngine(const torch::jit::Module& module, std::string method_name, ExtraInfo info);
-
-namespace ptq {
-/**
- * @brief A factory to build a post training quantization calibrator from a torch dataloader
- *
- * Creates a calibrator to use for post training quantization. By default the returned calibrator uses TensorRT Entropy v2
- * algorithm to perform calibration. This is recommended for feed forward networks. You can override the algorithm selection
- * (such as to use the MinMax Calibrator recomended for NLP tasks) by calling make_int8_calibrator with the calibrator class
- * as a template parameter.
- *
- * e.g. ``trtorch::ptq::make_int8_calibrator<nvinfer1::IInt8MinMaxCalibrator>(std::move(calibration_dataloader), calibration_cache_file, use_cache);``
- * @tparam Algorithm: class nvinfer1::IInt8Calibrator (Default: nvinfer1::IInt8EntropyCalibrator2) - Algorithm to use
- * @tparam DataLoader: std::unique_ptr<torch::data::DataLoader> - DataLoader type
- * @param dataloader: std::unique_ptr<torch::data::DataLoader> - DataLoader containing data
- * @param cache_file_path: const std::string& - Path to read/write calibration cache
- * @param use_cache: bool - use calibration cache
- * @return Int8Calibrator<Algorithm, DataLoader>
- */
-
-template<typename Algorithm = nvinfer1::IInt8EntropyCalibrator2, typename DataLoader>
-TRTORCH_API inline Int8Calibrator<Algorithm, DataLoader> make_int8_calibrator(DataLoader dataloader, const std::string& cache_file_path, bool use_cache) {
-    return Int8Calibrator<Algorithm, DataLoader>(std::move(dataloader), cache_file_path, use_cache);
-}
-
-/**
- * @brief A factory to build a post training quantization calibrator from a torch dataloader that only uses the calibration cache
- *
- * Creates a calibrator to use for post training quantization which reads from a previously created calibration cache, therefore
- * you can have a calibration cache generating program that requires a dataloader and a dataset, then save the cache to use later
- * in a different program that needs to calibrate from scratch and not have the dataset dependency. However, the network should also
- *  be recalibrated if its structure changes, or the input data set changes, and it is the responsibility of the application to ensure this.
- *
- * By default the returned calibrator uses TensorRT Entropy v2 algorithm to perform calibration. This is recommended for feed forward networks
- * You can override the algorithm selection (such as to use the MinMax Calibrator recomended for NLP tasks) by calling make_int8_calibrator with
- * the calibrator class as a template parameter.
- *
- * e.g. trtorch::ptq::make_int8_cache_calibrator<nvinfer1::IInt8MinMaxCalibrator>(calibration_cache_file);
- * @tparam Algorithm: class nvinfer1::IInt8Calibrator (Default: nvinfer1::IInt8EntropyCalibrator2) - Algorithm to use
- * @param cache_file_path: const std::string& - Path to read/write calibration cache
- * @return Int8CacheCalibrator<Algorithm>
- */
-template<typename Algorithm = nvinfer1::IInt8EntropyCalibrator2>
-TRTORCH_API inline Int8CacheCalibrator<Algorithm> make_int8_cache_calibrator(const std::string& cache_file_path) {
-    return Int8CacheCalibrator<Algorithm>(cache_file_path);
-}
-} // namespace ptq
 } // namespace trtorch
diff --git a/cpp/ptq/main.cpp b/cpp/ptq/main.cpp
@@ -1,6 +1,7 @@
 #include "torch/script.h"
 #include "torch/torch.h"
 #include "trtorch/trtorch.h"
+#include "trtorch/ptq.h"
 
 #include "NvInfer.h"