[Profiler] Clean up profiler includes. (pytorch#69421)

Taylor Robie · facebook-github-bot · commit 24bc3be14689 · 2021-12-15T12:50:24.000-08:00
Summary: Pull Request resolved: pytorch#69421 I've hit a lot of build issues in D32671972, and I've come to realize that a lot of it boils down to header hygene. `function.h` includes `profiler.h` *solely* to transitively include `record_function.h` which winds up leaking the profiler symbols. Moreover several files are relying on transitive includes to get access to `getTime`. As long as I have to touch all the places that use `getTime`, I may as well also move them to the new namespace. Test Plan: Unit tests and CI. Reviewed By: aaronenyeshi, albanD Differential Revision: D32865907 fbshipit-source-id: f87d6fd5afb784dca2146436e72c69e34623020e
diff --git a/android/pytorch_android/src/main/cpp/pytorch_jni_jit.cpp b/android/pytorch_android/src/main/cpp/pytorch_jni_jit.cpp
@@ -19,8 +19,6 @@
 #include <android/log.h>
 #endif
 
-using namespace torch::autograd::profiler;
-
 namespace pytorch_jni {
 
 namespace {
diff --git a/test/cpp/jit/test_backend_compiler_lib.cpp b/test/cpp/jit/test_backend_compiler_lib.cpp
@@ -98,13 +98,13 @@ class BackendWithCompiler : public PyTorchBackendInterface {
     op_runtimes_us.reserve(handle.toList().size());
 
     c10::List<at::Tensor> output_list;
-    auto start_us = autograd::profiler::getTime() / 1000;
+    auto start_us = torch::profiler::impl::getTime() / 1000;
     for (const auto& token : handle.toList()) {
       IValue val = token;
       auto instruction = val.toTupleRef().elements()[0].toStringRef();
       auto debug_handle = val.toTupleRef().elements()[1].toInt();
       double const_val = 1.0;
-      auto start_time_us = autograd::profiler::getTime() / 1000;
+      auto start_time_us = torch::profiler::impl::getTime() / 1000;
       try {
         if (instruction.rfind("prim::Constant", 0) == 0) {
           TORCH_CHECK(
@@ -147,7 +147,7 @@ class BackendWithCompiler : public PyTorchBackendInterface {
       } catch (c10::Error& e) {
         TORCH_DELEGATED_BACKEND_THROW(false, e.what(), debug_handle);
       }
-      auto end_time_us = autograd::profiler::getTime() / 1000;
+      auto end_time_us = torch::profiler::impl::getTime() / 1000;
       auto duration = end_time_us - start_time_us;
       op_runtimes_us.emplace_back(duration, debug_handle, instruction);
     }
diff --git a/test/cpp/jit/test_misc.cpp b/test/cpp/jit/test_misc.cpp
@@ -10,6 +10,7 @@
 
 #include <torch/csrc/autograd/engine.h>
 #include <torch/csrc/autograd/generated/variable_factories.h>
+#include <torch/csrc/autograd/profiler.h>
 #include <torch/csrc/autograd/variable.h>
 #include <torch/csrc/jit/api/function_impl.h>
 #include <torch/csrc/jit/api/module.h>
@@ -76,8 +77,6 @@
 #include <utility>
 #include <vector>
 
-using namespace torch::autograd::profiler;
-
 namespace torch {
 namespace jit {
 inline c10::AliasAnalysisKind aliasAnalysisFromSchema() {
@@ -2664,7 +2663,8 @@ TEST(ComputeFlopsTest, Basic) {
 
   // Test unknown operator
   std::unordered_map<std::string, c10::IValue> extra_args;
-  flops = computeFlops(std::string("aten::unknown"), extra_args);
+  flops = torch::profiler::impl::computeFlops(
+      std::string("aten::unknown"), extra_args);
   ASSERT_EQ(flops, 0);
 
   // Test aten::conv2d
@@ -2680,30 +2680,34 @@ TEST(ComputeFlopsTest, Basic) {
   extra_args["padding"] = at::IValue(at::IntArrayRef(padding));
   extra_args["stride"] = at::IValue(at::IntArrayRef(stride));
   extra_args["dilation"] = at::IValue(at::IntArrayRef(dilation));
-  flops = computeFlops(std::string("aten::conv2d"), extra_args);
+  flops = torch::profiler::impl::computeFlops(
+      std::string("aten::conv2d"), extra_args);
   ASSERT_EQ(flops, 13440);
 
   // Test aten::conv2d fail
   input_size = {4, 5, 6, 7};
   weight_size = {4, 5, 6};
   extra_args["input_size"] = at::IValue(at::IntArrayRef(input_size));
   extra_args["weight_size"] = at::IValue(at::IntArrayRef(weight_size));
-  flops = computeFlops(std::string("aten::conv2d"), extra_args);
+  flops = torch::profiler::impl::computeFlops(
+      std::string("aten::conv2d"), extra_args);
   ASSERT_EQ(flops, 0);
 
   // Test aten::conv2d fail 2
   weight_size = {3, 5, 2, 1};
   stride = {0, 0};
   extra_args["weight_size"] = at::IValue(at::IntArrayRef(input_size));
   extra_args["stride"] = at::IValue(at::IntArrayRef(stride));
-  flops = computeFlops(std::string("aten::conv2d"), extra_args);
+  flops = torch::profiler::impl::computeFlops(
+      std::string("aten::conv2d"), extra_args);
   ASSERT_EQ(flops, 0);
 
   // Test aten::conv2d fail 3
   extra_args.clear();
   input_size = {4, 5, 6, 7};
   extra_args["input_size"] = at::IValue(at::IntArrayRef(input_size));
-  flops = computeFlops(std::string("aten::conv2d"), extra_args);
+  flops = torch::profiler::impl::computeFlops(
+      std::string("aten::conv2d"), extra_args);
   ASSERT_EQ(flops, 0);
 
   // Test aten::mm
@@ -2712,11 +2716,13 @@ TEST(ComputeFlopsTest, Basic) {
   std::vector<int64_t> mat2_sizes = {6, 5, 4, 3};
   extra_args["mat1_size"] = at::IValue(at::IntArrayRef(mat1_sizes));
   extra_args["mat2_size"] = at::IValue(at::IntArrayRef(mat2_sizes));
-  flops = computeFlops(std::string("aten::mm"), extra_args);
+  flops =
+      torch::profiler::impl::computeFlops(std::string("aten::mm"), extra_args);
   ASSERT_EQ(flops, 43200);
 
   // Test aten::addmm
-  flops = computeFlops(std::string("aten::addmm"), extra_args);
+  flops = torch::profiler::impl::computeFlops(
+      std::string("aten::addmm"), extra_args);
   ASSERT_EQ(flops, 43200);
 
   // Test aten::bmm
@@ -2725,30 +2731,35 @@ TEST(ComputeFlopsTest, Basic) {
   mat2_sizes = {7, 6, 3};
   extra_args["mat1_size"] = at::IValue(at::IntArrayRef(mat1_sizes));
   extra_args["mat2_size"] = at::IValue(at::IntArrayRef(mat2_sizes));
-  flops = computeFlops(std::string("aten::bmm"), extra_args);
+  flops =
+      torch::profiler::impl::computeFlops(std::string("aten::bmm"), extra_args);
   ASSERT_EQ(flops, 1260);
 
   // Test aten::baddbmm
-  flops = computeFlops(std::string("aten::baddbmm"), extra_args);
+  flops = torch::profiler::impl::computeFlops(
+      std::string("aten::baddbmm"), extra_args);
   ASSERT_EQ(flops, 1260);
 
   // Test mm out of range
   extra_args.clear();
-  flops = computeFlops(std::string("aten::mm"), extra_args);
+  flops =
+      torch::profiler::impl::computeFlops(std::string("aten::mm"), extra_args);
   ASSERT_EQ(flops, 0);
 
   // Test aten::add.Tensor
   extra_args.clear();
   std::vector<int64_t> mat_sizes = {3, 4, 5, 6};
   extra_args["mat_size"] = at::IValue(at::IntArrayRef(mat_sizes));
-  flops = computeFlops(std::string("aten::add"), extra_args);
+  flops =
+      torch::profiler::impl::computeFlops(std::string("aten::add"), extra_args);
   ASSERT_EQ(flops, 360);
 
   // Test aten::mul.Tensor
   extra_args.clear();
   mat_sizes = {3, 4, 5, 6};
   extra_args["mat_size"] = at::IValue(at::IntArrayRef(mat_sizes));
-  flops = computeFlops(std::string("aten::mul"), extra_args);
+  flops =
+      torch::profiler::impl::computeFlops(std::string("aten::mul"), extra_args);
   ASSERT_EQ(flops, 360);
 }
 
diff --git a/torch/csrc/api/include/torch/utils.h b/torch/csrc/api/include/torch/utils.h
@@ -3,6 +3,7 @@
 #include <ATen/Parallel.h>
 #include <ATen/record_function.h>
 #include <torch/csrc/autograd/grad_mode.h>
+#include <torch/csrc/autograd/profiler.h>
 #include <torch/csrc/api/include/torch/types.h>
 #include <torch/csrc/utils/crash_handler.h>
 #include <cstdint>
diff --git a/torch/csrc/autograd/function.h b/torch/csrc/autograd/function.h
@@ -3,14 +3,14 @@
 #include <torch/csrc/autograd/edge.h>
 #include <torch/csrc/autograd/grad_mode.h>
 #include <torch/csrc/autograd/anomaly_mode.h>
-#include <torch/csrc/autograd/profiler.h>
 #include <torch/csrc/autograd/saved_variable.h>
 #include <torch/csrc/autograd/input_metadata.h>
 #include <torch/csrc/autograd/variable.h>
 #include <torch/csrc/utils/python_stub.h>
 #include <torch/csrc/utils/variadic.h>
 
 #include <ATen/ATen.h>
+#include <ATen/record_function.h>
 #include <ATen/SequenceNumber.h>
 #include <c10/util/Exception.h>
 
diff --git a/torch/csrc/autograd/profiler_cuda.cpp b/torch/csrc/autograd/profiler_cuda.cpp
@@ -45,7 +45,7 @@ struct CUDAMethods : public CUDAStubs {
     });
     auto stream = at::cuda::getCurrentCUDAStream();
     if (cpu_ns) {
-      *cpu_ns = getTime();
+      *cpu_ns = torch::profiler::impl::getTime();
     }
     TORCH_CUDA_CHECK(cudaEventRecord(cuda_event_ptr, stream));
   }
diff --git a/torch/csrc/autograd/profiler_kineto.cpp b/torch/csrc/autograd/profiler_kineto.cpp
@@ -41,7 +41,7 @@ inline int64_t getTimeUs() {
 #ifdef USE_KINETO
   return libkineto::timeSinceEpoch(std::chrono::system_clock::now());
 #else
-  return getTime() / 1000;
+  return torch::profiler::impl::getTime() / 1000;
 #endif // USE_KINETO
 }
 }  // namespace
diff --git a/torch/csrc/autograd/profiler_legacy.cpp b/torch/csrc/autograd/profiler_legacy.cpp
@@ -568,7 +568,7 @@ void LegacyEvent::record(bool record_cuda) {
     cuda_stubs()->record(&device_, &cuda_event, &cpu_ns_);
     return;
   }
-  cpu_ns_ = getTime();
+  cpu_ns_ = torch::profiler::impl::getTime();
 }
 
 /* static */ LegacyEvent LegacyEvent::fromIValue(const at::IValue& eventIValue) {
diff --git a/torch/csrc/autograd/profiler_legacy.h b/torch/csrc/autograd/profiler_legacy.h
@@ -598,3 +598,15 @@ struct TORCH_API ProfilerThreadLocalState : public c10::MemoryReportingInfoBase
 
 } // namespace profiler
 }} // namespace torch::autograd
+
+// Mirror symbols in new namespace for transition.
+namespace torch {
+namespace profiler {
+namespace impl {
+using torch::autograd::profiler::computeFlops;
+using torch::autograd::profiler::getTime;
+using torch::autograd::profiler::ProfilerConfig;
+using torch::autograd::profiler::ProfilerState;
+} // impl
+} // profiler
+} // torch
diff --git a/torch/csrc/distributed/c10d/reducer.hpp b/torch/csrc/distributed/c10d/reducer.hpp
@@ -15,6 +15,7 @@
 #include <c10d/comm.hpp>
 #include <c10d/default_comm_hooks.hpp>
 #include <torch/csrc/autograd/function.h>
+#include <torch/csrc/autograd/profiler.h>
 #include <torch/csrc/autograd/variable.h>
 #ifndef _WIN32
 #include <torch/csrc/distributed/autograd/context/context.h>
@@ -29,7 +30,7 @@ constexpr int kDDPRuntimeLoggingSampleRate = 100;
 constexpr int kUnsetTime = -1;
 
 inline int64_t current_time_in_nanos() {
-  return torch::autograd::profiler::getTime();
+  return torch::profiler::impl::getTime();
 }
 
 // Forward declaration
diff --git a/torch/csrc/jit/mobile/profiler_edge.cpp b/torch/csrc/jit/mobile/profiler_edge.cpp
@@ -18,49 +18,52 @@ KinetoEdgeCPUProfiler::KinetoEdgeCPUProfiler(
     const bool with_flops,
     const bool with_modules)
     : m_(m), trace_file_name_(fname) {
-  profiler::ProfilerConfig config(
-      profiler::ProfilerState::KINETO,
+  torch::profiler::impl::ProfilerConfig config(
+      torch::profiler::impl::ProfilerState::KINETO,
       report_input_shapes,
       profile_memory,
       with_stack,
       with_flops,
       with_modules);
-  profiler::prepareProfiler(config, {profiler::ActivityType::CPU});
+  torch::autograd::profiler::prepareProfiler(
+      config, {torch::autograd::profiler::ActivityType::CPU});
   if (with_modules || with_stack) {
-    auto post_processing = [this, with_stack, with_modules](
-                               std::vector<profiler::KinetoEvent>& events) {
-      std::string no_debug_info("Model was not saved with debug information");
-      for (auto& e : events) {
-        if (with_modules) {
-          // Since KinetoEvents's module hierarchy takes vector of strings we
-          // just construct a temporary vector using one string element
-          if (this->m_.hasDebugHandles()) {
-            e.moduleHierarchy(std::vector<std::string>(
-                {this->m_.getModuleHierarchy(e.debugHandle())}));
-          } else {
-            e.moduleHierarchy(std::vector<std::string>({no_debug_info}));
+    auto post_processing =
+        [this, with_stack, with_modules](
+            std::vector<torch::autograd::profiler::KinetoEvent>& events) {
+          std::string no_debug_info(
+              "Model was not saved with debug information");
+          for (auto& e : events) {
+            if (with_modules) {
+              // Since KinetoEvents's module hierarchy takes vector of strings
+              // we just construct a temporary vector using one string element
+              if (this->m_.hasDebugHandles()) {
+                e.moduleHierarchy(std::vector<std::string>(
+                    {this->m_.getModuleHierarchy(e.debugHandle())}));
+              } else {
+                e.moduleHierarchy(std::vector<std::string>({no_debug_info}));
+              }
+            } else if (with_stack) {
+              // Since KinetoEvents's stack trace takes vector of strings we
+              // just construct a temporary vector using one string element
+              if (this->m_.hasDebugHandles()) {
+                e.stack(std::vector<std::string>(
+                    {this->m_.getCallStack(e.debugHandle())}));
+              } else {
+                e.stack(std::vector<std::string>({no_debug_info}));
+              }
+            }
           }
-        } else if (with_stack) {
-          // Since KinetoEvents's stack trace takes vector of strings we just
-          // construct a temporary vector using one string element
-          if (this->m_.hasDebugHandles()) {
-            e.stack(std::vector<std::string>(
-                {this->m_.getCallStack(e.debugHandle())}));
-          } else {
-            e.stack(std::vector<std::string>({no_debug_info}));
-          }
-        }
-      }
-    };
-    profiler::enableProfilerWithEventPostProcess(
+        };
+    torch::autograd::profiler::enableProfilerWithEventPostProcess(
         config,
-        {profiler::ActivityType::CPU},
+        {torch::autograd::profiler::ActivityType::CPU},
         post_processing,
         {at::RecordScope::LITE_INTERPRETER});
   } else {
-    profiler::enableProfiler(
+    torch::autograd::profiler::enableProfiler(
         config,
-        {profiler::ActivityType::CPU},
+        {torch::autograd::profiler::ActivityType::CPU},
         {at::RecordScope::LITE_INTERPRETER});
   }
   trace_file_name_ = fname;
@@ -75,7 +78,7 @@ void KinetoEdgeCPUProfiler::recordBackendEvent(
     const int64_t debug_handle,
     const std::string& event_name,
     const std::string& backend_name) {
-  profiler::reportBackendEventToActiveKinetoProfiler(
+  torch::autograd::profiler::reportBackendEventToActiveKinetoProfiler(
       start_time_us,
       end_time_us,
       debug_handle,
@@ -84,18 +87,18 @@ void KinetoEdgeCPUProfiler::recordBackendEvent(
       backend_name);
 }
 
-const std::unique_ptr<profiler::ProfilerResult>& KinetoEdgeCPUProfiler::
-    disableProfiler() {
+const std::unique_ptr<torch::autograd::profiler::ProfilerResult>&
+KinetoEdgeCPUProfiler::disableProfiler() {
   TORCH_CHECK(
       !profiler_result_,
       "KinetoEdgeCPUProfiler already disabled. "
       "To get list of events use getProfilerResults()");
-  profiler_result_ = profiler::disableProfiler();
+  profiler_result_ = torch::autograd::profiler::disableProfiler();
   return profiler_result_;
 }
 
-const std::unique_ptr<profiler::ProfilerResult>& KinetoEdgeCPUProfiler::
-    getProfilerResult() {
+const std::unique_ptr<torch::autograd::profiler::ProfilerResult>&
+KinetoEdgeCPUProfiler::getProfilerResult() {
   TORCH_CHECK(
       profiler_result_,
       "KinetoEdgeCPUProfiler has not been disabled. "
@@ -108,7 +111,7 @@ KinetoEdgeCPUProfiler::~KinetoEdgeCPUProfiler() {
     if (profiler_result_) {
       profiler_result_->save(trace_file_name_);
     } else {
-      profiler::disableProfiler()->save(trace_file_name_);
+      torch::autograd::profiler::disableProfiler()->save(trace_file_name_);
     }
   }
   tls_edge_profiler = nullptr;
diff --git a/torch/csrc/jit/mobile/profiler_edge.h b/torch/csrc/jit/mobile/profiler_edge.h
@@ -2,7 +2,6 @@
 #include <torch/csrc/autograd/profiler_kineto.h>
 #include <torch/csrc/jit/mobile/module.h>
 
-namespace profiler = torch::autograd::profiler;
 namespace torch {
 namespace jit {
 namespace mobile {
@@ -58,8 +57,10 @@ class TORCH_API KinetoEdgeCPUProfiler {
       const bool with_flops = false,
       const bool with_modules = false);
 
-  const std::unique_ptr<profiler::ProfilerResult>& disableProfiler();
-  const std::unique_ptr<profiler::ProfilerResult>& getProfilerResult();
+  const std::unique_ptr<torch::autograd::profiler::ProfilerResult>&
+  disableProfiler();
+  const std::unique_ptr<torch::autograd::profiler::ProfilerResult>&
+  getProfilerResult();
   void recordBackendEvent(
       const int64_t start_time_us,
       const int64_t end_time_us,
@@ -76,7 +77,7 @@ class TORCH_API KinetoEdgeCPUProfiler {
    */
   const mobile::Module& m_;
   std::string trace_file_name_;
-  std::unique_ptr<profiler::ProfilerResult> profiler_result_;
+  std::unique_ptr<torch::autograd::profiler::ProfilerResult> profiler_result_;
 };
 
 TORCH_API KinetoEdgeCPUProfiler* getCurrentEdgeProfiler();
diff --git a/torch/csrc/jit/runtime/register_prim_ops_fulljit.cpp b/torch/csrc/jit/runtime/register_prim_ops_fulljit.cpp

Original file line number	Diff line number	Diff line change
`@@ -45,7 +45,7 @@ struct CUDAMethods : public CUDAStubs {`
`45`	`45`	`});`
`46`	`46`	`auto stream = at::cuda::getCurrentCUDAStream();`
`47`	`47`	`if (cpu_ns) {`
`48`		`- *cpu_ns = getTime();`
	`48`	`+ *cpu_ns = torch::profiler::impl::getTime();`
`49`	`49`	`}`
`50`	`50`	`TORCH_CUDA_CHECK(cudaEventRecord(cuda_event_ptr, stream));`
`51`	`51`	`}`
Original file line number	Diff line number	Diff line change
`@@ -41,7 +41,7 @@ inline int64_t getTimeUs() {`
`41`	`41`	`#ifdef USE_KINETO`
`42`	`42`	`return libkineto::timeSinceEpoch(std::chrono::system_clock::now());`
`43`	`43`	`#else`
`44`		`- return getTime() / 1000;`
	`44`	`+ return torch::profiler::impl::getTime() / 1000;`
`45`	`45`	`#endif // USE_KINETO`
`46`	`46`	`}`
`47`	`47`	`} // namespace`
Original file line number	Diff line number	Diff line change
`@@ -568,7 +568,7 @@ void LegacyEvent::record(bool record_cuda) {`
`568`	`568`	`cuda_stubs()->record(&device_, &cuda_event, &cpu_ns_);`
`569`	`569`	`return;`
`570`	`570`	`}`
`571`		`- cpu_ns_ = getTime();`
	`571`	`+ cpu_ns_ = torch::profiler::impl::getTime();`
`572`	`572`	`}`
`573`	`573`
`574`	`574`	`/* static */ LegacyEvent LegacyEvent::fromIValue(const at::IValue& eventIValue) {`