[Placeholder] Teach the OCL and CPU backends to handle Placeholders.

nadavrot · nadavrot · commit 2ceff6b1bd40 · 2018-09-08T23:30:22.000-07:00
diff --git a/lib/Backends/CPU/AllocationsInfo.cpp b/lib/Backends/CPU/AllocationsInfo.cpp
@@ -32,6 +32,7 @@ using llvm::dyn_cast;
 using llvm::isa;
 
 void AllocationsInfo::allocateWeightVars(const IRFunction *F,
+                                         const PlaceholderMap &placeholders,
                                          bool absoluteAddr) {
   // Use two different allocators, because constant weights and mutable weights
   // may use different memory blocks.
@@ -73,6 +74,21 @@ void AllocationsInfo::allocateWeightVars(const IRFunction *F,
     }
   }
 
+  // Allocate addresses for the Placeholders.
+  for (auto PH : placeholders) {
+    assert(isa<WeightVar>(F->getWeightForNode(PH.first)));
+    auto *w = cast<WeightVar>(F->getWeightForNode(PH.first));
+    auto numBytes = w->getSizeInBytes();
+    size_t addr = mutableWeightVarsAllocator.allocate(numBytes, w);
+    if (!absoluteAddr) {
+      allocatedAddressed_[w] = addr;
+    } else {
+      // Reuse the address used by the payload.
+      allocatedAddressed_[w] =
+          PH.second->getUnsafePtr() - static_cast<char *>(nullptr);
+    }
+  }
+
   // Remember that max required memory size for each kind of weights.
   constantWeightVarsMemSize_ = constantWeightVarsAllocator.getMaxMemoryUsage();
   mutableWeightVarsMemSize_ = mutableWeightVarsAllocator.getMaxMemoryUsage();
@@ -196,6 +212,14 @@ void AllocationsInfo::numberValues(const IRFunction *F) {
                     : ValueKind::MutableWeight;
     valueNumbers_[w] = std::make_pair(kind, valueIdx++);
   }
+
+  // Assign numbers to all placeholders.
+  for (auto &v : F->getGraph()->getParent()->getPlaceholders()) {
+    assert(isa<WeightVar>(F->getWeightForNode(v)));
+    auto *w = cast<WeightVar>(F->getWeightForNode(v));
+    valueNumbers_[w] = std::make_pair(ValueKind::MutableWeight, valueIdx++);
+  }
+
   // Assign numbers to all activations and tensorviews.
   for (const auto &I : F->getInstrs()) {
     if (auto *A = dyn_cast<AllocActivationInst>(&I)) {
diff --git a/lib/Backends/CPU/AllocationsInfo.h b/lib/Backends/CPU/AllocationsInfo.h
@@ -16,6 +16,8 @@
 #ifndef GLOW_BACKENDS_CPU_ALLOCATIONSINFO_H
 #define GLOW_BACKENDS_CPU_ALLOCATIONSINFO_H
 
+#include "glow/Backends/CompiledFunction.h"
+
 #include "llvm/IR/Module.h"
 
 #include <functional>
@@ -52,12 +54,15 @@ struct AllocationsInfo {
   /// Base address of activations.
   uint8_t *baseActivationsAddress_{nullptr};
 
-  /// Assign offsets to all WeightVars of \p M.
+  /// Assign offsets to all of the variables in the module \p M and to the
+  /// placeholders \p placeholders.
   /// If the \p absoluteAddr is true, simply reuse the addresses already used
   /// by the payloads of tensors corresponding to those WeightVars as offsets.
   /// This is useful in a JIT setup. If \p absoluteAddr is false, then all the
   /// WeightVars will get new offsets assigned.
-  void allocateWeightVars(const IRFunction *F, bool absoluteAddr);
+  void allocateWeightVars(const IRFunction *F,
+                          const PlaceholderMap &placeholders,
+                          bool absoluteAddr);
   /// Assign offsets to all activations.
   /// No actual memory allocation is performed. All the allocations should be
   /// performed by the client based on the information provided by the
diff --git a/lib/Backends/CPU/BundleSaver.cpp b/lib/Backends/CPU/BundleSaver.cpp
@@ -250,7 +250,8 @@ void BundleSaver::performBundleMemoryAllocation() {
   allocationsInfo_.allocateActivations(F_);
   // Tell the allocateWeightVars to not reuse any existing addresses for weights
   // and to assign new ones.
-  allocationsInfo_.allocateWeightVars(F_, false);
+  PlaceholderMap empty;
+  allocationsInfo_.allocateWeightVars(F_, empty, false);
   allocationsInfo_.allocateTensorViews(F_);
 }
 
diff --git a/lib/Backends/CPU/CPUBackend.cpp b/lib/Backends/CPU/CPUBackend.cpp
@@ -96,11 +96,12 @@ static void emitJitMain(LLVMIRGen &irgen) {
 
 /// Perform memory allocation for a JIT execution.
 static void *allocateJITMemory(const IRFunction *F,
-                               AllocationsInfo &allocationsInfo) {
+                               AllocationsInfo &allocationsInfo,
+                               const PlaceholderMap &placeholders) {
   allocationsInfo.numberValues(F);
   allocationsInfo.allocateActivations(F);
-  // Tell the allocateWeightVars to reuse existing addresses for weights.
-  allocationsInfo.allocateWeightVars(F, true);
+  // Tell the allocateWeightVars to use absolute addresses for weights.
+  allocationsInfo.allocateWeightVars(F, placeholders, true);
   allocationsInfo.allocateTensorViews(F);
 
   // Allocate the heap to match the max memory usage for activations.
@@ -131,7 +132,8 @@ CPUBackend::compile(std::unique_ptr<IRFunction> IR,
                            llvm::CodeModel::Model::Large);
   irgen->initCodeGen();
   // Perform the address assignment for activations and WeightVars.
-  auto heap = allocateJITMemory(IR.get(), irgen->getAllocationsInfo());
+  auto heap =
+      allocateJITMemory(IR.get(), irgen->getAllocationsInfo(), placeholders);
   // Create the jitmain function to be invoked by JIT.
   emitJitMain(*irgen);
   // Emit the code for the body of the entry function.
diff --git a/lib/Backends/OpenCL/OpenCL.cpp b/lib/Backends/OpenCL/OpenCL.cpp
@@ -98,7 +98,8 @@ static void addStringOption(std::vector<std::string> &options,
   options.push_back("-D" + name + "=" + value);
 }
 
-OpenCLFunction::OpenCLFunction(std::unique_ptr<IRFunction> F)
+OpenCLFunction::OpenCLFunction(std::unique_ptr<IRFunction> F,
+                               const PlaceholderMap &placeholders)
     : F_(std::move(F)) {
   cl_uint numPlatforms{0};
   cl_int err = clGetPlatformIDs(0, NULL, &numPlatforms);
@@ -136,7 +137,7 @@ OpenCLFunction::OpenCLFunction(std::unique_ptr<IRFunction> F)
   addIntOption(options, "SIZEOF_HOST_SIZE_T", sizeof(size_t));
   // Create the program from the source.
   createProgram(SHADER_CODE, options, commands_);
-  allocateMemory();
+  allocateMemory(placeholders);
 }
 
 OpenCLFunction::~OpenCLFunction() {
@@ -1482,15 +1483,24 @@ uint64_t OpenCLFunction::copyMutableWeightsFromDevice() {
   return copiedBytes;
 }
 
-void OpenCLFunction::allocateMemory() {
-  /// The allocator assigns device memory addresses to the buffers.
+void OpenCLFunction::allocateMemory(const PlaceholderMap &placeholders) {
+  // The allocator assigns device memory addresses to the buffers.
   MemoryAllocator allocator("GPU", 0xFFFFFFFF);
+
+  // Register the bound locations of the variables.
   for (auto &v : F_->getGraph()->getParent()->getVars()) {
     auto *w = F_->getWeightForNode(v);
     assert(!externalTensors_.count(w) && "The tensor is already registered");
     externalTensors_[w] = &v->getPayload();
   }
 
+  // Register the bound locations of the placeholders.
+  for (auto PH : placeholders) {
+    auto *w = F_->getWeightForNode(PH.first);
+    assert(!externalTensors_.count(w) && "The tensor is already registered");
+    externalTensors_[w] = PH.second;
+  }
+
   // Assign device-space addresses to the weights.
   for (auto it : externalTensors_) {
     Tensor *T = it.second;
@@ -1573,5 +1583,5 @@ void OpenCLFunction::freeDeviceBuffer(cl_mem buf) { clReleaseMemObject(buf); }
 std::unique_ptr<CompiledFunction>
 OCLBackend::compile(std::unique_ptr<IRFunction> IR,
                     const PlaceholderMap &placeholders) const {
-  return llvm::make_unique<OpenCLFunction>(std::move(IR));
+  return llvm::make_unique<OpenCLFunction>(std::move(IR), placeholders);
 }
diff --git a/lib/Backends/OpenCL/OpenCL.h b/lib/Backends/OpenCL/OpenCL.h
@@ -92,7 +92,8 @@ class OpenCLFunction final : public CompiledFunction {
 
 public:
   /// Ctor.
-  explicit OpenCLFunction(std::unique_ptr<IRFunction> F);
+  explicit OpenCLFunction(std::unique_ptr<IRFunction> F,
+                          const PlaceholderMap &placeholders);
 
   /// @name CompiledFunction interface
   ///@{
@@ -103,7 +104,7 @@ class OpenCLFunction final : public CompiledFunction {
 
 private:
   /// Allocate memory for the tensors.
-  void allocateMemory();
+  void allocateMemory(const PlaceholderMap &placeholders);
   /// Copy the value from a device to a provided buffer.
   /// If \p buf is nullptr, the payload of the underlying tensor is used.
   /// \returns number of copied bytes.
diff --git a/tests/unittests/BackendTest.cpp b/tests/unittests/BackendTest.cpp
@@ -179,15 +179,14 @@ TEST_P(BackendTest, decoupleCodegenFromGraph) {
 
 /// Check that we can pass information to the execution engine using Placeholder
 /// variables and read it back using Save nodes (in variables).
-TEST(Placeholder, simplePlaceholderValue) {
+TEST_P(BackendTest, simplePlaceholderValue) {
   Tensor data{99.0, 35.0, 2.0, 3.0};
-  ExecutionEngine EE{BackendKind::Interpreter};
-  auto &mod = EE.getModule();
+  auto &mod = EE_.getModule();
   Function *F = mod.createFunction("main");
   auto *input = mod.createPlaceholder(ElemKind::FloatTy, {4}, "input");
   SaveNode *S = F->createSave("ret", input);
-  EE.compile(CompilationMode::Infer, F, {input}, {&data});
-  EE.run();
+  EE_.compile(CompilationMode::Infer, F, {input}, {&data});
+  EE_.run();
   auto &res = S->getVariable()->getPayload();
   EXPECT_TRUE(res.isEqual(data));
 }

Original file line number	Diff line number	Diff line change
`@@ -250,7 +250,8 @@ void BundleSaver::performBundleMemoryAllocation() {`
`250`	`250`	`allocationsInfo_.allocateActivations(F_);`
`251`	`251`	`// Tell the allocateWeightVars to not reuse any existing addresses for weights`
`252`	`252`	`// and to assign new ones.`
`253`		`- allocationsInfo_.allocateWeightVars(F_, false);`
	`253`	`+ PlaceholderMap empty;`
	`254`	`+ allocationsInfo_.allocateWeightVars(F_, empty, false);`
`254`	`255`	`allocationsInfo_.allocateTensorViews(F_);`
`255`	`256`	`}`
`256`	`257`