pytorch
diff --git a/‎docs/JIT.md
Lines changed: 4 additions & 7 deletions b/‎docs/JIT.md
Lines changed: 4 additions & 7 deletions
diff --git a/‎include/glow/Backends/CompiledFunction.h
Lines changed: 30 additions & 3 deletions b/‎include/glow/Backends/CompiledFunction.h
Lines changed: 30 additions & 3 deletions
diff --git a/‎lib/Backends/CPU/AllocationsInfo.cpp
Lines changed: 59 additions & 45 deletions b/‎lib/Backends/CPU/AllocationsInfo.cpp
Lines changed: 59 additions & 45 deletions
diff --git a/‎lib/Backends/CPU/AllocationsInfo.h
Lines changed: 17 additions & 16 deletions b/‎lib/Backends/CPU/AllocationsInfo.h
Lines changed: 17 additions & 16 deletions
diff --git a/‎lib/Backends/CPU/BundleSaver.cpp
Lines changed: 3 additions & 4 deletions b/‎lib/Backends/CPU/BundleSaver.cpp
Lines changed: 3 additions & 4 deletions
@@ -19,12 +19,10 @@ The JIT, on the other hand, generates a single stream of highly optimized
 instructions that don't go back to the interpreter. Moreover, each instruction
 is optimized based on specific information on the context in which the
 instruction is executed. When a matrix multiplication is compiled the JIT knows
-exactly the dimensions of the matrices that are being executed and where the
-tensors are placed in memory. The JIT knows that the buffers do or do-not
-alias, and exactly the number of iterations for the loop. The knowledge enables
+exactly the dimensions of the matrices that are being executed. The knowledge enables
 much better code generation and vectorization. The JIT is also able to eliminate
 all calls to 'malloc', because the memory is statically allocated. The whole
-network is allocated by a single malloc call.
+network is allocated by a single malloc call, all inputs and outputs a single seperate call.
 
 ### How the JIT Works
 
@@ -35,9 +33,8 @@ allocate concrete memory addresses for the AllocActivation instructions in the
 module. The allocation is done by scanning the module and updating the memory
 allocator. After this process the allocator reports the high water mark, which
 is the maximum number of bytes that the network consumes. The allocator assigns
-offsets for each alloc activation within the buffer. Then, the JIT performs a
-single call to 'malloc' to allocates the heap. At this point each activation and
-each weight has a concrete address on the heap.
+offsets for each alloc activation within the buffer. This information is stored
+in a runtimeBundle where a single call to malloc initializes the heap for activations.
 
 Next, the JIT opens a new LLVM functions and prepares for code generation. The
 compiler goes over each low-level instruction and generates a sequence of
 
@@ -16,23 +16,50 @@
 #ifndef GLOW_BACKENDS_COMPILEDFUNCTION_H
 #define GLOW_BACKENDS_COMPILEDFUNCTION_H
 
+#include "glow/Graph/Nodes.h"
 #include <unordered_map>
 
 namespace glow {
 
 class Context;
-
+namespace runtime {
+/// RuntimeSymbolInfo
+/// Contains information for initialization and handling of symbol at runtime.
+struct RuntimeSymbolInfo {
+  /// The size in bytes.
+  size_t size;
+  /// Offset in bytes from the base address.
+  size_t offset;
+};
+/// Runtime Bundle
+/// Contains the information needed to be passed forward from compile time to
+/// runtime. In order to allocate and initialize memory.
+struct RuntimeBundle {
+  /// Map from symbol name to a RuntimeSymbolInfo.
+  std::unordered_map<std::string, RuntimeSymbolInfo> symbolTable;
+  /// Pointer to memory containing the weights for execution.
+  uint8_t *constants;
+  /// Amount of memory needed for weights.
+  const size_t constantWeightVarsMemSize;
+  /// Amount of memory needed for mutable vars.
+  const size_t mutableWeightVarsMemSize;
+  /// Amount of memory needed for activations.
+  const size_t activationsMemSize;
+  RuntimeBundle(size_t constWeight, size_t mutableWeight, size_t activations)
+      : constantWeightVarsMemSize(constWeight),
+        mutableWeightVarsMemSize(mutableWeight),
+        activationsMemSize(activations) {}
+};
+} // end namespace runtime
 /// Interface for executing a compiled function.
 class CompiledFunction {
 public:
   /// Dtor.
   virtual ~CompiledFunction() = default;
-
   /// Execute the network and allocate Placeholder memory with given
   /// \p ctx providing mapping between Placeholder and populated tensor.
   virtual void execute(Context &ctx) = 0;
 };
-
 } // end namespace glow
 
 #endif // GLOW_BACKENDS_COMPILEDFUNCTION_H
@@ -16,13 +16,15 @@
 #define DEBUG_TYPE "jit-allocations"
 
 #include "AllocationsInfo.h"
+#include "glow/Backends/CompiledFunction.h"
 #include "glow/CodeGen/MemoryAllocator.h"
 #include "glow/Graph/Context.h"
 #include "glow/Graph/Graph.h"
 #include "glow/Graph/Nodes.h"
 #include "glow/IR/IRUtils.h"
 #include "glow/IR/Instrs.h"
 #include "glow/Support/Debug.h"
+#include "glow/Support/Memory.h"
 
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
@@ -32,9 +34,7 @@ using llvm::cast;
 using llvm::dyn_cast;
 using llvm::isa;
 
-void AllocationsInfo::allocateWeightVars(const IRFunction *F,
-                                         const Context &ctx,
-                                         bool absoluteAddr) {
+void AllocationsInfo::allocateWeightVars(const IRFunction *F) {
   // Use two different allocators, because constant weights and mutable weights
   // may use different memory blocks.
   MemoryAllocator constantWeightVarsAllocator("ConstantWeights", 0);
@@ -43,48 +43,29 @@ void AllocationsInfo::allocateWeightVars(const IRFunction *F,
   // Compute the new offsets for all the weights, do not reuse their current
   // addresses. Process all constant WeightVars first.
   for (auto &v : F->getGraph()->getParent()->getConstants()) {
-    assert(isa<WeightVar>(F->getWeightForNode(v)));
+    assert(isa<WeightVar>(F->getWeightForNode(v)) && "Expected WeightVar");
     auto *w = cast<WeightVar>(F->getWeightForNode(v));
     auto numBytes = w->getSizeInBytes();
     size_t addr = constantWeightVarsAllocator.allocate(numBytes, w);
-    if (!absoluteAddr) {
-      allocatedAddressed_[w] = addr;
-    } else {
-      // Reuse the address used by the payload.
-      allocatedAddressed_[w] =
-          v->getPayload().getUnsafePtr() - static_cast<char *>(nullptr);
-    }
+    allocatedAddress_[w] = addr;
   }
 
-  if (absoluteAddr) {
-    // Allocate addresses for the Placeholders that have payloads defined at
-    // compile-time.
-    // TODO: Remove this branch once Context becomes a parameter of the
-    // CompiledFunction::execute method.
-    for (auto PH : ctx.pairs()) {
-      assert(isa<WeightVar>(F->getWeightForNode(PH.first)));
-      auto *w = cast<WeightVar>(F->getWeightForNode(PH.first));
-      // Reuse the address used by the payload.
-      allocatedAddressed_[w] =
-          PH.second->getUnsafePtr() - static_cast<char *>(nullptr);
-    }
-  } else {
-    // Allocate based on size as reported by the formal type of Placeholders
-    for (auto &v : F->getGraph()->getParent()->getPlaceholders()) {
-      assert(isa<WeightVar>(F->getWeightForNode(v)));
-      auto *w = cast<WeightVar>(F->getWeightForNode(v));
-      auto numBytes = w->getSizeInBytes();
-      size_t addr = mutableWeightVarsAllocator.allocate(numBytes, w);
-      allocatedAddressed_[w] = addr;
-    }
+  // Compute the offsets and total memory requirements for Placeholders.
+  for (auto &v : F->getGraph()->getParent()->getPlaceholders()) {
+    // Get the WeightVar for each Placeholder to calculate offsets.
+    assert(isa<WeightVar>(F->getWeightForNode(v)) && "Expected WeightVar");
+    auto *w = cast<WeightVar>(F->getWeightForNode(v));
+    auto numBytes = w->getSizeInBytes();
+    size_t addr = mutableWeightVarsAllocator.allocate(numBytes, w);
+    allocatedAddress_[w] = addr;
   }
 
   // Remember that max required memory size for each kind of weights.
   constantWeightVarsMemSize_ = constantWeightVarsAllocator.getMaxMemoryUsage();
   mutableWeightVarsMemSize_ = mutableWeightVarsAllocator.getMaxMemoryUsage();
 
   DEBUG_GLOW(for (auto &A
-                  : allocatedAddressed_) {
+                  : allocatedAddress_) {
     if (isa<AllocActivationInst>(A.first) || isa<TensorViewInst>(A.first))
       continue;
     assert(valueNumbers_.count(A.first) && "Unknown weight");
@@ -94,13 +75,47 @@ void AllocationsInfo::allocateWeightVars(const IRFunction *F,
             : "mutable weight";
     llvm::errs() << "Allocated " << kind << " " << A.first->getName()
                  << " size: " << A.first->getSizeInBytes()
-                 << "  address range:  [" << allocatedAddressed_[A.first]
-                 << ", "
-                 << allocatedAddressed_[A.first] + A.first->getSizeInBytes()
+                 << "  address range:  [" << allocatedAddress_[A.first] << ", "
+                 << allocatedAddress_[A.first] + A.first->getSizeInBytes()
                  << "]\n";
   });
 }
 
+void AllocationsInfo::collectConstants(const IRFunction *F) {
+
+  // At compile time condense constants to a single block of memory.
+  // This allows the graph to go away after compile time.
+  baseConstantWeightVarsStore_ =
+      (uint8_t *)alignedAlloc(constantWeightVarsMemSize_, TensorAlignment);
+  for (auto &v : F->getGraph()->getParent()->getConstants()) {
+    assert(isa<WeightVar>(F->getWeightForNode(v)));
+    auto *w = cast<WeightVar>(F->getWeightForNode(v));
+    auto payload = v->getPayload().getUnsafePtr();
+    auto numBytes = w->getSizeInBytes();
+    auto addr = allocatedAddress_[w];
+    // Copy weight to offset.
+    memcpy(baseConstantWeightVarsStore_ + addr, payload, numBytes);
+  }
+}
+
+runtime::RuntimeBundle
+AllocationsInfo::generateRuntimeBundle(const IRFunction *F) {
+  runtime::RuntimeBundle info(constantWeightVarsMemSize_,
+                              mutableWeightVarsMemSize_, activationsMemSize_);
+  std::unordered_map<std::string, runtime::RuntimeSymbolInfo> symbolTable;
+  info.constants = baseConstantWeightVarsStore_;
+  for (auto &v : F->getGraph()->getParent()->getPlaceholders()) {
+    assert(isa<WeightVar>(F->getWeightForNode(v)) && "Expected WeightVar");
+    auto *w = cast<WeightVar>(F->getWeightForNode(v));
+    runtime::RuntimeSymbolInfo symbol;
+    symbol.offset = allocatedAddress_[w];
+    symbol.size = w->getSizeInBytes();
+    symbolTable.emplace(std::string(v->getName()), symbol);
+  }
+  info.symbolTable = std::move(symbolTable);
+  return info;
+}
+
 void AllocationsInfo::allocateActivations(const IRFunction *F) {
   // Use a memory allocator with no upper bound on how much memory we can
   // allocate.
@@ -131,15 +146,14 @@ void AllocationsInfo::allocateActivations(const IRFunction *F) {
 
   // Register specific addresses within the heap to activations.
   for (auto &A : activationAddr) {
-    allocatedAddressed_[A.first] = A.second;
+    allocatedAddress_[A.first] = A.second;
   }
   DEBUG_GLOW(for (auto &A
-                  : allocatedAddressed_) {
+                  : allocatedAddress_) {
     llvm::errs() << "Allocated activation " << A.first->getName()
                  << " size: " << A.first->getSizeInBytes()
-                 << "  address range:  [" << allocatedAddressed_[A.first]
-                 << ", "
-                 << allocatedAddressed_[A.first] + A.first->getSizeInBytes()
+                 << "  address range:  [" << allocatedAddress_[A.first] << ", "
+                 << allocatedAddress_[A.first] + A.first->getSizeInBytes()
                  << "]\n";
   });
 }
@@ -174,18 +188,18 @@ void AllocationsInfo::allocateTensorViews(const IRFunction *F) {
   for (const auto &I : F->getInstrs()) {
     if (const auto *TVI = dyn_cast<TensorViewInst>(&I)) {
       auto *viewOrigin = getOrigin(TVI);
-      assert(allocatedAddressed_.count(viewOrigin) &&
+      assert(allocatedAddress_.count(viewOrigin) &&
              "Did not find original WeightVar or AllocActivation for a "
              "TensorView.");
-      size_t originAddr = allocatedAddressed_[viewOrigin];
+      size_t originAddr = allocatedAddress_[viewOrigin];
 
       // Calculate the offset into the underlying alloc activation.
       size_t offset = calculateTensorViewOffset(TVI);
 
       // Calculate the correct address using this offset into the alloc
       // activation and map from the original TVI to it.
-      assert(!allocatedAddressed_.count(TVI) && "Allocation already made!");
-      allocatedAddressed_[TVI] = originAddr + offset;
+      assert(!allocatedAddress_.count(TVI) && "Allocation already made!");
+      allocatedAddress_[TVI] = originAddr + offset;
       continue;
     }
   }
 
@@ -16,6 +16,7 @@
 #ifndef GLOW_BACKENDS_CPU_ALLOCATIONSINFO_H
 #define GLOW_BACKENDS_CPU_ALLOCATIONSINFO_H
 
+#include "glow/Graph/Nodes.h"
 #include "llvm/IR/Module.h"
 
 #include <functional>
@@ -27,6 +28,9 @@ class WeightVar;
 class Constant;
 class Context;
 
+namespace runtime {
+struct RuntimeBundle;
+}
 /// Information about allocations for activations, constant weight variables
 /// and mutable weight variables.
 struct AllocationsInfo {
@@ -39,42 +43,39 @@ struct AllocationsInfo {
   /// numberOffsets_[valueNumbers_[v]]
 
   /// Maps Values in the module to their offsets.
-  llvm::DenseMap<const Value *, uint64_t> allocatedAddressed_;
+  llvm::DenseMap<const Value *, uint64_t> allocatedAddress_;
   /// Amount of memory to be allocated for constant WeightVars.
   size_t constantWeightVarsMemSize_{0};
   /// Amount of memory to be allocated for mutable WeightVars.
   size_t mutableWeightVarsMemSize_{0};
   /// Amount of memory to be allocated for activations.
   size_t activationsMemSize_{0};
+  /// Base address of stored constant weights.
+  uint8_t *baseConstantWeightVarsStore_{nullptr};
   /// Base address of constant weights.
-  uint8_t *baseConstantWeightVarsAddress_{nullptr};
-  /// Base address of mutable WeightVars.
-  uint8_t *baseMutableWeightVarsAddress_{nullptr};
-  /// Base address of activations.
-  uint8_t *baseActivationsAddress_{nullptr};
 
   /// Assign offsets to all of the variables in the module \p M and to the
-  /// placeholders. \p ctx is the context that maps the graph to the concrete
-  /// execution environment for a specific function.
-  /// If the \p absoluteAddr is true, simply reuse the addresses already used
-  /// by the payloads of tensors corresponding to those WeightVars as offsets.
-  /// This is useful in a JIT setup. If \p absoluteAddr is false, then all the
-  /// WeightVars will get new offsets assigned.
-  void allocateWeightVars(const IRFunction *F, const Context &ctx,
-                          bool absoluteAddr);
+  /// placeholders.
+  void allocateWeightVars(const IRFunction *F);
   /// Assign offsets to all activations.
   /// No actual memory allocation is performed. All the allocations should be
   /// performed by the client based on the information provided by the
-  /// AllocationsInfo.
+  /// AllocationsInfo or RuntimeBundle.
   void allocateActivations(const IRFunction *F);
   /// Assign offsets to all tensorviews.
   /// No memory allocation is performed. Sets up all offsets into already
   /// defined offsets for WeightVars and AllocActivations. Assumes the weight
-  /// vars and alloc activations have already been added to allocatedAddressed_.
+  /// vars and alloc activations have already been added to allocatedAddress_.
   void allocateTensorViews(const IRFunction *F);
   /// Number all allocations and weight variables by assigning them unique
   /// numbers.
   void numberValues(const IRFunction *F);
+
+  /// Collect Constants into a single block of memory.
+  void collectConstants(const IRFunction *F);
+  /// Returns runtimeBundle object containing offsets and allocation sizes
+  /// needed for runtime.
+  runtime::RuntimeBundle generateRuntimeBundle(const IRFunction *F);
 };
 
 } // namespace glow
 
@@ -56,7 +56,7 @@ void BundleSaver::saveWeights(llvm::StringRef weightsFileName) {
     auto *w = cast<WeightVar>(F_->getWeightForNode(v));
     auto numBytes = w->getSizeInBytes();
     auto payload = v->getPayload().getUnsafePtr();
-    auto addr = allocationsInfo_.allocatedAddressed_[w];
+    auto addr = allocationsInfo_.allocatedAddress_[w];
     if (addr < pos) {
       // The payload was written already. It aliases something we have seen
       // already.
@@ -98,7 +98,7 @@ void BundleSaver::emitSymbolTable() {
   for (auto &v : F_->getGraph()->getParent()->getPlaceholders()) {
     auto *w = cast<WeightVar>(F_->getWeightForNode(v));
     auto size = w->getType()->size();
-    auto addr = allocationsInfo_.allocatedAddressed_[w];
+    auto addr = allocationsInfo_.allocatedAddress_[w];
     // Create an SymbolTableEntry.
     auto *entry = llvm::ConstantStruct::get(
         symbolTableEntryTy,
@@ -259,8 +259,7 @@ void BundleSaver::performBundleMemoryAllocation() {
   allocationsInfo_.allocateActivations(F_);
   // Tell the allocateWeightVars to not reuse any existing addresses for weights
   // and to assign new ones.
-  Context empty;
-  allocationsInfo_.allocateWeightVars(F_, empty, false);
+  allocationsInfo_.allocateWeightVars(F_);
   allocationsInfo_.allocateTensorViews(F_);
 }