Skip to content

Commit d32e0e8

Browse files
committed
Moved CPUBackend memory allocation to runtime.
Created new RuntimeBundle to pass information forward from compile time to runtime
1 parent a7acfc8 commit d32e0e8

File tree

11 files changed

+229
-135
lines changed

11 files changed

+229
-135
lines changed

docs/JIT.md

Lines changed: 4 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -19,12 +19,10 @@ The JIT, on the other hand, generates a single stream of highly optimized
1919
instructions that don't go back to the interpreter. Moreover, each instruction
2020
is optimized based on specific information on the context in which the
2121
instruction is executed. When a matrix multiplication is compiled the JIT knows
22-
exactly the dimensions of the matrices that are being executed and where the
23-
tensors are placed in memory. The JIT knows that the buffers do or do-not
24-
alias, and exactly the number of iterations for the loop. The knowledge enables
22+
exactly the dimensions of the matrices that are being executed. The knowledge enables
2523
much better code generation and vectorization. The JIT is also able to eliminate
2624
all calls to 'malloc', because the memory is statically allocated. The whole
27-
network is allocated by a single malloc call.
25+
network is allocated by a single malloc call, all inputs and outputs a single seperate call.
2826

2927
### How the JIT Works
3028

@@ -35,9 +33,8 @@ allocate concrete memory addresses for the AllocActivation instructions in the
3533
module. The allocation is done by scanning the module and updating the memory
3634
allocator. After this process the allocator reports the high water mark, which
3735
is the maximum number of bytes that the network consumes. The allocator assigns
38-
offsets for each alloc activation within the buffer. Then, the JIT performs a
39-
single call to 'malloc' to allocates the heap. At this point each activation and
40-
each weight has a concrete address on the heap.
36+
offsets for each alloc activation within the buffer. This information is stored
37+
in a runtimeBundle where a single call to malloc initializes the heap for activations.
4138

4239
Next, the JIT opens a new LLVM functions and prepares for code generation. The
4340
compiler goes over each low-level instruction and generates a sequence of

include/glow/Backends/CompiledFunction.h

Lines changed: 30 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -16,23 +16,50 @@
1616
#ifndef GLOW_BACKENDS_COMPILEDFUNCTION_H
1717
#define GLOW_BACKENDS_COMPILEDFUNCTION_H
1818

19+
#include "glow/Graph/Nodes.h"
1920
#include <unordered_map>
2021

2122
namespace glow {
2223

2324
class Context;
24-
25+
namespace runtime {
26+
/// RuntimeSymbolInfo
27+
/// Contains information for initialization and handling of symbol at runtime.
28+
struct RuntimeSymbolInfo {
29+
/// The size in bytes.
30+
size_t size;
31+
/// Offset in bytes from the base address.
32+
size_t offset;
33+
};
34+
/// Runtime Bundle
35+
/// Contains the information needed to be passed forward from compile time to
36+
/// runtime. In order to allocate and initialize memory.
37+
struct RuntimeBundle {
38+
/// Map from symbol name to a RuntimeSymbolInfo.
39+
std::unordered_map<std::string, RuntimeSymbolInfo> symbolTable;
40+
/// Pointer to memory containing the weights for execution.
41+
uint8_t *constants;
42+
/// Amount of memory needed for weights.
43+
const size_t constantWeightVarsMemSize;
44+
/// Amount of memory needed for mutable vars.
45+
const size_t mutableWeightVarsMemSize;
46+
/// Amount of memory needed for activations.
47+
const size_t activationsMemSize;
48+
RuntimeBundle(size_t constWeight, size_t mutableWeight, size_t activations)
49+
: constantWeightVarsMemSize(constWeight),
50+
mutableWeightVarsMemSize(mutableWeight),
51+
activationsMemSize(activations) {}
52+
};
53+
} // end namespace runtime
2554
/// Interface for executing a compiled function.
2655
class CompiledFunction {
2756
public:
2857
/// Dtor.
2958
virtual ~CompiledFunction() = default;
30-
3159
/// Execute the network and allocate Placeholder memory with given
3260
/// \p ctx providing mapping between Placeholder and populated tensor.
3361
virtual void execute(Context &ctx) = 0;
3462
};
35-
3663
} // end namespace glow
3764

3865
#endif // GLOW_BACKENDS_COMPILEDFUNCTION_H

lib/Backends/CPU/AllocationsInfo.cpp

Lines changed: 59 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -16,13 +16,15 @@
1616
#define DEBUG_TYPE "jit-allocations"
1717

1818
#include "AllocationsInfo.h"
19+
#include "glow/Backends/CompiledFunction.h"
1920
#include "glow/CodeGen/MemoryAllocator.h"
2021
#include "glow/Graph/Context.h"
2122
#include "glow/Graph/Graph.h"
2223
#include "glow/Graph/Nodes.h"
2324
#include "glow/IR/IRUtils.h"
2425
#include "glow/IR/Instrs.h"
2526
#include "glow/Support/Debug.h"
27+
#include "glow/Support/Memory.h"
2628

2729
#include "llvm/Support/Debug.h"
2830
#include "llvm/Support/raw_ostream.h"
@@ -32,9 +34,7 @@ using llvm::cast;
3234
using llvm::dyn_cast;
3335
using llvm::isa;
3436

35-
void AllocationsInfo::allocateWeightVars(const IRFunction *F,
36-
const Context &ctx,
37-
bool absoluteAddr) {
37+
void AllocationsInfo::allocateWeightVars(const IRFunction *F) {
3838
// Use two different allocators, because constant weights and mutable weights
3939
// may use different memory blocks.
4040
MemoryAllocator constantWeightVarsAllocator("ConstantWeights", 0);
@@ -43,48 +43,29 @@ void AllocationsInfo::allocateWeightVars(const IRFunction *F,
4343
// Compute the new offsets for all the weights, do not reuse their current
4444
// addresses. Process all constant WeightVars first.
4545
for (auto &v : F->getGraph()->getParent()->getConstants()) {
46-
assert(isa<WeightVar>(F->getWeightForNode(v)));
46+
assert(isa<WeightVar>(F->getWeightForNode(v)) && "Expected WeightVar");
4747
auto *w = cast<WeightVar>(F->getWeightForNode(v));
4848
auto numBytes = w->getSizeInBytes();
4949
size_t addr = constantWeightVarsAllocator.allocate(numBytes, w);
50-
if (!absoluteAddr) {
51-
allocatedAddressed_[w] = addr;
52-
} else {
53-
// Reuse the address used by the payload.
54-
allocatedAddressed_[w] =
55-
v->getPayload().getUnsafePtr() - static_cast<char *>(nullptr);
56-
}
50+
allocatedAddress_[w] = addr;
5751
}
5852

59-
if (absoluteAddr) {
60-
// Allocate addresses for the Placeholders that have payloads defined at
61-
// compile-time.
62-
// TODO: Remove this branch once Context becomes a parameter of the
63-
// CompiledFunction::execute method.
64-
for (auto PH : ctx.pairs()) {
65-
assert(isa<WeightVar>(F->getWeightForNode(PH.first)));
66-
auto *w = cast<WeightVar>(F->getWeightForNode(PH.first));
67-
// Reuse the address used by the payload.
68-
allocatedAddressed_[w] =
69-
PH.second->getUnsafePtr() - static_cast<char *>(nullptr);
70-
}
71-
} else {
72-
// Allocate based on size as reported by the formal type of Placeholders
73-
for (auto &v : F->getGraph()->getParent()->getPlaceholders()) {
74-
assert(isa<WeightVar>(F->getWeightForNode(v)));
75-
auto *w = cast<WeightVar>(F->getWeightForNode(v));
76-
auto numBytes = w->getSizeInBytes();
77-
size_t addr = mutableWeightVarsAllocator.allocate(numBytes, w);
78-
allocatedAddressed_[w] = addr;
79-
}
53+
// Compute the offsets and total memory requirements for Placeholders.
54+
for (auto &v : F->getGraph()->getParent()->getPlaceholders()) {
55+
// Get the WeightVar for each Placeholder to calculate offsets.
56+
assert(isa<WeightVar>(F->getWeightForNode(v)) && "Expected WeightVar");
57+
auto *w = cast<WeightVar>(F->getWeightForNode(v));
58+
auto numBytes = w->getSizeInBytes();
59+
size_t addr = mutableWeightVarsAllocator.allocate(numBytes, w);
60+
allocatedAddress_[w] = addr;
8061
}
8162

8263
// Remember that max required memory size for each kind of weights.
8364
constantWeightVarsMemSize_ = constantWeightVarsAllocator.getMaxMemoryUsage();
8465
mutableWeightVarsMemSize_ = mutableWeightVarsAllocator.getMaxMemoryUsage();
8566

8667
DEBUG_GLOW(for (auto &A
87-
: allocatedAddressed_) {
68+
: allocatedAddress_) {
8869
if (isa<AllocActivationInst>(A.first) || isa<TensorViewInst>(A.first))
8970
continue;
9071
assert(valueNumbers_.count(A.first) && "Unknown weight");
@@ -94,13 +75,47 @@ void AllocationsInfo::allocateWeightVars(const IRFunction *F,
9475
: "mutable weight";
9576
llvm::errs() << "Allocated " << kind << " " << A.first->getName()
9677
<< " size: " << A.first->getSizeInBytes()
97-
<< " address range: [" << allocatedAddressed_[A.first]
98-
<< ", "
99-
<< allocatedAddressed_[A.first] + A.first->getSizeInBytes()
78+
<< " address range: [" << allocatedAddress_[A.first] << ", "
79+
<< allocatedAddress_[A.first] + A.first->getSizeInBytes()
10080
<< "]\n";
10181
});
10282
}
10383

84+
void AllocationsInfo::collectConstants(const IRFunction *F) {
85+
86+
// At compile time condense constants to a single block of memory.
87+
// This allows the graph to go away after compile time.
88+
baseConstantWeightVarsStore_ =
89+
(uint8_t *)alignedAlloc(constantWeightVarsMemSize_, TensorAlignment);
90+
for (auto &v : F->getGraph()->getParent()->getConstants()) {
91+
assert(isa<WeightVar>(F->getWeightForNode(v)));
92+
auto *w = cast<WeightVar>(F->getWeightForNode(v));
93+
auto payload = v->getPayload().getUnsafePtr();
94+
auto numBytes = w->getSizeInBytes();
95+
auto addr = allocatedAddress_[w];
96+
// Copy weight to offset.
97+
memcpy(baseConstantWeightVarsStore_ + addr, payload, numBytes);
98+
}
99+
}
100+
101+
runtime::RuntimeBundle
102+
AllocationsInfo::generateRuntimeBundle(const IRFunction *F) {
103+
runtime::RuntimeBundle info(constantWeightVarsMemSize_,
104+
mutableWeightVarsMemSize_, activationsMemSize_);
105+
std::unordered_map<std::string, runtime::RuntimeSymbolInfo> symbolTable;
106+
info.constants = baseConstantWeightVarsStore_;
107+
for (auto &v : F->getGraph()->getParent()->getPlaceholders()) {
108+
assert(isa<WeightVar>(F->getWeightForNode(v)) && "Expected WeightVar");
109+
auto *w = cast<WeightVar>(F->getWeightForNode(v));
110+
runtime::RuntimeSymbolInfo symbol;
111+
symbol.offset = allocatedAddress_[w];
112+
symbol.size = w->getSizeInBytes();
113+
symbolTable.emplace(std::string(v->getName()), symbol);
114+
}
115+
info.symbolTable = std::move(symbolTable);
116+
return info;
117+
}
118+
104119
void AllocationsInfo::allocateActivations(const IRFunction *F) {
105120
// Use a memory allocator with no upper bound on how much memory we can
106121
// allocate.
@@ -131,15 +146,14 @@ void AllocationsInfo::allocateActivations(const IRFunction *F) {
131146

132147
// Register specific addresses within the heap to activations.
133148
for (auto &A : activationAddr) {
134-
allocatedAddressed_[A.first] = A.second;
149+
allocatedAddress_[A.first] = A.second;
135150
}
136151
DEBUG_GLOW(for (auto &A
137-
: allocatedAddressed_) {
152+
: allocatedAddress_) {
138153
llvm::errs() << "Allocated activation " << A.first->getName()
139154
<< " size: " << A.first->getSizeInBytes()
140-
<< " address range: [" << allocatedAddressed_[A.first]
141-
<< ", "
142-
<< allocatedAddressed_[A.first] + A.first->getSizeInBytes()
155+
<< " address range: [" << allocatedAddress_[A.first] << ", "
156+
<< allocatedAddress_[A.first] + A.first->getSizeInBytes()
143157
<< "]\n";
144158
});
145159
}
@@ -174,18 +188,18 @@ void AllocationsInfo::allocateTensorViews(const IRFunction *F) {
174188
for (const auto &I : F->getInstrs()) {
175189
if (const auto *TVI = dyn_cast<TensorViewInst>(&I)) {
176190
auto *viewOrigin = getOrigin(TVI);
177-
assert(allocatedAddressed_.count(viewOrigin) &&
191+
assert(allocatedAddress_.count(viewOrigin) &&
178192
"Did not find original WeightVar or AllocActivation for a "
179193
"TensorView.");
180-
size_t originAddr = allocatedAddressed_[viewOrigin];
194+
size_t originAddr = allocatedAddress_[viewOrigin];
181195

182196
// Calculate the offset into the underlying alloc activation.
183197
size_t offset = calculateTensorViewOffset(TVI);
184198

185199
// Calculate the correct address using this offset into the alloc
186200
// activation and map from the original TVI to it.
187-
assert(!allocatedAddressed_.count(TVI) && "Allocation already made!");
188-
allocatedAddressed_[TVI] = originAddr + offset;
201+
assert(!allocatedAddress_.count(TVI) && "Allocation already made!");
202+
allocatedAddress_[TVI] = originAddr + offset;
189203
continue;
190204
}
191205
}

lib/Backends/CPU/AllocationsInfo.h

Lines changed: 17 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
#ifndef GLOW_BACKENDS_CPU_ALLOCATIONSINFO_H
1717
#define GLOW_BACKENDS_CPU_ALLOCATIONSINFO_H
1818

19+
#include "glow/Graph/Nodes.h"
1920
#include "llvm/IR/Module.h"
2021

2122
#include <functional>
@@ -27,6 +28,9 @@ class WeightVar;
2728
class Constant;
2829
class Context;
2930

31+
namespace runtime {
32+
struct RuntimeBundle;
33+
}
3034
/// Information about allocations for activations, constant weight variables
3135
/// and mutable weight variables.
3236
struct AllocationsInfo {
@@ -39,42 +43,39 @@ struct AllocationsInfo {
3943
/// numberOffsets_[valueNumbers_[v]]
4044

4145
/// Maps Values in the module to their offsets.
42-
llvm::DenseMap<const Value *, uint64_t> allocatedAddressed_;
46+
llvm::DenseMap<const Value *, uint64_t> allocatedAddress_;
4347
/// Amount of memory to be allocated for constant WeightVars.
4448
size_t constantWeightVarsMemSize_{0};
4549
/// Amount of memory to be allocated for mutable WeightVars.
4650
size_t mutableWeightVarsMemSize_{0};
4751
/// Amount of memory to be allocated for activations.
4852
size_t activationsMemSize_{0};
53+
/// Base address of stored constant weights.
54+
uint8_t *baseConstantWeightVarsStore_{nullptr};
4955
/// Base address of constant weights.
50-
uint8_t *baseConstantWeightVarsAddress_{nullptr};
51-
/// Base address of mutable WeightVars.
52-
uint8_t *baseMutableWeightVarsAddress_{nullptr};
53-
/// Base address of activations.
54-
uint8_t *baseActivationsAddress_{nullptr};
5556

5657
/// Assign offsets to all of the variables in the module \p M and to the
57-
/// placeholders. \p ctx is the context that maps the graph to the concrete
58-
/// execution environment for a specific function.
59-
/// If the \p absoluteAddr is true, simply reuse the addresses already used
60-
/// by the payloads of tensors corresponding to those WeightVars as offsets.
61-
/// This is useful in a JIT setup. If \p absoluteAddr is false, then all the
62-
/// WeightVars will get new offsets assigned.
63-
void allocateWeightVars(const IRFunction *F, const Context &ctx,
64-
bool absoluteAddr);
58+
/// placeholders.
59+
void allocateWeightVars(const IRFunction *F);
6560
/// Assign offsets to all activations.
6661
/// No actual memory allocation is performed. All the allocations should be
6762
/// performed by the client based on the information provided by the
68-
/// AllocationsInfo.
63+
/// AllocationsInfo or RuntimeBundle.
6964
void allocateActivations(const IRFunction *F);
7065
/// Assign offsets to all tensorviews.
7166
/// No memory allocation is performed. Sets up all offsets into already
7267
/// defined offsets for WeightVars and AllocActivations. Assumes the weight
73-
/// vars and alloc activations have already been added to allocatedAddressed_.
68+
/// vars and alloc activations have already been added to allocatedAddress_.
7469
void allocateTensorViews(const IRFunction *F);
7570
/// Number all allocations and weight variables by assigning them unique
7671
/// numbers.
7772
void numberValues(const IRFunction *F);
73+
74+
/// Collect Constants into a single block of memory.
75+
void collectConstants(const IRFunction *F);
76+
/// Returns runtimeBundle object containing offsets and allocation sizes
77+
/// needed for runtime.
78+
runtime::RuntimeBundle generateRuntimeBundle(const IRFunction *F);
7879
};
7980

8081
} // namespace glow

lib/Backends/CPU/BundleSaver.cpp

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,7 @@ void BundleSaver::saveWeights(llvm::StringRef weightsFileName) {
5656
auto *w = cast<WeightVar>(F_->getWeightForNode(v));
5757
auto numBytes = w->getSizeInBytes();
5858
auto payload = v->getPayload().getUnsafePtr();
59-
auto addr = allocationsInfo_.allocatedAddressed_[w];
59+
auto addr = allocationsInfo_.allocatedAddress_[w];
6060
if (addr < pos) {
6161
// The payload was written already. It aliases something we have seen
6262
// already.
@@ -98,7 +98,7 @@ void BundleSaver::emitSymbolTable() {
9898
for (auto &v : F_->getGraph()->getParent()->getPlaceholders()) {
9999
auto *w = cast<WeightVar>(F_->getWeightForNode(v));
100100
auto size = w->getType()->size();
101-
auto addr = allocationsInfo_.allocatedAddressed_[w];
101+
auto addr = allocationsInfo_.allocatedAddress_[w];
102102
// Create an SymbolTableEntry.
103103
auto *entry = llvm::ConstantStruct::get(
104104
symbolTableEntryTy,
@@ -259,8 +259,7 @@ void BundleSaver::performBundleMemoryAllocation() {
259259
allocationsInfo_.allocateActivations(F_);
260260
// Tell the allocateWeightVars to not reuse any existing addresses for weights
261261
// and to assign new ones.
262-
Context empty;
263-
allocationsInfo_.allocateWeightVars(F_, empty, false);
262+
allocationsInfo_.allocateWeightVars(F_);
264263
allocationsInfo_.allocateTensorViews(F_);
265264
}
266265

0 commit comments

Comments
 (0)