Skip to content

Commit 50cb744

Browse files
committed
Add dynamic buffer support to OCL Backend
1 parent 98a07f5 commit 50cb744

File tree

8 files changed

+232
-490
lines changed

8 files changed

+232
-490
lines changed

.circleci/build.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -140,7 +140,7 @@ elif [[ "$CIRCLE_JOB" == "PYTORCH" ]]; then
140140
cd build
141141
elif [[ "$CIRCLE_JOB" == "OPENCL" ]]; then
142142
install_pocl
143-
CMAKE_ARGS+=("-DGLOW_WITH_OPENCL=ON")
143+
CMAKE_ARGS+=("-DGLOW_WITH_OPENCL=ON" "-DGLOW_OPENCL_ALIGN=128")
144144
else
145145
CMAKE_ARGS+=("-DCMAKE_BUILD_TYPE=Debug")
146146
if [[ "${CIRCLE_JOB}" == "SHARED" ]]; then

lib/Backends/OpenCL/OpenCL.cpp

Lines changed: 127 additions & 93 deletions
Large diffs are not rendered by default.

lib/Backends/OpenCL/OpenCL.h

Lines changed: 13 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -145,8 +145,7 @@ class OpenCLFunction final : public CompiledFunction {
145145
/// Fill the device \p buffer with a given \p value.
146146
/// \param len number of buffer elements to be filled by the \p value.
147147
/// Elements are considered to be of the type described by \p elemKind.
148-
void fillBuffer(cl_mem buffer, uint64_t start, uint64_t len, float value,
149-
ElemKind elemKind,
148+
void fillBuffer(cl_mem buffer, uint64_t len, float value, ElemKind elemKind,
150149
runtime::OpenCLDeviceBindings *devBindings);
151150

152151
/// Execution a convolution instruction which uses NCHW format.
@@ -242,10 +241,13 @@ namespace runtime {
242241
/// device specific information used to run a compiled function on a specific
243242
/// device.
244243
struct OpenCLDeviceBindings : DeviceBindings {
245-
OpenCLDeviceBindings(cl_mem buffer, cl_command_queue commands,
246-
cl_device_id device, cl_context ctx, cl_program prog)
244+
OpenCLDeviceBindings(
245+
cl_mem buffer, cl_command_queue commands, cl_device_id device,
246+
cl_context ctx, cl_program prog,
247+
const std::unordered_map<std::string, cl_mem> &subBuffers)
247248
: DeviceBindings(OCLBackend::getName()), deviceBuffer{buffer},
248-
commandQueue{commands}, deviceId{device}, context{ctx}, program{prog} {}
249+
commandQueue{commands}, deviceId{device}, context{ctx}, program{prog},
250+
weightBuffers(subBuffers) {}
249251

250252
/// CL memory buffer. Currently this contains both mutable and immutable
251253
/// weights, the buffer is allocated once when the network is added.
@@ -269,6 +271,12 @@ struct OpenCLDeviceBindings : DeviceBindings {
269271

270272
/// A list of kernels and their associated events.
271273
std::vector<KernelLaunch> kernelLaunches;
274+
275+
/// Buffers or subBuffers associated with symbols.
276+
std::unordered_map<std::string, cl_mem> weightBuffers;
277+
278+
/// /returns the subBufffer assciated with a Value.
279+
cl_mem getBuffer(glow::Value *v);
272280
};
273281
} // namespace runtime
274282
} // namespace glow

lib/Backends/OpenCL/OpenCLDeviceManager.cpp

Lines changed: 34 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,29 @@ DeviceManager *createOCLDeviceManager(const DeviceConfig &config) {
5555
return new OpenCLDeviceManager(config);
5656
}
5757

58-
OpenCLBuffer::~OpenCLBuffer() { clReleaseMemObject(buffer_); }
58+
OpenCLBuffer::~OpenCLBuffer() {
59+
for (auto buf : subBuffers_) {
60+
clReleaseMemObject(buf.second);
61+
}
62+
subBuffers_.clear();
63+
64+
clReleaseMemObject(buffer_);
65+
}
66+
67+
/// Add a mapping from a Symbol name to an offset into buffer_;
68+
bool OpenCLBuffer::addSubBuffer(std::string name, size_t offset, size_t size) {
69+
cl_buffer_region region({offset, size});
70+
cl_int err;
71+
auto buf = clCreateSubBuffer(buffer_, CL_MEM_READ_WRITE,
72+
CL_BUFFER_CREATE_TYPE_REGION, &region, &err);
73+
auto res = subBuffers_.emplace(name, buf);
74+
if (!res.second) {
75+
llvm::dbgs() << "OpenCLBuffer: failed to add subBuffer for symbol " << name
76+
<< "\n";
77+
return false;
78+
}
79+
return true;
80+
}
5981
} // namespace runtime
6082
} // namespace glow
6183

@@ -356,6 +378,15 @@ void OpenCLDeviceManager::addNetworkImpl(const Module *module,
356378
clFinish(commands);
357379
}
358380
usedMemoryBytes_ += sizeInBytes;
381+
382+
// Add a sub-buffer for each symbol in the symbol table. OpenCL sub-buffers
383+
// are essentially TensorViews in Glow.
384+
for (auto &pair : bundle.getSymbolTable()) {
385+
bool success = buffer->addSubBuffer(pair.first, pair.second.offset,
386+
pair.second.size);
387+
DCHECK(success);
388+
}
389+
359390
// Compile the CL program.
360391
// Add to the function name lookup map.
361392
// Add shared pointer to the buffer to buffers. This way the buffer will
@@ -376,6 +407,7 @@ void OpenCLDeviceManager::addNetworkImpl(const Module *module,
376407
programs_.emplace(func.first, program);
377408
functions_.emplace(func.first, func.second);
378409
buffers_.emplace(func.first, buffer);
410+
379411
buffer->incrementUsers();
380412

381413
DCHECK_LE(usedMemoryBytes_, maxMemoryBytes_);
@@ -666,7 +698,7 @@ void OpenCLDeviceManager::runFunctionImpl(
666698
auto program = programs_[function];
667699
auto clBindings = glow::make_unique<runtime::OpenCLDeviceBindings>(
668700
buffers_[function]->getBuffer(), queue.backingQueue, deviceId_, context_,
669-
program);
701+
program, buffers_[function]->getSubBuffers());
670702

671703
// Copy inputs to the device.
672704
copyInputsToDevice(func->getRuntimeBundle(), context.get(), clBindings.get());

lib/Backends/OpenCL/OpenCLDeviceManager.h

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -98,6 +98,9 @@ class OpenCLBuffer {
9898
/// The OpenCL buffer being stored.
9999
cl_mem buffer_;
100100

101+
/// Subbuffers for symbols.
102+
std::unordered_map<std::string, cl_mem> subBuffers_;
103+
101104
/// Count of functions using this buffer.
102105
unsigned int users_{0};
103106

@@ -120,6 +123,14 @@ class OpenCLBuffer {
120123

121124
/// Get size of buffer in bytes.
122125
size_t getSize() { return size_; }
126+
127+
/// Return the mapping from Symbol name to subBuffer for this Buffer.
128+
const std::unordered_map<std::string, cl_mem> &getSubBuffers() {
129+
return subBuffers_;
130+
}
131+
132+
/// Add a mapping from a Symbol name to an offset into buffer_;
133+
bool addSubBuffer(std::string name, size_t offset, size_t size);
123134
};
124135

125136
/// A class controlling a single OpenCL device. Many OpenCLFunctions may be

0 commit comments

Comments
 (0)