Skip to content

Enable calling into runtime from onnxifi #2300

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Jan 29, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
72 changes: 49 additions & 23 deletions lib/Onnxifi/Base.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,17 +22,22 @@

namespace glow {
namespace onnxifi {
namespace {
const char *inferenceFunctionName = "inference";
const char *compatibilityFunctionName = "check";
} // namespace

onnxStatus BackendId::checkGraphCompatibility(const void *onnxModel,
size_t onnxModelSize) {
Module module;

auto function = module.createFunction("check");
auto function = module.createFunction(compatibilityFunctionName);

std::unique_ptr<ONNXIFIModelLoader> loader;
auto loaderOrErr =
ONNXIFIModelLoader::parse(onnxModel, onnxModelSize, 0 /*weightCount*/,
nullptr /*weightDescriptors*/, *function,
false /*loadInputsAsPlaceholders*/, use_onnx_);
auto loaderOrErr = ONNXIFIModelLoader::parse(
onnxModel, onnxModelSize, 0 /*weightCount*/,
nullptr /*weightDescriptors*/, *function,
false /*loadInputsAsPlaceholders*/, getUseOnnx());
if (loaderOrErr) {
loader = std::move(*loaderOrErr);
} else {
Expand Down Expand Up @@ -91,7 +96,8 @@ onnxStatus Graph::initGraph(const void *onnxModel, size_t onnxModelSize,
uint32_t weightCount,
const onnxTensorDescriptorV1 *weightDescriptors) {
// TODO: support multiple functions here.
function_ = backendPtr_->getEE().getModule().createFunction("inference");
function_ =
backendPtr_->getEE().getModule().createFunction(inferenceFunctionName);

// TODO: make better error reporting.
std::unique_ptr<ONNXIFIModelLoader> loader =
Expand All @@ -103,7 +109,11 @@ onnxStatus Graph::initGraph(const void *onnxModel, size_t onnxModelSize,
onnxOutputToPlaceholder_ = loader->getOutputVarsMapping();

// Emit IR for the graph and compile it.
backendPtr_->getEE().compile(CompilationMode::Infer, function_);
if (backendPtr_->getUseHostManager()) {
backendPtr_->getHostManager().addNetwork(&backendPtr_->getEE().getModule());
} else {
backendPtr_->getEE().compile(CompilationMode::Infer, function_);
}

return ONNXIFI_STATUS_SUCCESS;
}
Expand Down Expand Up @@ -146,26 +156,42 @@ void Graph::run(
phs.push_back(var);
}

auto ctx = llvm::make_unique<Context>();

// Run inference.
auto &EE = backendPtr_->getEE();
auto &mod = EE.getModule();
ctx_.allocate(mod.getPlaceholders());
updateInputPlaceholders(ctx_, phs, tensors);
EE.run(ctx_);

// Tensors do not own underlying memory for input buffer,
// just delete memory allocated for the tensor object itself.
for (size_t i = 0; i < tensors.size(); ++i) {
delete tensors[i];
}

// Copy outputs to the addresses specified in the outputPlaceholderToBuffer.
for (auto outputVar : outputPlaceholderToBuffer) {
void *outputAddress = reinterpret_cast<void *>(outputVar.second);
const Tensor *res = ctx_.get(outputVar.first);
ctx->allocate(mod.getPlaceholders());
updateInputPlaceholders(*ctx, phs, tensors);

// Lambda capturing work to do after the graph has finished running.
auto afterRun = [tensors = std::move(tensors), outputPlaceholderToBuffer](
std::unique_ptr<glow::Context> ctx) {
// Tensors do not own underlying memory for input buffer,
// just delete memory allocated for the tensor object itself.
for (size_t i = 0; i < tensors.size(); ++i) {
delete tensors[i];
}

memcpy(outputAddress, res->getUnsafePtr(),
res->size() * res->getType().getElementSize());
// Copy output data from the graph to the onnxifi outputs.
for (auto &outputVar : outputPlaceholderToBuffer) {
void *outputAddress = reinterpret_cast<void *>(outputVar.second);
Tensor *res = ctx->get(outputVar.first);
memcpy(outputAddress, res->getUnsafePtr(),
res->size() * res->getType().getElementSize());
}
};

if (backendPtr_->getUseHostManager()) {
backendPtr_->runOnHostManager(
inferenceFunctionName, std::move(ctx),
[afterRun = std::move(afterRun)](int runIdentifier, int resultCode,
std::unique_ptr<glow::Context> ctx) {
afterRun(std::move(ctx));
});
} else {
EE.run(*ctx);
afterRun(std::move(ctx));
}
}

Expand Down
63 changes: 50 additions & 13 deletions lib/Onnxifi/Base.h
Original file line number Diff line number Diff line change
Expand Up @@ -32,16 +32,30 @@
namespace glow {
namespace onnxifi {

// TODO get rid of this once HostManager is landed.
struct HostManager {
bool addNetwork(Module *M) {
llvm_unreachable("HostManager is not yet implemented.");
}
};

// TODO use the actual type here once available.
using ResultCBTy =
std::function<void(int, int, std::unique_ptr<glow::Context>)>;

/// BackendId associated with the Glow backend.
class BackendId {
public:
/// Create Glow ONNXIFI backend identifier with the
/// given Glow backend \p kind, \p id, \p concurrency and whether to use onnx
/// or caffe2 for models (\p use_onnx).
/// given Glow backend \p kind, \p id, \p concurrency, whether to use onnx
/// or caffe2 for models (\p useInnx), and whether to use HostManager instead
/// of ExecutionEngine for running graphs (useHostManager).
/// NOTE: useHostManager is not yet supported as HostManager is yet to be
/// fully implemented.
explicit BackendId(glow::BackendKind kind, int id, int concurrency,
bool use_onnx)
: id_(id), use_onnx_(use_onnx), concurrency_(concurrency),
executionEngine_(kind) {}
bool useOnnx, bool useHostManager)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

update doxygen with the useHostManager.

: id_(id), useOnnx_(useOnnx), concurrency_(concurrency),
executionEngine_(kind), useHostManager_(useHostManager) {}

bool isOpSupported(Kinded::Kind opKind, ElemKind elementTy);

Expand All @@ -55,20 +69,36 @@ class BackendId {
/// \returns Execution Engine associated with the Backend.
glow::ExecutionEngine &getEE() { return executionEngine_; }

/// \returns the whether use onnx or not
bool getUseOnnx() const { return use_onnx_; }
/// \returns the whether use onnx or not.
bool getUseOnnx() const { return useOnnx_; }

/// \returns the whether use HostManager for inference or not.
bool getUseHostManager() const { return useHostManager_; }

/// \returns HostManager associated with the BackendId.
HostManager &getHostManager() { return hostManager_; }

/// \returns the backend id.
int getID() const { return id_; }

/// \returns concurrency for the backend.
int getConcurrency() const { return concurrency_; }

/// Run the network named by \p networkName using HostManager with context \p
/// ctx afterwhich the result callback \p cb will be called.
void runOnHostManager(llvm::StringRef networkName,
std::unique_ptr<Context> ctx, ResultCBTy cb) {
// TODO enable once HostManager is landed.
// hostManager_->runNetwork(networkName, std::move(ctx), std::move(cb));
}

private:
int id_;
bool use_onnx_;
bool useOnnx_;
int concurrency_;
glow::ExecutionEngine executionEngine_;
bool useHostManager_;
HostManager hostManager_; // TODO use real HostManager once landed.
};

typedef BackendId *BackendIdPtr;
Expand All @@ -85,9 +115,21 @@ class Backend {
/// \returns Execution Engine associated with the Backend.
glow::ExecutionEngine &getEE() { return backendIdPtr_->getEE(); }

/// \returns the whether use HostManager for inference or not.
bool getUseHostManager() const { return backendIdPtr_->getUseHostManager(); }

/// \returns HostManager for the associated BackendId.
HostManager &getHostManager() { return backendIdPtr_->getHostManager(); }

/// Run inference async using backend thread pool.
void runAsync(const std::function<void(void)> &fn);

// Call BackendId::runOnHostManager
void runOnHostManager(llvm::StringRef networkName,
std::unique_ptr<Context> ctx, ResultCBTy cb) {
backendIdPtr_->runOnHostManager(networkName, std::move(ctx), std::move(cb));
}

private:
BackendIdPtr backendIdPtr_;
// ThreadPool instance for the backend.
Expand Down Expand Up @@ -156,11 +198,6 @@ class Graph {
BackendPtr backendPtr_;
Function *function_;

/// This is the compilation context that represents a single thread.
/// TODO: Once we finish the migration to placeholders we'll need to manage
/// the state properly.
Context ctx_;

/// Mapping between ONNX name for the input variable and Glow
/// placeholder for input.
llvm::StringMap<Placeholder *> onnxInputToPlaceholder_;
Expand Down
28 changes: 17 additions & 11 deletions lib/Onnxifi/onnxifiGlow.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -66,18 +66,18 @@ GLOW_ONNXIFI_LIBRARY_FUNCTION_WRAPPER(onnxGetBackendIDs)(
// TODO: change concurrency level to std::thread::hardware_concurrency()
// when Glow CPU backend can handle concurrent execution.
// For now, limit concurrent execution to a single worker thread..
auto *cpuBackendOnnx =
new glow::onnxifi::BackendId(glow::BackendKind::CPU, /*id*/ 1,
/*concurrency*/ 1, /*use_onnx*/ true);
auto *cpuBackendOnnx = new glow::onnxifi::BackendId(
glow::BackendKind::CPU, /*id*/ 1,
/*concurrency*/ 1, /*useOnnx*/ true, /*useHostManager*/ false);
auto *interpreterBackendOnnx = new glow::onnxifi::BackendId(
glow::BackendKind::Interpreter,
/*id*/ 2, /*concurrency*/ 1, /*use_onnx*/ true);
auto *cpuBackendC2 =
new glow::onnxifi::BackendId(glow::BackendKind::CPU, /*id*/ 3,
/*concurrency*/ 1, /*use_onnx*/ false);
/*id*/ 2, /*concurrency*/ 1, /*useOnnx*/ true, /*useHostManager*/ false);
auto *cpuBackendC2 = new glow::onnxifi::BackendId(
glow::BackendKind::CPU, /*id*/ 3,
/*concurrency*/ 1, /*useOnnx*/ false, /*useHostManager*/ false);
auto *interpreterBackendC2 = new glow::onnxifi::BackendId(
glow::BackendKind::Interpreter,
/*id*/ 4, /*concurrency*/ 1, /*use_onnx*/ false);
/*id*/ 4, /*concurrency*/ 1, /*useOnnx*/ false, /*useHostManager*/ false);
manager.addBackendId(cpuBackendOnnx);
manager.addBackendId(interpreterBackendOnnx);
manager.addBackendId(cpuBackendC2);
Expand All @@ -88,7 +88,7 @@ GLOW_ONNXIFI_LIBRARY_FUNCTION_WRAPPER(onnxGetBackendIDs)(
backendIDs[2] = cpuBackendC2;
backendIDs[3] = interpreterBackendC2;
#else
*numBackends = 2;
*numBackends = 3;

// In case backendIDs is nullptr or does not have enough capacity just return
// the total number of supported backends.
Expand All @@ -98,16 +98,22 @@ GLOW_ONNXIFI_LIBRARY_FUNCTION_WRAPPER(onnxGetBackendIDs)(

auto *interpreterBackendOnnx = new glow::onnxifi::BackendId(
glow::BackendKind::Interpreter,
/*id*/ 1, /*concurrency*/ 1, /*use_onnx*/ true);
/*id*/ 1, /*concurrency*/ 1, /*useOnnx*/ true, /*useHostManager*/ false);
auto *interpreterBackendC2 = new glow::onnxifi::BackendId(
glow::BackendKind::Interpreter,
/*id*/ 2, /*concurrency*/ 1, /*use_onnx*/ false);
/*id*/ 2, /*concurrency*/ 1, /*useOnnx*/ false, /*useHostManager*/ false);
auto *interpreterBackendC2HostManager = new glow::onnxifi::BackendId(
glow::BackendKind::Interpreter,
/*id*/ 2, /*concurrency*/ 1, /*useOnnx*/ false,
/*useHostManager*/ true);

manager.addBackendId(interpreterBackendOnnx);
manager.addBackendId(interpreterBackendC2);
manager.addBackendId(interpreterBackendC2HostManager);

backendIDs[0] = interpreterBackendOnnx;
backendIDs[1] = interpreterBackendC2;
backendIDs[2] = interpreterBackendC2HostManager;
#endif

return ONNXIFI_STATUS_SUCCESS;
Expand Down
28 changes: 16 additions & 12 deletions tests/unittests/GlowOnnxifiManagerTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -24,9 +24,10 @@ using namespace glow::onnxifi;

TEST(GlowOnnxifiManagerTest, BackendIdTest) {
auto &manager = GlowOnnxifiManager::get();
auto *backendId = new glow::onnxifi::BackendId(glow::BackendKind::Interpreter,
/*id*/ 1, /*use_onnx*/ true,
/*concurrency*/ 1);
auto *backendId = new glow::onnxifi::BackendId(
glow::BackendKind::Interpreter,
/*id*/ 1,
/*concurrency*/ 1, /*use_onnx*/ true, /*useHostManager*/ false);
// BackendId isn't valid before it has been added to the manager.
EXPECT_FALSE(manager.isValid(backendId));
manager.addBackendId(backendId);
Expand All @@ -43,9 +44,10 @@ TEST(GlowOnnxifiManagerTest, BackendIdTest) {

TEST(GlowOnnxifiManagerTest, BackendTest) {
auto &manager = GlowOnnxifiManager::get();
auto *backendId = new glow::onnxifi::BackendId(glow::BackendKind::Interpreter,
/*id*/ 1, /*use_onnx*/ true,
/*concurrency*/ 1);
auto *backendId = new glow::onnxifi::BackendId(
glow::BackendKind::Interpreter,
/*id*/ 1,
/*concurrency*/ 1, /*use_onnx*/ true, /*useHostManager*/ false);
manager.addBackendId(backendId);

auto *backend = manager.createBackend(backendId);
Expand Down Expand Up @@ -78,9 +80,10 @@ TEST(GlowOnnxifiManagerTest, EventTest) {

TEST(GlowOnnxifiManagerTest, GraphTest) {
auto &manager = GlowOnnxifiManager::get();
auto *backendId = new glow::onnxifi::BackendId(glow::BackendKind::Interpreter,
/*id*/ 1, /*use_onnx*/ true,
/*concurrency*/ 1);
auto *backendId = new glow::onnxifi::BackendId(
glow::BackendKind::Interpreter,
/*id*/ 1,
/*concurrency*/ 1, /*use_onnx*/ true, /*useHostManager*/ false);
manager.addBackendId(backendId);
auto *backend = manager.createBackend(backendId);

Expand All @@ -102,9 +105,10 @@ TEST(GlowOnnxifiManagerTest, GraphTest) {

void createAndDestroyManagerObjects() {
auto &manager = GlowOnnxifiManager::get();
auto *backendId = new glow::onnxifi::BackendId(glow::BackendKind::Interpreter,
/*id*/ 1, /*use_onnx*/ true,
/*concurrency*/ 1);
auto *backendId = new glow::onnxifi::BackendId(
glow::BackendKind::Interpreter,
/*id*/ 1,
/*concurrency*/ 1, /*use_onnx*/ true, /*useHostManager*/ false);
manager.addBackendId(backendId);
auto *backend = manager.createBackend(backendId);
auto *event = manager.createEvent();
Expand Down