Skip to content

Commit 0b0460a

Browse files
omromanofacebook-github-bot
authored andcommitted
Update NNPI Backend to v0.4.2.5 (#3968)
Summary: Updating NNPI Backend to version 0.4.2.5. Pull Request resolved: #3968 Reviewed By: hyuen, jfix71 Differential Revision: D19300112 Pulled By: arunm-git fbshipit-source-id: 63f3644d7b4bcf7fb579d24603020678bfacb668
1 parent 71a93b9 commit 0b0460a

15 files changed

+485
-316
lines changed

lib/Backends/NNPI/CMakeLists.txt

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -97,6 +97,14 @@ message(STATUS "[NNPI] NNPI_MG_LIB_DIR = ${NNPI_MG_LIB}")
9797
message(STATUS "[NNPI] GLOW_BINARY_DIR = ${GLOW_BINARY_DIR}")
9898
message(STATUS "[NNPI] NNPI_COLLECT_MEM_USAGE = ${NNPI_MEM_PROFILING}")
9999

100+
if (UNIX)
101+
EXEC_PROGRAM(cat ARGS "/proc/cpuinfo" OUTPUT_VARIABLE CPUINFO)
102+
STRING(REGEX REPLACE "^.*(avx512f).*$" "\\1" AVX512F_FLAG ${CPUINFO})
103+
if("avx512f" STREQUAL "${AVX512F_FLAG}")
104+
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mavx512f -mavx512vl -mavx512bw")
105+
add_definitions(-DUSE_AVX)
106+
endif()
107+
endif()
100108
add_subdirectory(ClassGen)
101109

102110
include_directories(

lib/Backends/NNPI/DebugMacros.h

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,9 @@
1919
#include "glow/Support/Error.h"
2020
#include "nnpi_inference.h"
2121
#include "nnpi_transformer.h"
22+
#include <chrono>
2223
#include <glog/logging.h>
24+
#include <string>
2325

2426
// Macro for memory instrumentation.
2527
#if NNPI_COLLECT_MEM_USAGE
@@ -255,4 +257,20 @@ GetNNPIInferenceErrorDesc(NNPIInferenceErrorCode err) {
255257
if (exp_res != NNPI_INF_NO_ERROR) \
256258
callback(MAKE_ERR(ErrorValue::ErrorCode::RUNTIME_ERROR, msg)); \
257259
}
260+
261+
// Used in debugging.
262+
#define NNPI_TIMER_START(timer_name) \
263+
auto timer_name = std::chrono::high_resolution_clock::now();
264+
#define NNPI_TIMER_STOP(timer_name, msg_prefix_) \
265+
{ \
266+
auto timer_end_ = std::chrono::high_resolution_clock::now(); \
267+
std::cout \
268+
<< std::string(msg_prefix_) + \
269+
std::to_string( \
270+
std::chrono::duration_cast<std::chrono::microseconds>( \
271+
timer_end - timer_name) \
272+
.count()) + \
273+
"\n"; \
274+
}
275+
258276
#endif // GLOW_NNPI_DEBUG_MACROS_H

lib/Backends/NNPI/Importer.cpp

Lines changed: 26 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -941,7 +941,7 @@ class ConcatNodeImporter : public INNPINodeImporter {
941941
auto numInputs = glowConcat->getNumInputs();
942942
NNPIObjectName *inputs = new NNPIObjectName[numInputs];
943943
LOG_AND_RETURN_IF_NOT(ERROR, inputs, "No inputs", NNPI_INVALID_PARAM);
944-
std::set<std::string> inputTensors;
944+
std::unordered_set<std::string> inputTensors;
945945

946946
for (unsigned i = 0; i < numInputs; i++) {
947947
auto nvName = nodeValueName(glowConcat->getNthInput(i));
@@ -969,11 +969,29 @@ class TileNodeImporter : public INNPINodeImporter {
969969
importer.setUsedTensors({nodeValueName(glowTile->getInput())},
970970
{nodeValueName(glowTile->getResult())});
971971

972-
return nnpiNetworkAddTileOp(importer.getNetwork(),
973-
glowTile->getName().begin(),
974-
nodeValueName(glowTile->getInput()).c_str(),
975-
nodeValueName(glowTile->getResult()).c_str(),
976-
glowTile->getCount(), glowTile->getAxis());
972+
auto numDims = glowTile->getInput().getType()->dims().size();
973+
std::vector<int32_t> repeats(numDims, 1);
974+
auto axis = glowTile->getAxis();
975+
LOG_AND_RETURN_IF_NOT(ERROR, axis >= 0 && axis < numDims,
976+
"tile axis is invalid", NNPI_INVALID_PARAM);
977+
repeats[axis] = glowTile->getCount();
978+
NNPITensorDesc desc;
979+
desc.attributes.value = 0;
980+
desc.attributes.constant = 1;
981+
desc.numDims = 1;
982+
desc.dims[0] = numDims;
983+
desc.quantParams.precision = NNPI_PRECISION_INT32;
984+
desc.quantParams.type = NNPI_QUANTIZATION_NONE;
985+
desc.layout = NNPI_LAYOUT_ANY;
986+
987+
auto repeatsTensorName = glowTile->getName().str() + "_repeats";
988+
989+
importer.addTensor(repeatsTensorName, desc, repeats.data());
990+
991+
return nnpiNetworkAddTileOp(
992+
importer.getNetwork(), glowTile->getName().begin(),
993+
nodeValueName(glowTile->getInput()).c_str(), repeatsTensorName.c_str(),
994+
nodeValueName(glowTile->getResult()).c_str());
977995
}
978996
};
979997

@@ -1592,7 +1610,7 @@ class NNPICustomDSPNodeImporter : public INNPINodeImporter {
15921610
auto numInputs = glowDSP->getInputs().size();
15931611
NNPIObjectName *inputs = new NNPIObjectName[numInputs];
15941612
LOG_AND_RETURN_IF_NOT(ERROR, inputs, "No inputs", NNPI_INVALID_PARAM);
1595-
std::set<std::string> inputTensors;
1613+
std::unordered_set<std::string> inputTensors;
15961614
uint32_t i = 0;
15971615
for (const auto &nv : glowDSP->getInputs()) {
15981616
auto nvName = nodeValueName(nv);
@@ -1603,7 +1621,7 @@ class NNPICustomDSPNodeImporter : public INNPINodeImporter {
16031621
uint32_t numOutputs = 1;
16041622
NNPIObjectName *outputs = new NNPIObjectName[numOutputs];
16051623
LOG_AND_RETURN_IF_NOT(ERROR, outputs, "No outputs", NNPI_INVALID_PARAM);
1606-
std::set<std::string> outputTensors;
1624+
std::unordered_set<std::string> outputTensors;
16071625
auto nvName = nodeValueName(glowDSP->getResult());
16081626
strncpy(outputs[0], nvName.c_str(), sizeof(NNPIObjectName));
16091627
outputTensors.insert(nvName);

lib/Backends/NNPI/Importer.h

Lines changed: 9 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -23,9 +23,9 @@
2323
#include "nnpi_network_builder_EXPERIMENTAL.h"
2424
#include "llvm/ADT/ArrayRef.h"
2525
#include "llvm/Support/raw_ostream.h"
26-
#include <map>
27-
#include <set>
2826
#include <string>
27+
#include <unordered_map>
28+
#include <unordered_set>
2929

3030
namespace glow {
3131
class Function;
@@ -67,8 +67,9 @@ class NNPIImporter {
6767
const std::string &offsetTensor = {},
6868
bool forceSymlowp = false);
6969
/// Set given tensor names as inputs/outputs.
70-
void setUsedTensors(const std::set<std::string> &readTensors = {},
71-
const std::set<std::string> &writeTensors = {}) {
70+
void
71+
setUsedTensors(const std::unordered_set<std::string> &readTensors = {},
72+
const std::unordered_set<std::string> &writeTensors = {}) {
7273
readTensors_.insert(readTensors.begin(), readTensors.end());
7374
writeTensors_.insert(writeTensors.begin(), writeTensors.end());
7475
}
@@ -97,13 +98,13 @@ class NNPIImporter {
9798

9899
private:
99100
/// Map of named external tensors (inputs, outputs, weights, etc...).
100-
std::map<std::string, const Tensor *> constants_;
101+
std::unordered_map<std::string, const Tensor *> constants_;
101102
/// Set of tensors written to by the function.
102-
std::set<std::string> writeTensors_;
103+
std::unordered_set<std::string> writeTensors_;
103104
/// Set of tensors read from by the function.
104-
std::set<std::string> readTensors_;
105+
std::unordered_set<std::string> readTensors_;
105106
/// Set of tensors already defined.
106-
std::set<std::string> definedTensors_;
107+
std::unordered_set<std::string> definedTensors_;
107108
/// Number of internal names created for variables.
108109
size_t internalNameCounter_;
109110

0 commit comments

Comments
 (0)