diff --git a/docs/Backends.md b/docs/Backends.md index 48cdf36e36..674a3072f7 100644 --- a/docs/Backends.md +++ b/docs/Backends.md @@ -65,6 +65,14 @@ Additionally, there are virtual functions that backends can override: [below](#backend-specific-nodes-and-instructions-transformations) for more information. +- `virtual bool verify(const Function &F) const;` + + - Verifies that `Function &F` conforms to the backend-dependent graph constraints. + +- `virtual bool verify(const IRFunction &IR) const;` + + - Verifies that `IRFunction &IR` conforms to the backend-specific constraints. + - `virtual bool shouldLower(const Node *N) const;` - Allow the backend to prevent lowering for some `Node *N`. For example, if a diff --git a/include/glow/Backend/Backend.h b/include/glow/Backend/Backend.h index c53fdcead8..34ed7da5d6 100644 --- a/include/glow/Backend/Backend.h +++ b/include/glow/Backend/Backend.h @@ -98,6 +98,29 @@ class Backend { /// \returns whether the provided \p NI is supported by the backend. virtual bool isOpSupported(const NodeInfo &NI) const = 0; + /// \returns whether all nodes inside \p F are supported. + bool checkAllNodesSupported(const Function &F) const; + + /// \returns whether the provided \p F conforms to the backend-dependent graph + /// constraints. Giving the backend an opportunity to check that everything + /// conforms to its specific restrictions by overriding this function. It is + /// highly recommended for backends to make their backend specific + /// verifications a super-set of target independent Function::verify() by + /// calling it in their overridden implementation. It is not a strict + /// requirement, of course, in case they diverge / the backend has a good + /// reason not to call Function::verify(). + virtual bool verify(const Function &F) const; + + /// \returns whether the provided \p IR conforms to the backend-dependent + /// graph constraints. Giving the backend an opportunity to check that + /// everything conforms to its specific restrictions by overriding this + /// function. It is highly recommended for backends to make their backend + /// specific verifications a super-set of target independent + /// IRFunction::verify() by calling it in their overridden implementation. It + /// is not a strict requirement, of course, in case they diverge / the backend + /// has a good reason not to call IRFunction::verify(). + virtual bool verify(const IRFunction &IR) const; + /// \returns true if the supplied Node \N should be lowered. By default, all /// Nodes are candidates for lowering. virtual bool shouldLower(const Node *N) const { return true; } diff --git a/include/glow/Backend/BackendUtils.h b/include/glow/Backend/BackendUtils.h index da4929b1f2..551d5b2a37 100644 --- a/include/glow/Backend/BackendUtils.h +++ b/include/glow/Backend/BackendUtils.h @@ -149,6 +149,28 @@ class RuntimeBundle { }; } // namespace runtime +/// Generates a struct named has_\p METHOD_NAME that looks for a method called +/// \p METHOD_NAME inside of ClassName with return type ReturnType. +#define CLASS_CONTAINS_METHOD(METHOD_NAME) \ + template \ + struct has_##METHOD_NAME { \ + private: \ + template \ + static constexpr auto check(T *) -> \ + typename std::is_same().METHOD_NAME()), \ + ReturnType>::type; \ + template static constexpr std::false_type check(...); \ + typedef decltype(check(0)) type; \ + \ + public: \ + static constexpr bool value = type::value; \ + }; + +/// Use template meta-programming to check if typename ClassName contains +/// getFusedActivation() method. Below generates a struct named +/// has_getFusedActivation that looks for said method. +CLASS_CONTAINS_METHOD(getFusedActivation) + /// If \p PH is an output placeholder in the Function \p F, /// \returns true. /// This is determined by checking if the PH has a user which uses the PH as an @@ -173,6 +195,34 @@ bool isOutput(const Placeholder *PH, const IRFunction &F); /// by the current function. bool isInput(const Placeholder *PH, const IRFunction &F); +/// If \p N does not have fused activation \returns true. +template ::value, + int> = 0> +bool checkNoFusion(const T &N) { + (void)N; + return true; +} + +/// If \p N does not have fused activation \returns true. +template ::value, + int> = 0> +bool checkNoFusion(const T &N) { + if (N.getFusedActivation() != FusedActivation::NONE) { + report("Glow backend does not support fused Activations for: " + + std::string(N.getKindName())); + return false; + } + return true; +} + +/// If \p N does not have fused activation \returns true. +bool checkNoFusionForNode(const Node &N); + +/// If \p I does not have fused activation \returns true. +bool checkNoFusionForInstr(const Instruction &I); + /// Contains information for placeholder during allocation. struct PlaceholderInputOutputInfo { /// The placeholder address. diff --git a/include/glow/Optimizer/GraphOptimizer/PassManager.h b/include/glow/Optimizer/GraphOptimizer/PassManager.h index 93885683c3..26d8bef2ba 100644 --- a/include/glow/Optimizer/GraphOptimizer/PassManager.h +++ b/include/glow/Optimizer/GraphOptimizer/PassManager.h @@ -16,6 +16,8 @@ #ifndef GLOW_OPTIMIZER_GRAPHOPTIMIZER_PASSMANAGER_H #define GLOW_OPTIMIZER_GRAPHOPTIMIZER_PASSMANAGER_H +#include "glow/Backend/Backend.h" + #include "glow/Optimizer/GraphOptimizer/CompilationContext.h" #include "glow/Optimizer/GraphOptimizer/FunctionPass.h" #include "glow/Optimizer/GraphOptimizer/FunctionPasses.h" @@ -34,6 +36,9 @@ class FunctionPassManager : public Named { /// The pipeline of passes to run. FunctionPassPipeline pipeline_; + /// The Backend we have for backend-specific verification. + const Backend *backend_; + /// The index of the current pass being executed in the pipeline. size_t passIdx_ = 0; @@ -55,8 +60,9 @@ class FunctionPassManager : public Named { const CompilationContext &cctx); public: - FunctionPassManager(llvm::StringRef name, FunctionPassPipeline pipeline) - : Named(name), pipeline_(pipeline), passIdx_(0) {} + FunctionPassManager(llvm::StringRef name, FunctionPassPipeline pipeline, + const Backend *backend = nullptr) + : Named(name), pipeline_(pipeline), backend_(backend), passIdx_(0) {} ~FunctionPassManager() = default; /// Run the FunctionPassPipeline given the \ref pipeline_ and diff --git a/include/glow/Support/Error.h b/include/glow/Support/Error.h index be7f5f9873..ec7b22ec87 100644 --- a/include/glow/Support/Error.h +++ b/include/glow/Support/Error.h @@ -89,6 +89,10 @@ class GlowErr final : public llvm::ErrorInfo { MODEL_WRITER_INVALID_FILENAME, // Model writer cannot serialize graph to the file. MODEL_WRITER_SERIALIZATION_ERROR, + // Compilation error; IR unsupported after generation. + COMPILE_UNSUPPORTED_IR_AFTER_GENERATE, + // Compilation error; IR unsupported after optimization. + COMPILE_UNSUPPORTED_IR_AFTER_OPTIMIZE, }; /// GlowErr is not convertable to std::error_code. This is included for @@ -164,6 +168,10 @@ class GlowErr final : public llvm::ErrorInfo { return "MODEL_WRITER_INVALID_FILENAME"; case ErrorCode::MODEL_WRITER_SERIALIZATION_ERROR: return "MODEL_WRITER_SERIALIZATION_ERROR"; + case ErrorCode::COMPILE_UNSUPPORTED_IR_AFTER_GENERATE: + return "COMPILE_UNSUPPORTED_IR_AFTER_GENERATE"; + case ErrorCode::COMPILE_UNSUPPORTED_IR_AFTER_OPTIMIZE: + return "COMPILE_UNSUPPORTED_IR_AFTER_OPTIMIZE"; }; llvm_unreachable("unsupported ErrorCode"); diff --git a/lib/Backend/Backend.cpp b/lib/Backend/Backend.cpp index ac5b1a51b6..e97124de74 100644 --- a/lib/Backend/Backend.cpp +++ b/lib/Backend/Backend.cpp @@ -157,6 +157,28 @@ void Backend::autoInstrument(TraceInfo &traceInfo, IRFunction *IR) const { IR->pushInstr(new TraceEventInst("end_trace", backingWeight, index)); } +bool Backend::checkAllNodesSupported(const Function &F) const { + bool allSupported = true; + for (const Node &N : F.getNodes()) { + if (!isOpSupported(N)) { + allSupported = false; + report("Unsupported node found while compiling Function " + + F.getName().str() + " for backend " + getBackendName() + ": " + + N.getDebugDesc()); + } + } + return allSupported; +} + +bool Backend::verify(const Function &F) const { + return F.verify() && checkAllNodesSupported(F); +} + +bool Backend::verify(const IRFunction &IR) const { + (void)IR; + return true; +} + FunctionPassPipeline Backend::getOptimizationPipeline() const { return createDefaultGraphOptimizationPassPipeline(); }; diff --git a/lib/Backend/BackendUtils.cpp b/lib/Backend/BackendUtils.cpp index 5747b343b7..267023b571 100644 --- a/lib/Backend/BackendUtils.cpp +++ b/lib/Backend/BackendUtils.cpp @@ -349,6 +349,45 @@ bool isInput(const Placeholder *PH, return false; } +/// If \p N does not have fused activation \returns true. +bool checkNoFusionForNode(const Node &N) { +#define DEF_NODE(CLASS, NAME) \ + case Kinded::Kind::CLASS##Kind: { \ + const CLASS *CI = llvm::cast(&N); \ + return checkNoFusion(*CI); \ + break; \ + } + switch (N.getKind()) { +#include "glow/AutoGenNodes.def" + default: + llvm_unreachable("Invalid node."); + } + return true; +} + +/// If \p I does not have fused activation \returns true. +bool checkNoFusionForInstr(const Instruction &I) { +#define DEF_VALUE(CLASS, NAME) +#define DEF_INSTR(CLASS, NAME) \ + case Kinded::Kind::CLASS##Kind: { \ + const CLASS *CI = llvm::cast(&I); \ + return checkNoFusion(*CI); \ + break; \ + } +#define DEF_BACKEND_SPECIFIC_INSTR(CLASS, NAME) \ + case Kinded::Kind::CLASS##Kind: { \ + const CLASS *CI = llvm::cast(&I); \ + return checkNoFusion(*CI); \ + break; \ + } + switch (I.getKind()) { +#include "glow/AutoGenInstr.def" + default: + llvm_unreachable("Invalid instruction."); + } + return true; +} + template ContiguousPlaceholders getContiguousPlaceHolder(const ARR &holders, const FUN &F) { diff --git a/lib/Backends/Interpreter/Interpreter.cpp b/lib/Backends/Interpreter/Interpreter.cpp index 8675a2efc3..f068ae3559 100644 --- a/lib/Backends/Interpreter/Interpreter.cpp +++ b/lib/Backends/Interpreter/Interpreter.cpp @@ -22,6 +22,7 @@ #include "glow/Graph/Graph.h" #include "glow/Graph/Nodes.h" #include "glow/IR/IR.h" +#include "glow/IR/Instrs.h" #include "glow/Optimizer/IROptimizer/IROptimizer.h" using namespace glow; @@ -467,6 +468,114 @@ bool Interpreter::isOpSupported(const NodeInfo &NI) const { } } +/// Use template meta-programming to check if typename ClassName contains +/// has_getLayout() method. Below generates a struct named has_getLayout that +/// looks for said method. +CLASS_CONTAINS_METHOD(getLayout) + +template ::value, int> = 0> +static bool checkLayout(const T &I) { + (void)I; + return true; +} + +template ::value, int> = 0> +static bool checkLayout(const T &I) { + if (I.getLayout() != NHWC) { + report("Glow Interpreter supports only NHWC"); + return false; + } + return true; +} + +static bool checkLayoutForNode(const Node &N) { +#define DEF_NODE(CLASS, NAME) \ + case Kinded::Kind::CLASS##Kind: { \ + const CLASS *CI = llvm::cast(&N); \ + return checkLayout(*CI); \ + break; \ + } + switch (N.getKind()) { +#include "glow/AutoGenNodes.def" + default: + llvm_unreachable("Invalid instruction."); + } + return true; +} + +bool Interpreter::verify(const Function &F) const { + if (!F.verify()) { + return false; + } + if (!checkAllNodesSupported(F)) { + return false; + } + for (const Node &N : F.getNodes()) { + if (!checkLayoutForNode(N)) { + return false; + } + if (!checkNoFusionForNode(N)) { + return false; + } + switch (N.getKind()) { + case Kinded::Kind::ChannelwiseQuantizedConvolutionNodeKind: { + auto *CQCI = llvm::cast(&N); + if (!CQCI->getGroupwise()) { + report("Glow Interpreter does not support Non-groupwise variant"); + return false; + } + continue; + } + default: + continue; + } + } + return true; +} + +static bool checkLayoutForInstr(const Instruction &I) { +#define DEF_VALUE(CLASS, NAME) +#define DEF_INSTR(CLASS, NAME) \ + case Kinded::Kind::CLASS##Kind: { \ + const CLASS *CI = llvm::cast(&I); \ + return checkLayout(*CI); \ + break; \ + } +#define DEF_BACKEND_SPECIFIC_INSTR(CLASS, NAME) + switch (I.getKind()) { +#include "glow/AutoGenInstr.def" + default: + llvm_unreachable("Invalid instruction."); + } + return true; +} + +bool Interpreter::verify(const IRFunction &IR) const { + for (const auto &I : IR.getInstrs()) { + if (!checkNoFusionForInstr(I)) { + return false; + } + if (!checkLayoutForInstr(I)) { + return false; + } + switch (I.getKind()) { + case Kinded::Kind::ChannelwiseQuantizedConvolutionInstKind: { + auto *CQCI = llvm::cast(&I); + if (!CQCI->getGroupwise()) { + report("Glow Interpreter does not support Non-groupwise variant"); + return false; + } + continue; + } + default: + continue; + } + } + return true; +} + bool Interpreter::shouldLower(const Node *N) const { switch (N->getKind()) { case Kinded::Kind::ConvolutionNodeKind: diff --git a/lib/Backends/Interpreter/Interpreter.h b/lib/Backends/Interpreter/Interpreter.h index 295ceef8d8..a60d19af8d 100644 --- a/lib/Backends/Interpreter/Interpreter.h +++ b/lib/Backends/Interpreter/Interpreter.h @@ -49,6 +49,9 @@ class Interpreter final : public BackendUsingGlowIR { bool isOpSupported(const NodeInfo &NI) const override; + bool verify(const Function &F) const override; + bool verify(const IRFunction &IR) const override; + bool shouldLower(const Node *N) const override; /// @} diff --git a/lib/Backends/Interpreter/InterpreterNodes.cpp b/lib/Backends/Interpreter/InterpreterNodes.cpp index 0649ae4ff4..6510f17497 100644 --- a/lib/Backends/Interpreter/InterpreterNodes.cpp +++ b/lib/Backends/Interpreter/InterpreterNodes.cpp @@ -282,10 +282,6 @@ void BoundInterpreterFunction::fwdConvolutionInstQuantizedImpl( } void BoundInterpreterFunction::fwdConvolutionInst(const ConvolutionInst *I) { - assert(I->getLayout() == NHWC && - "Glow Interpreter supports only NHWC Convolutions"); - assert(I->getFusedActivation() == FusedActivation::NONE && - "Glow Interpreter does not support fused Activations."); auto kernelSizes = I->getKernels(); auto pads = I->getPads(); auto strides = I->getStrides(); @@ -307,8 +303,6 @@ void BoundInterpreterFunction::fwdConvolutionInst(const ConvolutionInst *I) { void BoundInterpreterFunction::fwdConvolutionGradInst( const ConvolutionGradInst *I) { - assert(I->getLayout() == NHWC && - "Glow Interpreter supports only NHWC Convolutions"); auto inW = getWeightHandle(I->getSrc()); auto inG = getWeightHandle(I->getSrcGrad()); auto outG = getWeightHandle(I->getDestGrad()); @@ -593,8 +587,6 @@ void BoundInterpreterFunction::fwdConvolution3DGradInst( void BoundInterpreterFunction::fwdChannelwiseQuantizedConvolutionInst( const ChannelwiseQuantizedConvolutionInst *I) { - assert(I->getGroupwise() && "Non-groupwise not supported"); - using AccumulatorTy = int32_t; auto inW = getWeightHandle(I->getSrc()); @@ -759,7 +751,6 @@ static void fwdMaxPool(Tensor *inW, Tensor *outW, Tensor *argmaxW, } void BoundInterpreterFunction::fwdMaxPoolInst(const MaxPoolInst *I) { - assert(I->getLayout() == NHWC && "Glow Interpreter supports only NHWC Pools"); auto inW = getTensor(I->getSrc()); auto outW = getTensor(I->getDest()); @@ -777,7 +768,6 @@ void BoundInterpreterFunction::fwdMaxPoolInst(const MaxPoolInst *I) { void BoundInterpreterFunction::fwdMaxPoolWithArgmaxInst( const MaxPoolWithArgmaxInst *I) { - assert(I->getLayout() == NHWC && "Glow Interpreter supports only NHWC Pools"); auto inW = getTensor(I->getSrc()); auto outW = getTensor(I->getDest()); auto argmaxW = getTensor(I->getArgmax()); @@ -896,7 +886,6 @@ void BoundInterpreterFunction::fwdAvgPoolInstI8Impl(const AvgPoolInst *I) { } void BoundInterpreterFunction::fwdAvgPoolInst(const AvgPoolInst *I) { - assert(I->getLayout() == NHWC && "Glow Interpreter supports only NHWC Pools"); if (I->getSrc()->getType()->isQuantizedType()) { fwdAvgPoolInstI8Impl(I); return; diff --git a/lib/Backends/OpenCL/OpenCL.cpp b/lib/Backends/OpenCL/OpenCL.cpp index 14ee8dffe4..60bdd687ed 100644 --- a/lib/Backends/OpenCL/OpenCL.cpp +++ b/lib/Backends/OpenCL/OpenCL.cpp @@ -341,8 +341,6 @@ void OpenCLFunction::executeNCHWConvolution( std::vector &kernelLaunches) { DCHECK(executionContext->getDeviceBindings()) << "DeviceBindings must be set."; - DCHECK(CC->getFusedActivation() == FusedActivation::NONE) - << "OpenCL Backend does not support fused activations."; auto devBindings = static_cast( executionContext->getDeviceBindings()); auto input = CC->getSrc(); @@ -940,8 +938,6 @@ llvm::Error OpenCLFunction::execute(ExecutionContext *context) { // index correctly into the output buffer. If the output has zero // dimensions store one slice of size 1 into destSliceSizes. auto destDims = BRA->getDest()->getType()->dims(); - DCHECK(destDims.size() < 4 && - "OpenCL BatchedReduceAdd supports max 3 output dimensions"); std::vector destDimsVec(destDims.begin(), destDims.end()); if (destDims.empty()) { destDimsVec.emplace_back(1); @@ -1010,7 +1006,6 @@ llvm::Error OpenCLFunction::execute(ExecutionContext *context) { if (auto *CG = dyn_cast(&I)) { auto *src = CG->getSrc(); - auto *filter = CG->getFilter(); auto *destGrad = CG->getDestGrad(); auto *srcGrad = CG->getSrcGrad(); auto *filterGrad = CG->getFilterGrad(); @@ -1044,10 +1039,6 @@ llvm::Error OpenCLFunction::execute(ExecutionContext *context) { biasGrad->size(), 0, biasGrad->getElementType(), clBindings, kernelLaunches); - (void)filter; - DCHECK(filter->dims() == filterGrad->dims()) << "Dims should be the same"; - DCHECK(src->dims() == srcGrad->dims()) << "Dims should be the same"; - enqueueKernel(I.getName(), commands, kernel, deviceId, {destGradDim.h, destGradDim.w, destGradDim.c}, kernelLaunches); @@ -1066,9 +1057,7 @@ llvm::Error OpenCLFunction::execute(ExecutionContext *context) { auto numArgs = setKernelArgsForBuffers(kernel, I, 1, runtimeBundle_); ShapeHW kdim(PM->getKernels()); - DCHECK(kdim.isSquare()) << "Only square kernel is supported"; ShapeHW sdim(PM->getStrides()); - DCHECK(sdim.isSquare()) << "Only square stride is supported"; setKernelArg(kernel, numArgs + 1, kdim.height); setKernelArg(kernel, numArgs + 2, sdim.height); auto pads = PaddingTLBR(PM->getPads()); @@ -1106,9 +1095,7 @@ llvm::Error OpenCLFunction::execute(ExecutionContext *context) { auto idim = ShapeNHWC(PM->getSrc()->getType()->dims()); auto pads = PaddingTLBR(PM->getPads()); ShapeHW kdim(PM->getKernels()); - DCHECK(kdim.isSquare()) << "Only square kernel is supported"; ShapeHW sdim(PM->getStrides()); - DCHECK(sdim.isSquare()) << "Only square stride is supported"; setKernelArg(kernel, numArgs + 1, kdim.height); setKernelArg(kernel, numArgs + 2, sdim.height); setKernelArg(kernel, numArgs + 3, pads); @@ -1129,18 +1116,13 @@ llvm::Error OpenCLFunction::execute(ExecutionContext *context) { auto srcGradDim = ShapeNHWC(PMG->getSrcGrad()->dims()); auto pads = PaddingTLBR(PMG->getPads()); ShapeHW kdim(PMG->getKernels()); - DCHECK(kdim.isSquare()) << "Only square kernel is supported"; ShapeHW sdim(PMG->getStrides()); - DCHECK(sdim.isSquare()) << "Only square stride is supported"; setKernelArg(kernel, numArgs + 1, kdim.height); setKernelArg(kernel, numArgs + 2, sdim.height); setKernelArg(kernel, numArgs + 3, pads); setKernelArg(kernel, numArgs + 4, srcGradDim); setKernelArg(kernel, numArgs + 5, destGradDim); - DCHECK_EQ(srcGradDim.n, destGradDim.n) << "batch size is wrong"; - DCHECK_EQ(srcGradDim.c, destGradDim.c) << "depth size is wrong"; - enqueueKernel(I.getName(), commands, kernel, deviceId, {srcGradDim.n}, kernelLaunches); continue; @@ -1158,9 +1140,7 @@ llvm::Error OpenCLFunction::execute(ExecutionContext *context) { auto numArgs = setKernelArgsForBuffers(kernel, I, 1, runtimeBundle_); ShapeHW kdim(PA->getKernels()); - DCHECK(kdim.isSquare()) << "Only square kernel is supported"; ShapeHW sdim(PA->getStrides()); - DCHECK(sdim.isSquare()) << "Only square stride is supported"; setKernelArg(kernel, numArgs + 1, kdim.height); setKernelArg(kernel, numArgs + 2, sdim.height); auto pads = PaddingTLBR(PA->getPads()); @@ -1262,10 +1242,6 @@ llvm::Error OpenCLFunction::execute(ExecutionContext *context) { auto *data = GI->getData(); - DCHECK(data->getElementType() == ElemKind::FloatTy) - << "At the moment only floats are supported, unsupported type: " - << Type::getElementName(data->getElementType()).str(); - TypeRef dataType = data->getType(); size_t numIndices = GI->getIndices()->size(); @@ -1292,9 +1268,6 @@ llvm::Error OpenCLFunction::execute(ExecutionContext *context) { } if (auto *SDI = dyn_cast(&I)) { - assert(!SDI->getCumulative() && "Cumulative assign not supported!"); - assert(SDI->getIndices()->dims()[1] == 1 && - "Only one-dimensional indices are supported!"); cl_kernel kernel = createKernel(kernelName, program); setKernelArg(kernel, 0, deviceBuffer); auto numArgs = setKernelArgsForBuffers(kernel, I, 1, runtimeBundle_); @@ -1932,6 +1905,176 @@ bool OCLBackend::isOpSupported(const NodeInfo &NI) const { } } +/// If \p I got square shaped kernels and strides \returns true. +template static bool checkSquare(const T &I) { + ShapeHW kdim(I.getKernels()); + ShapeHW sdim(I.getStrides()); + if (!kdim.isSquare()) { + report("Only square kernel is supported"); + return false; + } + if (!sdim.isSquare()) { + report("Only square stride is supported"); + return false; + } + return true; +} + +bool OCLBackend::verify(const Function &F) const { + if (!F.verify()) { + return false; + } + if (!checkAllNodesSupported(F)) { + return false; + } + for (const Node &N : F.getNodes()) { + if (!checkNoFusionForNode(N)) { + return false; + } + switch (N.getKind()) { + case Kinded::Kind::ScatterDataNodeKind: { + auto *SD = llvm::cast(&N); + if (SD->getCumulative()) { + report("Cumulative assign not supported!"); + return false; + } + if (SD->getIndices().dims()[1] != 1) { + report("Only one-dimensional indices are supported"); + return false; + } + continue; + } + case Kinded::Kind::OCLBatchedReduceAddNodeKind: { + auto *BRA = llvm::cast(&N); + auto destDims = BRA->getResult().getType()->dims(); + if (destDims.size() > 3) { + report("OpenCL BatchedReduceAdd supports max 3 output dimensions"); + return false; + } + continue; + } + case Kinded::Kind::MaxPoolNodeKind: { + auto *MP = llvm::cast(&N); + if (!checkSquare(*MP)) { + return false; + } + continue; + } + case Kinded::Kind::MaxPoolGradNodeKind: { + auto *MPG = llvm::cast(&N); + if (!checkSquare(*MPG)) { + return false; + } + continue; + } + case Kinded::Kind::AvgPoolNodeKind: { + auto *AP = llvm::cast(&N); + if (!checkSquare(*AP)) { + return false; + } + continue; + } + default: + continue; + } + } + return true; +} + +bool OCLBackend::verify(const IRFunction &IR) const { + for (const auto &I : IR.getInstrs()) { + if (!checkNoFusionForInstr(I)) { + return false; + } + switch (I.getKind()) { + case Kinded::Kind::ScatterDataInstKind: { + auto *SD = llvm::cast(&I); + if (SD->getCumulative()) { + report("Cumulative assign not supported!"); + return false; + } + if (SD->getIndices()->dims()[1] != 1) { + report("Only one-dimensional indices are supported"); + return false; + } + continue; + } + case Kinded::Kind::OCLBatchedReduceAddInstKind: { + auto *BRA = llvm::cast(&I); + auto destDims = BRA->getDest()->getType()->dims(); + if (destDims.size() > 3) { + report("OpenCL BatchedReduceAdd supports max 3 output dimensions"); + return false; + } + continue; + } + case Kinded::Kind::ConvolutionGradInstKind: { + auto *CG = llvm::cast(&I); + auto *src = CG->getSrc(); + auto *filter = CG->getFilter(); + auto *srcGrad = CG->getSrcGrad(); + auto *filterGrad = CG->getFilterGrad(); + if (filter->dims() != filterGrad->dims() || + src->dims() != srcGrad->dims()) { + report("Dims should be the same"); + return false; + } + continue; + } + case Kinded::Kind::MaxPoolInstKind: { + auto *MP = llvm::cast(&I); + if (!checkSquare(*MP)) { + return false; + } + continue; + } + case Kinded::Kind::MaxPoolWithArgmaxInstKind: { + auto *MPWA = llvm::cast(&I); + if (!checkSquare(*MPWA)) { + return false; + } + continue; + } + case Kinded::Kind::MaxPoolWithArgmaxGradInstKind: { + auto *MPWAG = llvm::cast(&I); + if (!checkSquare(*MPWAG)) { + return false; + } + auto destGradDim = ShapeNHWC(MPWAG->getDestGrad()->dims()); + auto srcGradDim = ShapeNHWC(MPWAG->getSrcGrad()->dims()); + if (srcGradDim.n != destGradDim.n) { + report("batch size is wrong"); + return false; + } + if (srcGradDim.c != destGradDim.c) { + report("depth size is wrong"); + return false; + } + continue; + } + case Kinded::Kind::AvgPoolInstKind: { + auto *AP = llvm::cast(&I); + if (!checkSquare(*AP)) { + return false; + } + continue; + } + case Kinded::Kind::GatherInstKind: { + auto *G = llvm::cast(&I); + auto *data = G->getData(); + if (data->getElementType() != ElemKind::FloatTy) { + report("Gather: At the moment only floats are supported"); + return false; + } + continue; + } + default: + continue; + } + } + return true; +} + TraceInfo OCLBackend::buildManualTraceInfo(Function *F) const { TraceInfo info(false, getTraceEventDataSize()); diff --git a/lib/Backends/OpenCL/OpenCL.h b/lib/Backends/OpenCL/OpenCL.h index 35262c33a6..525b1f4794 100644 --- a/lib/Backends/OpenCL/OpenCL.h +++ b/lib/Backends/OpenCL/OpenCL.h @@ -213,6 +213,9 @@ class OCLBackend final : public BackendUsingGlowIR { bool isOpSupported(const NodeInfo &NI) const override; + bool verify(const Function &F) const override; + bool verify(const IRFunction &IR) const override; + bool shouldLower(const Node *N) const override { // The group convolution is supported in OpenCL slow convolution kernel. if (N->getKind() == Kinded::Kind::ConvolutionNodeKind) diff --git a/lib/IR/IRGen.cpp b/lib/IR/IRGen.cpp index c0c4a62972..303da1c775 100644 --- a/lib/IR/IRGen.cpp +++ b/lib/IR/IRGen.cpp @@ -448,4 +448,11 @@ void IRFunction::generateIR(const Backend &B) { for (auto &N : ScheduledNodes) { N->visit(nullptr, &irgen); } + + if (!B.verify(*this)) { + EXIT_ON_ERR( + MAKE_ERR(GlowErr::ErrorCode::COMPILE_UNSUPPORTED_IR_AFTER_GENERATE, + "Unsupported instruction(s) found after generating IR " + + getName().str() + " for backend " + B.getBackendName())); + } } diff --git a/lib/Optimizer/GraphOptimizer/GraphOptimizer.cpp b/lib/Optimizer/GraphOptimizer/GraphOptimizer.cpp index 8a41b13dc0..f340ca91c2 100644 --- a/lib/Optimizer/GraphOptimizer/GraphOptimizer.cpp +++ b/lib/Optimizer/GraphOptimizer/GraphOptimizer.cpp @@ -2878,7 +2878,7 @@ void glow::optimize(Function *F, CompilationContext &cctx, const Backend &B) { LOG_SCOPE(F->getLogContext(), "glow::optimize") FunctionPassManager FPM("TargetDependentGraphOptzFPM", - B.getOptimizationPipeline()); + B.getOptimizationPipeline(), &B); FPM.run(F, cctx); } @@ -2900,26 +2900,6 @@ void glow::optimize(Function *F, CompilationMode mode) { optimize(F, cctx); } -/// \returns an error if any nodes inside \p F are not supported by \p B. -static llvm::Error checkAllNodesSupported(const Function &F, const Backend &B) { - bool allSupported = true; - for (const Node &N : F.getNodes()) { - if (!B.isOpSupported(N)) { - allSupported = false; - report("Unsupported node found while compiling Function " + - F.getName().str() + " for backend " + B.getBackendName() + ": " + - N.getDebugDesc()); - } - } - if (!allSupported) { - return MAKE_ERR(GlowErr::ErrorCode::COMPILE_UNSUPPORTED_NODE_AFTER_OPTIMIZE, - "Unsupported node(s) found after optimizing Function " + - F.getName().str() + " for backend " + - B.getBackendName()); - } - return llvm::Error::success(); -} - /// Helper function that may transform \p F given preferences of \p cctx and /// \p B. The specific transformations are done based on the /// PrecisionConfiguration found in \p cctx. This could include quantization, @@ -3017,5 +2997,14 @@ llvm::Error glow::optimizeFunction(Function *F, const Backend &B, ::glow::optimize(F, cctx, B); } - return checkAllNodesSupported(*F, B); + // We already started using backend specific verification when the function + // state became lowered. Do one more verification pass to make sure everything + // is in order and to bail if it is not. + if (!B.verify(*F)) { + return MAKE_ERR(GlowErr::ErrorCode::COMPILE_UNSUPPORTED_NODE_AFTER_OPTIMIZE, + "Unsupported node(s) found after optimizing Function " + + F->getName().str() + " for backend " + + B.getBackendName()); + } + return llvm::Error::success(); } diff --git a/lib/Optimizer/GraphOptimizer/PassManager.cpp b/lib/Optimizer/GraphOptimizer/PassManager.cpp index f500a1263f..b5e699af2a 100644 --- a/lib/Optimizer/GraphOptimizer/PassManager.cpp +++ b/lib/Optimizer/GraphOptimizer/PassManager.cpp @@ -136,7 +136,12 @@ bool FunctionPassManager::runPrePass(Function *F, } if (verifyBeforeAllPassesOpt || listContainsString(verifyBeforePassesOpt, P.getName())) { - CHECK(F->verify()); + if (backend_) { + // Do backend-specific verification. + CHECK(backend_->verify(*F)); + } else { + CHECK(F->verify()); + } } return false; } @@ -156,7 +161,12 @@ bool FunctionPassManager::runPostPass(Function *F, } if (verifyAfterAllPassesOpt || listContainsString(verifyAfterPassesOpt, P.getName())) { - CHECK(F->verify()); + if (backend_) { + // Do backend-specific verification. + CHECK(backend_->verify(*F)); + } else { + CHECK(F->verify()); + } } return false; } diff --git a/lib/Optimizer/IROptimizer/IROptimizer.cpp b/lib/Optimizer/IROptimizer/IROptimizer.cpp index 86bb64d3f9..4cc563e375 100644 --- a/lib/Optimizer/IROptimizer/IROptimizer.cpp +++ b/lib/Optimizer/IROptimizer/IROptimizer.cpp @@ -1647,6 +1647,12 @@ glow::generateAndOptimizeIR(Function *F, const Backend &B, auto IR = llvm::make_unique(F); IR->generateIR(B); ::glow::optimize(*IR, shouldShareBuffers); + if (!B.verify(*IR)) { + EXIT_ON_ERR(MAKE_ERR( + GlowErr::ErrorCode::COMPILE_UNSUPPORTED_IR_AFTER_OPTIMIZE, + "Unsupported instruction(s) found after optimizing IR " + + IR->getName().str() + " for backend " + B.getBackendName())); + } return IR; }