diff --git a/docs/Backends.md b/docs/Backends.md
index 48cdf36e36..674a3072f7 100644
--- a/docs/Backends.md
+++ b/docs/Backends.md
@@ -65,6 +65,14 @@ Additionally, there are virtual functions that backends can override:
     [below](#backend-specific-nodes-and-instructions-transformations) for more
     information.
 
+- `virtual bool verify(const Function &F) const;`
+
+  - Verifies that `Function &F` conforms to the backend-dependent graph constraints.
+
+- `virtual bool verify(const IRFunction &IR) const;`
+
+  - Verifies that `IRFunction &IR` conforms to the backend-specific constraints.
+
 - `virtual bool shouldLower(const Node *N) const;`
 
   - Allow the backend to prevent lowering for some `Node *N`. For example, if a
diff --git a/include/glow/Backend/Backend.h b/include/glow/Backend/Backend.h
index c53fdcead8..34ed7da5d6 100644
--- a/include/glow/Backend/Backend.h
+++ b/include/glow/Backend/Backend.h
@@ -98,6 +98,29 @@ class Backend {
   /// \returns whether the provided \p NI is supported by the backend.
   virtual bool isOpSupported(const NodeInfo &NI) const = 0;
 
+  /// \returns whether all nodes inside \p F are supported.
+  bool checkAllNodesSupported(const Function &F) const;
+
+  /// \returns whether the provided \p F conforms to the backend-dependent graph
+  /// constraints. Giving the backend an opportunity to check that everything
+  /// conforms to its specific restrictions by overriding this function. It is
+  /// highly recommended for backends to make their backend specific
+  /// verifications a super-set of target independent Function::verify() by
+  /// calling it in their overridden implementation. It is not a strict
+  /// requirement, of course, in case they diverge / the backend has a good
+  /// reason not to call Function::verify().
+  virtual bool verify(const Function &F) const;
+
+  /// \returns whether the provided \p IR conforms to the backend-dependent
+  /// graph constraints. Giving the backend an opportunity to check that
+  /// everything conforms to its specific restrictions by overriding this
+  /// function. It is highly recommended for backends to make their backend
+  /// specific verifications a super-set of target independent
+  /// IRFunction::verify() by calling it in their overridden implementation. It
+  /// is not a strict requirement, of course, in case they diverge / the backend
+  /// has a good reason not to call IRFunction::verify().
+  virtual bool verify(const IRFunction &IR) const;
+
   /// \returns true if the supplied Node \N should be lowered. By default, all
   /// Nodes are candidates for lowering.
   virtual bool shouldLower(const Node *N) const { return true; }
diff --git a/include/glow/Backend/BackendUtils.h b/include/glow/Backend/BackendUtils.h
index da4929b1f2..551d5b2a37 100644
--- a/include/glow/Backend/BackendUtils.h
+++ b/include/glow/Backend/BackendUtils.h
@@ -149,6 +149,28 @@ class RuntimeBundle {
 };
 } // namespace runtime
 
+/// Generates a struct named has_\p METHOD_NAME that looks for a method called
+/// \p METHOD_NAME inside of ClassName with return type ReturnType.
+#define CLASS_CONTAINS_METHOD(METHOD_NAME)                                     \
+  template <typename ClassName, typename ReturnType>                           \
+  struct has_##METHOD_NAME {                                                   \
+  private:                                                                     \
+    template <typename T>                                                      \
+    static constexpr auto check(T *) ->                                        \
+        typename std::is_same<decltype(std::declval<T>().METHOD_NAME()),       \
+                              ReturnType>::type;                               \
+    template <typename> static constexpr std::false_type check(...);           \
+    typedef decltype(check<ClassName>(0)) type;                                \
+                                                                               \
+  public:                                                                      \
+    static constexpr bool value = type::value;                                 \
+  };
+
+/// Use template meta-programming to check if typename ClassName contains
+/// getFusedActivation() method. Below generates a struct named
+/// has_getFusedActivation that looks for said method.
+CLASS_CONTAINS_METHOD(getFusedActivation)
+
 /// If \p PH is an output placeholder in the Function \p F,
 /// \returns true.
 /// This is determined by checking if the PH has a user which uses the PH as an
@@ -173,6 +195,34 @@ bool isOutput(const Placeholder *PH, const IRFunction &F);
 /// by the current function.
 bool isInput(const Placeholder *PH, const IRFunction &F);
 
+/// If \p N does not have fused activation \returns true.
+template <typename T,
+          std::enable_if_t<!has_getFusedActivation<T, FusedActivation>::value,
+                           int> = 0>
+bool checkNoFusion(const T &N) {
+  (void)N;
+  return true;
+}
+
+/// If \p N does not have fused activation \returns true.
+template <typename T,
+          std::enable_if_t<has_getFusedActivation<T, FusedActivation>::value,
+                           int> = 0>
+bool checkNoFusion(const T &N) {
+  if (N.getFusedActivation() != FusedActivation::NONE) {
+    report("Glow backend does not support fused Activations for: " +
+           std::string(N.getKindName()));
+    return false;
+  }
+  return true;
+}
+
+/// If \p N does not have fused activation \returns true.
+bool checkNoFusionForNode(const Node &N);
+
+/// If \p I does not have fused activation \returns true.
+bool checkNoFusionForInstr(const Instruction &I);
+
 /// Contains information for placeholder during allocation.
 struct PlaceholderInputOutputInfo {
   /// The placeholder address.
diff --git a/include/glow/Optimizer/GraphOptimizer/PassManager.h b/include/glow/Optimizer/GraphOptimizer/PassManager.h
index 93885683c3..26d8bef2ba 100644
--- a/include/glow/Optimizer/GraphOptimizer/PassManager.h
+++ b/include/glow/Optimizer/GraphOptimizer/PassManager.h
@@ -16,6 +16,8 @@
 #ifndef GLOW_OPTIMIZER_GRAPHOPTIMIZER_PASSMANAGER_H
 #define GLOW_OPTIMIZER_GRAPHOPTIMIZER_PASSMANAGER_H
 
+#include "glow/Backend/Backend.h"
+
 #include "glow/Optimizer/GraphOptimizer/CompilationContext.h"
 #include "glow/Optimizer/GraphOptimizer/FunctionPass.h"
 #include "glow/Optimizer/GraphOptimizer/FunctionPasses.h"
@@ -34,6 +36,9 @@ class FunctionPassManager : public Named {
   /// The pipeline of passes to run.
   FunctionPassPipeline pipeline_;
 
+  /// The Backend we have for backend-specific verification.
+  const Backend *backend_;
+
   /// The index of the current pass being executed in the pipeline.
   size_t passIdx_ = 0;
 
@@ -55,8 +60,9 @@ class FunctionPassManager : public Named {
                const CompilationContext &cctx);
 
 public:
-  FunctionPassManager(llvm::StringRef name, FunctionPassPipeline pipeline)
-      : Named(name), pipeline_(pipeline), passIdx_(0) {}
+  FunctionPassManager(llvm::StringRef name, FunctionPassPipeline pipeline,
+                      const Backend *backend = nullptr)
+      : Named(name), pipeline_(pipeline), backend_(backend), passIdx_(0) {}
   ~FunctionPassManager() = default;
 
   /// Run the FunctionPassPipeline given the \ref pipeline_ and
diff --git a/include/glow/Support/Error.h b/include/glow/Support/Error.h
index be7f5f9873..ec7b22ec87 100644
--- a/include/glow/Support/Error.h
+++ b/include/glow/Support/Error.h
@@ -89,6 +89,10 @@ class GlowErr final : public llvm::ErrorInfo<GlowErr> {
     MODEL_WRITER_INVALID_FILENAME,
     // Model writer cannot serialize graph to the file.
     MODEL_WRITER_SERIALIZATION_ERROR,
+    // Compilation error; IR unsupported after generation.
+    COMPILE_UNSUPPORTED_IR_AFTER_GENERATE,
+    // Compilation error; IR unsupported after optimization.
+    COMPILE_UNSUPPORTED_IR_AFTER_OPTIMIZE,
   };
 
   /// GlowErr is not convertable to std::error_code. This is included for
@@ -164,6 +168,10 @@ class GlowErr final : public llvm::ErrorInfo<GlowErr> {
       return "MODEL_WRITER_INVALID_FILENAME";
     case ErrorCode::MODEL_WRITER_SERIALIZATION_ERROR:
       return "MODEL_WRITER_SERIALIZATION_ERROR";
+    case ErrorCode::COMPILE_UNSUPPORTED_IR_AFTER_GENERATE:
+      return "COMPILE_UNSUPPORTED_IR_AFTER_GENERATE";
+    case ErrorCode::COMPILE_UNSUPPORTED_IR_AFTER_OPTIMIZE:
+      return "COMPILE_UNSUPPORTED_IR_AFTER_OPTIMIZE";
     };
 
     llvm_unreachable("unsupported ErrorCode");
diff --git a/lib/Backend/Backend.cpp b/lib/Backend/Backend.cpp
index ac5b1a51b6..e97124de74 100644
--- a/lib/Backend/Backend.cpp
+++ b/lib/Backend/Backend.cpp
@@ -157,6 +157,28 @@ void Backend::autoInstrument(TraceInfo &traceInfo, IRFunction *IR) const {
   IR->pushInstr(new TraceEventInst("end_trace", backingWeight, index));
 }
 
+bool Backend::checkAllNodesSupported(const Function &F) const {
+  bool allSupported = true;
+  for (const Node &N : F.getNodes()) {
+    if (!isOpSupported(N)) {
+      allSupported = false;
+      report("Unsupported node found while compiling Function " +
+             F.getName().str() + " for backend " + getBackendName() + ": " +
+             N.getDebugDesc());
+    }
+  }
+  return allSupported;
+}
+
+bool Backend::verify(const Function &F) const {
+  return F.verify() && checkAllNodesSupported(F);
+}
+
+bool Backend::verify(const IRFunction &IR) const {
+  (void)IR;
+  return true;
+}
+
 FunctionPassPipeline Backend::getOptimizationPipeline() const {
   return createDefaultGraphOptimizationPassPipeline();
 };
diff --git a/lib/Backend/BackendUtils.cpp b/lib/Backend/BackendUtils.cpp
index 5747b343b7..267023b571 100644
--- a/lib/Backend/BackendUtils.cpp
+++ b/lib/Backend/BackendUtils.cpp
@@ -349,6 +349,45 @@ bool isInput(const Placeholder *PH,
   return false;
 }
 
+/// If \p N does not have fused activation \returns true.
+bool checkNoFusionForNode(const Node &N) {
+#define DEF_NODE(CLASS, NAME)                                                  \
+  case Kinded::Kind::CLASS##Kind: {                                            \
+    const CLASS *CI = llvm::cast<CLASS>(&N);                                   \
+    return checkNoFusion(*CI);                                                 \
+    break;                                                                     \
+  }
+  switch (N.getKind()) {
+#include "glow/AutoGenNodes.def"
+  default:
+    llvm_unreachable("Invalid node.");
+  }
+  return true;
+}
+
+/// If \p I does not have fused activation \returns true.
+bool checkNoFusionForInstr(const Instruction &I) {
+#define DEF_VALUE(CLASS, NAME)
+#define DEF_INSTR(CLASS, NAME)                                                 \
+  case Kinded::Kind::CLASS##Kind: {                                            \
+    const CLASS *CI = llvm::cast<CLASS>(&I);                                   \
+    return checkNoFusion(*CI);                                                 \
+    break;                                                                     \
+  }
+#define DEF_BACKEND_SPECIFIC_INSTR(CLASS, NAME)                                \
+  case Kinded::Kind::CLASS##Kind: {                                            \
+    const CLASS *CI = llvm::cast<CLASS>(&I);                                   \
+    return checkNoFusion(*CI);                                                 \
+    break;                                                                     \
+  }
+  switch (I.getKind()) {
+#include "glow/AutoGenInstr.def"
+  default:
+    llvm_unreachable("Invalid instruction.");
+  }
+  return true;
+}
+
 template <typename FUN, typename ARR>
 ContiguousPlaceholders getContiguousPlaceHolder(const ARR &holders,
                                                 const FUN &F) {
diff --git a/lib/Backends/Interpreter/Interpreter.cpp b/lib/Backends/Interpreter/Interpreter.cpp
index 8675a2efc3..f068ae3559 100644
--- a/lib/Backends/Interpreter/Interpreter.cpp
+++ b/lib/Backends/Interpreter/Interpreter.cpp
@@ -22,6 +22,7 @@
 #include "glow/Graph/Graph.h"
 #include "glow/Graph/Nodes.h"
 #include "glow/IR/IR.h"
+#include "glow/IR/Instrs.h"
 #include "glow/Optimizer/IROptimizer/IROptimizer.h"
 
 using namespace glow;
@@ -467,6 +468,114 @@ bool Interpreter::isOpSupported(const NodeInfo &NI) const {
   }
 }
 
+/// Use template meta-programming to check if typename ClassName contains
+/// has_getLayout() method. Below generates a struct named has_getLayout that
+/// looks for said method.
+CLASS_CONTAINS_METHOD(getLayout)
+
+template <typename T, std::enable_if_t<
+                          !has_getLayout<T, ConvolutionLayout>::value, int> = 0>
+static bool checkLayout(const T &I) {
+  (void)I;
+  return true;
+}
+
+template <typename T,
+          std::enable_if_t<has_getLayout<T, ConvolutionLayout>::value, int> = 0>
+static bool checkLayout(const T &I) {
+  if (I.getLayout() != NHWC) {
+    report("Glow Interpreter supports only NHWC");
+    return false;
+  }
+  return true;
+}
+
+static bool checkLayoutForNode(const Node &N) {
+#define DEF_NODE(CLASS, NAME)                                                  \
+  case Kinded::Kind::CLASS##Kind: {                                            \
+    const CLASS *CI = llvm::cast<CLASS>(&N);                                   \
+    return checkLayout(*CI);                                                   \
+    break;                                                                     \
+  }
+  switch (N.getKind()) {
+#include "glow/AutoGenNodes.def"
+  default:
+    llvm_unreachable("Invalid instruction.");
+  }
+  return true;
+}
+
+bool Interpreter::verify(const Function &F) const {
+  if (!F.verify()) {
+    return false;
+  }
+  if (!checkAllNodesSupported(F)) {
+    return false;
+  }
+  for (const Node &N : F.getNodes()) {
+    if (!checkLayoutForNode(N)) {
+      return false;
+    }
+    if (!checkNoFusionForNode(N)) {
+      return false;
+    }
+    switch (N.getKind()) {
+    case Kinded::Kind::ChannelwiseQuantizedConvolutionNodeKind: {
+      auto *CQCI = llvm::cast<ChannelwiseQuantizedConvolutionNode>(&N);
+      if (!CQCI->getGroupwise()) {
+        report("Glow Interpreter does not support Non-groupwise variant");
+        return false;
+      }
+      continue;
+    }
+    default:
+      continue;
+    }
+  }
+  return true;
+}
+
+static bool checkLayoutForInstr(const Instruction &I) {
+#define DEF_VALUE(CLASS, NAME)
+#define DEF_INSTR(CLASS, NAME)                                                 \
+  case Kinded::Kind::CLASS##Kind: {                                            \
+    const CLASS *CI = llvm::cast<CLASS>(&I);                                   \
+    return checkLayout(*CI);                                                   \
+    break;                                                                     \
+  }
+#define DEF_BACKEND_SPECIFIC_INSTR(CLASS, NAME)
+  switch (I.getKind()) {
+#include "glow/AutoGenInstr.def"
+  default:
+    llvm_unreachable("Invalid instruction.");
+  }
+  return true;
+}
+
+bool Interpreter::verify(const IRFunction &IR) const {
+  for (const auto &I : IR.getInstrs()) {
+    if (!checkNoFusionForInstr(I)) {
+      return false;
+    }
+    if (!checkLayoutForInstr(I)) {
+      return false;
+    }
+    switch (I.getKind()) {
+    case Kinded::Kind::ChannelwiseQuantizedConvolutionInstKind: {
+      auto *CQCI = llvm::cast<ChannelwiseQuantizedConvolutionInst>(&I);
+      if (!CQCI->getGroupwise()) {
+        report("Glow Interpreter does not support Non-groupwise variant");
+        return false;
+      }
+      continue;
+    }
+    default:
+      continue;
+    }
+  }
+  return true;
+}
+
 bool Interpreter::shouldLower(const Node *N) const {
   switch (N->getKind()) {
   case Kinded::Kind::ConvolutionNodeKind:
diff --git a/lib/Backends/Interpreter/Interpreter.h b/lib/Backends/Interpreter/Interpreter.h
index 295ceef8d8..a60d19af8d 100644
--- a/lib/Backends/Interpreter/Interpreter.h
+++ b/lib/Backends/Interpreter/Interpreter.h
@@ -49,6 +49,9 @@ class Interpreter final : public BackendUsingGlowIR {
 
   bool isOpSupported(const NodeInfo &NI) const override;
 
+  bool verify(const Function &F) const override;
+  bool verify(const IRFunction &IR) const override;
+
   bool shouldLower(const Node *N) const override;
 
   /// @}
diff --git a/lib/Backends/Interpreter/InterpreterNodes.cpp b/lib/Backends/Interpreter/InterpreterNodes.cpp
index 0649ae4ff4..6510f17497 100644
--- a/lib/Backends/Interpreter/InterpreterNodes.cpp
+++ b/lib/Backends/Interpreter/InterpreterNodes.cpp
@@ -282,10 +282,6 @@ void BoundInterpreterFunction::fwdConvolutionInstQuantizedImpl(
 }
 
 void BoundInterpreterFunction::fwdConvolutionInst(const ConvolutionInst *I) {
-  assert(I->getLayout() == NHWC &&
-         "Glow Interpreter supports only NHWC Convolutions");
-  assert(I->getFusedActivation() == FusedActivation::NONE &&
-         "Glow Interpreter does not support fused Activations.");
   auto kernelSizes = I->getKernels();
   auto pads = I->getPads();
   auto strides = I->getStrides();
@@ -307,8 +303,6 @@ void BoundInterpreterFunction::fwdConvolutionInst(const ConvolutionInst *I) {
 
 void BoundInterpreterFunction::fwdConvolutionGradInst(
     const ConvolutionGradInst *I) {
-  assert(I->getLayout() == NHWC &&
-         "Glow Interpreter supports only NHWC Convolutions");
   auto inW = getWeightHandle(I->getSrc());
   auto inG = getWeightHandle(I->getSrcGrad());
   auto outG = getWeightHandle(I->getDestGrad());
@@ -593,8 +587,6 @@ void BoundInterpreterFunction::fwdConvolution3DGradInst(
 
 void BoundInterpreterFunction::fwdChannelwiseQuantizedConvolutionInst(
     const ChannelwiseQuantizedConvolutionInst *I) {
-  assert(I->getGroupwise() && "Non-groupwise not supported");
-
   using AccumulatorTy = int32_t;
 
   auto inW = getWeightHandle<int8_t>(I->getSrc());
@@ -759,7 +751,6 @@ static void fwdMaxPool(Tensor *inW, Tensor *outW, Tensor *argmaxW,
 }
 
 void BoundInterpreterFunction::fwdMaxPoolInst(const MaxPoolInst *I) {
-  assert(I->getLayout() == NHWC && "Glow Interpreter supports only NHWC Pools");
   auto inW = getTensor(I->getSrc());
   auto outW = getTensor(I->getDest());
 
@@ -777,7 +768,6 @@ void BoundInterpreterFunction::fwdMaxPoolInst(const MaxPoolInst *I) {
 
 void BoundInterpreterFunction::fwdMaxPoolWithArgmaxInst(
     const MaxPoolWithArgmaxInst *I) {
-  assert(I->getLayout() == NHWC && "Glow Interpreter supports only NHWC Pools");
   auto inW = getTensor(I->getSrc());
   auto outW = getTensor(I->getDest());
   auto argmaxW = getTensor(I->getArgmax());
@@ -896,7 +886,6 @@ void BoundInterpreterFunction::fwdAvgPoolInstI8Impl(const AvgPoolInst *I) {
 }
 
 void BoundInterpreterFunction::fwdAvgPoolInst(const AvgPoolInst *I) {
-  assert(I->getLayout() == NHWC && "Glow Interpreter supports only NHWC Pools");
   if (I->getSrc()->getType()->isQuantizedType()) {
     fwdAvgPoolInstI8Impl(I);
     return;
diff --git a/lib/Backends/OpenCL/OpenCL.cpp b/lib/Backends/OpenCL/OpenCL.cpp
index 14ee8dffe4..60bdd687ed 100644
--- a/lib/Backends/OpenCL/OpenCL.cpp
+++ b/lib/Backends/OpenCL/OpenCL.cpp
@@ -341,8 +341,6 @@ void OpenCLFunction::executeNCHWConvolution(
     std::vector<KernelLaunch> &kernelLaunches) {
   DCHECK(executionContext->getDeviceBindings())
       << "DeviceBindings must be set.";
-  DCHECK(CC->getFusedActivation() == FusedActivation::NONE)
-      << "OpenCL Backend does not support fused activations.";
   auto devBindings = static_cast<runtime::OpenCLDeviceBindings *>(
       executionContext->getDeviceBindings());
   auto input = CC->getSrc();
@@ -940,8 +938,6 @@ llvm::Error OpenCLFunction::execute(ExecutionContext *context) {
       // index correctly into the output buffer. If the output has zero
       // dimensions store one slice of size 1 into destSliceSizes.
       auto destDims = BRA->getDest()->getType()->dims();
-      DCHECK(destDims.size() < 4 &&
-             "OpenCL BatchedReduceAdd supports max 3 output dimensions");
       std::vector<size_t> destDimsVec(destDims.begin(), destDims.end());
       if (destDims.empty()) {
         destDimsVec.emplace_back(1);
@@ -1010,7 +1006,6 @@ llvm::Error OpenCLFunction::execute(ExecutionContext *context) {
 
     if (auto *CG = dyn_cast<ConvolutionGradInst>(&I)) {
       auto *src = CG->getSrc();
-      auto *filter = CG->getFilter();
       auto *destGrad = CG->getDestGrad();
       auto *srcGrad = CG->getSrcGrad();
       auto *filterGrad = CG->getFilterGrad();
@@ -1044,10 +1039,6 @@ llvm::Error OpenCLFunction::execute(ExecutionContext *context) {
                  biasGrad->size(), 0, biasGrad->getElementType(), clBindings,
                  kernelLaunches);
 
-      (void)filter;
-      DCHECK(filter->dims() == filterGrad->dims()) << "Dims should be the same";
-      DCHECK(src->dims() == srcGrad->dims()) << "Dims should be the same";
-
       enqueueKernel(I.getName(), commands, kernel, deviceId,
                     {destGradDim.h, destGradDim.w, destGradDim.c},
                     kernelLaunches);
@@ -1066,9 +1057,7 @@ llvm::Error OpenCLFunction::execute(ExecutionContext *context) {
       auto numArgs = setKernelArgsForBuffers(kernel, I, 1, runtimeBundle_);
 
       ShapeHW kdim(PM->getKernels());
-      DCHECK(kdim.isSquare()) << "Only square kernel is supported";
       ShapeHW sdim(PM->getStrides());
-      DCHECK(sdim.isSquare()) << "Only square stride is supported";
       setKernelArg<cl_uint>(kernel, numArgs + 1, kdim.height);
       setKernelArg<cl_uint>(kernel, numArgs + 2, sdim.height);
       auto pads = PaddingTLBR(PM->getPads());
@@ -1106,9 +1095,7 @@ llvm::Error OpenCLFunction::execute(ExecutionContext *context) {
       auto idim = ShapeNHWC(PM->getSrc()->getType()->dims());
       auto pads = PaddingTLBR(PM->getPads());
       ShapeHW kdim(PM->getKernels());
-      DCHECK(kdim.isSquare()) << "Only square kernel is supported";
       ShapeHW sdim(PM->getStrides());
-      DCHECK(sdim.isSquare()) << "Only square stride is supported";
       setKernelArg<cl_uint>(kernel, numArgs + 1, kdim.height);
       setKernelArg<cl_uint>(kernel, numArgs + 2, sdim.height);
       setKernelArg(kernel, numArgs + 3, pads);
@@ -1129,18 +1116,13 @@ llvm::Error OpenCLFunction::execute(ExecutionContext *context) {
       auto srcGradDim = ShapeNHWC(PMG->getSrcGrad()->dims());
       auto pads = PaddingTLBR(PMG->getPads());
       ShapeHW kdim(PMG->getKernels());
-      DCHECK(kdim.isSquare()) << "Only square kernel is supported";
       ShapeHW sdim(PMG->getStrides());
-      DCHECK(sdim.isSquare()) << "Only square stride is supported";
       setKernelArg<cl_uint>(kernel, numArgs + 1, kdim.height);
       setKernelArg<cl_uint>(kernel, numArgs + 2, sdim.height);
       setKernelArg(kernel, numArgs + 3, pads);
       setKernelArg(kernel, numArgs + 4, srcGradDim);
       setKernelArg(kernel, numArgs + 5, destGradDim);
 
-      DCHECK_EQ(srcGradDim.n, destGradDim.n) << "batch size is wrong";
-      DCHECK_EQ(srcGradDim.c, destGradDim.c) << "depth size is wrong";
-
       enqueueKernel(I.getName(), commands, kernel, deviceId, {srcGradDim.n},
                     kernelLaunches);
       continue;
@@ -1158,9 +1140,7 @@ llvm::Error OpenCLFunction::execute(ExecutionContext *context) {
       auto numArgs = setKernelArgsForBuffers(kernel, I, 1, runtimeBundle_);
 
       ShapeHW kdim(PA->getKernels());
-      DCHECK(kdim.isSquare()) << "Only square kernel is supported";
       ShapeHW sdim(PA->getStrides());
-      DCHECK(sdim.isSquare()) << "Only square stride is supported";
       setKernelArg<cl_uint>(kernel, numArgs + 1, kdim.height);
       setKernelArg<cl_uint>(kernel, numArgs + 2, sdim.height);
       auto pads = PaddingTLBR(PA->getPads());
@@ -1262,10 +1242,6 @@ llvm::Error OpenCLFunction::execute(ExecutionContext *context) {
 
       auto *data = GI->getData();
 
-      DCHECK(data->getElementType() == ElemKind::FloatTy)
-          << "At the moment only floats are supported, unsupported type: "
-          << Type::getElementName(data->getElementType()).str();
-
       TypeRef dataType = data->getType();
       size_t numIndices = GI->getIndices()->size();
 
@@ -1292,9 +1268,6 @@ llvm::Error OpenCLFunction::execute(ExecutionContext *context) {
     }
 
     if (auto *SDI = dyn_cast<ScatterDataInst>(&I)) {
-      assert(!SDI->getCumulative() && "Cumulative assign not supported!");
-      assert(SDI->getIndices()->dims()[1] == 1 &&
-             "Only one-dimensional indices are supported!");
       cl_kernel kernel = createKernel(kernelName, program);
       setKernelArg(kernel, 0, deviceBuffer);
       auto numArgs = setKernelArgsForBuffers(kernel, I, 1, runtimeBundle_);
@@ -1932,6 +1905,176 @@ bool OCLBackend::isOpSupported(const NodeInfo &NI) const {
   }
 }
 
+/// If \p I got square shaped kernels and strides \returns true.
+template <class T> static bool checkSquare(const T &I) {
+  ShapeHW kdim(I.getKernels());
+  ShapeHW sdim(I.getStrides());
+  if (!kdim.isSquare()) {
+    report("Only square kernel is supported");
+    return false;
+  }
+  if (!sdim.isSquare()) {
+    report("Only square stride is supported");
+    return false;
+  }
+  return true;
+}
+
+bool OCLBackend::verify(const Function &F) const {
+  if (!F.verify()) {
+    return false;
+  }
+  if (!checkAllNodesSupported(F)) {
+    return false;
+  }
+  for (const Node &N : F.getNodes()) {
+    if (!checkNoFusionForNode(N)) {
+      return false;
+    }
+    switch (N.getKind()) {
+    case Kinded::Kind::ScatterDataNodeKind: {
+      auto *SD = llvm::cast<ScatterDataNode>(&N);
+      if (SD->getCumulative()) {
+        report("Cumulative assign not supported!");
+        return false;
+      }
+      if (SD->getIndices().dims()[1] != 1) {
+        report("Only one-dimensional indices are supported");
+        return false;
+      }
+      continue;
+    }
+    case Kinded::Kind::OCLBatchedReduceAddNodeKind: {
+      auto *BRA = llvm::cast<OCLBatchedReduceAddNode>(&N);
+      auto destDims = BRA->getResult().getType()->dims();
+      if (destDims.size() > 3) {
+        report("OpenCL BatchedReduceAdd supports max 3 output dimensions");
+        return false;
+      }
+      continue;
+    }
+    case Kinded::Kind::MaxPoolNodeKind: {
+      auto *MP = llvm::cast<MaxPoolNode>(&N);
+      if (!checkSquare(*MP)) {
+        return false;
+      }
+      continue;
+    }
+    case Kinded::Kind::MaxPoolGradNodeKind: {
+      auto *MPG = llvm::cast<MaxPoolGradNode>(&N);
+      if (!checkSquare(*MPG)) {
+        return false;
+      }
+      continue;
+    }
+    case Kinded::Kind::AvgPoolNodeKind: {
+      auto *AP = llvm::cast<AvgPoolNode>(&N);
+      if (!checkSquare(*AP)) {
+        return false;
+      }
+      continue;
+    }
+    default:
+      continue;
+    }
+  }
+  return true;
+}
+
+bool OCLBackend::verify(const IRFunction &IR) const {
+  for (const auto &I : IR.getInstrs()) {
+    if (!checkNoFusionForInstr(I)) {
+      return false;
+    }
+    switch (I.getKind()) {
+    case Kinded::Kind::ScatterDataInstKind: {
+      auto *SD = llvm::cast<ScatterDataInst>(&I);
+      if (SD->getCumulative()) {
+        report("Cumulative assign not supported!");
+        return false;
+      }
+      if (SD->getIndices()->dims()[1] != 1) {
+        report("Only one-dimensional indices are supported");
+        return false;
+      }
+      continue;
+    }
+    case Kinded::Kind::OCLBatchedReduceAddInstKind: {
+      auto *BRA = llvm::cast<OCLBatchedReduceAddInst>(&I);
+      auto destDims = BRA->getDest()->getType()->dims();
+      if (destDims.size() > 3) {
+        report("OpenCL BatchedReduceAdd supports max 3 output dimensions");
+        return false;
+      }
+      continue;
+    }
+    case Kinded::Kind::ConvolutionGradInstKind: {
+      auto *CG = llvm::cast<ConvolutionGradInst>(&I);
+      auto *src = CG->getSrc();
+      auto *filter = CG->getFilter();
+      auto *srcGrad = CG->getSrcGrad();
+      auto *filterGrad = CG->getFilterGrad();
+      if (filter->dims() != filterGrad->dims() ||
+          src->dims() != srcGrad->dims()) {
+        report("Dims should be the same");
+        return false;
+      }
+      continue;
+    }
+    case Kinded::Kind::MaxPoolInstKind: {
+      auto *MP = llvm::cast<MaxPoolInst>(&I);
+      if (!checkSquare(*MP)) {
+        return false;
+      }
+      continue;
+    }
+    case Kinded::Kind::MaxPoolWithArgmaxInstKind: {
+      auto *MPWA = llvm::cast<MaxPoolWithArgmaxInst>(&I);
+      if (!checkSquare(*MPWA)) {
+        return false;
+      }
+      continue;
+    }
+    case Kinded::Kind::MaxPoolWithArgmaxGradInstKind: {
+      auto *MPWAG = llvm::cast<MaxPoolWithArgmaxGradInst>(&I);
+      if (!checkSquare(*MPWAG)) {
+        return false;
+      }
+      auto destGradDim = ShapeNHWC(MPWAG->getDestGrad()->dims());
+      auto srcGradDim = ShapeNHWC(MPWAG->getSrcGrad()->dims());
+      if (srcGradDim.n != destGradDim.n) {
+        report("batch size is wrong");
+        return false;
+      }
+      if (srcGradDim.c != destGradDim.c) {
+        report("depth size is wrong");
+        return false;
+      }
+      continue;
+    }
+    case Kinded::Kind::AvgPoolInstKind: {
+      auto *AP = llvm::cast<AvgPoolInst>(&I);
+      if (!checkSquare(*AP)) {
+        return false;
+      }
+      continue;
+    }
+    case Kinded::Kind::GatherInstKind: {
+      auto *G = llvm::cast<GatherInst>(&I);
+      auto *data = G->getData();
+      if (data->getElementType() != ElemKind::FloatTy) {
+        report("Gather: At the moment only floats are supported");
+        return false;
+      }
+      continue;
+    }
+    default:
+      continue;
+    }
+  }
+  return true;
+}
+
 TraceInfo OCLBackend::buildManualTraceInfo(Function *F) const {
   TraceInfo info(false, getTraceEventDataSize());
 
diff --git a/lib/Backends/OpenCL/OpenCL.h b/lib/Backends/OpenCL/OpenCL.h
index 35262c33a6..525b1f4794 100644
--- a/lib/Backends/OpenCL/OpenCL.h
+++ b/lib/Backends/OpenCL/OpenCL.h
@@ -213,6 +213,9 @@ class OCLBackend final : public BackendUsingGlowIR {
 
   bool isOpSupported(const NodeInfo &NI) const override;
 
+  bool verify(const Function &F) const override;
+  bool verify(const IRFunction &IR) const override;
+
   bool shouldLower(const Node *N) const override {
     // The group convolution is supported in OpenCL slow convolution kernel.
     if (N->getKind() == Kinded::Kind::ConvolutionNodeKind)
diff --git a/lib/IR/IRGen.cpp b/lib/IR/IRGen.cpp
index c0c4a62972..303da1c775 100644
--- a/lib/IR/IRGen.cpp
+++ b/lib/IR/IRGen.cpp
@@ -448,4 +448,11 @@ void IRFunction::generateIR(const Backend &B) {
   for (auto &N : ScheduledNodes) {
     N->visit(nullptr, &irgen);
   }
+
+  if (!B.verify(*this)) {
+    EXIT_ON_ERR(
+        MAKE_ERR(GlowErr::ErrorCode::COMPILE_UNSUPPORTED_IR_AFTER_GENERATE,
+                 "Unsupported instruction(s) found after generating IR " +
+                     getName().str() + " for backend " + B.getBackendName()));
+  }
 }
diff --git a/lib/Optimizer/GraphOptimizer/GraphOptimizer.cpp b/lib/Optimizer/GraphOptimizer/GraphOptimizer.cpp
index 8a41b13dc0..f340ca91c2 100644
--- a/lib/Optimizer/GraphOptimizer/GraphOptimizer.cpp
+++ b/lib/Optimizer/GraphOptimizer/GraphOptimizer.cpp
@@ -2878,7 +2878,7 @@ void glow::optimize(Function *F, CompilationContext &cctx, const Backend &B) {
   LOG_SCOPE(F->getLogContext(), "glow::optimize")
 
   FunctionPassManager FPM("TargetDependentGraphOptzFPM",
-                          B.getOptimizationPipeline());
+                          B.getOptimizationPipeline(), &B);
   FPM.run(F, cctx);
 }
 
@@ -2900,26 +2900,6 @@ void glow::optimize(Function *F, CompilationMode mode) {
   optimize(F, cctx);
 }
 
-/// \returns an error if any nodes inside \p F are not supported by \p B.
-static llvm::Error checkAllNodesSupported(const Function &F, const Backend &B) {
-  bool allSupported = true;
-  for (const Node &N : F.getNodes()) {
-    if (!B.isOpSupported(N)) {
-      allSupported = false;
-      report("Unsupported node found while compiling Function " +
-             F.getName().str() + " for backend " + B.getBackendName() + ": " +
-             N.getDebugDesc());
-    }
-  }
-  if (!allSupported) {
-    return MAKE_ERR(GlowErr::ErrorCode::COMPILE_UNSUPPORTED_NODE_AFTER_OPTIMIZE,
-                    "Unsupported node(s) found after optimizing Function " +
-                        F.getName().str() + " for backend " +
-                        B.getBackendName());
-  }
-  return llvm::Error::success();
-}
-
 /// Helper function that may transform \p F given preferences of \p cctx and
 /// \p B. The specific transformations are done based on the
 /// PrecisionConfiguration found in \p cctx. This could include quantization,
@@ -3017,5 +2997,14 @@ llvm::Error glow::optimizeFunction(Function *F, const Backend &B,
     ::glow::optimize(F, cctx, B);
   }
 
-  return checkAllNodesSupported(*F, B);
+  // We already started using backend specific verification when the function
+  // state became lowered. Do one more verification pass to make sure everything
+  // is in order and to bail if it is not.
+  if (!B.verify(*F)) {
+    return MAKE_ERR(GlowErr::ErrorCode::COMPILE_UNSUPPORTED_NODE_AFTER_OPTIMIZE,
+                    "Unsupported node(s) found after optimizing Function " +
+                        F->getName().str() + " for backend " +
+                        B.getBackendName());
+  }
+  return llvm::Error::success();
 }
diff --git a/lib/Optimizer/GraphOptimizer/PassManager.cpp b/lib/Optimizer/GraphOptimizer/PassManager.cpp
index f500a1263f..b5e699af2a 100644
--- a/lib/Optimizer/GraphOptimizer/PassManager.cpp
+++ b/lib/Optimizer/GraphOptimizer/PassManager.cpp
@@ -136,7 +136,12 @@ bool FunctionPassManager::runPrePass(Function *F,
   }
   if (verifyBeforeAllPassesOpt ||
       listContainsString(verifyBeforePassesOpt, P.getName())) {
-    CHECK(F->verify());
+    if (backend_) {
+      // Do backend-specific verification.
+      CHECK(backend_->verify(*F));
+    } else {
+      CHECK(F->verify());
+    }
   }
   return false;
 }
@@ -156,7 +161,12 @@ bool FunctionPassManager::runPostPass(Function *F,
   }
   if (verifyAfterAllPassesOpt ||
       listContainsString(verifyAfterPassesOpt, P.getName())) {
-    CHECK(F->verify());
+    if (backend_) {
+      // Do backend-specific verification.
+      CHECK(backend_->verify(*F));
+    } else {
+      CHECK(F->verify());
+    }
   }
   return false;
 }
diff --git a/lib/Optimizer/IROptimizer/IROptimizer.cpp b/lib/Optimizer/IROptimizer/IROptimizer.cpp
index 86bb64d3f9..4cc563e375 100644
--- a/lib/Optimizer/IROptimizer/IROptimizer.cpp
+++ b/lib/Optimizer/IROptimizer/IROptimizer.cpp
@@ -1647,6 +1647,12 @@ glow::generateAndOptimizeIR(Function *F, const Backend &B,
   auto IR = llvm::make_unique<IRFunction>(F);
   IR->generateIR(B);
   ::glow::optimize(*IR, shouldShareBuffers);
+  if (!B.verify(*IR)) {
+    EXIT_ON_ERR(MAKE_ERR(
+        GlowErr::ErrorCode::COMPILE_UNSUPPORTED_IR_AFTER_OPTIMIZE,
+        "Unsupported instruction(s) found after optimizing IR " +
+            IR->getName().str() + " for backend " + B.getBackendName()));
+  }
   return IR;
 }