diff --git a/src/conv.jl b/src/conv.jl
index af26d5936..d096d6e3c 100644
--- a/src/conv.jl
+++ b/src/conv.jl
@@ -159,7 +159,7 @@ end
 if is_nnpack_available()
     function conv(x::Array{xT, 4}, w::Array{wT, 4},
                   cdims::DenseConvDims{2, K, C_in, C_out, (1, 1), P, (1, 1), F};
-                  kwargs...) where {xT, wT, K, C_in, C_out, S, P, F}
+                  kwargs...) where {xT, wT, K, C_in, C_out, P, F}
         return conv_nnpack(x, w, cdims; kwargs...)
     end
 end
diff --git a/src/impl/depthwiseconv_direct.jl b/src/impl/depthwiseconv_direct.jl
index 7e2e02bd5..b6822a488 100644
--- a/src/impl/depthwiseconv_direct.jl
+++ b/src/impl/depthwiseconv_direct.jl
@@ -20,7 +20,7 @@ See the docstring for `conv_direct!()` for more on the optional parameters.
 """
 function depthwiseconv_direct!(y::AbstractArray{yT,5}, x::AbstractArray{xT,5},
                       w::AbstractArray{wT,5}, cdims::DepthwiseConvDims;
-                      alpha::yT = yT(1), beta = false) where {yT, xT, wT}
+                      alpha::yT=yT(1), beta=false) where {yT, xT, wT}
     check_dims(size(x), size(w), size(y), cdims)
 
     width, height, depth = input_size(cdims)
@@ -135,7 +135,7 @@ for each batch and channel independently.
 function ∇depthwiseconv_data_direct!(
                 dx::AbstractArray{xT,5}, dy::AbstractArray{yT,5},
                 w::AbstractArray{wT,5}, cdims::DepthwiseConvDims;
-                alpha::xT=xT(1), beta::xT=xT(0)) where {xT, yT, wT}
+                alpha::xT=xT(1), beta=false) where {xT, yT, wT}
     # We do a separate convolution for each channel in x
     @inbounds for cidx in 1:channels_in(cdims)
         # For this batch and in-channel, we have a normal transposed convolution
@@ -168,7 +168,7 @@ Calculate the gradient imposed upon `w` in the depthwise convolution `y = x * w`
 function ∇depthwiseconv_filter_direct!(
                 dw::AbstractArray{wT,5}, x::AbstractArray{xT,5},
                 dy::AbstractArray{yT,5}, cdims::DepthwiseConvDims;
-                alpha::wT=wT(1),beta::wT=wT(0)) where {xT, yT, wT}
+                alpha::wT=wT(1),beta=false) where {xT, yT, wT}
     # We do a separate convolution for each channel in x
     @inbounds for cidx in 1:channels_in(cdims)
         # For this batch and in-channel, we have a normal transposed convolution
diff --git a/src/impl/depthwiseconv_im2col.jl b/src/impl/depthwiseconv_im2col.jl
index 387efdb29..80e8935cf 100644
--- a/src/impl/depthwiseconv_im2col.jl
+++ b/src/impl/depthwiseconv_im2col.jl
@@ -14,7 +14,7 @@ function depthwiseconv_im2col!(
                 y::AbstractArray{T,5}, x::AbstractArray{T,5},
                 w::AbstractArray{T,5}, cdims::DepthwiseConvDims;
                 col::AbstractArray{T,2} = similar(x, im2col_dims(cdims)),
-                alpha=T(1), beta=T(0)) where T
+                alpha::T=T(1), beta::T=T(0)) where T
     check_dims(size(x), size(w), size(y), cdims)
     
     # This functions exactly the same as conv_im2col!(), except that we shard the
@@ -56,7 +56,7 @@ function ∇depthwiseconv_filter_im2col!(
                 dw::AbstractArray{T,5}, x::AbstractArray{T,5},
                 dy::AbstractArray{T,5}, cdims::DepthwiseConvDims;
                 col::AbstractArray{T,2} = similar(dw, im2col_dims(cdims)),
-                alpha=T(1), beta=T(0)) where T
+                alpha::T=T(1), beta::T=T(0)) where T
     check_dims(size(x), size(dw), size(dy), cdims)
 
     M = prod(kernel_size(cdims))
@@ -96,7 +96,7 @@ function ∇depthwiseconv_data_im2col!(
                 dx::AbstractArray{T,5}, dy::AbstractArray{T,5},
                 w::AbstractArray{T,5}, cdims::DepthwiseConvDims;
                 col::AbstractArray{T,2} = similar(dx, im2col_dims(cdims)),
-                alpha=T(1), beta=T(0)) where T
+                alpha::T=T(1), beta::T=T(0)) where T
     check_dims(size(dx), size(w), size(dy), cdims)
 
     M = prod(output_size(cdims))
diff --git a/src/nnpack/interface.jl b/src/nnpack/interface.jl
index fbe6dc05d..25ab93632 100644
--- a/src/nnpack/interface.jl
+++ b/src/nnpack/interface.jl
@@ -52,11 +52,19 @@ end
 
 
 """
-    check_supported_operation(x::Array, pdims::PoolDims)
+    nnpack_supported_operation(cdims::ConvDims)
+    nnpack_supported_operation(pdims::PoolDims)
 
-Returns `true` if nnpack supports the pooling operation for the given input.
+Returns `true` if nnpack supports the convolution/pooling operation for the given parameters.
 """
-function check_supported_operation(x::Array{T, 4}, pdims::PoolDims{2, K, S, P, (1, 1)}) where {T, K, S, P}
-    val = size(x)[1:2] .+ (P[1] + P[2], P[3] + P[4]) .- K
+function nnpack_supported_operation(pdims::PoolDims{2, K, S, P, (1, 1)}) where {K, S, P}
+    val = input_size(pdims)[1:2] .+ (P[1] + P[2], P[3] + P[4]) .- K
     return val .% S == (0, 0) ? true : false
 end
+
+function nnpack_supported_operation(cdims::ConvDims{2, K, (1, 1), P, (1, 1)}) where {K, S, P}
+    return true
+end
+
+# Return false for everything else
+nnpack_supported_operation(dims) = false
diff --git a/src/nnpack/libnnpack.jl b/src/nnpack/libnnpack.jl
index 44876b5a8..2f3996c32 100644
--- a/src/nnpack/libnnpack.jl
+++ b/src/nnpack/libnnpack.jl
@@ -127,7 +127,7 @@ end
 function nnp_convolution_output(y::Array{Float32,4}, x::Array{Float32,4}, w::Array{Float32,4}, b::Array{Float32,1}; algo::nnp_convolution_algorithm = UInt32(0), workspace_buffer = nothing, workspace_size = 0, padding = 0, stride = 1, threadpool = C_NULL, profile = nothing)
     input_size = nnp_size(Csize_t.((size(x,1), size(x,2)))...)
     kernel_size = nnp_size(Csize_t.((size(w,1),size(w,2)))...)
-    input_padding = nnp_padding(Csize_t(padding[2]), Csize_t(padding[1]), Csize_t(padding[2]), Csize_t(padding[1]))
+    input_padding = nnp_padding(Csize_t(padding[3]), Csize_t(padding[2]), Csize_t(padding[4]), Csize_t(padding[1]))
     profile = profile == nothing ? nnp_profile() : profile
     workspace_buffer = workspace_buffer === nothing ? C_NULL : workspace_buffer
     nnp_convolution_output(UInt32(algo), size(x,4), size(x,3), size(w,4), input_size, input_padding, kernel_size, x, w, b, y, workspace_buffer, workspace_size, UInt32(0), C_NULL, threadpool, profile)
diff --git a/src/pooling.jl b/src/pooling.jl
index ac9e8827c..13c605e97 100644
--- a/src/pooling.jl
+++ b/src/pooling.jl
@@ -132,7 +132,7 @@ end
 # Use NNPACK if it is available and operation is supported
 if is_nnpack_available()
     function maxpool(x::Array{T, 4}, pdims::PoolDims{2, K, S, P, (1, 1)}; kwargs...) where {T, K, S, P}
-        func = check_supported_operation(x, pdims) ? maxpool_nnpack : maxpool_direct
+        func = nnpack_supported_operation(pdims) ? maxpool_nnpack : maxpool_direct
         return func(x, pdims; kwargs...)
     end
 end
diff --git a/test/conv.jl b/test/conv.jl
index 2df60cbec..55bbbaec6 100644
--- a/test/conv.jl
+++ b/test/conv.jl
@@ -274,10 +274,13 @@ conv_answer_dict = Dict(
             # A "drop channels and batch dimension" helper
             ddims(x) = dropdims(x, dims=(rank+1, rank+2))
 
-            for conv in (NNlib.conv, NNlib.conv_im2col, NNlib.conv_direct)
+            for conv in (NNlib.conv, NNlib.conv_im2col, NNlib.conv_direct, NNlib.conv_nnpack)
+                if conv == NNlib.conv_nnpack && !NNlib.nnpack_supported_operation(DenseConvDims(x, w))
+                    continue
+                end
                 @testset "$(conv)" begin
-                    # First, your basic convolution with no parameters
                     cdims = DenseConvDims(x, w)
+                    # First, your basic convolution with no parameters
                     @test isapprox(ddims(conv(x, w, cdims)), y_plain, rtol = 1.0e-7)
 
                     # Next, test convolution on views and alternate datatypes: