diff --git a/docs/src/models/layers.md b/docs/src/models/layers.md
index 3e1645a1fb..d44e291031 100644
--- a/docs/src/models/layers.md
+++ b/docs/src/models/layers.md
@@ -25,7 +25,6 @@ CrossCor
 SamePad
 Flux.flatten
 Flux.convfilter
-Flux.depthwiseconvfilter
 ```
 
 ## Upsampling Layers
diff --git a/src/layers/conv.jl b/src/layers/conv.jl
index 27314d09f0..142a129f11 100644
--- a/src/layers/conv.jl
+++ b/src/layers/conv.jl
@@ -128,6 +128,8 @@ julia> Flux.params(c1) |> length
 """
 function Conv(w::AbstractArray{T,N}, b = true, σ = identity;
               stride = 1, pad = 0, dilation = 1, groups = 1) where {T,N}
+
+  @assert size(w, N) % groups == 0 "Output channel dimension must be divisible by groups."
   stride = expand(Val(N-2), stride)
   dilation = expand(Val(N-2), dilation)
   pad = calc_padding(Conv, pad, size(w)[1:N-2], dilation, stride)
@@ -151,12 +153,12 @@ channels from `in` to `out`.
 
 Accepts the keyword `init` (default: `glorot_uniform`) to control the sampling
 distribution.
-
-See also: [`depthwiseconvfilter`](@ref)
 """
 function convfilter(filter::NTuple{N,Integer}, ch::Pair{<:Integer,<:Integer};
           init = glorot_uniform, groups = 1) where N
   cin, cout = ch
+  @assert cin % groups == 0 "Input channel dimension must be divisible by groups."
+  @assert cout % groups == 0 "Output channel dimension must be divisible by groups."
   init(filter..., cin÷groups, cout)
 end
 
@@ -298,91 +300,37 @@ end
 
 """
     DepthwiseConv(filter, in => out, σ=identity; stride=1, pad=0, dilation=1, [bias, init])
+    DepthwiseConv(weight::AbstractArray, [bias, activation; stride, pad, dilation])
+    
+Return a depthwise convolutional layer, that is a [`Conv`](@ref) layer with number of
+groups equal to the number of input channels.
 
-Depthwise convolutional layer. `filter` is a tuple of integers
-specifying the size of the convolutional kernel, while
-`in` and `out` specify the number of input and output channels.
-
-Note that `out` must be an integer multiple of `in`.
-
-Parameters are controlled by additional keywords, with defaults
-`init=glorot_uniform` and `bias=true`.
-
-See also [`Conv`](@ref) for more detailed description of keywords.
+See [`Conv`](@ref) for a description of the arguments.
 
 # Examples
+
 ```jldoctest
 julia> xs = rand(Float32, 100, 100, 3, 50);  # a batch of 50 RGB images
 
 julia> lay = DepthwiseConv((5,5), 3 => 6, relu; bias=false)
-DepthwiseConv((5, 5), 3 => 6, relu, bias=false)  # 150 parameters
+Conv((5, 5), 3 => 6, relu, groups=3, bias=false)  # 150 parameters 
 
 julia> lay(xs) |> size
 (96, 96, 6, 50)
 
-julia> DepthwiseConv((5,5), 3 => 9, stride=2, pad=2)(xs) |> size
+julia> DepthwiseConv((5, 5), 3 => 9, stride=2, pad=2)(xs) |> size
 (50, 50, 9, 50)
 ```
 """
-struct DepthwiseConv{N,M,F,A,V}
-  σ::F
-  weight::A
-  bias::V
-  stride::NTuple{N,Int}
-  pad::NTuple{M,Int}
-  dilation::NTuple{N,Int}
+function DepthwiseConv(k::NTuple{<:Any,Integer}, ch::Pair{<:Integer,<:Integer}, σ = identity; 
+            stride = 1, pad = 0, dilation = 1, bias = true, init = glorot_uniform)
+  Conv(k, ch, σ; groups=ch.first, stride, pad, dilation, bias, init)
 end
 
-"""
-    DepthwiseConv(weight::AbstractArray, [bias, activation; stride, pad, dilation])
-
-Constructs a layer with the given weight and bias arrays.
-Accepts the same keywords as the `DepthwiseConv((4,4), 3 => 6, relu)` method.
-"""
 function DepthwiseConv(w::AbstractArray{T,N}, bias = true, σ = identity;
-                      stride = 1, pad = 0, dilation = 1) where {T,N}
-  stride = expand(Val(N-2), stride)
-  dilation = expand(Val(N-2), dilation)
-  pad = calc_padding(DepthwiseConv, pad, size(w)[1:N-2], dilation, stride)
-  b = create_bias(w, bias, prod(size(w)[N-1:end]))
-  return DepthwiseConv(σ, w, b, stride, pad, dilation)
-end
-
-function DepthwiseConv(k::NTuple{N,Integer}, ch::Pair{<:Integer,<:Integer}, σ = identity;
-                init = glorot_uniform, stride = 1, pad = 0, dilation = 1,
-                bias = true) where N
-  @assert ch[2] % ch[1] == 0 "Output channels must be integer multiple of input channels"
-  weight = depthwiseconvfilter(k, ch, init = init)
-  return DepthwiseConv(weight, bias, σ; stride, pad, dilation)
-end
-
-@functor DepthwiseConv
-
-"""
-    depthwiseconvfilter(filter::Tuple, in => out)
-
-Constructs a depthwise convolutional weight array defined by `filter` and channels
-from `in` to `out`.
-
-Accepts the keyword `init` (default: `glorot_uniform`) to control the sampling
-distribution.
-
-See also: [`convfilter`](@ref)
-"""
-depthwiseconvfilter(filter::NTuple{N,Integer}, ch::Pair{<:Integer,<:Integer};
-                    init = glorot_uniform) where N = init(filter..., div(ch[2], ch[1]), ch[1])
-
-function (c::DepthwiseConv)(x)
-  σ = NNlib.fast_act(c.σ, x)
-  cdims = DepthwiseConvDims(x, c.weight; stride=c.stride, padding=c.pad, dilation=c.dilation)
-  σ.(depthwiseconv(x, c.weight, cdims) .+ conv_reshape_bias(c))
-end
-
-function Base.show(io::IO, l::DepthwiseConv)
-  print(io, "DepthwiseConv(", size(l.weight)[1:end-2])
-  print(io, ", ", size(l.weight)[end], " => ", prod(size(l.weight)[end-1:end]))
-  _print_conv_opt(io, l)
-  print(io, ")")
+                  stride = 1, pad = 0, dilation = 1) where {T,N}
+  w2 = reshape(w, size(w)[1:end-2]..., 1, :)
+  Conv(w2, bias, σ; groups = size(w)[end-1], stride, pad, dilation)
 end
 
 
diff --git a/src/layers/show.jl b/src/layers/show.jl
index d03a253805..47772f7e72 100644
--- a/src/layers/show.jl
+++ b/src/layers/show.jl
@@ -55,7 +55,7 @@ _show_children(m::Maxout) = m.layers
 _show_children(p::Parallel) = (p.connection, p.layers...)
 
 for T in [
-    :Conv, :ConvTranspose, :CrossCor, :DepthwiseConv, :Dense, :Bilinear, :Embedding,
+    :Conv, :ConvTranspose, :CrossCor, :Dense, :Bilinear, :Embedding,
     :BatchNorm, :LayerNorm, :InstanceNorm, :GroupNorm,
   ]
   @eval function Base.show(io::IO, m::MIME"text/plain", x::$T)
diff --git a/src/outputsize.jl b/src/outputsize.jl
index 1caea9e16b..774b75ff26 100644
--- a/src/outputsize.jl
+++ b/src/outputsize.jl
@@ -153,7 +153,7 @@ end
 
 ## fixes for layers that don't work out of the box
 
-for (fn, Dims) in ((:conv, DenseConvDims), (:depthwiseconv, DepthwiseConvDims))
+for (fn, Dims) in ((:conv, DenseConvDims),)
   @eval begin
     function NNlib.$fn(a::AbstractArray{Nil}, b::AbstractArray{Nil}, dims::$Dims)
       fill(nil, NNlib.output_size(dims)..., NNlib.channels_out(dims), size(a)[end])
diff --git a/src/utils.jl b/src/utils.jl
index ef0ab9114a..85dc8b711f 100644
--- a/src/utils.jl
+++ b/src/utils.jl
@@ -383,7 +383,7 @@ Has the following behaviour
 
 Some caveats: 
 * Not all layers will be identity mapping when used with this init. Exceptions
-  include recurrent layers, `DepthwiseConv` and normalization layers.
+  include recurrent layers and normalization layers.
 
 * Layers must have `input_size == output_size` for identity mapping to be
   possible. When this is not the case, extra dimensions of the array are padded with zeros.
diff --git a/test/cuda/layers.jl b/test/cuda/layers.jl
index 677e50170d..8024681a06 100644
--- a/test/cuda/layers.jl
+++ b/test/cuda/layers.jl
@@ -11,7 +11,7 @@
 end
 
 # TODO: These layers get into scalar indexing issues.
-const BROKEN_LAYERS = Union{DepthwiseConv}
+const BROKEN_LAYERS = Union{}
 
 const ACTIVATIONS = [identity, relu, tanh,
                      sigmoid, exp, softplus,
diff --git a/test/layers/conv.jl b/test/layers/conv.jl
index eb7d13be1c..019f3fd603 100644
--- a/test/layers/conv.jl
+++ b/test/layers/conv.jl
@@ -81,6 +81,10 @@ end
     c = Conv((3,4,5), 100 => 25, groups = 5)
     @test size(c.weight) == (3,4,5, 20, 25)
     @test size(c(ip)) == (8,8,8, 25, 2)
+
+    # Test that we cannot ask for non-integer multiplication factors
+    @test_throws AssertionError Conv((2, 2), 3=>10, groups=2)
+    @test_throws AssertionError Conv((2, 2), 2=>9, groups=2)
   end
 end