diff --git a/docs/src/models/layers.md b/docs/src/models/layers.md index 3e1645a1fb..d44e291031 100644 --- a/docs/src/models/layers.md +++ b/docs/src/models/layers.md @@ -25,7 +25,6 @@ CrossCor SamePad Flux.flatten Flux.convfilter -Flux.depthwiseconvfilter ``` ## Upsampling Layers diff --git a/src/layers/conv.jl b/src/layers/conv.jl index 27314d09f0..142a129f11 100644 --- a/src/layers/conv.jl +++ b/src/layers/conv.jl @@ -128,6 +128,8 @@ julia> Flux.params(c1) |> length """ function Conv(w::AbstractArray{T,N}, b = true, σ = identity; stride = 1, pad = 0, dilation = 1, groups = 1) where {T,N} + + @assert size(w, N) % groups == 0 "Output channel dimension must be divisible by groups." stride = expand(Val(N-2), stride) dilation = expand(Val(N-2), dilation) pad = calc_padding(Conv, pad, size(w)[1:N-2], dilation, stride) @@ -151,12 +153,12 @@ channels from `in` to `out`. Accepts the keyword `init` (default: `glorot_uniform`) to control the sampling distribution. - -See also: [`depthwiseconvfilter`](@ref) """ function convfilter(filter::NTuple{N,Integer}, ch::Pair{<:Integer,<:Integer}; init = glorot_uniform, groups = 1) where N cin, cout = ch + @assert cin % groups == 0 "Input channel dimension must be divisible by groups." + @assert cout % groups == 0 "Output channel dimension must be divisible by groups." init(filter..., cin÷groups, cout) end @@ -298,91 +300,37 @@ end """ DepthwiseConv(filter, in => out, σ=identity; stride=1, pad=0, dilation=1, [bias, init]) + DepthwiseConv(weight::AbstractArray, [bias, activation; stride, pad, dilation]) + +Return a depthwise convolutional layer, that is a [`Conv`](@ref) layer with number of +groups equal to the number of input channels. -Depthwise convolutional layer. `filter` is a tuple of integers -specifying the size of the convolutional kernel, while -`in` and `out` specify the number of input and output channels. - -Note that `out` must be an integer multiple of `in`. - -Parameters are controlled by additional keywords, with defaults -`init=glorot_uniform` and `bias=true`. - -See also [`Conv`](@ref) for more detailed description of keywords. +See [`Conv`](@ref) for a description of the arguments. # Examples + ```jldoctest julia> xs = rand(Float32, 100, 100, 3, 50); # a batch of 50 RGB images julia> lay = DepthwiseConv((5,5), 3 => 6, relu; bias=false) -DepthwiseConv((5, 5), 3 => 6, relu, bias=false) # 150 parameters +Conv((5, 5), 3 => 6, relu, groups=3, bias=false) # 150 parameters julia> lay(xs) |> size (96, 96, 6, 50) -julia> DepthwiseConv((5,5), 3 => 9, stride=2, pad=2)(xs) |> size +julia> DepthwiseConv((5, 5), 3 => 9, stride=2, pad=2)(xs) |> size (50, 50, 9, 50) ``` """ -struct DepthwiseConv{N,M,F,A,V} - σ::F - weight::A - bias::V - stride::NTuple{N,Int} - pad::NTuple{M,Int} - dilation::NTuple{N,Int} +function DepthwiseConv(k::NTuple{<:Any,Integer}, ch::Pair{<:Integer,<:Integer}, σ = identity; + stride = 1, pad = 0, dilation = 1, bias = true, init = glorot_uniform) + Conv(k, ch, σ; groups=ch.first, stride, pad, dilation, bias, init) end -""" - DepthwiseConv(weight::AbstractArray, [bias, activation; stride, pad, dilation]) - -Constructs a layer with the given weight and bias arrays. -Accepts the same keywords as the `DepthwiseConv((4,4), 3 => 6, relu)` method. -""" function DepthwiseConv(w::AbstractArray{T,N}, bias = true, σ = identity; - stride = 1, pad = 0, dilation = 1) where {T,N} - stride = expand(Val(N-2), stride) - dilation = expand(Val(N-2), dilation) - pad = calc_padding(DepthwiseConv, pad, size(w)[1:N-2], dilation, stride) - b = create_bias(w, bias, prod(size(w)[N-1:end])) - return DepthwiseConv(σ, w, b, stride, pad, dilation) -end - -function DepthwiseConv(k::NTuple{N,Integer}, ch::Pair{<:Integer,<:Integer}, σ = identity; - init = glorot_uniform, stride = 1, pad = 0, dilation = 1, - bias = true) where N - @assert ch[2] % ch[1] == 0 "Output channels must be integer multiple of input channels" - weight = depthwiseconvfilter(k, ch, init = init) - return DepthwiseConv(weight, bias, σ; stride, pad, dilation) -end - -@functor DepthwiseConv - -""" - depthwiseconvfilter(filter::Tuple, in => out) - -Constructs a depthwise convolutional weight array defined by `filter` and channels -from `in` to `out`. - -Accepts the keyword `init` (default: `glorot_uniform`) to control the sampling -distribution. - -See also: [`convfilter`](@ref) -""" -depthwiseconvfilter(filter::NTuple{N,Integer}, ch::Pair{<:Integer,<:Integer}; - init = glorot_uniform) where N = init(filter..., div(ch[2], ch[1]), ch[1]) - -function (c::DepthwiseConv)(x) - σ = NNlib.fast_act(c.σ, x) - cdims = DepthwiseConvDims(x, c.weight; stride=c.stride, padding=c.pad, dilation=c.dilation) - σ.(depthwiseconv(x, c.weight, cdims) .+ conv_reshape_bias(c)) -end - -function Base.show(io::IO, l::DepthwiseConv) - print(io, "DepthwiseConv(", size(l.weight)[1:end-2]) - print(io, ", ", size(l.weight)[end], " => ", prod(size(l.weight)[end-1:end])) - _print_conv_opt(io, l) - print(io, ")") + stride = 1, pad = 0, dilation = 1) where {T,N} + w2 = reshape(w, size(w)[1:end-2]..., 1, :) + Conv(w2, bias, σ; groups = size(w)[end-1], stride, pad, dilation) end diff --git a/src/layers/show.jl b/src/layers/show.jl index d03a253805..47772f7e72 100644 --- a/src/layers/show.jl +++ b/src/layers/show.jl @@ -55,7 +55,7 @@ _show_children(m::Maxout) = m.layers _show_children(p::Parallel) = (p.connection, p.layers...) for T in [ - :Conv, :ConvTranspose, :CrossCor, :DepthwiseConv, :Dense, :Bilinear, :Embedding, + :Conv, :ConvTranspose, :CrossCor, :Dense, :Bilinear, :Embedding, :BatchNorm, :LayerNorm, :InstanceNorm, :GroupNorm, ] @eval function Base.show(io::IO, m::MIME"text/plain", x::$T) diff --git a/src/outputsize.jl b/src/outputsize.jl index 1caea9e16b..774b75ff26 100644 --- a/src/outputsize.jl +++ b/src/outputsize.jl @@ -153,7 +153,7 @@ end ## fixes for layers that don't work out of the box -for (fn, Dims) in ((:conv, DenseConvDims), (:depthwiseconv, DepthwiseConvDims)) +for (fn, Dims) in ((:conv, DenseConvDims),) @eval begin function NNlib.$fn(a::AbstractArray{Nil}, b::AbstractArray{Nil}, dims::$Dims) fill(nil, NNlib.output_size(dims)..., NNlib.channels_out(dims), size(a)[end]) diff --git a/src/utils.jl b/src/utils.jl index ef0ab9114a..85dc8b711f 100644 --- a/src/utils.jl +++ b/src/utils.jl @@ -383,7 +383,7 @@ Has the following behaviour Some caveats: * Not all layers will be identity mapping when used with this init. Exceptions - include recurrent layers, `DepthwiseConv` and normalization layers. + include recurrent layers and normalization layers. * Layers must have `input_size == output_size` for identity mapping to be possible. When this is not the case, extra dimensions of the array are padded with zeros. diff --git a/test/cuda/layers.jl b/test/cuda/layers.jl index 677e50170d..8024681a06 100644 --- a/test/cuda/layers.jl +++ b/test/cuda/layers.jl @@ -11,7 +11,7 @@ end # TODO: These layers get into scalar indexing issues. -const BROKEN_LAYERS = Union{DepthwiseConv} +const BROKEN_LAYERS = Union{} const ACTIVATIONS = [identity, relu, tanh, sigmoid, exp, softplus, diff --git a/test/layers/conv.jl b/test/layers/conv.jl index eb7d13be1c..019f3fd603 100644 --- a/test/layers/conv.jl +++ b/test/layers/conv.jl @@ -81,6 +81,10 @@ end c = Conv((3,4,5), 100 => 25, groups = 5) @test size(c.weight) == (3,4,5, 20, 25) @test size(c(ip)) == (8,8,8, 25, 2) + + # Test that we cannot ask for non-integer multiplication factors + @test_throws AssertionError Conv((2, 2), 3=>10, groups=2) + @test_throws AssertionError Conv((2, 2), 2=>9, groups=2) end end