@@ -179,11 +179,12 @@ testmode!(m::AlphaDropout, mode=true) =
179
179
(m. active = (isnothing (mode) || mode == :auto ) ? nothing : ! mode; m)
180
180
181
181
"""
182
- LayerNorm(size..., λ=identity; affine=true, ϵ=1fe -5)
182
+ LayerNorm(size..., λ=identity; affine=true, eps=1f -5)
183
183
184
184
A [normalisation layer](https://arxiv.org/abs/1607.06450) designed to be
185
185
used with recurrent hidden states.
186
186
The argument `size` should be an integer or a tuple of integers.
187
+
187
188
In the forward pass, the layer normalises the mean and standard
188
189
deviation of the input, then applies the elementwise activation `λ`.
189
190
The input is normalised along the first `length(size)` dimensions
@@ -217,9 +218,10 @@ struct LayerNorm{F,D,T,N}
217
218
affine:: Bool
218
219
end
219
220
220
- function LayerNorm (size:: Tuple{Vararg{Int}} , λ= identity; affine:: Bool = true , ϵ:: Real = 1f-5 )
221
+ function LayerNorm (size:: Tuple{Vararg{Int}} , λ= identity; affine:: Bool = true , eps:: Real = 1f-5 , ϵ= nothing )
222
+ ε = Losses. _greek_ascii_depwarn (ϵ => eps, :LayerNorm , " ϵ" => " eps" )
221
223
diag = affine ? Scale (size... , λ) : λ!= identity ? Base. Fix1 (broadcast, λ) : identity
222
- return LayerNorm (λ, diag, ϵ , size, affine)
224
+ return LayerNorm (λ, diag, ε , size, affine)
223
225
end
224
226
LayerNorm (size:: Integer... ; kw... ) = LayerNorm (Int .(size); kw... )
225
227
LayerNorm (size_act... ; kw... ) = LayerNorm (Int .(size_act[1 : end - 1 ]), size_act[end ]; kw... )
@@ -287,7 +289,7 @@ ChainRulesCore.@non_differentiable _track_stats!(::Any...)
287
289
BatchNorm(channels::Integer, λ=identity;
288
290
initβ=zeros32, initγ=ones32,
289
291
affine = true, track_stats = true,
290
- ϵ =1f-5, momentum= 0.1f0)
292
+ eps =1f-5, momentum= 0.1f0)
291
293
292
294
[Batch Normalization](https://arxiv.org/abs/1502.03167) layer.
293
295
`channels` should be the size of the channel dimension in your data (see below).
@@ -340,15 +342,17 @@ end
340
342
function BatchNorm (chs:: Int , λ= identity;
341
343
initβ= zeros32, initγ= ones32,
342
344
affine= true , track_stats= true ,
343
- ϵ= 1f-5 , momentum= 0.1f0 )
345
+ eps:: Real = 1f-5 , momentum:: Real = 0.1f0 , ϵ= nothing )
346
+
347
+ ε = Losses. _greek_ascii_depwarn (ϵ => eps, :BatchNorm , " ϵ" => " eps" )
344
348
345
349
β = affine ? initβ (chs) : nothing
346
350
γ = affine ? initγ (chs) : nothing
347
351
μ = track_stats ? zeros32 (chs) : nothing
348
352
σ² = track_stats ? ones32 (chs) : nothing
349
353
350
354
return BatchNorm (λ, β, γ,
351
- μ, σ², ϵ , momentum,
355
+ μ, σ², ε , momentum,
352
356
affine, track_stats,
353
357
nothing , chs)
354
358
end
379
383
InstanceNorm(channels::Integer, λ=identity;
380
384
initβ=zeros32, initγ=ones32,
381
385
affine=false, track_stats=false,
382
- ϵ =1f-5, momentum=0.1f0)
386
+ eps =1f-5, momentum=0.1f0)
383
387
384
388
[Instance Normalization](https://arxiv.org/abs/1607.08022) layer.
385
389
`channels` should be the size of the channel dimension in your data (see below).
@@ -430,19 +434,20 @@ end
430
434
function InstanceNorm (chs:: Int , λ= identity;
431
435
initβ= zeros32, initγ= ones32,
432
436
affine= false , track_stats= false ,
433
- ϵ = 1f-5 , momentum= 0.1f0 )
437
+ eps :: Real = 1f-5 , momentum:: Real = 0.1f0 , ϵ = nothing )
434
438
435
439
if track_stats
436
440
Base. depwarn (" `track_stats=true` will be removed from InstanceNorm in Flux 0.14. The default value is `track_stats=false`, which will work as before." , :InstanceNorm )
437
441
end
442
+ ε = Losses. _greek_ascii_depwarn (ϵ => eps, :InstanceNorm , " ϵ" => " eps" )
438
443
439
444
β = affine ? initβ (chs) : nothing
440
445
γ = affine ? initγ (chs) : nothing
441
446
μ = track_stats ? zeros32 (chs) : nothing
442
447
σ² = track_stats ? ones32 (chs) : nothing
443
448
444
449
return InstanceNorm (λ, β, γ,
445
- μ, σ², ϵ , momentum,
450
+ μ, σ², ε , momentum,
446
451
affine, track_stats,
447
452
nothing , chs)
448
453
end
473
478
GroupNorm(channels::Integer, G::Integer, λ=identity;
474
479
initβ=zeros32, initγ=ones32,
475
480
affine=true, track_stats=false,
476
- ϵ =1f-5, momentum=0.1f0)
481
+ eps =1f-5, momentum=0.1f0)
477
482
478
483
[Group Normalization](https://arxiv.org/abs/1803.08494) layer.
479
484
@@ -532,11 +537,12 @@ trainable(gn::GroupNorm) = hasaffine(gn) ? (β = gn.β, γ = gn.γ) : (;)
532
537
function GroupNorm (chs:: Int , G:: Int , λ= identity;
533
538
initβ= zeros32, initγ= ones32,
534
539
affine= true , track_stats= false ,
535
- ϵ = 1f-5 , momentum= 0.1f0 )
540
+ eps :: Real = 1f-5 , momentum:: Real = 0.1f0 , ϵ = nothing )
536
541
537
- if track_stats
542
+ if track_stats
538
543
Base. depwarn (" `track_stats=true` will be removed from GroupNorm in Flux 0.14. The default value is `track_stats=false`, which will work as before." , :GroupNorm )
539
- end
544
+ end
545
+ ε = Losses. _greek_ascii_depwarn (ϵ => eps, :GroupNorm , " ϵ" => " eps" )
540
546
541
547
chs % G == 0 || error (" The number of groups ($(G) ) must divide the number of channels ($chs )" )
542
548
548
554
return GroupNorm (G, λ,
549
555
β, γ,
550
556
μ, σ²,
551
- ϵ , momentum,
557
+ ε , momentum,
552
558
affine, track_stats,
553
559
nothing , chs)
554
560
end
0 commit comments