Skip to content

Commit 6d64cb3

Browse files
authored
Fix default momentum value of BatchNorm2d in MaxViT from 0.99 to 0.01 (#8312)
1 parent 9563e3e commit 6d64cb3

File tree

1 file changed

+4
-3
lines changed

1 file changed

+4
-3
lines changed

torchvision/models/maxvit.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -577,7 +577,7 @@ class MaxVit(nn.Module):
577577
stochastic_depth_prob (float): Probability of stochastic depth. Expands to a list of probabilities for each layer that scales linearly to the specified value.
578578
squeeze_ratio (float): Squeeze ratio in the SE Layer. Default: 0.25.
579579
expansion_ratio (float): Expansion ratio in the MBConv bottleneck. Default: 4.
580-
norm_layer (Callable[..., nn.Module]): Normalization function. Default: None (setting to None will produce a `BatchNorm2d(eps=1e-3, momentum=0.99)`).
580+
norm_layer (Callable[..., nn.Module]): Normalization function. Default: None (setting to None will produce a `BatchNorm2d(eps=1e-3, momentum=0.01)`).
581581
activation_layer (Callable[..., nn.Module]): Activation function Default: nn.GELU.
582582
head_dim (int): Dimension of the attention heads.
583583
mlp_ratio (int): Expansion ratio of the MLP layer. Default: 4.
@@ -623,7 +623,7 @@ def __init__(
623623
# https://github.com/google-research/maxvit/blob/da76cf0d8a6ec668cc31b399c4126186da7da944/maxvit/models/maxvit.py#L1029-L1030
624624
# for the exact parameters used in batchnorm
625625
if norm_layer is None:
626-
norm_layer = partial(nn.BatchNorm2d, eps=1e-3, momentum=0.99)
626+
norm_layer = partial(nn.BatchNorm2d, eps=1e-3, momentum=0.01)
627627

628628
# Make sure input size will be divisible by the partition size in all blocks
629629
# Undefined behavior if H or W are not divisible by p
@@ -788,7 +788,8 @@ class MaxVit_T_Weights(WeightsEnum):
788788
},
789789
"_ops": 5.558,
790790
"_file_size": 118.769,
791-
"_docs": """These weights reproduce closely the results of the paper using a similar training recipe.""",
791+
"_docs": """These weights reproduce closely the results of the paper using a similar training recipe.
792+
They were trained with a BatchNorm2D momentum of 0.99 instead of the more correct 0.01.""",
792793
},
793794
)
794795
DEFAULT = IMAGENET1K_V1

0 commit comments

Comments
 (0)