@@ -69,81 +69,79 @@ def run_sparse_sample(self, iterations, expected, optimizer):
69
69
self .assertAllClose (var_1 .read_value (), expected [1 ], atol = 2e-4 )
70
70
71
71
def test_dense_sample (self ):
72
- # Expected values are obtained from the official implementation
72
+ # Expected values are obtained from the previous implementation
73
73
self .run_dense_sample (
74
- iterations = 1000 ,
75
- expected = [[0.5554 , 1.5549 ], [2.5557 , 3.5557 ]],
74
+ iterations = 100 ,
75
+ expected = [[0.985769 , 1.985269 ], [2.986119 , 3.986068 ]],
76
76
optimizer = RectifiedAdam (lr = 1e-3 ),
77
77
)
78
78
79
79
def test_sparse_sample (self ):
80
- # Expected values are obtained from the official implementation
81
- # Dense results should be: [-0.1929, 0.8066], [1.8075, 2.8074]
80
+ # Expected values are obtained from the previous implementation
82
81
self .run_sparse_sample (
83
- iterations = 2000 ,
84
- expected = [[- 0.1929 , 2.0 ], [3.0 , 2.8074 ]],
82
+ iterations = 200 ,
83
+ expected = [[0.959333 , 2.0 ], [3.0 , 3.959632 ]],
85
84
optimizer = RectifiedAdam (lr = 1e-3 ),
86
85
)
87
86
88
87
def test_dense_sample_with_amsgrad (self ):
89
88
# Expected values are obtained from the official implementation
90
89
# `amsgrad` has no effect because the gradient is fixed
91
90
self .run_dense_sample (
92
- iterations = 1000 ,
93
- expected = [[0.5554 , 1.5549 ], [2.5557 , 3.5557 ]],
91
+ iterations = 100 ,
92
+ expected = [[0.985769 , 1.985269 ], [2.986119 , 3.986068 ]],
94
93
optimizer = RectifiedAdam (lr = 1e-3 , amsgrad = True ),
95
94
)
96
95
97
96
def test_sparse_sample_with_amsgrad (self ):
98
97
# Expected values are obtained from the official implementation
99
98
# `amsgrad` has no effect because the gradient is fixed
100
99
self .run_sparse_sample (
101
- iterations = 2000 ,
102
- expected = [[- 0.1929 , 2.0 ], [3.0 , 2.8074 ]],
100
+ iterations = 200 ,
101
+ expected = [[0.959333 , 2.0 ], [3.0 , 3.959632 ]],
103
102
optimizer = RectifiedAdam (lr = 1e-3 , amsgrad = True ),
104
103
)
105
104
106
105
def test_dense_sample_with_weight_decay (self ):
107
- # Expected values are obtained from the official implementation
106
+ # Expected values are obtained from the previous implementation
108
107
self .run_dense_sample (
109
- iterations = 1000 ,
110
- expected = [[0.5472 , 1.5368 ], [2.5276 , 3.5176 ]],
108
+ iterations = 100 ,
109
+ expected = [[0.984775 , 1.983276 ], [2.983125 , 3.982076 ]],
111
110
optimizer = RectifiedAdam (lr = 1e-3 , weight_decay = 0.01 ),
112
111
)
113
112
114
113
def test_sparse_sample_with_weight_decay (self ):
115
- # Expected values are obtained from the official implementation
116
- # Dense results should be: [-0.2029, 0.7768], [1.7578, 2.7380]
114
+ # Expected values are obtained from the previous implementation
117
115
self .run_sparse_sample (
118
- iterations = 2000 ,
119
- expected = [[- 0.2029 , 2.0 ], [3.0 , 2.7380 ]],
116
+ iterations = 200 ,
117
+ expected = [[0.957368 , 2.0 ], [3.0 , 3.951673 ]],
120
118
optimizer = RectifiedAdam (lr = 1e-3 , weight_decay = 0.01 ),
121
119
)
122
120
123
121
def test_dense_sample_with_warmup (self ):
124
122
self .run_dense_sample (
125
- iterations = 1000 ,
126
- expected = [[0.8041 , 1.8041 ], [2.8041 , 3.8041 ]],
123
+ iterations = 100 ,
124
+ expected = [[0.994062 , 1.993912 ], [2.994167 , 3.994152 ]],
127
125
optimizer = RectifiedAdam (
128
- lr = 1e-3 , total_steps = 1000 , warmup_proportion = 0.1 , min_lr = 1e-5 ,
126
+ lr = 1e-3 , total_steps = 100 , warmup_proportion = 0.1 , min_lr = 1e-5 ,
129
127
),
130
128
)
131
129
132
130
def test_sparse_sample_with_warmup (self ):
133
131
self .run_sparse_sample (
134
- iterations = 2000 ,
135
- expected = [[0.4653 , 2.0 ], [3.0 , 3.4653 ]],
132
+ iterations = 200 ,
133
+ expected = [[0.982629 , 2.0 ], [3.0 , 3.982674 ]],
136
134
optimizer = RectifiedAdam (
137
- lr = 1e-3 , total_steps = 2000 , warmup_proportion = 0.1 , min_lr = 1e-5 ,
135
+ lr = 1e-3 , total_steps = 200 , warmup_proportion = 0.1 , min_lr = 1e-5 ,
138
136
),
139
137
)
140
138
141
139
def test_dense_sample_with_lookahead (self ):
142
140
# Expected values are obtained from the original implementation
143
141
# of Ranger
144
142
self .run_dense_sample (
145
- iterations = 1000 ,
146
- expected = [[0.7985 , 1.7983 ], [2.7987 , 3.7986 ]],
143
+ iterations = 100 ,
144
+ expected = [[0.993126 , 1.992901 ], [2.993283 , 3.993261 ]],
147
145
optimizer = Lookahead (
148
146
RectifiedAdam (lr = 1e-3 , beta_1 = 0.95 ,),
149
147
sync_period = 6 ,
@@ -152,12 +150,11 @@ def test_dense_sample_with_lookahead(self):
152
150
)
153
151
154
152
def test_sparse_sample_with_lookahead (self ):
155
- # Expected values are obtained from the original implementation
153
+ # Expected values are obtained from the previous implementation
156
154
# of Ranger.
157
- # Dense results should be: [0.6417, 1.6415], [2.6419, 3.6418]
158
155
self .run_sparse_sample (
159
- iterations = 1500 ,
160
- expected = [[0.6417 , 2.0 ], [3.0 , 3.6418 ]],
156
+ iterations = 150 ,
157
+ expected = [[0.988156 , 2.0 ], [3.0 , 3.988291 ]],
161
158
optimizer = Lookahead (
162
159
RectifiedAdam (lr = 1e-3 , beta_1 = 0.95 ,),
163
160
sync_period = 6 ,
0 commit comments