Skip to content

Commit d594826

Browse files
ngimelmcarilli
ngimel
authored andcommitted
Adam tests (pytorch#67)
* Add unittest for FusedAdam. * Fix some bugs. * set seed for adam test
1 parent a01a732 commit d594826

File tree

3 files changed

+180
-1
lines changed

3 files changed

+180
-1
lines changed

tests/run_mixed_adam/__init__.py

Whitespace-only changes.
Lines changed: 179 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,179 @@
1+
import unittest
2+
import os
3+
import random
4+
5+
import torch
6+
import apex
7+
8+
class TestFusedAdam(unittest.TestCase):
9+
def setUp(self, max_abs_diff=1e-3, max_rel_diff=1, iters=7):
10+
self.max_abs_diff = max_abs_diff
11+
self.max_rel_diff = max_rel_diff
12+
self.iters = iters
13+
torch.cuda.manual_seed(9876)
14+
15+
def tearDown(self):
16+
pass
17+
18+
def gen_param_optim(self, tensors, adam_option):
19+
ref_param = []
20+
tst_param = []
21+
for tensor in tensors:
22+
ref_param.append(torch.nn.Parameter(tensor.clone()))
23+
tst_param.append(torch.nn.Parameter(tensor.clone()))
24+
25+
ref_optim = torch.optim.Adam(ref_param, **adam_option)
26+
tst_optim = apex.optimizers.FusedAdam(tst_param, **adam_option)
27+
28+
return (ref_param, tst_param, ref_optim, tst_optim)
29+
30+
def gen_grad(self, ref_param, tst_param):
31+
for p_ref, p_tst in zip(ref_param, tst_param):
32+
p_ref.grad = torch.rand_like(p_ref)
33+
p_tst.grad = p_ref.grad
34+
35+
def gen_mixed_grad(self, ref_param, tst_param, scale=1.0):
36+
half_grads = []
37+
for p_ref, p_tst in zip(ref_param, tst_param):
38+
half_grads.append(torch.rand_like(p_ref).half())
39+
p_ref.grad = half_grads[-1].float() / scale
40+
return half_grads
41+
42+
def get_max_diff(self, ref_param, tst_param):
43+
max_abs_diff = max_rel_diff = 0
44+
for p_ref, p_tst in zip(ref_param, tst_param):
45+
max_abs_diff_p = (p_ref - p_tst).abs().max().item()
46+
max_rel_diff_p = ((p_ref - p_tst) / p_ref).abs().max().item()
47+
48+
if max_abs_diff_p > max_abs_diff: max_abs_diff = max_abs_diff_p
49+
if max_rel_diff_p > max_rel_diff: max_rel_diff = max_rel_diff_p
50+
51+
return max_abs_diff, max_rel_diff
52+
53+
def gen_single_type_test(self, param_type=torch.float):
54+
nelem = 278011
55+
adam_option = {'lr':5e-4, 'betas':(0.9, 0.999), 'eps':1e-08,
56+
'weight_decay':0, 'amsgrad':False}
57+
58+
tensor = torch.rand(nelem, dtype=param_type, device='cuda')
59+
ref_param, tst_param, ref_optim, tst_optim = \
60+
self.gen_param_optim([tensor], adam_option)
61+
62+
for i in range(self.iters):
63+
self.gen_grad(ref_param, tst_param)
64+
ref_optim.step()
65+
tst_optim.step()
66+
max_abs_diff, max_rel_diff = self.get_max_diff(ref_param, tst_param)
67+
68+
self.assertLessEqual(max_abs_diff, self.max_abs_diff)
69+
self.assertLessEqual(max_rel_diff, self.max_rel_diff)
70+
71+
def test_double(self):
72+
self.gen_single_type_test(param_type=torch.double)
73+
74+
def test_float(self):
75+
self.gen_single_type_test(param_type=torch.float)
76+
77+
def test_half(self):
78+
nelem = 278011
79+
adam_option = {'lr':5e-4, 'betas':(0.9, 0.999), 'eps':1e-08,
80+
'weight_decay':0, 'amsgrad':False}
81+
82+
tensor = torch.rand(nelem, dtype=torch.float, device='cuda')
83+
ref_param, tst_param, ref_optim, tst_optim = \
84+
self.gen_param_optim([tensor], adam_option)
85+
86+
for i in range(self.iters):
87+
half_grads = self.gen_mixed_grad(ref_param, tst_param)
88+
ref_optim.step()
89+
tst_optim.step(grads=half_grads)
90+
max_abs_diff, max_rel_diff = self.get_max_diff(ref_param, tst_param)
91+
92+
self.assertLessEqual(max_abs_diff, self.max_abs_diff)
93+
self.assertLessEqual(max_rel_diff, self.max_rel_diff)
94+
95+
def test_multi_params(self):
96+
sizes = [[4096, 1024], [4096], [4096, 2048], [32320, 1024], [1]]
97+
adam_option = {'lr':5e-4, 'betas':(0.9, 0.999), 'eps':1e-08,
98+
'weight_decay':0, 'amsgrad':False}
99+
100+
tensors = []
101+
for size in sizes:
102+
tensors.append(torch.rand(size, dtype=torch.float, device='cuda'))
103+
ref_param, tst_param, ref_optim, tst_optim = \
104+
self.gen_param_optim(tensors, adam_option)
105+
106+
for i in range(self.iters):
107+
half_grads = self.gen_mixed_grad(ref_param, tst_param)
108+
ref_optim.step()
109+
tst_optim.step(grads=half_grads)
110+
max_abs_diff, max_rel_diff = self.get_max_diff(ref_param, tst_param)
111+
self.assertLessEqual(max_abs_diff, self.max_abs_diff)
112+
self.assertLessEqual(max_rel_diff, self.max_rel_diff)
113+
114+
def test_scale(self):
115+
nelem = 278011
116+
adam_option = {'lr':5e-4, 'betas':(0.9, 0.999), 'eps':1e-08,
117+
'weight_decay':0, 'amsgrad':False}
118+
119+
tensor = torch.rand(nelem, dtype=torch.float, device='cuda')
120+
ref_param, tst_param, ref_optim, tst_optim = \
121+
self.gen_param_optim([tensor], adam_option)
122+
123+
for i in range(self.iters):
124+
scale = random.random() * 1000
125+
half_grads = self.gen_mixed_grad(ref_param, tst_param, scale)
126+
ref_optim.step()
127+
tst_optim.step(grads=half_grads, scale=scale)
128+
max_abs_diff, max_rel_diff = self.get_max_diff(ref_param, tst_param)
129+
130+
self.assertLessEqual(max_abs_diff, self.max_abs_diff)
131+
self.assertLessEqual(max_rel_diff, self.max_rel_diff)
132+
133+
def test_fp16_output(self):
134+
nelem = 278011
135+
adam_option = {'lr':5e-4, 'betas':(0.9, 0.999), 'eps':1e-08,
136+
'weight_decay':0, 'amsgrad':False}
137+
138+
tensor = torch.rand(nelem, dtype=torch.float, device='cuda')
139+
ref_param, tst_param, ref_optim, tst_optim = \
140+
self.gen_param_optim([tensor], adam_option)
141+
142+
fp16_param = torch.nn.Parameter(tensor.clone().half())
143+
144+
for i in range(self.iters):
145+
half_grads = self.gen_mixed_grad(ref_param, tst_param)
146+
ref_optim.step()
147+
tst_optim.step(grads=half_grads, output_params=[fp16_param])
148+
149+
max_abs_diff, max_rel_diff = self.get_max_diff(ref_param, tst_param)
150+
self.assertLessEqual(max_abs_diff, self.max_abs_diff)
151+
self.assertLessEqual(max_rel_diff, self.max_rel_diff)
152+
153+
max_abs_diff, max_rel_diff = self.get_max_diff(tst_param, \
154+
[fp16_param.float()])
155+
self.assertLessEqual(max_abs_diff, self.max_abs_diff)
156+
self.assertLessEqual(max_rel_diff, self.max_rel_diff)
157+
158+
def test_adam_option(self):
159+
nelem = 1
160+
adam_option = {'lr':0.01, 'betas':(0.6, 0.9), 'eps':3e-06,
161+
'weight_decay':0, 'amsgrad':False}
162+
163+
tensor = torch.rand(nelem, dtype=torch.float, device='cuda')
164+
ref_param, tst_param, ref_optim, tst_optim = \
165+
self.gen_param_optim([tensor], adam_option)
166+
167+
for i in range(self.iters):
168+
self.gen_grad(ref_param, tst_param)
169+
ref_optim.step()
170+
tst_optim.step()
171+
max_abs_diff, max_rel_diff = self.get_max_diff(ref_param, tst_param)
172+
173+
self.assertLessEqual(max_abs_diff, self.max_abs_diff)
174+
self.assertLessEqual(max_rel_diff, self.max_rel_diff)
175+
176+
177+
if __name__ == '__main__':
178+
script_path = os.path.dirname(os.path.realpath(__file__))
179+
unittest.main()

tests/run_test.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
import unittest
22
import sys
33

4-
test_dirs = ["run_fp16_optimizer", "run_amp"]
4+
test_dirs = ["run_fp16_optimizer", "run_amp", "run_mixed_adam"]
55

66
runner = unittest.TextTestRunner(verbosity=2)
77

0 commit comments

Comments
 (0)