Skip to content

Commit aaa2de1

Browse files
authored
[UnitTest][MTP]add test_speculate_get_padding_offset (#3730)
1 parent abde903 commit aaa2de1

File tree

2 files changed

+203
-0
lines changed

2 files changed

+203
-0
lines changed
Lines changed: 143 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,143 @@
1+
# Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved.
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
import unittest
16+
17+
import numpy as np
18+
import paddle
19+
20+
from fastdeploy.model_executor.ops.gpu import speculate_get_padding_offset
21+
22+
23+
def ref_speculate_get_padding_offset(cum_offsets, seq_lens, max_seq_len, token_num_data):
24+
bsz = seq_lens.shape[0]
25+
26+
padding_offset = np.zeros([token_num_data], dtype=np.int32)
27+
batch_id_per_token = np.zeros([token_num_data], dtype=np.int32)
28+
cum_offsets_out = np.zeros([bsz], dtype=np.int32)
29+
cu_seqlens_q = np.zeros([bsz + 1], dtype=np.int32)
30+
cu_seqlens_k = np.zeros([bsz + 1], dtype=np.int32)
31+
32+
modified_indices = {
33+
"padding_offset": [],
34+
"cum_offsets_out": [],
35+
"cu_seqlens_q": [],
36+
"cu_seqlens_k": [],
37+
}
38+
39+
cu_seqlens_q[0] = 0
40+
cu_seqlens_k[0] = 0
41+
modified_indices["cu_seqlens_q"].append(0)
42+
modified_indices["cu_seqlens_k"].append(0)
43+
44+
for bi in range(bsz):
45+
cum_offset = 0 if bi == 0 else cum_offsets[bi - 1]
46+
cum_offsets_out[bi] = cum_offset
47+
modified_indices["cum_offsets_out"].append(bi)
48+
49+
for i in range(seq_lens[bi]):
50+
idx = bi * max_seq_len - cum_offset + i
51+
if idx >= 0 and idx < token_num_data:
52+
if idx == 0:
53+
print(idx, bi, cum_offset)
54+
padding_offset[idx] = cum_offset
55+
batch_id_per_token[idx] = bi
56+
modified_indices["padding_offset"].append(idx)
57+
58+
cum_seq_len = (bi + 1) * max_seq_len - cum_offsets[bi]
59+
cu_seqlens_q[bi + 1] = cum_seq_len
60+
cu_seqlens_k[bi + 1] = cum_seq_len
61+
modified_indices["cu_seqlens_q"].append(bi + 1)
62+
modified_indices["cu_seqlens_k"].append(bi + 1)
63+
64+
return (
65+
padding_offset,
66+
cum_offsets_out,
67+
cu_seqlens_q,
68+
cu_seqlens_k,
69+
modified_indices,
70+
batch_id_per_token,
71+
)
72+
73+
74+
class TestSpeculateGetPaddingOffset(unittest.TestCase):
75+
def test_speculate_get_padding_offset(self):
76+
test_case = {
77+
"bsz": 4,
78+
"max_seq_len": 10,
79+
"token_num_data": 32,
80+
"cum_offsets": np.array([2, 5, 8, 12], dtype=np.int32),
81+
"seq_lens": np.array([8, 5, 7, 6], dtype=np.int32),
82+
"seq_lens_encoder": np.array([1, 0, 1, 0], dtype=np.int32),
83+
}
84+
85+
max_draft_tokens = 4
86+
87+
input_ids = np.random.randint(0, 1000, (test_case["bsz"], test_case["max_seq_len"]), dtype=np.int64)
88+
draft_tokens = np.random.randint(0, 1000, (test_case["bsz"], max_draft_tokens), dtype=np.int64)
89+
token_num = np.array([test_case["token_num_data"]], dtype=np.int64)
90+
91+
input_ids_tensor = paddle.to_tensor(input_ids)
92+
draft_tokens_tensor = paddle.to_tensor(draft_tokens)
93+
cum_offsets_tensor = paddle.to_tensor(test_case["cum_offsets"])
94+
seq_lens_tensor = paddle.to_tensor(test_case["seq_lens"])
95+
seq_lens_encoder_tensor = paddle.to_tensor(test_case["seq_lens_encoder"])
96+
token_num_tensor = paddle.to_tensor(token_num)
97+
98+
(
99+
x_remove_padding,
100+
batch_id_per_token,
101+
cu_seqlens_q,
102+
cu_seqlens_k,
103+
) = speculate_get_padding_offset(
104+
input_ids_tensor,
105+
draft_tokens_tensor,
106+
cum_offsets_tensor,
107+
token_num_tensor,
108+
seq_lens_tensor,
109+
seq_lens_encoder_tensor,
110+
)
111+
112+
(
113+
ref_padding_offset,
114+
ref_cum_offsets_out,
115+
ref_cu_seqlens_q,
116+
ref_cu_seqlens_k,
117+
modified_indices,
118+
ref_batch_id_per_token,
119+
) = ref_speculate_get_padding_offset(
120+
test_case["cum_offsets"],
121+
test_case["seq_lens"],
122+
test_case["max_seq_len"],
123+
test_case["token_num_data"],
124+
)
125+
126+
output_arrays = {
127+
"batch_id_per_token": batch_id_per_token.numpy(),
128+
"cu_seqlens_q": cu_seqlens_q.numpy(),
129+
"cu_seqlens_k": cu_seqlens_k.numpy(),
130+
}
131+
132+
ref_arrays = {
133+
"batch_id_per_token": ref_batch_id_per_token,
134+
"cu_seqlens_q": ref_cu_seqlens_q,
135+
"cu_seqlens_k": ref_cu_seqlens_k,
136+
}
137+
138+
for key in output_arrays:
139+
np.testing.assert_allclose(output_arrays[key], ref_arrays[key])
140+
141+
142+
if __name__ == "__main__":
143+
unittest.main()
Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,60 @@
1+
# Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved.
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
import unittest
16+
17+
import numpy as np
18+
import paddle
19+
20+
from fastdeploy.model_executor.ops.gpu import speculate_get_seq_lens_output
21+
22+
23+
class TestSpeculateGetSeqLensOutput(unittest.TestCase):
24+
25+
def run_seq_lens(self, input_values):
26+
paddle.seed(42)
27+
np.random.seed(42)
28+
seq_lens_this_time = paddle.to_tensor(input_values[0], dtype="int32")
29+
seq_lens_encoder = paddle.to_tensor(input_values[1], dtype="int32")
30+
seq_lens_decoder = paddle.to_tensor(input_values[2], dtype="int32")
31+
seq_lens_output = speculate_get_seq_lens_output(seq_lens_this_time, seq_lens_encoder, seq_lens_decoder)[0]
32+
return seq_lens_output
33+
34+
def test_speculate_get_seq_lens_output1(self):
35+
input_values = [[7], [0], [0]]
36+
output_value = 7
37+
result = self.run_seq_lens(input_values)
38+
np.testing.assert_allclose(result.numpy(), output_value)
39+
40+
def test_speculate_get_seq_lens_output2(self):
41+
input_values = [[7], [1], [0]]
42+
output_value = 1
43+
result = self.run_seq_lens(input_values)
44+
np.testing.assert_allclose(result.numpy(), output_value)
45+
46+
def test_speculate_get_seq_lens_output3(self):
47+
input_values = [[1], [1], [0]]
48+
output_value = 1
49+
result = self.run_seq_lens(input_values)
50+
np.testing.assert_allclose(result.numpy(), output_value)
51+
52+
def test_speculate_get_seq_lens_output4(self):
53+
input_values = [[0], [1], [0]]
54+
output_value = 0
55+
result = self.run_seq_lens(input_values)
56+
np.testing.assert_allclose(result.numpy(), output_value)
57+
58+
59+
if __name__ == "__main__":
60+
unittest.main()

0 commit comments

Comments
 (0)