Skip to content

Commit fd3046b

Browse files
committed
Refs #173. Fixed overflow internal buffer bug of gemv_t on x86.
1 parent a4ee6f3 commit fd3046b

File tree

3 files changed

+164
-37
lines changed

3 files changed

+164
-37
lines changed

kernel/x86/gemv_t_sse.S

Lines changed: 58 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -89,17 +89,23 @@
8989
#endif
9090

9191
#define STACKSIZE 16
92-
93-
#define M 4 + STACKSIZE(%esp)
94-
#define N 8 + STACKSIZE(%esp)
95-
#define ALPHA 16 + STACKSIZE(%esp)
96-
#define A 20 + STACKSIZE(%esp)
97-
#define STACK_LDA 24 + STACKSIZE(%esp)
98-
#define STACK_X 28 + STACKSIZE(%esp)
99-
#define STACK_INCX 32 + STACKSIZE(%esp)
100-
#define Y 36 + STACKSIZE(%esp)
101-
#define STACK_INCY 40 + STACKSIZE(%esp)
102-
#define BUFFER 44 + STACKSIZE(%esp)
92+
#define ARGS 16
93+
94+
#define M 4 + STACKSIZE+ARGS(%esp)
95+
#define N 8 + STACKSIZE+ARGS(%esp)
96+
#define ALPHA 16 + STACKSIZE+ARGS(%esp)
97+
#define A 20 + STACKSIZE+ARGS(%esp)
98+
#define STACK_LDA 24 + STACKSIZE+ARGS(%esp)
99+
#define STACK_X 28 + STACKSIZE+ARGS(%esp)
100+
#define STACK_INCX 32 + STACKSIZE+ARGS(%esp)
101+
#define Y 36 + STACKSIZE+ARGS(%esp)
102+
#define STACK_INCY 40 + STACKSIZE+ARGS(%esp)
103+
#define BUFFER 44 + STACKSIZE+ARGS(%esp)
104+
105+
#define MMM 0+STACKSIZE(%esp)
106+
#define NN 4+STACKSIZE(%esp)
107+
#define AA 8+STACKSIZE(%esp)
108+
#define LDAX 12+STACKSIZE(%esp)
103109

104110
#define I %eax
105111
#define J %ebx
@@ -114,6 +120,7 @@
114120

115121
PROLOGUE
116122

123+
subl $ARGS,%esp
117124
pushl %ebp
118125
pushl %edi
119126
pushl %esi
@@ -122,6 +129,37 @@
122129
PROFCODE
123130

124131
movl STACK_LDA, LDA
132+
movl LDA,LDAX # backup LDA
133+
movl N,J
134+
movl J,NN # backup N
135+
movl A,J
136+
movl J,AA # backup A
137+
movl M,J
138+
movl J,MMM # mov M to MMM
139+
.L0t:
140+
xorl J,J
141+
addl $1,J
142+
sall $23,J # J=2^22
143+
subl J,MMM # MMM=MMM-J
144+
movl J,M
145+
jge .L00t
146+
ALIGN_4
147+
148+
movl MMM,%eax
149+
addl J,%eax
150+
jle .L999x
151+
movl %eax,M
152+
153+
.L00t:
154+
movl AA,%eax
155+
movl %eax,A # mov AA to A
156+
157+
movl NN,%eax
158+
movl %eax,N # reset N
159+
160+
161+
movl LDAX, LDA # reset LDA
162+
125163
movl STACK_X, X
126164
movl STACK_INCX, INCX
127165
movl STACK_INCY, INCY
@@ -628,10 +666,19 @@
628666
ALIGN_4
629667

630668
.L999:
669+
movl M,J
670+
leal (,J,SIZE),%eax
671+
addl %eax,AA
672+
jmp .L0t
673+
ALIGN_4
674+
675+
.L999x:
631676
popl %ebx
632677
popl %esi
633678
popl %edi
634679
popl %ebp
680+
681+
addl $ARGS,%esp
635682
ret
636683

637684
EPILOGUE

kernel/x86/gemv_t_sse2.S

Lines changed: 60 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -76,18 +76,24 @@
7676
#endif
7777

7878
#define STACKSIZE 16
79+
#define ARGS 16
80+
81+
#define M 4 + STACKSIZE+ARGS(%esp)
82+
#define N 8 + STACKSIZE+ARGS(%esp)
83+
#define ALPHA 16 + STACKSIZE+ARGS(%esp)
84+
#define A 24 + STACKSIZE+ARGS(%esp)
85+
#define STACK_LDA 28 + STACKSIZE+ARGS(%esp)
86+
#define STACK_X 32 + STACKSIZE+ARGS(%esp)
87+
#define STACK_INCX 36 + STACKSIZE+ARGS(%esp)
88+
#define Y 40 + STACKSIZE+ARGS(%esp)
89+
#define STACK_INCY 44 + STACKSIZE+ARGS(%esp)
90+
#define BUFFER 48 + STACKSIZE+ARGS(%esp)
91+
92+
#define MMM 0+STACKSIZE(%esp)
93+
#define AA 4+STACKSIZE(%esp)
94+
#define LDAX 8+STACKSIZE(%esp)
95+
#define NN 12+STACKSIZE(%esp)
7996

80-
#define M 4 + STACKSIZE(%esp)
81-
#define N 8 + STACKSIZE(%esp)
82-
#define ALPHA 16 + STACKSIZE(%esp)
83-
#define A 24 + STACKSIZE(%esp)
84-
#define STACK_LDA 28 + STACKSIZE(%esp)
85-
#define STACK_X 32 + STACKSIZE(%esp)
86-
#define STACK_INCX 36 + STACKSIZE(%esp)
87-
#define Y 40 + STACKSIZE(%esp)
88-
#define STACK_INCY 44 + STACKSIZE(%esp)
89-
#define BUFFER 48 + STACKSIZE(%esp)
90-
9197
#define I %eax
9298
#define J %ebx
9399

@@ -101,14 +107,47 @@
101107

102108
PROLOGUE
103109

110+
subl $ARGS,%esp
111+
104112
pushl %ebp
105113
pushl %edi
106114
pushl %esi
107115
pushl %ebx
108116

109117
PROFCODE
110118

119+
111120
movl STACK_LDA, LDA
121+
movl LDA,LDAX # backup LDA
122+
movl N,J
123+
movl J,NN # backup N
124+
movl A,J
125+
movl J,AA # backup A
126+
movl M,J
127+
movl J,MMM # mov M to MMM
128+
.L0t:
129+
xorl J,J
130+
addl $1,J
131+
sall $22,J # J=2^22
132+
subl J,MMM # MMM=MMM-J
133+
movl J,M
134+
jge .L00t
135+
ALIGN_4
136+
137+
movl MMM,%eax
138+
addl J,%eax
139+
jle .L999x
140+
movl %eax,M
141+
142+
.L00t:
143+
movl AA,%eax
144+
movl %eax,A # mov AA to A
145+
146+
movl NN,%eax
147+
movl %eax,N # reset N
148+
149+
150+
movl LDAX, LDA # reset LDA
112151
movl STACK_X, X
113152
movl STACK_INCX, INCX
114153
movl STACK_INCY, INCY
@@ -117,6 +156,7 @@
117156
leal (,INCY, SIZE), INCY
118157
leal (,LDA, SIZE), LDA
119158

159+
120160
subl $-16 * SIZE, A
121161

122162
cmpl $0, N
@@ -560,10 +600,19 @@
560600
ALIGN_4
561601

562602
.L999:
603+
movl M,J
604+
leal (,J,SIZE),%eax
605+
addl %eax,AA
606+
jmp .L0t
607+
ALIGN_4
608+
609+
.L999x:
563610
popl %ebx
564611
popl %esi
565612
popl %edi
566613
popl %ebp
614+
615+
addl $ARGS,%esp
567616
ret
568617

569618
EPILOGUE

kernel/x86_64/sgemv_t.S

Lines changed: 46 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@
4747

4848
#ifndef WINDOWS_ABI
4949

50-
#define STACKSIZE 64
50+
#define STACKSIZE 128
5151

5252
#define OLD_M %rdi
5353
#define OLD_N %rsi
@@ -57,6 +57,10 @@
5757
#define STACK_Y 16 + STACKSIZE(%rsp)
5858
#define STACK_INCY 24 + STACKSIZE(%rsp)
5959
#define STACK_BUFFER 32 + STACKSIZE(%rsp)
60+
#define MMM 56(%rsp)
61+
#define NN 64(%rsp)
62+
#define AA 72(%rsp)
63+
#define LDAX 80(%rsp)
6064

6165
#else
6266

@@ -71,6 +75,10 @@
7175
#define STACK_Y 72 + STACKSIZE(%rsp)
7276
#define STACK_INCY 80 + STACKSIZE(%rsp)
7377
#define STACK_BUFFER 88 + STACKSIZE(%rsp)
78+
#defien MMM 216(%rsp)
79+
#defien NN 224(%rsp)
80+
#define AA 232(%rsp)
81+
#define LDAX 240(%rsp)
7482

7583
#endif
7684

@@ -127,29 +135,46 @@
127135
movups %xmm14, 192(%rsp)
128136
movups %xmm15, 208(%rsp)
129137

130-
movq OLD_M, M
131-
movq OLD_N, N
132-
movq OLD_A, A
133-
movq OLD_LDA, LDA
138+
movq OLD_M, MMM
139+
movq OLD_N, NN
140+
movq OLD_A, AA
141+
movq OLD_LDA, LDAX
134142
movq OLD_X, X
135143
#else
136-
movq OLD_M, M
137-
movq OLD_N, N
138-
movq OLD_A, A
139-
movq OLD_LDA, LDA
144+
movq OLD_M, MMM
145+
movq OLD_N, NN
146+
movq OLD_A, AA
147+
movq OLD_LDA, LDAX
140148
#endif
141-
142-
movq STACK_INCX, INCX
143-
movq STACK_Y, Y
144-
movq STACK_INCY, INCY
145-
movq STACK_BUFFER, BUFFER
146-
147149
#ifndef WINDOWS_ABI
148150
pshufd $0, %xmm0, ALPHA
149151
#else
150152
pshufd $0, %xmm3, ALPHA
151153
#endif
152154

155+
156+
.L0t:
157+
xorq M,M
158+
addq $1,M
159+
salq $22,M
160+
subq M,MMM
161+
jge .L00t
162+
ALIGN_4
163+
164+
movq MMM,%rax
165+
addq M,%rax
166+
jle .L999x
167+
movq %rax,M
168+
169+
.L00t:
170+
movq LDAX,LDA
171+
movq NN,N
172+
movq AA,A
173+
movq STACK_INCX, INCX
174+
movq STACK_Y, Y
175+
movq STACK_INCY, INCY
176+
movq STACK_BUFFER, BUFFER
177+
153178
leaq (,INCX, SIZE), INCX
154179
leaq (,INCY, SIZE), INCY
155180
leaq (,LDA, SIZE), LDA
@@ -6341,6 +6366,12 @@
63416366
ALIGN_4
63426367

63436368
.L999:
6369+
leaq (,M,SIZE),%rax
6370+
addq %rax,AA
6371+
jmp .L0t
6372+
ALIGN_4
6373+
6374+
.L999x:
63446375
movq 0(%rsp), %rbx
63456376
movq 8(%rsp), %rbp
63466377
movq 16(%rsp), %r12

0 commit comments

Comments
 (0)