We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent 200e4ac commit 0b08f74Copy full SHA for 0b08f74
kernel/x86/gemv_t_sse.S
@@ -142,7 +142,9 @@
142
.L0t:
143
xorl J,J
144
addl $1,J
145
- sall $21,J # J=2^22
+ sall $22,J # J=2^24*sizeof(float)=buffer size(16MB)
146
+ subl $8, J # Don't use last 8 float in the buffer.
147
+ # Now, split M by block J
148
subl J,MMM # MMM=MMM-J
149
movl J,M
150
jge .L00t
kernel/x86/gemv_t_sse2.S
@@ -128,7 +128,9 @@
128
129
130
131
- sall $22,J # J=2^22
+ sall $21,J # J=2^21*sizeof(double)=buffer size(16MB)
132
+ subl $4, J # Don't use last 4 double in the buffer.
133
134
135
136
0 commit comments