Skip to content

Commit e9a4b79

Browse files
committed
arm64: cmpxchg_dbl: patch in lse instructions when supported by the CPU
On CPUs which support the LSE atomic instructions introduced in ARMv8.1, it makes sense to use them in preference to ll/sc sequences. This patch introduces runtime patching of our cmpxchg_double primitives so that the LSE casp instruction is used instead. Reviewed-by: Catalin Marinas <[email protected]> Signed-off-by: Will Deacon <[email protected]>
1 parent c342f78 commit e9a4b79

File tree

3 files changed

+94
-51
lines changed

3 files changed

+94
-51
lines changed

arch/arm64/include/asm/atomic_ll_sc.h

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -253,4 +253,38 @@ __CMPXCHG_CASE( , , mb_8, dmb ish, "memory")
253253

254254
#undef __CMPXCHG_CASE
255255

256+
#define __CMPXCHG_DBL(name, mb, cl) \
257+
__LL_SC_INLINE int \
258+
__LL_SC_PREFIX(__cmpxchg_double##name(unsigned long old1, \
259+
unsigned long old2, \
260+
unsigned long new1, \
261+
unsigned long new2, \
262+
volatile void *ptr)) \
263+
{ \
264+
unsigned long tmp, ret; \
265+
\
266+
asm volatile("// __cmpxchg_double" #name "\n" \
267+
" " #mb "\n" \
268+
"1: ldxp %0, %1, %2\n" \
269+
" eor %0, %0, %3\n" \
270+
" eor %1, %1, %4\n" \
271+
" orr %1, %0, %1\n" \
272+
" cbnz %1, 2f\n" \
273+
" stxp %w0, %5, %6, %2\n" \
274+
" cbnz %w0, 1b\n" \
275+
" " #mb "\n" \
276+
"2:" \
277+
: "=&r" (tmp), "=&r" (ret), "+Q" (*(unsigned long *)ptr) \
278+
: "r" (old1), "r" (old2), "r" (new1), "r" (new2) \
279+
: cl); \
280+
\
281+
return ret; \
282+
} \
283+
__LL_SC_EXPORT(__cmpxchg_double##name);
284+
285+
__CMPXCHG_DBL( , , )
286+
__CMPXCHG_DBL(_mb, dmb ish, "memory")
287+
288+
#undef __CMPXCHG_DBL
289+
256290
#endif /* __ASM_ATOMIC_LL_SC_H */

arch/arm64/include/asm/atomic_lse.h

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -388,4 +388,47 @@ __CMPXCHG_CASE(x, , mb_8, al, "memory")
388388
#undef __LL_SC_CMPXCHG
389389
#undef __CMPXCHG_CASE
390390

391+
#define __LL_SC_CMPXCHG_DBL(op) __LL_SC_CALL(__cmpxchg_double##op)
392+
393+
#define __CMPXCHG_DBL(name, mb, cl...) \
394+
static inline int __cmpxchg_double##name(unsigned long old1, \
395+
unsigned long old2, \
396+
unsigned long new1, \
397+
unsigned long new2, \
398+
volatile void *ptr) \
399+
{ \
400+
unsigned long oldval1 = old1; \
401+
unsigned long oldval2 = old2; \
402+
register unsigned long x0 asm ("x0") = old1; \
403+
register unsigned long x1 asm ("x1") = old2; \
404+
register unsigned long x2 asm ("x2") = new1; \
405+
register unsigned long x3 asm ("x3") = new2; \
406+
register unsigned long x4 asm ("x4") = (unsigned long)ptr; \
407+
\
408+
asm volatile(ARM64_LSE_ATOMIC_INSN( \
409+
/* LL/SC */ \
410+
" nop\n" \
411+
" nop\n" \
412+
" nop\n" \
413+
__LL_SC_CMPXCHG_DBL(name), \
414+
/* LSE atomics */ \
415+
" casp" #mb "\t%[old1], %[old2], %[new1], %[new2], %[v]\n"\
416+
" eor %[old1], %[old1], %[oldval1]\n" \
417+
" eor %[old2], %[old2], %[oldval2]\n" \
418+
" orr %[old1], %[old1], %[old2]") \
419+
: [old1] "+r" (x0), [old2] "+r" (x1), \
420+
[v] "+Q" (*(unsigned long *)ptr) \
421+
: [new1] "r" (x2), [new2] "r" (x3), [ptr] "r" (x4), \
422+
[oldval1] "r" (oldval1), [oldval2] "r" (oldval2) \
423+
: "x30" , ##cl); \
424+
\
425+
return x0; \
426+
}
427+
428+
__CMPXCHG_DBL( , )
429+
__CMPXCHG_DBL(_mb, al, "memory")
430+
431+
#undef __LL_SC_CMPXCHG_DBL
432+
#undef __CMPXCHG_DBL
433+
391434
#endif /* __ASM_ATOMIC_LSE_H */

arch/arm64/include/asm/cmpxchg.h

Lines changed: 17 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -128,51 +128,6 @@ static inline unsigned long __cmpxchg(volatile void *ptr, unsigned long old,
128128
unreachable();
129129
}
130130

131-
#define system_has_cmpxchg_double() 1
132-
133-
static inline int __cmpxchg_double(volatile void *ptr1, volatile void *ptr2,
134-
unsigned long old1, unsigned long old2,
135-
unsigned long new1, unsigned long new2, int size)
136-
{
137-
unsigned long loop, lost;
138-
139-
switch (size) {
140-
case 8:
141-
VM_BUG_ON((unsigned long *)ptr2 - (unsigned long *)ptr1 != 1);
142-
do {
143-
asm volatile("// __cmpxchg_double8\n"
144-
" ldxp %0, %1, %2\n"
145-
" eor %0, %0, %3\n"
146-
" eor %1, %1, %4\n"
147-
" orr %1, %0, %1\n"
148-
" mov %w0, #0\n"
149-
" cbnz %1, 1f\n"
150-
" stxp %w0, %5, %6, %2\n"
151-
"1:\n"
152-
: "=&r"(loop), "=&r"(lost), "+Q" (*(u64 *)ptr1)
153-
: "r" (old1), "r"(old2), "r"(new1), "r"(new2));
154-
} while (loop);
155-
break;
156-
default:
157-
BUILD_BUG();
158-
}
159-
160-
return !lost;
161-
}
162-
163-
static inline int __cmpxchg_double_mb(volatile void *ptr1, volatile void *ptr2,
164-
unsigned long old1, unsigned long old2,
165-
unsigned long new1, unsigned long new2, int size)
166-
{
167-
int ret;
168-
169-
smp_mb();
170-
ret = __cmpxchg_double(ptr1, ptr2, old1, old2, new1, new2, size);
171-
smp_mb();
172-
173-
return ret;
174-
}
175-
176131
static inline unsigned long __cmpxchg_mb(volatile void *ptr, unsigned long old,
177132
unsigned long new, int size)
178133
{
@@ -210,21 +165,32 @@ static inline unsigned long __cmpxchg_mb(volatile void *ptr, unsigned long old,
210165
__ret; \
211166
})
212167

168+
#define system_has_cmpxchg_double() 1
169+
170+
#define __cmpxchg_double_check(ptr1, ptr2) \
171+
({ \
172+
if (sizeof(*(ptr1)) != 8) \
173+
BUILD_BUG(); \
174+
VM_BUG_ON((unsigned long *)(ptr2) - (unsigned long *)(ptr1) != 1); \
175+
})
176+
213177
#define cmpxchg_double(ptr1, ptr2, o1, o2, n1, n2) \
214178
({\
215179
int __ret;\
216-
__ret = __cmpxchg_double_mb((ptr1), (ptr2), (unsigned long)(o1), \
217-
(unsigned long)(o2), (unsigned long)(n1), \
218-
(unsigned long)(n2), sizeof(*(ptr1)));\
180+
__cmpxchg_double_check(ptr1, ptr2); \
181+
__ret = !__cmpxchg_double_mb((unsigned long)(o1), (unsigned long)(o2), \
182+
(unsigned long)(n1), (unsigned long)(n2), \
183+
ptr1); \
219184
__ret; \
220185
})
221186

222187
#define cmpxchg_double_local(ptr1, ptr2, o1, o2, n1, n2) \
223188
({\
224189
int __ret;\
225-
__ret = __cmpxchg_double((ptr1), (ptr2), (unsigned long)(o1), \
226-
(unsigned long)(o2), (unsigned long)(n1), \
227-
(unsigned long)(n2), sizeof(*(ptr1)));\
190+
__cmpxchg_double_check(ptr1, ptr2); \
191+
__ret = !__cmpxchg_double((unsigned long)(o1), (unsigned long)(o2), \
192+
(unsigned long)(n1), (unsigned long)(n2), \
193+
ptr1); \
228194
__ret; \
229195
})
230196

0 commit comments

Comments
 (0)