From fe482ad73ae4e9120e463cba5b960c2aebd80327 Mon Sep 17 00:00:00 2001 From: Brian Barrett Date: Wed, 19 Jan 2022 23:17:08 +0000 Subject: [PATCH 01/11] Scope "result" variable in options result is commonly used as a scoped variable throughout OMPI's configure. This doesn't currently cause failures, because the assembly checks unconditionally unset result (after using it for its own things). But to clean up the assembly macros, we must clean up this usage. Signed-off-by: Brian Barrett --- config/opal_configure_options.m4 | 2 ++ 1 file changed, 2 insertions(+) diff --git a/config/opal_configure_options.m4 b/config/opal_configure_options.m4 index 36e6052f64a..6b349a72591 100644 --- a/config/opal_configure_options.m4 +++ b/config/opal_configure_options.m4 @@ -420,6 +420,7 @@ AM_CONDITIONAL([OPAL_WANT_SCRIPT_WRAPPER_COMPILERS], [test "$enable_script_wrapp # # Support per-user config files? # +OPAL_VAR_SCOPE_PUSH([result]) AC_ARG_ENABLE([per-user-config-files], [AS_HELP_STRING([--enable-per-user-config-files], [Disable per-user configuration files, to save disk accesses during job start-up. This is likely desirable for large jobs. Note that this can also be achieved by environment variables at run-time. (default: enabled)])]) @@ -430,6 +431,7 @@ else fi AC_DEFINE_UNQUOTED([OPAL_WANT_HOME_CONFIG_FILES], [$result], [Enable per-user config files]) +OPAL_VAR_SCOPE_POP # # Do we want to enable IPv6 support? From 6e5cf17c847ccd8911666df0d8a2e0906260bd67 Mon Sep 17 00:00:00 2001 From: Brian Barrett Date: Tue, 11 Jan 2022 23:10:44 +0000 Subject: [PATCH 02/11] Remove infrastructure for non-inline assembly As of ebce88b7, we no longer have any non-inline assembly code in Open MPI. This patch removes the configure infrastructure to support non-inline assembly. Since the only assembly style supported is GCC inline assembly, remove a bunch of constant checks in the atomics and timing code around GCC inline assembly. Those headers won't be included unless GCC inline assembly is supported. Finally, be consistent in what constant we are checking for GCC inline assembly support and add a missing check in the ARM64 timer code. Signed-off-by: Brian Barrett --- config/opal_config_asm.m4 | 545 +--------------------- opal/include/opal/sys/architecture.h | 5 +- opal/include/opal/sys/arm64/atomic.h | 85 ++-- opal/include/opal/sys/arm64/atomic_llsc.h | 4 +- opal/include/opal/sys/arm64/timer.h | 6 + opal/include/opal/sys/arm64/update.sh | 36 -- opal/include/opal/sys/atomic.h | 66 +-- opal/include/opal/sys/powerpc/atomic.h | 194 ++++---- opal/include/opal/sys/powerpc/timer.h | 6 +- opal/include/opal/sys/powerpc/update.sh | 39 -- opal/include/opal/sys/timer.h | 17 +- opal/include/opal/sys/x86_64/atomic.h | 27 +- opal/include/opal/sys/x86_64/timer.h | 6 +- opal/include/opal/sys/x86_64/update.sh | 36 -- 14 files changed, 171 insertions(+), 901 deletions(-) delete mode 100644 opal/include/opal/sys/arm64/update.sh delete mode 100644 opal/include/opal/sys/powerpc/update.sh delete mode 100644 opal/include/opal/sys/x86_64/update.sh diff --git a/config/opal_config_asm.m4 b/config/opal_config_asm.m4 index ad9065eb688..6c3d3624192 100644 --- a/config/opal_config_asm.m4 +++ b/config/opal_config_asm.m4 @@ -20,6 +20,8 @@ dnl reserved. dnl Copyright (c) 2020 Google, LLC. All rights reserved. dnl Copyright (c) 2020 Intel, Inc. All rights reserved. dnl Copyright (c) 2021 IBM Corporation. All rights reserved. +dnl Copyright (c) 2022 Amazon.com, Inc. or its affiliates. +dnl All Rights reserved. dnl $COPYRIGHT$ dnl dnl Additional copyrights may follow @@ -422,463 +424,6 @@ AC_DEFUN([OPAL_CHECK_C11_CSWAP_INT128], [ OPAL_VAR_SCOPE_POP ]) -dnl ################################################################# -dnl -dnl OPAL_CHECK_ASM_TEXT -dnl -dnl Determine how to set current mode as text. -dnl -dnl ################################################################# -AC_DEFUN([OPAL_CHECK_ASM_TEXT],[ - AC_MSG_CHECKING([directive for setting text section]) - opal_cv_asm_text="" - if test "$opal_cv_c_compiler_vendor" = "microsoft" ; then - # text section will be brought in with the rest of - # header for MS - leave blank for now - opal_cv_asm_text="" - else - case $host in - *-aix*) - opal_cv_asm_text=[".csect .text[PR]"] - ;; - *) - opal_cv_asm_text=".text" - ;; - esac - fi - AC_MSG_RESULT([$opal_cv_asm_text]) - AC_DEFINE_UNQUOTED([OPAL_ASM_TEXT], ["$opal_cv_asm_text"], - [Assembly directive for setting text section]) - OPAL_ASM_TEXT="$opal_cv_asm_text" - AC_SUBST(OPAL_ASM_TEXT) -])dnl - - -dnl ################################################################# -dnl -dnl OPAL_CHECK_ASM_GLOBAL -dnl -dnl Sets OPAL_ASM_GLOBAL to the value to prefix global values -dnl -dnl I'm sure if I don't have a test for this, there will be some -dnl dumb platform that uses something else -dnl -dnl ################################################################# -AC_DEFUN([OPAL_CHECK_ASM_GLOBAL],[ - AC_MSG_CHECKING([directive for exporting symbols]) - opal_cv_asm_global="" - if test "$opal_cv_c_compiler_vendor" = "microsoft" ; then - opal_cv_asm_global="PUBLIC" - else - case $host in - *) - opal_cv_asm_global=".globl" - ;; - esac - fi - AC_MSG_RESULT([$opal_cv_asm_global]) - AC_DEFINE_UNQUOTED([OPAL_ASM_GLOBAL], ["$opal_cv_asm_global"], - [Assembly directive for exporting symbols]) - OPAL_ASM_GLOBAL="$opal_cv_asm_global" - AC_SUBST(OPAL_AS_GLOBAL) -])dnl - - -dnl ################################################################# -dnl -dnl OPAL_CHECK_ASM_LSYM -dnl -dnl Sets OPAL_ASM_LSYM to the prefix value on a symbol to make it -dnl an internal label (jump target and whatnot) -dnl -dnl We look for L .L $ L$ (in that order) for something that both -dnl assembles and does not leave a label in the output of nm. Fall -dnl back to L if nothing else seems to work :/ -dnl -dnl ################################################################# - -# _OPAL_CHECK_ASM_LSYM([variable-to-set]) -# --------------------------------------- -AC_DEFUN([_OPAL_CHECK_ASM_LSYM],[ - AC_REQUIRE([AC_PROG_GREP]) - - $1="L" - - for sym in L .L $ L$ ; do - asm_result=0 - echo "configure: trying $sym" >&AS_MESSAGE_LOG_FD - OPAL_TRY_ASSEMBLE([foobar$opal_cv_asm_label_suffix -${sym}mytestlabel$opal_cv_asm_label_suffix], - [# ok, we succeeded at assembling. see if we can nm, - # throwing the results in a file - if $NM conftest.$OBJEXT > conftest.out 2>&AS_MESSAGE_LOG_FD ; then - if test "`$GREP mytestlabel conftest.out`" = "" ; then - # there was no symbol... looks promising to me - $1="$sym" - asm_result=1 - elif test ["`$GREP ' [Nt] .*mytestlabel' conftest.out`"] = "" ; then - # see if we have a non-global-ish symbol - # but we should see if we can do better. - $1="$sym" - fi - else - # not so much on the NM goodness :/ - echo "$NM failed. Output from NM was:" >&AS_MESSAGE_LOG_FD - cat conftest.out >&AS_MESSAGE_LOG_FD - AC_MSG_WARN([$NM could not read object file]) - fi - ]) - if test "$asm_result" = "1" ; then - break - fi - done - rm -f conftest.out - unset asm_result sym -]) - -# OPAL_CHECK_ASM_LSYM() -# --------------------- -AC_DEFUN([OPAL_CHECK_ASM_LSYM],[ - AC_REQUIRE([LT_PATH_NM]) - - AC_CACHE_CHECK([prefix for lsym labels], - [opal_cv_asm_lsym], - [_OPAL_CHECK_ASM_LSYM([opal_cv_asm_lsym])]) - AC_DEFINE_UNQUOTED([OPAL_ASM_LSYM], ["$opal_cv_asm_lsym"], - [Assembly prefix for lsym labels]) - OPAL_ASM_LSYM="$opal_cv_asm_lsym" - AC_SUBST(OPAL_ASM_LSYM) -])dnl - -dnl ################################################################# -dnl -dnl OPAL_CHECK_ASM_PROC -dnl -dnl Sets a cv-flag, if the compiler needs a proc/endp-definition to -dnl link with C. -dnl -dnl ################################################################# -AC_DEFUN([OPAL_CHECK_ASM_PROC],[ - AC_CACHE_CHECK([if .proc/endp is needed], - [opal_cv_asm_need_proc], - [opal_cv_asm_need_proc="no" - OPAL_TRY_ASSEMBLE([ - .proc mysym -mysym: - .endp mysym], - [opal_cv_asm_need_proc="yes"]) - rm -f conftest.out]) - - if test "$opal_cv_asm_need_proc" = "yes" ; then - opal_cv_asm_proc=".proc" - opal_cv_asm_endproc=".endp" - else - opal_cv_asm_proc="#" - opal_cv_asm_endproc="#" - fi -])dnl - - -dnl ################################################################# -dnl -dnl OPAL_CHECK_ASM_GSYM -dnl -dnl Sets OPAL_ASM_GSYM to the prefix value on a symbol to make it -dnl a global linkable from C. Basically, an _ or not. -dnl -dnl ################################################################# -AC_DEFUN([OPAL_CHECK_ASM_GSYM],[ - AC_CACHE_CHECK([prefix for global symbol labels], - [opal_cv_asm_gsym], - [_OPAL_CHECK_ASM_GSYM]) - - if test "$opal_cv_asm_gsym" = "none" ; then - AC_MSG_ERROR([Could not determine global symbol label prefix]) - fi - - AC_DEFINE_UNQUOTED([OPAL_ASM_GSYM], ["$opal_cv_asm_gsym"], - [Assembly prefix for gsym labels]) - OPAL_ASM_GSYM="$opal_cv_asm_gsym" - AC_SUBST(OPAL_ASM_GSYM) - -]) - -AC_DEFUN([_OPAL_CHECK_ASM_GSYM],[ - opal_cv_asm_gsym="none" - - for sym in "_" "" "." ; do - asm_result=0 - echo "configure: trying $sym" >&AS_MESSAGE_LOG_FD -cat > conftest_c.c <&AS_MESSAGE_LOG_FD - opal_link="$CC $CFLAGS conftest_c.$OBJEXT conftest.$OBJEXT -o conftest $LDFLAGS $LIBS > conftest.link 2>&1" - if AC_TRY_EVAL(opal_link) ; then - # save the warnings - cat conftest.link >&AS_MESSAGE_LOG_FD - asm_result=1 - else - cat conftest.link >&AS_MESSAGE_LOG_FD - echo "configure: failed C program was: " >&AS_MESSAGE_LOG_FD - cat conftest_c.c >&AS_MESSAGE_LOG_FD - echo "configure: failed ASM program was: " >&AS_MESSAGE_LOG_FD - cat conftest.s >&AS_MESSAGE_LOG_FD - asm_result=0 - fi - else - # save output and failed program - cat conftest.cmpl >&AS_MESSAGE_LOG_FD - echo "configure: failed C program was: " >&AS_MESSAGE_LOG_FD - cat conftest.c >&AS_MESSAGE_LOG_FD - asm_result=0 - fi], - [asm_result=0]) - if test "$asm_result" = "1" ; then - opal_cv_asm_gsym="$sym" - break - fi - done - rm -rf conftest.* -])dnl - - -dnl ################################################################# -dnl -dnl OPAL_CHECK_ASM_LABEL_SUFFIX -dnl -dnl Sets OPAL_ASM_LABEL_SUFFIX to the value to suffix for labels -dnl -dnl I'm sure if I don't have a test for this, there will be some -dnl dumb platform that uses something else -dnl -dnl ################################################################# -AC_DEFUN([OPAL_CHECK_ASM_LABEL_SUFFIX],[ - AC_MSG_CHECKING([suffix for labels]) - opal_cv_asm_label_suffix="" - case $host in - *) - opal_cv_asm_label_suffix=":" - ;; - esac - AC_MSG_RESULT([$opal_cv_asm_label_suffix]) - AC_DEFINE_UNQUOTED([OPAL_ASM_LABEL_SUFFIX], ["$opal_cv_asm_label_suffix"], - [Assembly suffix for labels]) - OPAL_ASM_LABEL_SUFFIX="$opal_cv_asm_label_suffix" - AC_SUBST(OPAL_AS_LABEL_SUFFIX) -])dnl - - -dnl ################################################################# -dnl -dnl OPAL_CHECK_ASM_ALIGN_LOG -dnl -dnl Sets OPAL_ASM_ALIGN_LOG to 1 if align is specified -dnl logarithmically, 0 otherwise -dnl -dnl ################################################################# -AC_DEFUN([OPAL_CHECK_ASM_ALIGN_LOG],[ - AC_REQUIRE([LT_PATH_NM]) - AC_REQUIRE([AC_PROG_GREP]) - - AC_CACHE_CHECK([if .align directive takes logarithmic value], - [opal_cv_asm_align_log], - [ OPAL_TRY_ASSEMBLE([ $opal_cv_asm_text - .align 4 - $opal_cv_asm_global foo - .byte 1 - .align 4 -foo$opal_cv_asm_label_suffix - .byte 2], - [opal_asm_addr=[`$NM conftest.$OBJEXT | $GREP foo | sed -e 's/.*\([0-9a-fA-F][0-9a-fA-F]\).*foo.*/\1/'`]], - [opal_asm_addr=""]) - # test for both 16 and 10 (decimal and hex notations) - echo "configure: .align test address offset is $opal_asm_addr" >&AS_MESSAGE_LOG_FD - if test "$opal_asm_addr" = "16" || test "$opal_asm_addr" = "10" ; then - opal_cv_asm_align_log="yes" - else - opal_cv_asm_align_log="no" - fi]) - - if test "$opal_cv_asm_align_log" = "yes" || test "$opal_cv_asm_align_log" = "1" ; then - opal_asm_align_log_result=1 - else - opal_asm_align_log_result=0 - fi - - AC_DEFINE_UNQUOTED([OPAL_ASM_ALIGN_LOG], - [$asm_align_log_result], - [Assembly align directive expects logarithmic value]) - - unset omp_asm_addr asm_result -])dnl - - -dnl ################################################################# -dnl -dnl OPAL_CHECK_ASM_TYPE -dnl -dnl Sets OPAL_ASM_TYPE to the prefix for the function type to -dnl set a symbol's type as function (needed on ELF for shared -dnl libraries). If no .type directive is needed, sets OPAL_ASM_TYPE -dnl to an empty string -dnl -dnl We look for @ \# % -dnl -dnl ################################################################# -AC_DEFUN([OPAL_CHECK_ASM_TYPE],[ - AC_CACHE_CHECK([prefix for function in .type], - [opal_cv_asm_type], - [_OPAL_CHECK_ASM_TYPE]) - - AC_DEFINE_UNQUOTED([OPAL_ASM_TYPE], ["$opal_cv_asm_type"], - [How to set function type in .type directive]) - OPAL_ASM_TYPE="$opal_cv_asm_type" - AC_SUBST(OPAL_ASM_TYPE) -]) - -AC_DEFUN([_OPAL_CHECK_ASM_TYPE],[ - opal_cv_asm_type="" - - case "${host}" in - *-sun-solaris*) - # GCC on solaris seems to accept just about anything, not - # that what it defines actually works... So just hardwire - # to the right answer - opal_cv_asm_type="#" - ;; - *) - for type in @ \# % ; do - asm_result=0 - echo "configure: trying $type" >&AS_MESSAGE_LOG_FD - OPAL_TRY_ASSEMBLE([ .type mysym, ${type}function -mysym:], - [opal_cv_asm_type="${type}" - asm_result=1]) - if test "$asm_result" = "1" ; then - break - fi - done - ;; - esac - rm -f conftest.out - - unset asm_result type -])dnl - - -dnl ################################################################# -dnl -dnl OPAL_CHECK_ASM_SIZE -dnl -dnl Sets OPAL_ASM_SIZE to 1 if we should set .size directives for -dnl each function, 0 otherwise. -dnl -dnl ################################################################# -AC_DEFUN([OPAL_CHECK_ASM_SIZE],[ - AC_CACHE_CHECK([if .size is needed], - [opal_cv_asm_need_size], - [opal_cv_asm_need_size="no" - OPAL_TRY_ASSEMBLE([ .size mysym, 1], - [opal_cv_asm_need_size="yes"]) - rm -f conftest.out]) - - if test "$opal_cv_asm_need_size" = "yes" ; then - opal_asm_size=1 - else - opal_asm_size=0 - fi - - AC_DEFINE_UNQUOTED([OPAL_ASM_SIZE], ["$opal_asm_size"], - [Do we need to give a .size directive]) - OPAL_ASM_SIZE="$opal_asm_size" - AC_SUBST(OPAL_ASM_TYPE) - unset asm_result -])dnl - - -# OPAL_CHECK_ASM_GNU_STACKEXEC(var) -# ---------------------------------- -# sets shell variable var to the things necessary to -# disable execable stacks with GAS -AC_DEFUN([OPAL_CHECK_ASM_GNU_STACKEXEC], [ - AC_REQUIRE([AC_PROG_GREP]) - - AC_CHECK_PROG([OBJDUMP], [objdump], [objdump]) - AC_CACHE_CHECK([if .note.GNU-stack is needed], - [opal_cv_asm_gnu_stack_result], - [AS_IF([test "$OBJDUMP" != ""], - [ # first, see if a simple C program has it set - cat >conftest.c <&1 | $GREP '\.note\.GNU-stack' &> /dev/null && opal_cv_asm_gnu_stack_result=yes], - [OPAL_LOG_MSG([the failed program was:], 1) - OPAL_LOG_FILE([conftest.c]) - opal_cv_asm_gnu_stack_result=no]) - if test "$opal_cv_asm_gnu_stack_result" != "yes" ; then - opal_cv_asm_gnu_stack_result="no" - fi - rm -rf conftest.*], - [opal_cv_asm_gnu_stack_result="no"])]) - if test "$opal_cv_asm_gnu_stack_result" = "yes" ; then - opal_cv_asm_gnu_stack=1 - else - opal_cv_asm_gnu_stack=0 - fi -])dnl - - -dnl ################################################################# -dnl -dnl OPAL_CHECK_POWERPC_REG -dnl -dnl See if the notation for specifying registers is X (most everyone) -dnl or rX (OS X) -dnl -dnl ################################################################# -AC_DEFUN([OPAL_CHECK_POWERPC_REG],[ - AC_MSG_CHECKING([if PowerPC registers have r prefix]) - OPAL_TRY_ASSEMBLE([$opal_cv_asm_text - addi 1,1,0], - [opal_cv_asm_powerpc_r_reg=0], - [OPAL_TRY_ASSEMBLE([$opal_cv_asm_text - addi r1,r1,0], - [opal_cv_asm_powerpc_r_reg=1], - [AC_MSG_ERROR([Can not determine how to use PPC registers])])]) - if test "$opal_cv_asm_powerpc_r_reg" = "1" ; then - AC_MSG_RESULT([yes]) - else - AC_MSG_RESULT([no]) - fi - - AC_DEFINE_UNQUOTED([OPAL_POWERPC_R_REGISTERS], - [$opal_cv_asm_powerpc_r_reg], - [Whether r notation is used for ppc registers]) -])dnl dnl ################################################################# dnl @@ -977,7 +522,7 @@ dnl some compilers (i.e. earlier versions of Sun Studio 12) do not dnl necessarily handle xaddl properly, so that needs to be detected dnl during configure time. dnl -dnl DEFINE OPAL_GCC_INLINE_ASSEMBLY to 0 or 1 depending on GCC +dnl DEFINE OPAL_C_GCC_INLINE_ASSEMBLY to 0 or 1 depending on GCC dnl support dnl dnl ################################################################# @@ -1040,7 +585,6 @@ dnl dnl ################################################################# AC_DEFUN([OPAL_CONFIG_ASM],[ AC_REQUIRE([OPAL_SETUP_CC]) - AC_REQUIRE([AM_PROG_AS]) AC_ARG_ENABLE([c11-atomics],[AS_HELP_STRING([--enable-c11-atomics], [Enable use of C11 atomics if available (default: enabled)])]) @@ -1068,17 +612,6 @@ AC_DEFUN([OPAL_CONFIG_ASM],[ fi fi - OPAL_CHECK_ASM_PROC - OPAL_CHECK_ASM_TEXT - OPAL_CHECK_ASM_GLOBAL - OPAL_CHECK_ASM_GNU_STACKEXEC - OPAL_CHECK_ASM_LABEL_SUFFIX - OPAL_CHECK_ASM_GSYM - OPAL_CHECK_ASM_LSYM - OPAL_CHECK_ASM_TYPE - OPAL_CHECK_ASM_SIZE - OPAL_CHECK_ASM_ALIGN_LOG - # find our architecture for purposes of assembly stuff opal_cv_asm_arch="UNSUPPORTED" OPAL_GCC_INLINE_ASSIGN="" @@ -1112,7 +645,6 @@ AC_DEFUN([OPAL_CONFIG_ASM],[ ;; powerpc-*|powerpc64-*|powerpcle-*|powerpc64le-*|rs6000-*|ppc-*) - OPAL_CHECK_POWERPC_REG if test "$ac_cv_sizeof_long" = "4" ; then if test $opal_cv_asm_builtin = BUILTIN_NO ; then AC_MSG_ERROR([PowerPC 32-bit atomics are no longer supported. Use a C11 compiler]) @@ -1153,59 +685,13 @@ AC_DEFUN([OPAL_CONFIG_ASM],[ opal_cv_asm_inline_supported="no" # now that we know our architecture, try to inline assemble OPAL_CHECK_INLINE_C_GCC([$OPAL_GCC_INLINE_ASSIGN]) - - # format: - # config_file-text-global-label_suffix-gsym-lsym-type-size-align_log-ppc_r_reg-64_bit-gnu_stack - asm_format="default" - asm_format="${asm_format}-${opal_cv_asm_text}-${opal_cv_asm_global}" - asm_format="${asm_format}-${opal_cv_asm_label_suffix}-${opal_cv_asm_gsym}" - asm_format="${asm_format}-${opal_cv_asm_lsym}" - asm_format="${asm_format}-${opal_cv_asm_type}-${opal_asm_size}" - asm_format="${asm_format}-${opal_asm_align_log_result}" - if test "$opal_cv_asm_arch" = "POWERPC64" ; then - asm_format="${asm_format}-${opal_cv_asm_powerpc_r_reg}" - else - asm_format="${asm_format}-1" - fi - asm_format="${asm_format}-1" - opal_cv_asm_format="${asm_format}-${opal_cv_asm_gnu_stack}" - # For the Makefile, need to escape the $ as $$. Don't display - # this version, but make sure the Makefile gives the right thing - # when regenerating the files because the base has been touched. - OPAL_ASSEMBLY_FORMAT=`echo "$opal_cv_asm_format" | sed -e 's/\\\$/\\\$\\\$/'` - - AC_MSG_CHECKING([for assembly format]) - AC_MSG_RESULT([$opal_cv_asm_format]) - AC_DEFINE_UNQUOTED([OPAL_ASSEMBLY_FORMAT], ["$OPAL_ASSEMBLY_FORMAT"], - [Format of assembly file]) - AC_SUBST([OPAL_ASSEMBLY_FORMAT]) fi # if opal_cv_asm_builtin = BUILTIN_GCC result="OPAL_$opal_cv_asm_arch" - OPAL_ASSEMBLY_ARCH="$opal_cv_asm_arch" AC_MSG_CHECKING([for assembly architecture]) AC_MSG_RESULT([$opal_cv_asm_arch]) AC_DEFINE_UNQUOTED([OPAL_ASSEMBLY_ARCH], [$result], [Architecture type of assembly to use for atomic operations and CMA]) - AC_SUBST([OPAL_ASSEMBLY_ARCH]) - - # Check for RDTSCP support - result=0 - AS_IF([test "$opal_cv_asm_arch" = "X86_64" || test "$opal_cv_asm_arch" = "IA32"], - [AC_MSG_CHECKING([for RDTSCP assembly support]) - AC_LANG_PUSH([C]) - AC_RUN_IFELSE([AC_LANG_PROGRAM([[ - unsigned int rax, rdx; - __asm__ __volatile__ ("rdtscp\n": "=a" (rax), "=d" (rdx):: "%rax", "%rdx"); - ]])], - [result=1 - AC_MSG_RESULT([yes])], - [AC_MSG_RESULT([no])], - [#cross compile not supported - AC_MSG_RESULT(["no (cross compiling)"])]) - AC_LANG_POP([C])]) - AC_DEFINE_UNQUOTED([OPAL_ASSEMBLY_SUPPORTS_RDTSCP], [$result], - [Whether we have support for RDTSCP instruction]) result="OPAL_$opal_cv_asm_builtin" OPAL_ASSEMBLY_BUILTIN="$opal_cv_asm_builtin" @@ -1217,28 +703,5 @@ AC_DEFUN([OPAL_CONFIG_ASM],[ OPAL_SUMMARY_ADD([[Miscellaneous]],[[Atomics]],[],[$opal_cv_asm_builtin]) - OPAL_ASM_FIND_FILE - - unset result asm_format -])dnl - - -dnl ################################################################# -dnl -dnl OPAL_ASM_FIND_FILE -dnl -dnl -dnl do all the evil mojo to provide a working assembly file -dnl -dnl ################################################################# -AC_DEFUN([OPAL_ASM_FIND_FILE], [ - AC_REQUIRE([AC_PROG_GREP]) - AC_REQUIRE([AC_PROG_FGREP]) - -if test "$opal_cv_asm_arch" != "WINDOWS" && test "$opal_cv_asm_builtin" != "BUILTIN_GCC" && test "$opal_cv_asm_builtin" != "BUILTIN_OSX" && test "$opal_cv_asm_inline_arch" = "no" ; then - AC_MSG_ERROR([no atomic support available. exiting]) -else - # On windows with VC++, atomics are done with compiler primitives - opal_cv_asm_file="" -fi + unset result ])dnl diff --git a/opal/include/opal/sys/architecture.h b/opal/include/opal/sys/architecture.h index 35e7cad7886..8f53bda5f20 100644 --- a/opal/include/opal/sys/architecture.h +++ b/opal/include/opal/sys/architecture.h @@ -16,6 +16,8 @@ * Copyright (c) 2017 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2020 Google, LLC. All rights reserved. + * Copyright (c) 2022 Amazon.com, Inc. or its affiliates. + * All Rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -42,7 +44,4 @@ #define OPAL_BUILTIN_NO 0203 #define OPAL_BUILTIN_C11 0204 -/* Formats */ -#define OPAL_DEFAULT 1000 /* standard for given architecture */ - #endif /* #ifndef OPAL_SYS_ARCHITECTURE_H */ diff --git a/opal/include/opal/sys/arm64/atomic.h b/opal/include/opal/sys/arm64/atomic.h index 944b7d2577e..7675b36790e 100644 --- a/opal/include/opal/sys/arm64/atomic.h +++ b/opal/include/opal/sys/arm64/atomic.h @@ -16,6 +16,8 @@ * reserved. * Copyright (c) 2021 Triad National Security, LLC. All rights reserved. * Copyright (c) 2021 Google, LLC. All rights reserved. + * Copyright (c) 2022 Amazon.com, Inc. or its affiliates. + * All Rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -25,31 +27,28 @@ #include "atomic_llsc.h" -#if !defined(OPAL_SYS_ARCH_ATOMIC_H) - -# define OPAL_SYS_ARCH_ATOMIC_H 1 - -# if OPAL_GCC_INLINE_ASSEMBLY - -# define OPAL_HAVE_ATOMIC_MEM_BARRIER 1 -# define OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_32 1 -# define OPAL_HAVE_ATOMIC_SWAP_32 1 -# define OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_64 1 -# define OPAL_HAVE_ATOMIC_SWAP_64 1 -# define OPAL_HAVE_ATOMIC_ADD_32 1 -# define OPAL_HAVE_ATOMIC_AND_32 1 -# define OPAL_HAVE_ATOMIC_OR_32 1 -# define OPAL_HAVE_ATOMIC_XOR_32 1 -# define OPAL_HAVE_ATOMIC_SUB_32 1 -# define OPAL_HAVE_ATOMIC_ADD_64 1 -# define OPAL_HAVE_ATOMIC_AND_64 1 -# define OPAL_HAVE_ATOMIC_OR_64 1 -# define OPAL_HAVE_ATOMIC_XOR_64 1 -# define OPAL_HAVE_ATOMIC_SUB_64 1 - -# define MB() __asm__ __volatile__("dmb sy" : : : "memory") -# define RMB() __asm__ __volatile__("dmb ld" : : : "memory") -# define WMB() __asm__ __volatile__("dmb st" : : : "memory") +#ifndef OPAL_SYS_ARCH_ATOMIC_H +#define OPAL_SYS_ARCH_ATOMIC_H 1 + +#define OPAL_HAVE_ATOMIC_MEM_BARRIER 1 +#define OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_32 1 +#define OPAL_HAVE_ATOMIC_SWAP_32 1 +#define OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_64 1 +#define OPAL_HAVE_ATOMIC_SWAP_64 1 +#define OPAL_HAVE_ATOMIC_ADD_32 1 +#define OPAL_HAVE_ATOMIC_AND_32 1 +#define OPAL_HAVE_ATOMIC_OR_32 1 +#define OPAL_HAVE_ATOMIC_XOR_32 1 +#define OPAL_HAVE_ATOMIC_SUB_32 1 +#define OPAL_HAVE_ATOMIC_ADD_64 1 +#define OPAL_HAVE_ATOMIC_AND_64 1 +#define OPAL_HAVE_ATOMIC_OR_64 1 +#define OPAL_HAVE_ATOMIC_XOR_64 1 +#define OPAL_HAVE_ATOMIC_SUB_64 1 + +#define MB() __asm__ __volatile__("dmb sy" : : : "memory") +#define RMB() __asm__ __volatile__("dmb ld" : : : "memory") +#define WMB() __asm__ __volatile__("dmb st" : : : "memory") /********************************************************************** * @@ -251,23 +250,23 @@ static inline bool opal_atomic_compare_exchange_strong_rel_64(opal_atomic_int64_ return ret; } -# define OPAL_ASM_MAKE_ATOMIC(type, bits, name, inst, reg) \ - static inline type opal_atomic_fetch_##name##_##bits(opal_atomic_##type *addr, \ - type value) \ - { \ - type newval, old; \ - int32_t tmp; \ - \ - __asm__ __volatile__("1: ldxr %" reg "1, [%3] \n" \ - " " inst " %" reg "0, %" reg "1, %" reg "4 \n" \ - " stxr %w2, %" reg "0, [%3] \n" \ - " cbnz %w2, 1b \n" \ - : "=&r"(newval), "=&r"(old), "=&r"(tmp) \ - : "r"(addr), "r"(value) \ - : "cc", "memory"); \ - \ - return old; \ - } +#define OPAL_ASM_MAKE_ATOMIC(type, bits, name, inst, reg) \ + static inline type opal_atomic_fetch_##name##_##bits(opal_atomic_##type *addr, \ + type value) \ + { \ + type newval, old; \ + int32_t tmp; \ + \ + __asm__ __volatile__("1: ldxr %" reg "1, [%3] \n" \ + " " inst " %" reg "0, %" reg "1, %" reg "4 \n" \ + " stxr %w2, %" reg "0, [%3] \n" \ + " cbnz %w2, 1b \n" \ + : "=&r"(newval), "=&r"(old), "=&r"(tmp) \ + : "r"(addr), "r"(value) \ + : "cc", "memory"); \ + \ + return old; \ + } OPAL_ASM_MAKE_ATOMIC(int32_t, 32, add, "add", "w") OPAL_ASM_MAKE_ATOMIC(int32_t, 32, and, "and", "w") @@ -280,6 +279,4 @@ OPAL_ASM_MAKE_ATOMIC(int64_t, 64, or, "orr", "") OPAL_ASM_MAKE_ATOMIC(int64_t, 64, xor, "eor", "") OPAL_ASM_MAKE_ATOMIC(int64_t, 64, sub, "sub", "") -# endif /* OPAL_GCC_INLINE_ASSEMBLY */ - #endif /* ! OPAL_SYS_ARCH_ATOMIC_H */ diff --git a/opal/include/opal/sys/arm64/atomic_llsc.h b/opal/include/opal/sys/arm64/atomic_llsc.h index 2cae94d262b..57a4a31cc7a 100644 --- a/opal/include/opal/sys/arm64/atomic_llsc.h +++ b/opal/include/opal/sys/arm64/atomic_llsc.h @@ -16,6 +16,8 @@ * reserved. * Copyright (c) 2021 Triad National Security, LLC. All rights reserved. * Copyright (c) 2021 Google, LLC. All rights reserved. + * Copyright (c) 2022 Amazon.com, Inc. or its affiliates. + * All Rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -77,6 +79,6 @@ ret = (_ret == 0); \ } while (0) -# endif /* OPAL_GCC_INLINE_ASSEMBLY */ +# endif /* OPAL_C_GCC_INLINE_ASSEMBLY */ #endif /* ! OPAL_SYS_ARCH_ATOMIC_LLSC_H */ diff --git a/opal/include/opal/sys/arm64/timer.h b/opal/include/opal/sys/arm64/timer.h index 257f3782cb1..d6237e9dec0 100644 --- a/opal/include/opal/sys/arm64/timer.h +++ b/opal/include/opal/sys/arm64/timer.h @@ -7,6 +7,8 @@ * Copyright (c) 2016 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2021 Google, LLC. All rights reserved. + * Copyright (c) 2022 Amazon.com, Inc. or its affiliates. + * All Rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -23,6 +25,8 @@ typedef uint64_t opal_timer_t; typedef uint32_t opal_timer_t; #endif +#if OPAL_C_GCC_INLINE_ASSEMBLY + static inline opal_timer_t opal_sys_timer_get_cycles(void) { opal_timer_t ret; @@ -51,4 +55,6 @@ static inline opal_timer_t opal_sys_timer_get_freq(void) #define OPAL_HAVE_SYS_TIMER_GET_CYCLES 1 #define OPAL_HAVE_SYS_TIMER_GET_FREQ 1 +#endif /* OPAL_C_GCC_INLINE_ASSEMBLY */ + #endif /* ! OPAL_SYS_ARCH_TIMER_H */ diff --git a/opal/include/opal/sys/arm64/update.sh b/opal/include/opal/sys/arm64/update.sh deleted file mode 100644 index 94d8ed2714b..00000000000 --- a/opal/include/opal/sys/arm64/update.sh +++ /dev/null @@ -1,36 +0,0 @@ -#!/bin/sh -# -# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana -# University Research and Technology -# Corporation. All rights reserved. -# Copyright (c) 2004-2005 The University of Tennessee and The University -# of Tennessee Research Foundation. All rights -# reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, -# University of Stuttgart. All rights reserved. -# Copyright (c) 2004-2005 The Regents of the University of California. -# All rights reserved. -# $COPYRIGHT$ -# -# Additional copyrights may follow -# -# $HEADER$ -# - -CFILE=/tmp/opal_atomic_$$.c - -trap "/bin/rm -f $CFILE; exit 0" 0 1 2 15 - -echo Updating atomic.s from atomic.h using gcc - -cat > $CFILE< -#include -#define static -#define inline -#define OPAL_GCC_INLINE_ASSEMBLY 1 -#include "../architecture.h" -#include "atomic.h" -EOF - -gcc -O1 -I. -S $CFILE -o atomic.s diff --git a/opal/include/opal/sys/atomic.h b/opal/include/opal/sys/atomic.h index 01c4ba514b7..8713214a3a4 100644 --- a/opal/include/opal/sys/atomic.h +++ b/opal/include/opal/sys/atomic.h @@ -17,6 +17,8 @@ * Copyright (c) 2017 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2020-2021 Google, LLC. All rights reserved. + * Copyright (c) 2022 Amazon.com, Inc. or its affiliates. + * All Rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -57,28 +59,12 @@ #include "opal/sys/architecture.h" #include "opal_stdatomic.h" -/* do some quick #define cleanup in cases where we are doing - testing... */ -#ifdef OPAL_DISABLE_INLINE_ASM -# undef OPAL_C_GCC_INLINE_ASSEMBLY -# define OPAL_C_GCC_INLINE_ASSEMBLY 0 -#endif - #if OPAL_ASSEMBLY_BUILTIN == OPAL_BUILTIN_C11 && !defined(__INTEL_COMPILER) # include "atomic_stdc.h" #else /* !OPAL_C_HAVE__ATOMIC */ -/* define OPAL_{GCC,DEC,XLC}_INLINE_ASSEMBLY based on the - OPAL_C_{GCC,DEC,XLC}_INLINE_ASSEMBLY defines and whether we - are in C or C++ */ -# if defined(c_plusplus) || defined(__cplusplus) -/* We no longer support inline assembly for C++ as OPAL is a C-only interface */ -# define OPAL_GCC_INLINE_ASSEMBLY 0 -# else -# define OPAL_GCC_INLINE_ASSEMBLY OPAL_C_GCC_INLINE_ASSEMBLY -# endif BEGIN_C_DECLS /********************************************************************** @@ -109,39 +95,21 @@ typedef struct opal_atomic_lock_t opal_atomic_lock_t; * files if we need to specify them as inline or non-inline * *********************************************************************/ -# if !OPAL_GCC_INLINE_ASSEMBLY -# define OPAL_HAVE_INLINE_ATOMIC_MEM_BARRIER 0 -# define OPAL_HAVE_INLINE_ATOMIC_COMPARE_EXCHANGE_32 0 -# define OPAL_HAVE_INLINE_ATOMIC_COMPARE_EXCHANGE_64 0 -# define OPAL_HAVE_INLINE_ATOMIC_ADD_32 0 -# define OPAL_HAVE_INLINE_ATOMIC_AND_32 0 -# define OPAL_HAVE_INLINE_ATOMIC_OR_32 0 -# define OPAL_HAVE_INLINE_ATOMIC_XOR_32 0 -# define OPAL_HAVE_INLINE_ATOMIC_SUB_32 0 -# define OPAL_HAVE_INLINE_ATOMIC_ADD_64 0 -# define OPAL_HAVE_INLINE_ATOMIC_AND_64 0 -# define OPAL_HAVE_INLINE_ATOMIC_OR_64 0 -# define OPAL_HAVE_INLINE_ATOMIC_XOR_64 0 -# define OPAL_HAVE_INLINE_ATOMIC_SUB_64 0 -# define OPAL_HAVE_INLINE_ATOMIC_SWAP_32 0 -# define OPAL_HAVE_INLINE_ATOMIC_SWAP_64 0 -# else -# define OPAL_HAVE_INLINE_ATOMIC_MEM_BARRIER 1 -# define OPAL_HAVE_INLINE_ATOMIC_COMPARE_EXCHANGE_32 1 -# define OPAL_HAVE_INLINE_ATOMIC_COMPARE_EXCHANGE_64 1 -# define OPAL_HAVE_INLINE_ATOMIC_ADD_32 1 -# define OPAL_HAVE_INLINE_ATOMIC_AND_32 1 -# define OPAL_HAVE_INLINE_ATOMIC_OR_32 1 -# define OPAL_HAVE_INLINE_ATOMIC_XOR_32 1 -# define OPAL_HAVE_INLINE_ATOMIC_SUB_32 1 -# define OPAL_HAVE_INLINE_ATOMIC_ADD_64 1 -# define OPAL_HAVE_INLINE_ATOMIC_AND_64 1 -# define OPAL_HAVE_INLINE_ATOMIC_OR_64 1 -# define OPAL_HAVE_INLINE_ATOMIC_XOR_64 1 -# define OPAL_HAVE_INLINE_ATOMIC_SUB_64 1 -# define OPAL_HAVE_INLINE_ATOMIC_SWAP_32 1 -# define OPAL_HAVE_INLINE_ATOMIC_SWAP_64 1 -# endif +#define OPAL_HAVE_INLINE_ATOMIC_MEM_BARRIER 1 +#define OPAL_HAVE_INLINE_ATOMIC_COMPARE_EXCHANGE_32 1 +#define OPAL_HAVE_INLINE_ATOMIC_COMPARE_EXCHANGE_64 1 +#define OPAL_HAVE_INLINE_ATOMIC_ADD_32 1 +#define OPAL_HAVE_INLINE_ATOMIC_AND_32 1 +#define OPAL_HAVE_INLINE_ATOMIC_OR_32 1 +#define OPAL_HAVE_INLINE_ATOMIC_XOR_32 1 +#define OPAL_HAVE_INLINE_ATOMIC_SUB_32 1 +#define OPAL_HAVE_INLINE_ATOMIC_ADD_64 1 +#define OPAL_HAVE_INLINE_ATOMIC_AND_64 1 +#define OPAL_HAVE_INLINE_ATOMIC_OR_64 1 +#define OPAL_HAVE_INLINE_ATOMIC_XOR_64 1 +#define OPAL_HAVE_INLINE_ATOMIC_SUB_64 1 +#define OPAL_HAVE_INLINE_ATOMIC_SWAP_32 1 +#define OPAL_HAVE_INLINE_ATOMIC_SWAP_64 1 /** * Enumeration of lock states diff --git a/opal/include/opal/sys/powerpc/atomic.h b/opal/include/opal/sys/powerpc/atomic.h index 3df2058a987..ae7013f77ff 100644 --- a/opal/include/opal/sys/powerpc/atomic.h +++ b/opal/include/opal/sys/powerpc/atomic.h @@ -14,6 +14,8 @@ * Copyright (c) 2015-2018 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2021 Google, LLC. All rights reserved. + * Copyright (c) 2022 Amazon.com, Inc. or its affiliates. + * All Rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -63,7 +65,6 @@ * Memory Barriers * *********************************************************************/ -#if OPAL_GCC_INLINE_ASSEMBLY static inline void opal_atomic_mb(void) { @@ -85,31 +86,29 @@ static inline void opal_atomic_isync(void) ISYNC(); } -#endif /* end OPAL_GCC_INLINE_ASSEMBLY */ /********************************************************************** * * Atomic math operations * *********************************************************************/ -#if OPAL_GCC_INLINE_ASSEMBLY -# if defined(__xlC__) || defined(__IBMC__) || defined(__IBMCPP__) || defined(__ibmxl__) +#if defined(__xlC__) || defined(__IBMC__) || defined(__IBMCPP__) || defined(__ibmxl__) /* work-around bizzare xlc bug in which it sign-extends a pointer to a 32-bit signed integer */ -# define OPAL_ASM_ADDR(a) ((uintptr_t) a) -# else -# define OPAL_ASM_ADDR(a) (a) -# endif +# define OPAL_ASM_ADDR(a) ((uintptr_t) a) +#else +# define OPAL_ASM_ADDR(a) (a) +#endif -# if defined(__PGI) +#if defined(__PGI) /* work-around for bug in PGI 16.5-16.7 where the compiler fails to * correctly emit load instructions for 64-bit operands. without this * it will emit lwz instead of ld to load the 64-bit operand. */ -# define OPAL_ASM_VALUE64(x) (void *) (intptr_t)(x) -# else -# define OPAL_ASM_VALUE64(x) x -# endif +# define OPAL_ASM_VALUE64(x) (void *) (intptr_t)(x) +#else +# define OPAL_ASM_VALUE64(x) x +#endif static inline bool opal_atomic_compare_exchange_strong_32(opal_atomic_int32_t *addr, int32_t *oldval, int32_t newval) @@ -136,33 +135,28 @@ static inline bool opal_atomic_compare_exchange_strong_32(opal_atomic_int32_t *a * is that even with an always_inline attribute the compiler may still emit instructions to store * then load the arguments to/from the stack. This sequence may cause the ll reservation to be * cancelled. */ -# define opal_atomic_ll_32(addr, ret) \ - do { \ - opal_atomic_int32_t *_addr = (addr); \ - __asm__ __volatile__("lwarx %0, 0, %1 \n\t" : "=&r"(ret) : "r"(_addr)); \ - } while (0) - -# define opal_atomic_sc_32(addr, value, ret) \ - do { \ - opal_atomic_int32_t *_addr = (addr); \ - int32_t _ret, _foo, _newval = (int32_t) value; \ - \ - __asm__ __volatile__(" stwcx. %4, 0, %3 \n\t" \ - " li %0,0 \n\t" \ - " bne- 1f \n\t" \ - " ori %0,%0,1 \n\t" \ - "1:" \ - : "=r"(_ret), "=m"(*_addr), "=r"(_foo) \ - : "r"(_addr), "r"(_newval) \ - : "cc", "memory"); \ - ret = _ret; \ - } while (0) - -/* these two functions aren't inlined in the non-gcc case because then - there would be two function calls (since neither cmpset_32 nor - atomic_?mb can be inlined). Instead, we "inline" them by hand in - the assembly, meaning there is one function call overhead instead - of two */ +#define opal_atomic_ll_32(addr, ret) \ + do { \ + opal_atomic_int32_t *_addr = (addr); \ + __asm__ __volatile__("lwarx %0, 0, %1 \n\t" : "=&r"(ret) : "r"(_addr)); \ + } while (0) + +#define opal_atomic_sc_32(addr, value, ret) \ + do { \ + opal_atomic_int32_t *_addr = (addr); \ + int32_t _ret, _foo, _newval = (int32_t) value; \ + \ + __asm__ __volatile__(" stwcx. %4, 0, %3 \n\t" \ + " li %0,0 \n\t" \ + " bne- 1f \n\t" \ + " ori %0,%0,1 \n\t" \ + "1:" \ + : "=r"(_ret), "=m"(*_addr), "=r"(_foo) \ + : "r"(_addr), "r"(_newval) \ + : "cc", "memory"); \ + ret = _ret; \ + } while (0) + static inline bool opal_atomic_compare_exchange_strong_acq_32(opal_atomic_int32_t *addr, int32_t *oldval, int32_t newval) { @@ -195,25 +189,22 @@ static inline int32_t opal_atomic_swap_32(opal_atomic_int32_t *addr, int32_t new return ret; } -#endif /* OPAL_GCC_INLINE_ASSEMBLY */ - -#if OPAL_GCC_INLINE_ASSEMBLY - -# define OPAL_ATOMIC_POWERPC_DEFINE_ATOMIC_64(type, instr) \ - static inline int64_t opal_atomic_fetch_##type##_64(opal_atomic_int64_t *v, int64_t val) \ - { \ - int64_t t, old; \ - \ - __asm__ __volatile__("1: ldarx %1, 0, %4 \n\t" \ - " " #instr " %0, %3, %1 \n\t" \ - " stdcx. %0, 0, %4 \n\t" \ - " bne- 1b \n\t" \ - : "=&r"(t), "=&r"(old), "=m"(*v) \ - : "r"(OPAL_ASM_VALUE64(val)), "r" OPAL_ASM_ADDR(v), "m"(*v) \ - : "cc"); \ - \ - return old; \ - } + +#define OPAL_ATOMIC_POWERPC_DEFINE_ATOMIC_64(type, instr) \ + static inline int64_t opal_atomic_fetch_##type##_64(opal_atomic_int64_t *v, int64_t val) \ + { \ + int64_t t, old; \ + \ + __asm__ __volatile__("1: ldarx %1, 0, %4 \n\t" \ + " " #instr " %0, %3, %1 \n\t" \ + " stdcx. %0, 0, %4 \n\t" \ + " bne- 1b \n\t" \ + : "=&r"(t), "=&r"(old), "=m"(*v) \ + : "r"(OPAL_ASM_VALUE64(val)), "r" OPAL_ASM_ADDR(v), "m"(*v) \ + : "cc"); \ + \ + return old; \ + } OPAL_ATOMIC_POWERPC_DEFINE_ATOMIC_64(add, add) OPAL_ATOMIC_POWERPC_DEFINE_ATOMIC_64(and, and) @@ -243,28 +234,28 @@ static inline bool opal_atomic_compare_exchange_strong_64(opal_atomic_int64_t *a return ret; } -# define opal_atomic_ll_64(addr, ret) \ - do { \ - opal_atomic_int64_t *_addr = (addr); \ - __asm__ __volatile__("ldarx %0, 0, %1 \n\t" : "=&r"(ret) : "r"(_addr)); \ - } while (0) - -# define opal_atomic_sc_64(addr, value, ret) \ - do { \ - opal_atomic_int64_t *_addr = (addr); \ - int64_t _newval = (int64_t) value; \ - int32_t _ret; \ - \ - __asm__ __volatile__(" stdcx. %2, 0, %1 \n\t" \ - " li %0,0 \n\t" \ - " bne- 1f \n\t" \ - " ori %0,%0,1 \n\t" \ - "1:" \ - : "=r"(_ret) \ - : "r"(_addr), "r"(OPAL_ASM_VALUE64(_newval)) \ - : "cc", "memory"); \ - ret = _ret; \ - } while (0) +#define opal_atomic_ll_64(addr, ret) \ + do { \ + opal_atomic_int64_t *_addr = (addr); \ + __asm__ __volatile__("ldarx %0, 0, %1 \n\t" : "=&r"(ret) : "r"(_addr)); \ + } while (0) + +#define opal_atomic_sc_64(addr, value, ret) \ + do { \ + opal_atomic_int64_t *_addr = (addr); \ + int64_t _newval = (int64_t) value; \ + int32_t _ret; \ + \ + __asm__ __volatile__(" stdcx. %2, 0, %1 \n\t" \ + " li %0,0 \n\t" \ + " bne- 1f \n\t" \ + " ori %0,%0,1 \n\t" \ + "1:" \ + : "=r"(_ret) \ + : "r"(_addr), "r"(OPAL_ASM_VALUE64(_newval)) \ + : "cc", "memory"); \ + ret = _ret; \ + } while (0) static inline int64_t opal_atomic_swap_64(opal_atomic_int64_t *addr, int64_t newval) { @@ -280,15 +271,6 @@ static inline int64_t opal_atomic_swap_64(opal_atomic_int64_t *addr, int64_t new return ret; } -# endif /* OPAL_GCC_INLINE_ASSEMBLY */ - -#if OPAL_GCC_INLINE_ASSEMBLY - -/* these two functions aren't inlined in the non-gcc case because then - there would be two function calls (since neither cmpset_64 nor - atomic_?mb can be inlined). Instead, we "inline" them by hand in - the assembly, meaning there is one function call overhead instead - of two */ static inline bool opal_atomic_compare_exchange_strong_acq_64(opal_atomic_int64_t *addr, int64_t *oldval, int64_t newval) { @@ -307,21 +289,21 @@ static inline bool opal_atomic_compare_exchange_strong_rel_64(opal_atomic_int64_ return opal_atomic_compare_exchange_strong_64(addr, oldval, newval); } -# define OPAL_ATOMIC_POWERPC_DEFINE_ATOMIC_32(type, instr) \ - static inline int32_t opal_atomic_fetch_##type##_32(opal_atomic_int32_t *v, int val) \ - { \ - int32_t t, old; \ - \ - __asm__ __volatile__("1: lwarx %1, 0, %4 \n\t" \ - " " #instr " %0, %3, %1 \n\t" \ - " stwcx. %0, 0, %4 \n\t" \ - " bne- 1b \n\t" \ - : "=&r"(t), "=&r"(old), "=m"(*v) \ - : "r"(val), "r" OPAL_ASM_ADDR(v), "m"(*v) \ - : "cc"); \ - \ - return old; \ - } +#define OPAL_ATOMIC_POWERPC_DEFINE_ATOMIC_32(type, instr) \ + static inline int32_t opal_atomic_fetch_##type##_32(opal_atomic_int32_t *v, int val) \ + { \ + int32_t t, old; \ + \ + __asm__ __volatile__("1: lwarx %1, 0, %4 \n\t" \ + " " #instr " %0, %3, %1 \n\t" \ + " stwcx. %0, 0, %4 \n\t" \ + " bne- 1b \n\t" \ + : "=&r"(t), "=&r"(old), "=m"(*v) \ + : "r"(val), "r" OPAL_ASM_ADDR(v), "m"(*v) \ + : "cc"); \ + \ + return old; \ + } OPAL_ATOMIC_POWERPC_DEFINE_ATOMIC_32(add, add) OPAL_ATOMIC_POWERPC_DEFINE_ATOMIC_32(and, and) @@ -329,6 +311,4 @@ OPAL_ATOMIC_POWERPC_DEFINE_ATOMIC_32(or, or) OPAL_ATOMIC_POWERPC_DEFINE_ATOMIC_32(xor, xor) OPAL_ATOMIC_POWERPC_DEFINE_ATOMIC_32(sub, subf) -#endif /* OPAL_GCC_INLINE_ASSEMBLY */ - #endif /* ! OPAL_SYS_ARCH_ATOMIC_H */ diff --git a/opal/include/opal/sys/powerpc/timer.h b/opal/include/opal/sys/powerpc/timer.h index 3dc165ce05d..216b7da01e7 100644 --- a/opal/include/opal/sys/powerpc/timer.h +++ b/opal/include/opal/sys/powerpc/timer.h @@ -9,6 +9,8 @@ * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. + * Copyright (c) 2022 Amazon.com, Inc. or its affiliates. + * All Rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -21,7 +23,7 @@ typedef uint64_t opal_timer_t; -#if OPAL_GCC_INLINE_ASSEMBLY +#if OPAL_C_GCC_INLINE_ASSEMBLY static inline opal_timer_t opal_sys_timer_get_cycles(void) { @@ -42,6 +44,6 @@ static inline opal_timer_t opal_sys_timer_get_cycles(void) # define OPAL_HAVE_SYS_TIMER_GET_CYCLES 0 -#endif /* OPAL_GCC_INLINE_ASSEMBLY */ +#endif /* OPAL_C_GCC_INLINE_ASSEMBLY */ #endif /* ! OPAL_SYS_ARCH_TIMER_H */ diff --git a/opal/include/opal/sys/powerpc/update.sh b/opal/include/opal/sys/powerpc/update.sh deleted file mode 100644 index 095868d4fb5..00000000000 --- a/opal/include/opal/sys/powerpc/update.sh +++ /dev/null @@ -1,39 +0,0 @@ -#!/bin/sh -# -# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana -# University Research and Technology -# Corporation. All rights reserved. -# Copyright (c) 2004-2005 The University of Tennessee and The University -# of Tennessee Research Foundation. All rights -# reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, -# University of Stuttgart. All rights reserved. -# Copyright (c) 2004-2005 The Regents of the University of California. -# All rights reserved. -# $COPYRIGHT$ -# -# Additional copyrights may follow -# -# $HEADER$ -# - -CFILE=/tmp/opal_asm_$$.c - -trap "/bin/rm -f $CFILE; exit 0" 0 1 2 15 - -echo Updating asm.s from atomic.h and timer.h using gcc - -cat > $CFILE< -#include -#define static -#define inline -#define OPAL_GCC_INLINE_ASSEMBLY 1 -#include "../architecture.h" -#include "atomic.h" -#include "timer.h" -EOF - -gcc -O1 -mpowerpc64 -mcpu=970 -DOPAL_ASSEMBLY_ARCH=POWERPC32 -DOPAL_ASM_SUPPORT_64BIT=1 -I. -S $CFILE -o asm-32-64.s -gcc -O1 -DOPAL_ASSEMBLY_ARCH=OPAL_POWERPC32 -DOPAL_ASM_SUPPORT_64BIT=0 -I. -S $CFILE -o asm-32.s -gcc -m64 -O1 -finline-functions -DOPAL_ASSEMBLY_ARCH=OPAL_POWERPC64 -DOPAL_ASM_SUPPORT64BIT=1 -I. -S $CFILE -o asm-64.s diff --git a/opal/include/opal/sys/timer.h b/opal/include/opal/sys/timer.h index da59c6235f4..fd04b296f76 100644 --- a/opal/include/opal/sys/timer.h +++ b/opal/include/opal/sys/timer.h @@ -15,6 +15,8 @@ * reserved. * Copyright (c) 2020 Intel, Inc. All rights reserved. * Copyright (c) 2020 Google, LLC. All rights reserved. + * Copyright (c) 2022 Amazon.com, Inc. or its affiliates. + * All Rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -39,21 +41,6 @@ # include #endif -/* do some quick #define cleanup in cases where we are doing - testing... */ -#ifdef OPAL_DISABLE_INLINE_ASM -# undef OPAL_C_GCC_INLINE_ASSEMBLY -# define OPAL_C_GCC_INLINE_ASSEMBLY 0 -#endif - -/* define OPAL_{GCC,DEC,XLC}_INLINE_ASSEMBLY based on the - OPAL_{C,CXX}_{GCC,DEC,XLC}_INLINE_ASSEMBLY defines and whether we - are in C or C++ */ -#if defined(c_plusplus) || defined(__cplusplus) -# define OPAL_GCC_INLINE_ASSEMBLY OPAL_CXX_GCC_INLINE_ASSEMBLY -#else -# define OPAL_GCC_INLINE_ASSEMBLY OPAL_C_GCC_INLINE_ASSEMBLY -#endif /********************************************************************** * diff --git a/opal/include/opal/sys/x86_64/atomic.h b/opal/include/opal/sys/x86_64/atomic.h index 7787654f51c..532c89fae71 100644 --- a/opal/include/opal/sys/x86_64/atomic.h +++ b/opal/include/opal/sys/x86_64/atomic.h @@ -48,7 +48,6 @@ * Memory Barriers * *********************************************************************/ -#if OPAL_GCC_INLINE_ASSEMBLY static inline void opal_atomic_mb(void) { @@ -69,15 +68,11 @@ static inline void opal_atomic_isync(void) { } -#endif /* OPAL_GCC_INLINE_ASSEMBLY */ - /********************************************************************** * * Atomic math operations * *********************************************************************/ -#if OPAL_GCC_INLINE_ASSEMBLY - static inline bool opal_atomic_compare_exchange_strong_32(opal_atomic_int32_t *addr, int32_t *oldval, int32_t newval) { @@ -91,13 +86,9 @@ static inline bool opal_atomic_compare_exchange_strong_32(opal_atomic_int32_t *a return (bool) ret; } -#endif /* OPAL_GCC_INLINE_ASSEMBLY */ - #define opal_atomic_compare_exchange_strong_acq_32 opal_atomic_compare_exchange_strong_32 #define opal_atomic_compare_exchange_strong_rel_32 opal_atomic_compare_exchange_strong_32 -#if OPAL_GCC_INLINE_ASSEMBLY - static inline bool opal_atomic_compare_exchange_strong_64(opal_atomic_int64_t *addr, int64_t *oldval, int64_t newval) { @@ -111,12 +102,10 @@ static inline bool opal_atomic_compare_exchange_strong_64(opal_atomic_int64_t *a return (bool) ret; } -#endif /* OPAL_GCC_INLINE_ASSEMBLY */ - #define opal_atomic_compare_exchange_strong_acq_64 opal_atomic_compare_exchange_strong_64 #define opal_atomic_compare_exchange_strong_rel_64 opal_atomic_compare_exchange_strong_64 -#if OPAL_GCC_INLINE_ASSEMBLY && OPAL_HAVE_CMPXCHG16B && HAVE_OPAL_INT128_T +#if OPAL_HAVE_CMPXCHG16B && HAVE_OPAL_INT128_T static inline bool opal_atomic_compare_exchange_strong_128(opal_atomic_int128_t *addr, opal_int128_t *oldval, @@ -138,10 +127,6 @@ static inline bool opal_atomic_compare_exchange_strong_128(opal_atomic_int128_t # define OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_128 1 -#endif /* OPAL_GCC_INLINE_ASSEMBLY */ - -#if OPAL_GCC_INLINE_ASSEMBLY - # define OPAL_HAVE_ATOMIC_SWAP_32 1 # define OPAL_HAVE_ATOMIC_SWAP_64 1 @@ -154,10 +139,6 @@ static inline int32_t opal_atomic_swap_32(opal_atomic_int32_t *addr, int32_t new return oldval; } -#endif /* OPAL_GCC_INLINE_ASSEMBLY */ - -#if OPAL_GCC_INLINE_ASSEMBLY - static inline int64_t opal_atomic_swap_64(opal_atomic_int64_t *addr, int64_t newval) { int64_t oldval; @@ -166,10 +147,6 @@ static inline int64_t opal_atomic_swap_64(opal_atomic_int64_t *addr, int64_t new return oldval; } -#endif /* OPAL_GCC_INLINE_ASSEMBLY */ - -#if OPAL_GCC_INLINE_ASSEMBLY - # define OPAL_HAVE_ATOMIC_ADD_32 1 /** @@ -234,6 +211,4 @@ static inline int64_t opal_atomic_fetch_sub_64(opal_atomic_int64_t *v, int64_t i return ret; } -#endif /* OPAL_GCC_INLINE_ASSEMBLY */ - #endif /* ! OPAL_SYS_ARCH_ATOMIC_H */ diff --git a/opal/include/opal/sys/x86_64/timer.h b/opal/include/opal/sys/x86_64/timer.h index 9c884a993e1..03bb02387d0 100644 --- a/opal/include/opal/sys/x86_64/timer.h +++ b/opal/include/opal/sys/x86_64/timer.h @@ -12,6 +12,8 @@ * All rights reserved. * Copyright (c) 2016 Los Alamos National Security, LLC. ALl rights * reserved. + * Copyright (c) 2022 Amazon.com, Inc. or its affiliates. + * All Rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -28,7 +30,7 @@ typedef uint64_t opal_timer_t; #undef OPAL_TIMER_MONOTONIC #define OPAL_TIMER_MONOTONIC 0 -#if OPAL_GCC_INLINE_ASSEMBLY +#if OPAL_C_GCC_INLINE_ASSEMBLY # if OPAL_ASSEMBLY_ARCH == OPAL_X86_64 @@ -85,6 +87,6 @@ static inline opal_timer_t opal_sys_timer_get_cycles(void) # define OPAL_HAVE_SYS_TIMER_GET_CYCLES 0 -#endif /* OPAL_GCC_INLINE_ASSEMBLY */ +#endif /* OPAL_C_GCC_INLINE_ASSEMBLY */ #endif /* ! OPAL_SYS_ARCH_TIMER_H */ diff --git a/opal/include/opal/sys/x86_64/update.sh b/opal/include/opal/sys/x86_64/update.sh deleted file mode 100644 index dbef4d61cd0..00000000000 --- a/opal/include/opal/sys/x86_64/update.sh +++ /dev/null @@ -1,36 +0,0 @@ -#!/bin/sh -# -# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana -# University Research and Technology -# Corporation. All rights reserved. -# Copyright (c) 2004-2005 The University of Tennessee and The University -# of Tennessee Research Foundation. All rights -# reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, -# University of Stuttgart. All rights reserved. -# Copyright (c) 2004-2005 The Regents of the University of California. -# All rights reserved. -# $COPYRIGHT$ -# -# Additional copyrights may follow -# -# $HEADER$ -# - -CFILE=/tmp/opal_atomic_$$.c - -trap "/bin/rm -f $CFILE; exit 0" 0 1 2 15 - -echo Updating asm.s from atomic.h and timer.h using gcc - -cat > $CFILE< -#include -#define static -#define inline -#define OPAL_GCC_INLINE_ASSEMBLY 1 -#include "atomic.h" -#include "timer.h" -EOF - -gcc -O3 -I. -S $CFILE -o asm.s From ca545d1b72119f47dec688201132d91a767df9f0 Mon Sep 17 00:00:00 2001 From: Brian Barrett Date: Thu, 13 Jan 2022 23:23:17 +0000 Subject: [PATCH 03/11] Remove old MB() macro In the early days of OMPI, we used MB/WMB/RMB macros instead of the opal_atomic_* functions that most of the code uses today. Remove the last few places that the MB defines were used and then remove the defines themselves. Signed-off-by: Brian Barrett --- ompi/patterns/net/allreduce.c | 8 +++++--- opal/include/opal/sys/arm64/atomic.h | 10 +++------- opal/include/opal/sys/gcc_builtin/atomic.h | 1 - opal/include/opal/sys/powerpc/atomic.h | 13 ++++--------- opal/include/opal/sys/x86_64/atomic.h | 7 ++++--- 5 files changed, 16 insertions(+), 23 deletions(-) diff --git a/ompi/patterns/net/allreduce.c b/ompi/patterns/net/allreduce.c index be192255507..7442cd33f1e 100644 --- a/ompi/patterns/net/allreduce.c +++ b/ompi/patterns/net/allreduce.c @@ -5,6 +5,8 @@ * All rights reserved. * Copyright (c) 2017 IBM Corporation. All rights reserved. * Copyright (c) 2019 Intel, Inc. All rights reserved. + * Copyright (c) 2022 Amazon.com, Inc. or its affiliates. + * All Rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -34,7 +36,7 @@ void recv_completion(nt status, struct ompi_process_name_t* peer, struct iovec* int count, ompi_rml_tag_t tag, void* cbdata) { /* set receive completion flag */ - MB(); + opal_atomic_mb(); *(int *)cbdata=1; } @@ -232,7 +234,7 @@ comm_allreduce(void *sbuf, void *rbuf, int count, opal_datatype_t *dtype, send_buffer^=1; } - MB(); + opal_atomic_mb(); /* * Signal parent that data is ready */ @@ -255,7 +257,7 @@ comm_allreduce(void *sbuf, void *rbuf, int count, opal_datatype_t *dtype, *recv_done=0; *send_done=0; - MB(); + opal_atomic_mb(); /* post non-blocking receive */ recv_iov.iov_base=scratch_bufers[send_buffer]; diff --git a/opal/include/opal/sys/arm64/atomic.h b/opal/include/opal/sys/arm64/atomic.h index 7675b36790e..e244688d743 100644 --- a/opal/include/opal/sys/arm64/atomic.h +++ b/opal/include/opal/sys/arm64/atomic.h @@ -46,10 +46,6 @@ #define OPAL_HAVE_ATOMIC_XOR_64 1 #define OPAL_HAVE_ATOMIC_SUB_64 1 -#define MB() __asm__ __volatile__("dmb sy" : : : "memory") -#define RMB() __asm__ __volatile__("dmb ld" : : : "memory") -#define WMB() __asm__ __volatile__("dmb st" : : : "memory") - /********************************************************************** * * Memory Barriers @@ -58,17 +54,17 @@ static inline void opal_atomic_mb(void) { - MB(); + __asm__ __volatile__("dmb sy" : : : "memory"); } static inline void opal_atomic_rmb(void) { - RMB(); + __asm__ __volatile__("dmb ld" : : : "memory"); } static inline void opal_atomic_wmb(void) { - WMB(); + __asm__ __volatile__("dmb st" : : : "memory"); } static inline void opal_atomic_isync(void) diff --git a/opal/include/opal/sys/gcc_builtin/atomic.h b/opal/include/opal/sys/gcc_builtin/atomic.h index 80f5254f144..2160b127453 100644 --- a/opal/include/opal/sys/gcc_builtin/atomic.h +++ b/opal/include/opal/sys/gcc_builtin/atomic.h @@ -77,7 +77,6 @@ static inline void opal_atomic_wmb(void) __atomic_thread_fence(__ATOMIC_RELEASE); } -#define MB() opal_atomic_mb() /********************************************************************** * diff --git a/opal/include/opal/sys/powerpc/atomic.h b/opal/include/opal/sys/powerpc/atomic.h index ae7013f77ff..9bacbdb4bb2 100644 --- a/opal/include/opal/sys/powerpc/atomic.h +++ b/opal/include/opal/sys/powerpc/atomic.h @@ -30,11 +30,6 @@ * On powerpc ... */ -#define MB() __asm__ __volatile__("sync" : : : "memory") -#define RMB() __asm__ __volatile__ ("isync" : : : "memory") -#define WMB() __asm__ __volatile__("lwsync" : : : "memory") -#define ISYNC() __asm__ __volatile__("isync" : : : "memory") - /********************************************************************** * * Define constants for PowerPC 64 @@ -68,22 +63,22 @@ static inline void opal_atomic_mb(void) { - MB(); + __asm__ __volatile__("sync" : : : "memory"); } static inline void opal_atomic_rmb(void) { - RMB(); + __asm__ __volatile__ ("isync" : : : "memory"); } static inline void opal_atomic_wmb(void) { - WMB(); + __asm__ __volatile__("lwsync" : : : "memory"); } static inline void opal_atomic_isync(void) { - ISYNC(); + __asm__ __volatile__("isync" : : : "memory"); } diff --git a/opal/include/opal/sys/x86_64/atomic.h b/opal/include/opal/sys/x86_64/atomic.h index 532c89fae71..82aafa036d6 100644 --- a/opal/include/opal/sys/x86_64/atomic.h +++ b/opal/include/opal/sys/x86_64/atomic.h @@ -16,6 +16,8 @@ * Copyright (c) 2016-2017 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2021 Google, LLC. All rights reserved. + * Copyright (c) 2022 Amazon.com, Inc. or its affiliates. + * All Rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -30,7 +32,6 @@ */ #define SMPLOCK "lock; " -#define MB() __asm__ __volatile__("" : : : "memory") /********************************************************************** * @@ -56,12 +57,12 @@ static inline void opal_atomic_mb(void) static inline void opal_atomic_rmb(void) { - MB(); + __asm__ __volatile__("" : : : "memory"); } static inline void opal_atomic_wmb(void) { - MB(); + __asm__ __volatile__("" : : : "memory"); } static inline void opal_atomic_isync(void) From 4ac505909574540d4278b509cf93a4c744f2d2e1 Mon Sep 17 00:00:00 2001 From: Brian Barrett Date: Fri, 14 Jan 2022 00:01:26 +0000 Subject: [PATCH 04/11] Make atomic memory barriers mandatory The atomic memory barrier interface was de facto mandatory, as none of the code making opal_atomic_mb() calls bother with the ifdefs. Since all the supported platforms have atomic memory barrier implementations, just make them required. At the same time, clean up the code to reflect that there are no longer non-inline versions of the memory barrier assembly. Signed-off-by: Brian Barrett --- opal/include/opal/sys/arm64/atomic.h | 2 +- opal/include/opal/sys/atomic.h | 34 ++++------------------ opal/include/opal/sys/atomic_stdc.h | 11 +++++-- opal/include/opal/sys/gcc_builtin/atomic.h | 9 +++++- opal/include/opal/sys/powerpc/atomic.h | 2 +- opal/include/opal/sys/x86_64/atomic.h | 2 +- test/asm/atomic_barrier.c | 7 ++--- 7 files changed, 27 insertions(+), 40 deletions(-) diff --git a/opal/include/opal/sys/arm64/atomic.h b/opal/include/opal/sys/arm64/atomic.h index e244688d743..9b51924df58 100644 --- a/opal/include/opal/sys/arm64/atomic.h +++ b/opal/include/opal/sys/arm64/atomic.h @@ -30,7 +30,6 @@ #ifndef OPAL_SYS_ARCH_ATOMIC_H #define OPAL_SYS_ARCH_ATOMIC_H 1 -#define OPAL_HAVE_ATOMIC_MEM_BARRIER 1 #define OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_32 1 #define OPAL_HAVE_ATOMIC_SWAP_32 1 #define OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_64 1 @@ -46,6 +45,7 @@ #define OPAL_HAVE_ATOMIC_XOR_64 1 #define OPAL_HAVE_ATOMIC_SUB_64 1 + /********************************************************************** * * Memory Barriers diff --git a/opal/include/opal/sys/atomic.h b/opal/include/opal/sys/atomic.h index 8713214a3a4..7993985c411 100644 --- a/opal/include/opal/sys/atomic.h +++ b/opal/include/opal/sys/atomic.h @@ -41,7 +41,6 @@ * The following #defines will be true / false based on * assembly support: * - * - \c OPAL_HAVE_ATOMIC_MEM_BARRIER atomic memory barriers * - \c OPAL_HAVE_ATOMIC_SPINLOCKS atomic spinlocks * * Note that for the Atomic math, atomic add/sub may be implemented as @@ -95,7 +94,6 @@ typedef struct opal_atomic_lock_t opal_atomic_lock_t; * files if we need to specify them as inline or non-inline * *********************************************************************/ -#define OPAL_HAVE_INLINE_ATOMIC_MEM_BARRIER 1 #define OPAL_HAVE_INLINE_ATOMIC_COMPARE_EXCHANGE_32 1 #define OPAL_HAVE_INLINE_ATOMIC_COMPARE_EXCHANGE_64 1 #define OPAL_HAVE_INLINE_ATOMIC_ADD_32 1 @@ -166,18 +164,12 @@ enum { OPAL_ATOMIC_LOCK_UNLOCKED = 0, OPAL_ATOMIC_LOCK_LOCKED = 1 }; # endif # endif /* DOXYGEN */ + /********************************************************************** * - * Memory Barriers - defined here if running doxygen or have barriers - * but can't inline + * Memory Barriers * *********************************************************************/ -# if !defined(OPAL_HAVE_ATOMIC_MEM_BARRIER) && !defined(DOXYGEN) -/* no way to emulate in C code */ -# define OPAL_HAVE_ATOMIC_MEM_BARRIER 0 -# endif - -# if defined(DOXYGEN) || OPAL_HAVE_ATOMIC_MEM_BARRIER /** * Memory barrier * @@ -191,12 +183,7 @@ enum { OPAL_ATOMIC_LOCK_UNLOCKED = 0, OPAL_ATOMIC_LOCK_LOCKED = 1 }; * generally grinding the memory controller's performance. Use only * if you need *both* read and write barriers. */ - -# if OPAL_HAVE_INLINE_ATOMIC_MEM_BARRIER -static inline -# endif - void - opal_atomic_mb(void); +static inline void opal_atomic_mb(void); /** * Read memory barrier @@ -207,12 +194,7 @@ static inline * next read. Nothing is said about the ordering of writes when using * \c opal_atomic_rmb(). */ - -# if OPAL_HAVE_INLINE_ATOMIC_MEM_BARRIER -static inline -# endif - void - opal_atomic_rmb(void); +static inline void opal_atomic_rmb(void); /** * Write memory barrier. @@ -223,14 +205,8 @@ static inline * next write. Nothing is said about the ordering of reads when using * \c opal_atomic_wmb(). */ +static inline void opal_atomic_wmb(void); -# if OPAL_HAVE_INLINE_ATOMIC_MEM_BARRIER -static inline -# endif - void - opal_atomic_wmb(void); - -# endif /* defined(DOXYGEN) || OPAL_HAVE_ATOMIC_MEM_BARRIER */ /********************************************************************** * diff --git a/opal/include/opal/sys/atomic_stdc.h b/opal/include/opal/sys/atomic_stdc.h index 57afe80134e..3a9fe526680 100644 --- a/opal/include/opal/sys/atomic_stdc.h +++ b/opal/include/opal/sys/atomic_stdc.h @@ -7,6 +7,8 @@ * Copyright (c) 2019-2021 Google, LLC. All rights reserved. * Copyright (c) 2019 Triad National Security, LLC. All rights * reserved. + * Copyright (c) 2022 Amazon.com, Inc. or its affiliates. + * All Rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -30,8 +32,6 @@ # include # include -# define OPAL_HAVE_ATOMIC_MEM_BARRIER 1 - # define OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_32 1 # define OPAL_HAVE_ATOMIC_SWAP_32 1 @@ -58,6 +58,13 @@ # define OPAL_HAVE_ATOMIC_SPINLOCKS 1 + +/********************************************************************** + * + * Memory Barriers + * + *********************************************************************/ + static inline void opal_atomic_mb(void) { atomic_thread_fence(memory_order_seq_cst); diff --git a/opal/include/opal/sys/gcc_builtin/atomic.h b/opal/include/opal/sys/gcc_builtin/atomic.h index 2160b127453..7ead6e5b0ae 100644 --- a/opal/include/opal/sys/gcc_builtin/atomic.h +++ b/opal/include/opal/sys/gcc_builtin/atomic.h @@ -18,6 +18,8 @@ * Copyright (c) 2018 Triad National Security, LLC. All rights * reserved. * Copyright (c) 2021 Google, LLC. All rights reserved. + * Copyright (c) 2022 Amazon.com, Inc. or its affiliates. + * All Rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -33,7 +35,6 @@ * Memory Barriers * *********************************************************************/ -#define OPAL_HAVE_ATOMIC_MEM_BARRIER 1 #define OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_32 1 #define OPAL_HAVE_ATOMIC_ADD_32 1 @@ -50,6 +51,12 @@ #define OPAL_HAVE_ATOMIC_SUB_64 1 #define OPAL_HAVE_ATOMIC_SWAP_64 1 + +/********************************************************************** + * + * Memory Barriers + * + *********************************************************************/ #if (OPAL_ASSEMBLY_ARCH == OPAL_X86_64) && defined (__GNUC__) && !defined(__llvm) && (__GNUC__ < 6) /* work around a bug in older gcc versions where ACQUIRE seems to get * treated as a no-op instead */ diff --git a/opal/include/opal/sys/powerpc/atomic.h b/opal/include/opal/sys/powerpc/atomic.h index 9bacbdb4bb2..e533c0a8943 100644 --- a/opal/include/opal/sys/powerpc/atomic.h +++ b/opal/include/opal/sys/powerpc/atomic.h @@ -35,7 +35,6 @@ * Define constants for PowerPC 64 * *********************************************************************/ -#define OPAL_HAVE_ATOMIC_MEM_BARRIER 1 #define OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_32 1 #define OPAL_HAVE_ATOMIC_SWAP_32 1 @@ -55,6 +54,7 @@ #define OPAL_HAVE_ATOMIC_XOR_64 1 #define OPAL_HAVE_ATOMIC_SUB_64 1 + /********************************************************************** * * Memory Barriers diff --git a/opal/include/opal/sys/x86_64/atomic.h b/opal/include/opal/sys/x86_64/atomic.h index 82aafa036d6..d2d2cd40991 100644 --- a/opal/include/opal/sys/x86_64/atomic.h +++ b/opal/include/opal/sys/x86_64/atomic.h @@ -38,12 +38,12 @@ * Define constants for AMD64 / x86_64 / EM64T / ... * *********************************************************************/ -#define OPAL_HAVE_ATOMIC_MEM_BARRIER 1 #define OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_32 1 #define OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_64 1 + /********************************************************************** * * Memory Barriers diff --git a/test/asm/atomic_barrier.c b/test/asm/atomic_barrier.c index 065e116a283..264951b45dd 100644 --- a/test/asm/atomic_barrier.c +++ b/test/asm/atomic_barrier.c @@ -10,6 +10,8 @@ * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2022 Amazon.com, Inc. or its affiliates. + * All Rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -24,8 +26,6 @@ int main(int argc, char *argv[]) { -#if OPAL_HAVE_ATOMIC_MEM_BARRIER - /* there really isn't a great way to test that the barriers actually barrier, but at least make sure they don't kill the machine.*/ @@ -35,7 +35,4 @@ int main(int argc, char *argv[]) opal_atomic_wmb(); return 0; -#else - return 77; -#endif } From 799aeddc60abfb9f9b1789d5950a4e3e0ea651ee Mon Sep 17 00:00:00 2001 From: Brian Barrett Date: Fri, 21 Jan 2022 03:30:04 +0000 Subject: [PATCH 05/11] Make atomic cswap and swap mandatory The compare and swap and swap atomic interfaces are de facto mandatory, as no code actually checks for their existance before using them (with the notable exception of the 128 bit cswap), so make those interfaces mandatory and remove all the complex code to detect implementations of them. At the same time, reorder the implementation files to all have consistent ordering of the cswap and swap implementations, to ease maintenance and move to a implementation includes the compatibility layer mode instead of the previous overly complex all in one software implementation model. Finally, remove the untyped macro wrappers as 1) no one uses them and 2) they're a pain to maintain. Signed-off-by: Brian Barrett --- opal/include/opal/sys/Makefile.am | 4 + opal/include/opal/sys/arm64/atomic.h | 83 +++-- opal/include/opal/sys/atomic.h | 315 ++++++++++-------- opal/include/opal/sys/atomic_impl.h | 122 ------- opal/include/opal/sys/atomic_impl_ptr_cswap.h | 73 ++++ opal/include/opal/sys/atomic_impl_ptr_swap.h | 36 ++ opal/include/opal/sys/atomic_stdc.h | 112 +++---- opal/include/opal/sys/gcc_builtin/atomic.h | 178 +++++----- opal/include/opal/sys/powerpc/atomic.h | 219 ++++++------ opal/include/opal/sys/x86_64/atomic.h | 34 +- test/asm/atomic_cmpset.c | 36 -- 11 files changed, 630 insertions(+), 582 deletions(-) create mode 100644 opal/include/opal/sys/atomic_impl_ptr_cswap.h create mode 100644 opal/include/opal/sys/atomic_impl_ptr_swap.h diff --git a/opal/include/opal/sys/Makefile.am b/opal/include/opal/sys/Makefile.am index cdceeb21fc0..510bf59a965 100644 --- a/opal/include/opal/sys/Makefile.am +++ b/opal/include/opal/sys/Makefile.am @@ -16,6 +16,8 @@ # Copyright (c) 2017 Research Organization for Information Science # and Technology (RIST). All rights reserved. # Copyright (c) 2020-2021 Google, LLC. All rights reserved. +# Copyright (c) 2022 Amazon.com, Inc. or its affiliates. +# All Rights reserved. # $COPYRIGHT$ # # Additional copyrights may follow @@ -30,6 +32,8 @@ headers += \ opal/sys/atomic.h \ opal/sys/atomic_stdc.h \ opal/sys/atomic_impl.h \ + opal/sys/atomic_impl_ptr_cswap.h \ + opal/sys/atomic_impl_ptr_swap.h \ opal/sys/timer.h \ opal/sys/cma.h diff --git a/opal/include/opal/sys/arm64/atomic.h b/opal/include/opal/sys/arm64/atomic.h index 9b51924df58..72823616128 100644 --- a/opal/include/opal/sys/arm64/atomic.h +++ b/opal/include/opal/sys/arm64/atomic.h @@ -30,10 +30,6 @@ #ifndef OPAL_SYS_ARCH_ATOMIC_H #define OPAL_SYS_ARCH_ATOMIC_H 1 -#define OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_32 1 -#define OPAL_HAVE_ATOMIC_SWAP_32 1 -#define OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_64 1 -#define OPAL_HAVE_ATOMIC_SWAP_64 1 #define OPAL_HAVE_ATOMIC_ADD_32 1 #define OPAL_HAVE_ATOMIC_AND_32 1 #define OPAL_HAVE_ATOMIC_OR_32 1 @@ -72,9 +68,10 @@ static inline void opal_atomic_isync(void) __asm__ __volatile__("isb"); } + /********************************************************************** * - * Atomic math operations + * Compare and Swap * *********************************************************************/ @@ -99,20 +96,6 @@ static inline bool opal_atomic_compare_exchange_strong_32(opal_atomic_int32_t *a return ret; } -static inline int32_t opal_atomic_swap_32(opal_atomic_int32_t *addr, int32_t newval) -{ - int32_t ret, tmp; - - __asm__ __volatile__("1: ldaxr %w0, [%2] \n" - " stlxr %w1, %w3, [%2] \n" - " cbnz %w1, 1b \n" - : "=&r"(ret), "=&r"(tmp) - : "r"(addr), "r"(newval) - : "cc", "memory"); - - return ret; -} - /* these two functions aren't inlined in the non-gcc case because then there would be two function calls (since neither cmpset_32 nor atomic_?mb can be inlined). Instead, we "inline" them by hand in @@ -182,21 +165,6 @@ static inline bool opal_atomic_compare_exchange_strong_64(opal_atomic_int64_t *a return ret; } -static inline int64_t opal_atomic_swap_64(opal_atomic_int64_t *addr, int64_t newval) -{ - int64_t ret; - int tmp; - - __asm__ __volatile__("1: ldaxr %0, [%2] \n" - " stlxr %w1, %3, [%2] \n" - " cbnz %w1, 1b \n" - : "=&r"(ret), "=&r"(tmp) - : "r"(addr), "r"(newval) - : "cc", "memory"); - - return ret; -} - /* these two functions aren't inlined in the non-gcc case because then there would be two function calls (since neither cmpset_64 nor atomic_?mb can be inlined). Instead, we "inline" them by hand in @@ -246,6 +214,53 @@ static inline bool opal_atomic_compare_exchange_strong_rel_64(opal_atomic_int64_ return ret; } +#include "opal/sys/atomic_impl_ptr_cswap.h" + + +/********************************************************************** + * + * Swap + * + *********************************************************************/ + +static inline int32_t opal_atomic_swap_32(opal_atomic_int32_t *addr, int32_t newval) +{ + int32_t ret, tmp; + + __asm__ __volatile__("1: ldaxr %w0, [%2] \n" + " stlxr %w1, %w3, [%2] \n" + " cbnz %w1, 1b \n" + : "=&r"(ret), "=&r"(tmp) + : "r"(addr), "r"(newval) + : "cc", "memory"); + + return ret; +} + +static inline int64_t opal_atomic_swap_64(opal_atomic_int64_t *addr, int64_t newval) +{ + int64_t ret; + int tmp; + + __asm__ __volatile__("1: ldaxr %0, [%2] \n" + " stlxr %w1, %3, [%2] \n" + " cbnz %w1, 1b \n" + : "=&r"(ret), "=&r"(tmp) + : "r"(addr), "r"(newval) + : "cc", "memory"); + + return ret; +} + +#include "opal/sys/atomic_impl_ptr_swap.h" + + +/********************************************************************** + * + * Atomic math operations + * + *********************************************************************/ + #define OPAL_ASM_MAKE_ATOMIC(type, bits, name, inst, reg) \ static inline type opal_atomic_fetch_##name##_##bits(opal_atomic_##type *addr, \ type value) \ diff --git a/opal/include/opal/sys/atomic.h b/opal/include/opal/sys/atomic.h index 7993985c411..78f60a048eb 100644 --- a/opal/include/opal/sys/atomic.h +++ b/opal/include/opal/sys/atomic.h @@ -94,8 +94,6 @@ typedef struct opal_atomic_lock_t opal_atomic_lock_t; * files if we need to specify them as inline or non-inline * *********************************************************************/ -#define OPAL_HAVE_INLINE_ATOMIC_COMPARE_EXCHANGE_32 1 -#define OPAL_HAVE_INLINE_ATOMIC_COMPARE_EXCHANGE_64 1 #define OPAL_HAVE_INLINE_ATOMIC_ADD_32 1 #define OPAL_HAVE_INLINE_ATOMIC_AND_32 1 #define OPAL_HAVE_INLINE_ATOMIC_OR_32 1 @@ -106,8 +104,6 @@ typedef struct opal_atomic_lock_t opal_atomic_lock_t; #define OPAL_HAVE_INLINE_ATOMIC_OR_64 1 #define OPAL_HAVE_INLINE_ATOMIC_XOR_64 1 #define OPAL_HAVE_INLINE_ATOMIC_SUB_64 1 -#define OPAL_HAVE_INLINE_ATOMIC_SWAP_32 1 -#define OPAL_HAVE_INLINE_ATOMIC_SWAP_64 1 /** * Enumeration of lock states @@ -147,12 +143,6 @@ enum { OPAL_ATOMIC_LOCK_UNLOCKED = 0, OPAL_ATOMIC_LOCK_LOCKED = 1 }; /* compare and set operations can't really be emulated from software, so if these defines aren't already set, they should be set to 0 now */ -# ifndef OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_32 -# define OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_32 0 -# endif -# ifndef OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_64 -# define OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_64 0 -# endif # ifndef OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_128 # define OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_128 0 # endif @@ -208,6 +198,180 @@ static inline void opal_atomic_rmb(void); static inline void opal_atomic_wmb(void); +/********************************************************************** + * + * Compare and Swap + * + * Implementations must provide 32 and 64 bit compare-and-swap + * operations, but may provide the ptr implementation by including + * atomic_cmpx_ptr_impl.h (which implements the ptr implementation + * over the 32 and 64 bit implementations). + * + *********************************************************************/ +/** + * Atomic compare and set of 32 bit intergers with acquire and release semantics. + * + * @param addr Address of value to be swapped + * @param oldval Comparison value + * @param newval New value to set if comparision is true + * + * @returns If newval was written into addr, the function returns + * true. Otherwise, the function returns false and the value of addr + * at the time of the comparison is returned in oldval. + */ +static inline bool opal_atomic_compare_exchange_strong_32(opal_atomic_int32_t *addr, int32_t *oldval, + int32_t newval); + +/** + * Atomic compare and set of 32 bit intergers with acquire semantics. + * + * @param addr Address of value to be swapped + * @param oldval Comparison value + * @param newval New value to set if comparision is true + * + * @returns If newval was written into addr, the function returns + * true. Otherwise, the function returns false and the value of addr + * at the time of the comparison is returned in oldval. + */ +static inline bool opal_atomic_compare_exchange_strong_acq_32(opal_atomic_int32_t *addr, int32_t *oldval, + int32_t newval); + +/** + * Atomic compare and set of 32 bit intergers with release semantics. + * + * @param addr Address of value to be swapped + * @param oldval Comparison value + * @param newval New value to set if comparision is true + * + * @returns If newval was written into addr, the function returns + * true. Otherwise, the function returns false and the value of addr + * at the time of the comparison is returned in oldval. + */ +static inline bool opal_atomic_compare_exchange_strong_rel_32(opal_atomic_int32_t *addr, int32_t *oldval, + int32_t newval); + +/** + * Atomic compare and set of 64 bit intergers with acquire and release semantics. + * + * @param addr Address of value to be swapped + * @param oldval Comparison value + * @param newval New value to set if comparision is true + * + * @returns If newval was written into addr, the function returns + * true. Otherwise, the function returns false and the value of addr + * at the time of the comparison is returned in oldval. + */ +static inline bool opal_atomic_compare_exchange_strong_64(opal_atomic_int64_t *addr, int64_t *oldval, + int64_t newval); + +/** + * Atomic compare and set of 64 bit intergers with acquire semantics. + * + * @param addr Address of value to be swapped + * @param oldval Comparison value + * @param newval New value to set if comparision is true + * + * @returns If newval was written into addr, the function returns + * true. Otherwise, the function returns false and the value of addr + * at the time of the comparison is returned in oldval. + */ +static inline bool opal_atomic_compare_exchange_strong_acq_64(opal_atomic_int64_t *addr, int64_t *oldval, + int64_t newval); + +/** + * Atomic compare and set of 64 bit intergers with release semantics. + * + * @param addr Address of value to be swapped + * @param oldval Comparison value + * @param newval New value to set if comparision is true + * + * @returns If newval was written into addr, the function returns + * true. Otherwise, the function returns false and the value of addr + * at the time of the comparison is returned in oldval. + */ +static inline bool opal_atomic_compare_exchange_strong_rel_64(opal_atomic_int64_t *addr, int64_t *oldval, + int64_t newval); + +/** + * Atomic compare and set of pointer-sized intergers with acquire and release semantics. + * + * @param addr Address of value to be swapped + * @param oldval Comparison value + * @param newval New value to set if comparision is true + * + * @returns If newval was written into addr, the function returns + * true. Otherwise, the function returns false and the value of addr + * at the time of the comparison is returned in oldval. + */ +static inline bool opal_atomic_compare_exchange_strong_ptr(opal_atomic_intptr_t *addr, + intptr_t *oldval, intptr_t newval); + +/** + * Atomic compare and set of pointer-sized intergers with acquire semantics. + * + * @param addr Address of value to be swapped + * @param oldval Comparison value + * @param newval New value to set if comparision is true + * + * @returns If newval was written into addr, the function returns + * true. Otherwise, the function returns false and the value of addr + * at the time of the comparison is returned in oldval. + */ +static inline bool opal_atomic_compare_exchange_strong_acq_ptr(opal_atomic_intptr_t *addr, + intptr_t *oldval, intptr_t newval); + +/** + * Atomic compare and set of pointer-sized intergers with release semantics. + * + * @param addr Address of value to be swapped + * @param oldval Comparison value + * @param newval New value to set if comparision is true + * + * @returns If newval was written into addr, the function returns + * true. Otherwise, the function returns false and the value of addr + * at the time of the comparison is returned in oldval. + */ +static inline bool opal_atomic_compare_exchange_strong_rel_ptr(opal_atomic_intptr_t *addr, + intptr_t *oldval, intptr_t newval); + + +/********************************************************************** + * + * Swap + * + * Implementations may provide a native implementation of these + * operations or include atomic_swap_impl.h, which provides + * implementations over compare-and-swap. + * + *********************************************************************/ +/** + * Atomic swap of 32 bit value + * @param addr Address of value to be swapped + * @param newval New value to set in addr + * + * @returns Value in addr before swap + */ +static inline int32_t opal_atomic_swap_32(opal_atomic_int32_t *addr, int32_t newval); + +/** + * Atomic swap of 32 bit value + * @param addr Address of value to be swapped + * @param newval New value to set in addr + * + * @returns Value in addr before swap + */ +static inline int64_t opal_atomic_swap_64(opal_atomic_int64_t *addr, int64_t newval); + +/** + * Atomic swap of 32 bit value + * @param addr Address of value to be swapped + * @param newval New value to set in addr + * + * @returns Value in addr before swap + */ +static inline intptr_t opal_atomic_swap_ptr(opal_atomic_intptr_t *addr, intptr_t newval); + + /********************************************************************** * * Atomic spinlocks - always inlined, if have atomic compare-and-swap @@ -283,60 +447,6 @@ static inline * Atomic math operations * *********************************************************************/ -# if !defined(OPAL_HAVE_ATOMIC_CMPSET_32) && !defined(DOXYGEN) -# define OPAL_HAVE_ATOMIC_CMPSET_32 0 -# endif -# if defined(DOXYGEN) || OPAL_HAVE_ATOMIC_CMPSET_32 - -# if OPAL_HAVE_INLINE_ATOMIC_COMPARE_EXCHANGE_32 -static inline -# endif - bool - opal_atomic_compare_exchange_strong_32(opal_atomic_int32_t *addr, int32_t *oldval, - int32_t newval); - -# if OPAL_HAVE_INLINE_ATOMIC_COMPARE_EXCHANGE_32 -static inline -# endif - bool - opal_atomic_compare_exchange_strong_acq_32(opal_atomic_int32_t *addr, int32_t *oldval, - int32_t newval); - -# if OPAL_HAVE_INLINE_ATOMIC_COMPARE_EXCHANGE_32 -static inline -# endif - bool - opal_atomic_compare_exchange_strong_rel_32(opal_atomic_int32_t *addr, int32_t *oldval, - int32_t newval); -# endif - -# if !defined(OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_64) && !defined(DOXYGEN) -# define OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_64 0 -# endif -# if defined(DOXYGEN) || OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_64 - -# if OPAL_HAVE_INLINE_ATOMIC_COMPARE_EXCHANGE_64 -static inline -# endif - bool - opal_atomic_compare_exchange_strong_64(opal_atomic_int64_t *addr, int64_t *oldval, - int64_t newval); - -# if OPAL_HAVE_INLINE_ATOMIC_COMPARE_EXCHANGE_64 -static inline -# endif - bool - opal_atomic_compare_exchange_strong_acq_64(opal_atomic_int64_t *addr, int64_t *oldval, - int64_t newval); - -# if OPAL_HAVE_INLINE_ATOMIC_COMPARE_EXCHANGE_64 -static inline -# endif - bool - opal_atomic_compare_exchange_strong_rel_64(opal_atomic_int64_t *addr, int64_t *oldval, - int64_t newval); - -# endif static inline int32_t opal_atomic_add_fetch_32(opal_atomic_int32_t *addr, int delta); static inline int32_t opal_atomic_fetch_add_32(opal_atomic_int32_t *addr, int delta); @@ -443,83 +553,6 @@ static inline size_t opal_atomic_fetch_sub_size_t(opal_atomic_size_t *addr, size # endif # endif -# if defined(DOXYGEN) \ - || (OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_32 || OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_64) -/* these are always done with inline functions, so always mark as - static inline */ - -static inline bool opal_atomic_compare_exchange_strong_xx(opal_atomic_intptr_t *addr, - intptr_t *oldval, int64_t newval, - size_t length); -static inline bool opal_atomic_compare_exchange_strong_acq_xx(opal_atomic_intptr_t *addr, - intptr_t *oldval, int64_t newval, - size_t length); -static inline bool opal_atomic_compare_exchange_strong_rel_xx(opal_atomic_intptr_t *addr, - intptr_t *oldval, int64_t newval, - size_t length); - -static inline bool opal_atomic_compare_exchange_strong_ptr(opal_atomic_intptr_t *addr, - intptr_t *oldval, intptr_t newval); -static inline bool opal_atomic_compare_exchange_strong_acq_ptr(opal_atomic_intptr_t *addr, - intptr_t *oldval, intptr_t newval); -static inline bool opal_atomic_compare_exchange_strong_rel_ptr(opal_atomic_intptr_t *addr, - intptr_t *oldval, intptr_t newval); - -/** - * Atomic compare and set of generic type with relaxed semantics. This - * macro detect at compile time the type of the first argument and - * choose the correct function to be called. - * - * \note This macro should only be used for integer types. - * - * @param addr Address of . - * @param oldval Comparison value address of . - * @param newval New value to set if comparision is true . - * - * See opal_atomic_compare_exchange_* for pseudo-code. - */ -# define opal_atomic_compare_exchange_strong(ADDR, OLDVAL, NEWVAL) \ - opal_atomic_compare_exchange_strong_xx((opal_atomic_intptr_t *) (ADDR), \ - (intptr_t *) (OLDVAL), (intptr_t)(NEWVAL), \ - sizeof(*(ADDR))) - -/** - * Atomic compare and set of generic type with acquire semantics. This - * macro detect at compile time the type of the first argument and - * choose the correct function to be called. - * - * \note This macro should only be used for integer types. - * - * @param addr Address of . - * @param oldval Comparison value address of . - * @param newval New value to set if comparision is true . - * - * See opal_atomic_compare_exchange_acq_* for pseudo-code. - */ -# define opal_atomic_compare_exchange_strong_acq(ADDR, OLDVAL, NEWVAL) \ - opal_atomic_compare_exchange_strong_acq_xx((opal_atomic_intptr_t *) (ADDR), \ - (intptr_t *) (OLDVAL), (intptr_t)(NEWVAL), \ - sizeof(*(ADDR))) - -/** - * Atomic compare and set of generic type with release semantics. This - * macro detect at compile time the type of the first argument and - * choose the correct function to be called. - * - * \note This macro should only be used for integer types. - * - * @param addr Address of . - * @param oldval Comparison value address of . - * @param newval New value to set if comparision is true . - * - * See opal_atomic_compare_exchange_rel_* for pseudo-code. - */ -# define opal_atomic_compare_exchange_strong_rel(ADDR, OLDVAL, NEWVAL) \ - opal_atomic_compare_exchange_strong_rel_xx((opal_atomic_intptr_t *) (ADDR), \ - (intptr_t *) (OLDVAL), (intptr_t)(NEWVAL), \ - sizeof(*(ADDR))) - -# endif /* (OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_32 || OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_64) */ static inline void opal_atomic_add_xx(opal_atomic_intptr_t *addr, int32_t value, size_t length); static inline void opal_atomic_sub_xx(opal_atomic_intptr_t *addr, int32_t value, size_t length); diff --git a/opal/include/opal/sys/atomic_impl.h b/opal/include/opal/sys/atomic_impl.h index ef522daad43..817e580c502 100644 --- a/opal/include/opal/sys/atomic_impl.h +++ b/opal/include/opal/sys/atomic_impl.h @@ -37,7 +37,6 @@ * undefine all those functions if there is no 64 bit compare-exchange * *********************************************************************/ -#if OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_32 # if !defined(OPAL_HAVE_ATOMIC_MIN_32) static inline int32_t opal_atomic_fetch_min_32(opal_atomic_int32_t *addr, int32_t value) @@ -84,17 +83,6 @@ static inline int32_t opal_atomic_fetch_max_32(opal_atomic_int32_t *addr, int32_ return oldval; \ } -# if !defined(OPAL_HAVE_ATOMIC_SWAP_32) -# define OPAL_HAVE_ATOMIC_SWAP_32 1 -static inline int32_t opal_atomic_swap_32(opal_atomic_int32_t *addr, int32_t newval) -{ - int32_t old = *addr; - do { - } while (!opal_atomic_compare_exchange_strong_32(addr, &old, newval)); - - return old; -} -# endif /* OPAL_HAVE_ATOMIC_SWAP_32 */ # if !defined(OPAL_HAVE_ATOMIC_ADD_32) # define OPAL_HAVE_ATOMIC_ADD_32 1 @@ -131,10 +119,6 @@ OPAL_ATOMIC_DEFINE_CMPXCG_OP(int32_t, 32, -, sub) # endif /* OPAL_HAVE_ATOMIC_SUB_32 */ -#endif /* OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_32 */ - -#if OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_64 - # if !defined(OPAL_HAVE_ATOMIC_MIN_64) static inline int64_t opal_atomic_fetch_min_64(opal_atomic_int64_t *addr, int64_t value) { @@ -168,17 +152,6 @@ static inline int64_t opal_atomic_fetch_max_64(opal_atomic_int64_t *addr, int64_ # define OPAL_HAVE_ATOMIC_MAX_64 1 # endif /* OPAL_HAVE_ATOMIC_MAX_64 */ -# if !defined(OPAL_HAVE_ATOMIC_SWAP_64) -# define OPAL_HAVE_ATOMIC_SWAP_64 1 -static inline int64_t opal_atomic_swap_64(opal_atomic_int64_t *addr, int64_t newval) -{ - int64_t old = *addr; - do { - } while (!opal_atomic_compare_exchange_strong_64(addr, &old, newval)); - - return old; -} -# endif /* OPAL_HAVE_ATOMIC_SWAP_64 */ # if !defined(OPAL_HAVE_ATOMIC_ADD_64) # define OPAL_HAVE_ATOMIC_ADD_64 1 @@ -215,101 +188,6 @@ OPAL_ATOMIC_DEFINE_CMPXCG_OP(int64_t, 64, -, sub) # endif /* OPAL_HAVE_ATOMIC_SUB_64 */ -#else - -# if !defined(OPAL_HAVE_ATOMIC_ADD_64) -# define OPAL_HAVE_ATOMIC_ADD_64 0 -# endif - -# if !defined(OPAL_HAVE_ATOMIC_SUB_64) -# define OPAL_HAVE_ATOMIC_SUB_64 0 -# endif - -#endif /* OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_64 */ - -#if (OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_32 || OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_64) - -# if OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_32 && OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_64 -# define OPAL_ATOMIC_DEFINE_CMPXCG_XX(semantics) \ - static inline bool opal_atomic_compare_exchange_strong##semantics##xx( \ - opal_atomic_intptr_t *addr, intptr_t *oldval, int64_t newval, const size_t length) \ - { \ - switch (length) { \ - case 4: \ - return opal_atomic_compare_exchange_strong_32((opal_atomic_int32_t *) addr, \ - (int32_t *) oldval, \ - (int32_t) newval); \ - case 8: \ - return opal_atomic_compare_exchange_strong_64((opal_atomic_int64_t *) addr, \ - (int64_t *) oldval, \ - (int64_t) newval); \ - } \ - abort(); \ - } -# elif OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_32 -# define OPAL_ATOMIC_DEFINE_CMPXCG_XX(semantics) \ - static inline bool opal_atomic_compare_exchange_strong##semantics##xx( \ - opal_atomic_intptr_t *addr, intptr_t *oldval, int64_t newval, const size_t length) \ - { \ - switch (length) { \ - case 4: \ - return opal_atomic_compare_exchange_strong_32((opal_atomic_int32_t *) addr, \ - (int32_t *) oldval, \ - (int32_t) newval); \ - } \ - abort(); \ - } -# else -# error "Platform does not have required atomic compare-and-swap functionality" -# endif - -OPAL_ATOMIC_DEFINE_CMPXCG_XX(_) -OPAL_ATOMIC_DEFINE_CMPXCG_XX(_acq_) -OPAL_ATOMIC_DEFINE_CMPXCG_XX(_rel_) - -# if SIZEOF_VOID_P == 4 && OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_32 -# define OPAL_ATOMIC_DEFINE_CMPXCG_PTR_XX(semantics) \ - static inline bool \ - opal_atomic_compare_exchange_strong##semantics##ptr(opal_atomic_intptr_t *addr, \ - intptr_t *oldval, \ - intptr_t newval) \ - { \ - return opal_atomic_compare_exchange_strong_32((opal_atomic_int32_t *) addr, \ - (int32_t *) oldval, \ - (int32_t) newval); \ - } -# elif SIZEOF_VOID_P == 8 && OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_64 -# define OPAL_ATOMIC_DEFINE_CMPXCG_PTR_XX(semantics) \ - static inline bool \ - opal_atomic_compare_exchange_strong##semantics##ptr(opal_atomic_intptr_t *addr, \ - intptr_t *oldval, \ - intptr_t newval) \ - { \ - return opal_atomic_compare_exchange_strong_64((opal_atomic_int64_t *) addr, \ - (int64_t *) oldval, \ - (int64_t) newval); \ - } -# else -# error "Can not define opal_atomic_compare_exchange_strong_ptr with existing atomics" -# endif - -OPAL_ATOMIC_DEFINE_CMPXCG_PTR_XX(_) -OPAL_ATOMIC_DEFINE_CMPXCG_PTR_XX(_acq_) -OPAL_ATOMIC_DEFINE_CMPXCG_PTR_XX(_rel_) - -#endif /* (OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_32 || OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_64) */ - -#if (OPAL_HAVE_ATOMIC_SWAP_32 || OPAL_HAVE_ATOMIC_SWAP_64) - -# if SIZEOF_VOID_P == 4 && OPAL_HAVE_ATOMIC_SWAP_32 -# define opal_atomic_swap_ptr(addr, value) \ - (intptr_t) opal_atomic_swap_32((opal_atomic_int32_t *) addr, (int32_t) value) -# elif SIZEOF_VOID_P == 8 && OPAL_HAVE_ATOMIC_SWAP_64 -# define opal_atomic_swap_ptr(addr, value) \ - (intptr_t) opal_atomic_swap_64((opal_atomic_int64_t *) addr, (int64_t) value) -# endif - -#endif /* (OPAL_HAVE_ATOMIC_SWAP_32 || OPAL_HAVE_ATOMIC_SWAP_64) */ static inline void opal_atomic_add_xx(opal_atomic_intptr_t *addr, int32_t value, size_t length) { diff --git a/opal/include/opal/sys/atomic_impl_ptr_cswap.h b/opal/include/opal/sys/atomic_impl_ptr_cswap.h new file mode 100644 index 00000000000..a7246c04ee3 --- /dev/null +++ b/opal/include/opal/sys/atomic_impl_ptr_cswap.h @@ -0,0 +1,73 @@ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2014 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2010-2014 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2012-2018 Los Alamos National Security, LLC. All rights + * reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#ifndef ATOMIC_IMPL_PTR_CSWAP_H +#define ATOMIC_IMPL_PTR_CSWAP_H 1 + +static inline bool opal_atomic_compare_exchange_strong_ptr(opal_atomic_intptr_t *addr, + intptr_t *oldval, intptr_t newval) +{ +#if SIZEOF_VOID_P == 4 + return opal_atomic_compare_exchange_strong_32((opal_atomic_int32_t*)addr, + (int32_t*)oldval, + (int32_t)newval); +#elif SIZEOF_VOID_P == 8 + return opal_atomic_compare_exchange_strong_64((opal_atomic_int64_t*)addr, + (int64_t*)oldval, + (int64_t)newval); +#else +#error "No implementation of opal_atomic_compare_exchange_strong_ptr" +#endif +} + +static inline bool opal_atomic_compare_exchange_strong_acq_ptr(opal_atomic_intptr_t *addr, + intptr_t *oldval, intptr_t newval) +{ +#if SIZEOF_VOID_P == 4 + return opal_atomic_compare_exchange_strong_acq_32((opal_atomic_int32_t*)addr, + (int32_t*)oldval, + (int32_t)newval); +#elif SIZEOF_VOID_P == 8 + return opal_atomic_compare_exchange_strong_acq_64((opal_atomic_int64_t*)addr, + (int64_t*)oldval, + (int64_t)newval); +#else +#error "No implementation of opal_atomic_compare_exchange_strong_acq_ptr" +#endif +} + +static inline bool opal_atomic_compare_exchange_strong_rel_ptr(opal_atomic_intptr_t *addr, + intptr_t *oldval, intptr_t newval) +{ +#if SIZEOF_VOID_P == 4 + return opal_atomic_compare_exchange_strong_rel_32((opal_atomic_int32_t*)addr, + (int32_t*)oldval, + (int32_t)newval); +#elif SIZEOF_VOID_P == 8 + return opal_atomic_compare_exchange_strong_rel_64((opal_atomic_int64_t*)addr, + (int64_t*)oldval, + (int64_t)newval); +#else +#error "No implementation of opal_atomic_compare_exchange_strong_rel_ptr" +#endif +} + +#endif /* #ifndef ATOMIC_IMPL_PTR_CSWAP_H */ diff --git a/opal/include/opal/sys/atomic_impl_ptr_swap.h b/opal/include/opal/sys/atomic_impl_ptr_swap.h new file mode 100644 index 00000000000..97615961fc7 --- /dev/null +++ b/opal/include/opal/sys/atomic_impl_ptr_swap.h @@ -0,0 +1,36 @@ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2014 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2010-2014 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2012-2018 Los Alamos National Security, LLC. All rights + * reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#ifndef ATOMIC_SWAP_PTR_IMPL_H +#define ATOMIC_SWAP_PTR_IMPL_H + +static inline intptr_t opal_atomic_swap_ptr(opal_atomic_intptr_t *addr, intptr_t newval) +{ +#if SIZEOF_VOID_P == 4 + return (intptr_t)opal_atomic_swap_32((opal_atomic_int32_t *) addr, (int32_t) newval); +#elif SIZEOF_VOID_P == 8 + return (intptr_t)opal_atomic_swap_64((opal_atomic_int64_t *) addr, (int64_t) newval); +#else +#error "No implementation of opal_atomic_swap_ptr" +#endif +} + +#endif /* #ifndef ATOMIC_SWAP_PTR_IMPL_H */ diff --git a/opal/include/opal/sys/atomic_stdc.h b/opal/include/opal/sys/atomic_stdc.h index 3a9fe526680..c572f7c1994 100644 --- a/opal/include/opal/sys/atomic_stdc.h +++ b/opal/include/opal/sys/atomic_stdc.h @@ -32,18 +32,12 @@ # include # include -# define OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_32 1 -# define OPAL_HAVE_ATOMIC_SWAP_32 1 - # define OPAL_HAVE_ATOMIC_ADD_32 1 # define OPAL_HAVE_ATOMIC_AND_32 1 # define OPAL_HAVE_ATOMIC_OR_32 1 # define OPAL_HAVE_ATOMIC_XOR_32 1 # define OPAL_HAVE_ATOMIC_SUB_32 1 -# define OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_64 1 -# define OPAL_HAVE_ATOMIC_SWAP_64 1 - # define OPAL_HAVE_ATOMIC_ADD_64 1 # define OPAL_HAVE_ATOMIC_AND_64 1 # define OPAL_HAVE_ATOMIC_OR_64 1 @@ -87,44 +81,78 @@ static inline void opal_atomic_rmb(void) # endif } + +/********************************************************************** + * + * Compare and Swap + * + *********************************************************************/ + # define opal_atomic_compare_exchange_strong_32(addr, compare, value) \ atomic_compare_exchange_strong_explicit(addr, compare, value, memory_order_relaxed, \ memory_order_relaxed) -# define opal_atomic_compare_exchange_strong_64(addr, compare, value) \ - atomic_compare_exchange_strong_explicit(addr, compare, value, memory_order_relaxed, \ - memory_order_relaxed) -# define opal_atomic_compare_exchange_strong_ptr(addr, compare, value) \ - atomic_compare_exchange_strong_explicit(addr, compare, value, memory_order_relaxed, \ - memory_order_relaxed) # define opal_atomic_compare_exchange_strong_acq_32(addr, compare, value) \ atomic_compare_exchange_strong_explicit(addr, compare, value, memory_order_acquire, \ memory_order_relaxed) +# define opal_atomic_compare_exchange_strong_rel_32(addr, compare, value) \ + atomic_compare_exchange_strong_explicit(addr, compare, value, memory_order_release, \ + memory_order_relaxed) + +# define opal_atomic_compare_exchange_strong_64(addr, compare, value) \ + atomic_compare_exchange_strong_explicit(addr, compare, value, memory_order_relaxed, \ + memory_order_relaxed) # define opal_atomic_compare_exchange_strong_acq_64(addr, compare, value) \ atomic_compare_exchange_strong_explicit(addr, compare, value, memory_order_acquire, \ memory_order_relaxed) -# define opal_atomic_compare_exchange_strong_acq_ptr(addr, compare, value) \ - atomic_compare_exchange_strong_explicit(addr, compare, value, memory_order_acquire, \ +# define opal_atomic_compare_exchange_strong_rel_64(addr, compare, value) \ + atomic_compare_exchange_strong_explicit(addr, compare, value, memory_order_release, \ memory_order_relaxed) -# define opal_atomic_compare_exchange_strong_rel_32(addr, compare, value) \ - atomic_compare_exchange_strong_explicit(addr, compare, value, memory_order_release, \ +# define opal_atomic_compare_exchange_strong_ptr(addr, compare, value) \ + atomic_compare_exchange_strong_explicit(addr, compare, value, memory_order_relaxed, \ memory_order_relaxed) -# define opal_atomic_compare_exchange_strong_rel_64(addr, compare, value) \ - atomic_compare_exchange_strong_explicit(addr, compare, value, memory_order_release, \ +# define opal_atomic_compare_exchange_strong_acq_ptr(addr, compare, value) \ + atomic_compare_exchange_strong_explicit(addr, compare, value, memory_order_acquire, \ memory_order_relaxed) # define opal_atomic_compare_exchange_strong_rel_ptr(addr, compare, value) \ atomic_compare_exchange_strong_explicit(addr, compare, value, memory_order_release, \ memory_order_relaxed) -# define opal_atomic_compare_exchange_strong(addr, oldval, newval) \ - atomic_compare_exchange_strong_explicit(addr, oldval, newval, memory_order_relaxed, \ - memory_order_relaxed) -# define opal_atomic_compare_exchange_strong_acq(addr, oldval, newval) \ - atomic_compare_exchange_strong_explicit(addr, oldval, newval, memory_order_acquire, \ - memory_order_relaxed) -# define opal_atomic_compare_exchange_strong_rel(addr, oldval, newval) \ - atomic_compare_exchange_strong_explicit(addr, oldval, newval, memory_order_release, \ - memory_order_relaxed) +# if OPAL_HAVE_C11_CSWAP_INT128 + +/* the C11 atomic compare-exchange is lock free so use it */ +# define opal_atomic_compare_exchange_strong_128 atomic_compare_exchange_strong + +# define OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_128 1 + +# elif OPAL_HAVE_SYNC_BUILTIN_CSWAP_INT128 + +/* fall back on the __sync builtin if available since it will emit the expected instruction on + * x86_64 (cmpxchng16b) */ +__opal_attribute_always_inline__ static inline bool +opal_atomic_compare_exchange_strong_128(opal_atomic_int128_t *addr, opal_int128_t *oldval, + opal_int128_t newval) +{ + opal_int128_t prev = __sync_val_compare_and_swap(addr, *oldval, newval); + bool ret = prev == *oldval; + *oldval = prev; + return ret; +} + +# define OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_128 1 + +# else + +# define OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_128 0 + +# endif + + +/********************************************************************** + * + * Swap + * + *********************************************************************/ # define opal_atomic_swap_32(addr, value) \ atomic_exchange_explicit((_Atomic unsigned int *) addr, value, memory_order_relaxed) @@ -133,6 +161,7 @@ static inline void opal_atomic_rmb(void) # define opal_atomic_swap_ptr(addr, value) \ atomic_exchange_explicit((_Atomic unsigned long *) addr, value, memory_order_relaxed) + # define OPAL_ATOMIC_STDC_DEFINE_FETCH_OP(op, bits, type, operator) \ static inline type opal_atomic_fetch_##op##_##bits(opal_atomic_##type *addr, type value) \ { \ @@ -267,33 +296,4 @@ static inline void opal_atomic_unlock(opal_atomic_lock_t *lock) atomic_flag_clear(lock); } -# if OPAL_HAVE_C11_CSWAP_INT128 - -/* the C11 atomic compare-exchange is lock free so use it */ -# define opal_atomic_compare_exchange_strong_128 atomic_compare_exchange_strong - -# define OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_128 1 - -# elif OPAL_HAVE_SYNC_BUILTIN_CSWAP_INT128 - -/* fall back on the __sync builtin if available since it will emit the expected instruction on - * x86_64 (cmpxchng16b) */ -__opal_attribute_always_inline__ static inline bool -opal_atomic_compare_exchange_strong_128(opal_atomic_int128_t *addr, opal_int128_t *oldval, - opal_int128_t newval) -{ - opal_int128_t prev = __sync_val_compare_and_swap(addr, *oldval, newval); - bool ret = prev == *oldval; - *oldval = prev; - return ret; -} - -# define OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_128 1 - -# else - -# define OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_128 0 - -# endif - #endif /* !defined(OPAL_ATOMIC_STDC_H) */ diff --git a/opal/include/opal/sys/gcc_builtin/atomic.h b/opal/include/opal/sys/gcc_builtin/atomic.h index 7ead6e5b0ae..2570147dab7 100644 --- a/opal/include/opal/sys/gcc_builtin/atomic.h +++ b/opal/include/opal/sys/gcc_builtin/atomic.h @@ -36,20 +36,16 @@ * *********************************************************************/ -#define OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_32 1 #define OPAL_HAVE_ATOMIC_ADD_32 1 #define OPAL_HAVE_ATOMIC_AND_32 1 #define OPAL_HAVE_ATOMIC_OR_32 1 #define OPAL_HAVE_ATOMIC_XOR_32 1 #define OPAL_HAVE_ATOMIC_SUB_32 1 -#define OPAL_HAVE_ATOMIC_SWAP_32 1 -#define OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_64 1 #define OPAL_HAVE_ATOMIC_ADD_64 1 #define OPAL_HAVE_ATOMIC_AND_64 1 #define OPAL_HAVE_ATOMIC_OR_64 1 #define OPAL_HAVE_ATOMIC_XOR_64 1 #define OPAL_HAVE_ATOMIC_SUB_64 1 -#define OPAL_HAVE_ATOMIC_SWAP_64 1 /********************************************************************** @@ -87,7 +83,7 @@ static inline void opal_atomic_wmb(void) /********************************************************************** * - * Atomic math operations + * Compare and Swap * *********************************************************************/ @@ -99,6 +95,13 @@ static inline void opal_atomic_wmb(void) # pragma error_messages(off, E_ARG_INCOMPATIBLE_WITH_ARG_L) #endif +static inline bool opal_atomic_compare_exchange_strong_32(opal_atomic_int32_t *addr, + int32_t *oldval, int32_t newval) +{ + return __atomic_compare_exchange_n(addr, oldval, newval, false, __ATOMIC_ACQUIRE, + __ATOMIC_RELAXED); +} + static inline bool opal_atomic_compare_exchange_strong_acq_32(opal_atomic_int32_t *addr, int32_t *oldval, int32_t newval) { @@ -113,45 +116,13 @@ static inline bool opal_atomic_compare_exchange_strong_rel_32(opal_atomic_int32_ __ATOMIC_RELAXED); } -static inline bool opal_atomic_compare_exchange_strong_32(opal_atomic_int32_t *addr, - int32_t *oldval, int32_t newval) +static inline bool opal_atomic_compare_exchange_strong_64(opal_atomic_int64_t *addr, + int64_t *oldval, int64_t newval) { return __atomic_compare_exchange_n(addr, oldval, newval, false, __ATOMIC_ACQUIRE, __ATOMIC_RELAXED); } -static inline int32_t opal_atomic_swap_32(opal_atomic_int32_t *addr, int32_t newval) -{ - int32_t oldval; - __atomic_exchange(addr, &newval, &oldval, __ATOMIC_RELAXED); - return oldval; -} - -static inline int32_t opal_atomic_fetch_add_32(opal_atomic_int32_t *addr, int32_t delta) -{ - return __atomic_fetch_add(addr, delta, __ATOMIC_RELAXED); -} - -static inline int32_t opal_atomic_fetch_and_32(opal_atomic_int32_t *addr, int32_t value) -{ - return __atomic_fetch_and(addr, value, __ATOMIC_RELAXED); -} - -static inline int32_t opal_atomic_fetch_or_32(opal_atomic_int32_t *addr, int32_t value) -{ - return __atomic_fetch_or(addr, value, __ATOMIC_RELAXED); -} - -static inline int32_t opal_atomic_fetch_xor_32(opal_atomic_int32_t *addr, int32_t value) -{ - return __atomic_fetch_xor(addr, value, __ATOMIC_RELAXED); -} - -static inline int32_t opal_atomic_fetch_sub_32(opal_atomic_int32_t *addr, int32_t delta) -{ - return __atomic_fetch_sub(addr, delta, __ATOMIC_RELAXED); -} - static inline bool opal_atomic_compare_exchange_strong_acq_64(opal_atomic_int64_t *addr, int64_t *oldval, int64_t newval) { @@ -166,44 +137,7 @@ static inline bool opal_atomic_compare_exchange_strong_rel_64(opal_atomic_int64_ __ATOMIC_RELAXED); } -static inline bool opal_atomic_compare_exchange_strong_64(opal_atomic_int64_t *addr, - int64_t *oldval, int64_t newval) -{ - return __atomic_compare_exchange_n(addr, oldval, newval, false, __ATOMIC_ACQUIRE, - __ATOMIC_RELAXED); -} - -static inline int64_t opal_atomic_swap_64(opal_atomic_int64_t *addr, int64_t newval) -{ - int64_t oldval; - __atomic_exchange(addr, &newval, &oldval, __ATOMIC_RELAXED); - return oldval; -} - -static inline int64_t opal_atomic_fetch_add_64(opal_atomic_int64_t *addr, int64_t delta) -{ - return __atomic_fetch_add(addr, delta, __ATOMIC_RELAXED); -} - -static inline int64_t opal_atomic_fetch_and_64(opal_atomic_int64_t *addr, int64_t value) -{ - return __atomic_fetch_and(addr, value, __ATOMIC_RELAXED); -} - -static inline int64_t opal_atomic_fetch_or_64(opal_atomic_int64_t *addr, int64_t value) -{ - return __atomic_fetch_or(addr, value, __ATOMIC_RELAXED); -} - -static inline int64_t opal_atomic_fetch_xor_64(opal_atomic_int64_t *addr, int64_t value) -{ - return __atomic_fetch_xor(addr, value, __ATOMIC_RELAXED); -} - -static inline int64_t opal_atomic_fetch_sub_64(opal_atomic_int64_t *addr, int64_t delta) -{ - return __atomic_fetch_sub(addr, delta, __ATOMIC_RELAXED); -} +#include "opal/sys/atomic_impl_ptr_cswap.h" #if OPAL_HAVE_GCC_BUILTIN_CSWAP_INT128 @@ -223,7 +157,7 @@ static inline bool opal_atomic_compare_exchange_strong_128(opal_atomic_int128_t /* __atomic version is not lock-free so use legacy __sync version */ -static inline bool opal_atomic_compare_exchange_strong_128(opal_atomic_opal_int128_t *addr, +static inline bool opal_atomic_compare_exchange_strong_128(opal_atomic_int128_t *addr, opal_int128_t *oldval, opal_int128_t newval) { @@ -235,6 +169,35 @@ static inline bool opal_atomic_compare_exchange_strong_128(opal_atomic_opal_int1 #endif + +/********************************************************************** + * + * Swap + * + *********************************************************************/ + +static inline int32_t opal_atomic_swap_32(opal_atomic_int32_t *addr, int32_t newval) +{ + int32_t oldval; + __atomic_exchange(addr, &newval, &oldval, __ATOMIC_RELAXED); + return oldval; +} + +static inline int64_t opal_atomic_swap_64(opal_atomic_int64_t *addr, int64_t newval) +{ + int64_t oldval; + __atomic_exchange(addr, &newval, &oldval, __ATOMIC_RELAXED); + return oldval; +} + +static inline intptr_t opal_atomic_swap_ptr(opal_atomic_intptr_t *addr, intptr_t newval) +{ + intptr_t oldval; + __atomic_exchange(addr, &newval, &oldval, __ATOMIC_RELAXED); + return oldval; +} + + #if defined(__HLE__) # include @@ -277,6 +240,65 @@ static inline void opal_atomic_unlock(opal_atomic_lock_t *lock) #endif + +/********************************************************************** + * + * Atomic math operations + * + *********************************************************************/ + + +static inline int32_t opal_atomic_fetch_add_32(opal_atomic_int32_t *addr, int32_t delta) +{ + return __atomic_fetch_add(addr, delta, __ATOMIC_RELAXED); +} + +static inline int32_t opal_atomic_fetch_and_32(opal_atomic_int32_t *addr, int32_t value) +{ + return __atomic_fetch_and(addr, value, __ATOMIC_RELAXED); +} + +static inline int32_t opal_atomic_fetch_or_32(opal_atomic_int32_t *addr, int32_t value) +{ + return __atomic_fetch_or(addr, value, __ATOMIC_RELAXED); +} + +static inline int32_t opal_atomic_fetch_xor_32(opal_atomic_int32_t *addr, int32_t value) +{ + return __atomic_fetch_xor(addr, value, __ATOMIC_RELAXED); +} + +static inline int32_t opal_atomic_fetch_sub_32(opal_atomic_int32_t *addr, int32_t delta) +{ + return __atomic_fetch_sub(addr, delta, __ATOMIC_RELAXED); +} + +static inline int64_t opal_atomic_fetch_add_64(opal_atomic_int64_t *addr, int64_t delta) +{ + return __atomic_fetch_add(addr, delta, __ATOMIC_RELAXED); +} + +static inline int64_t opal_atomic_fetch_and_64(opal_atomic_int64_t *addr, int64_t value) +{ + return __atomic_fetch_and(addr, value, __ATOMIC_RELAXED); +} + +static inline int64_t opal_atomic_fetch_or_64(opal_atomic_int64_t *addr, int64_t value) +{ + return __atomic_fetch_or(addr, value, __ATOMIC_RELAXED); +} + +static inline int64_t opal_atomic_fetch_xor_64(opal_atomic_int64_t *addr, int64_t value) +{ + return __atomic_fetch_xor(addr, value, __ATOMIC_RELAXED); +} + +static inline int64_t opal_atomic_fetch_sub_64(opal_atomic_int64_t *addr, int64_t delta) +{ + return __atomic_fetch_sub(addr, delta, __ATOMIC_RELAXED); +} + + #if defined(__SUNPRO_C) || defined(__SUNPRO_CC) # pragma error_messages(default, E_ARG_INCOMPATIBLE_WITH_ARG_L) #endif diff --git a/opal/include/opal/sys/powerpc/atomic.h b/opal/include/opal/sys/powerpc/atomic.h index e533c0a8943..02a732add16 100644 --- a/opal/include/opal/sys/powerpc/atomic.h +++ b/opal/include/opal/sys/powerpc/atomic.h @@ -36,8 +36,6 @@ * *********************************************************************/ -#define OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_32 1 -#define OPAL_HAVE_ATOMIC_SWAP_32 1 #define OPAL_HAVE_ATOMIC_LLSC_32 1 #define OPAL_HAVE_ATOMIC_ADD_32 1 @@ -45,8 +43,6 @@ #define OPAL_HAVE_ATOMIC_OR_32 1 #define OPAL_HAVE_ATOMIC_XOR_32 1 #define OPAL_HAVE_ATOMIC_SUB_32 1 -#define OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_64 1 -#define OPAL_HAVE_ATOMIC_SWAP_64 1 #define OPAL_HAVE_ATOMIC_LLSC_64 1 #define OPAL_HAVE_ATOMIC_ADD_64 1 #define OPAL_HAVE_ATOMIC_AND_64 1 @@ -54,6 +50,23 @@ #define OPAL_HAVE_ATOMIC_XOR_64 1 #define OPAL_HAVE_ATOMIC_SUB_64 1 +#if defined(__xlC__) || defined(__IBMC__) || defined(__IBMCPP__) || defined(__ibmxl__) +/* work-around bizzare xlc bug in which it sign-extends + a pointer to a 32-bit signed integer */ +# define OPAL_ASM_ADDR(a) ((uintptr_t) a) +#else +# define OPAL_ASM_ADDR(a) (a) +#endif + +#if defined(__PGI) +/* work-around for bug in PGI 16.5-16.7 where the compiler fails to + * correctly emit load instructions for 64-bit operands. without this + * it will emit lwz instead of ld to load the 64-bit operand. */ +# define OPAL_ASM_VALUE64(x) (void *) (intptr_t)(x) +#else +# define OPAL_ASM_VALUE64(x) x +#endif + /********************************************************************** * @@ -84,27 +97,10 @@ static inline void opal_atomic_isync(void) /********************************************************************** * - * Atomic math operations + * Compare and Swap * *********************************************************************/ -#if defined(__xlC__) || defined(__IBMC__) || defined(__IBMCPP__) || defined(__ibmxl__) -/* work-around bizzare xlc bug in which it sign-extends - a pointer to a 32-bit signed integer */ -# define OPAL_ASM_ADDR(a) ((uintptr_t) a) -#else -# define OPAL_ASM_ADDR(a) (a) -#endif - -#if defined(__PGI) -/* work-around for bug in PGI 16.5-16.7 where the compiler fails to - * correctly emit load instructions for 64-bit operands. without this - * it will emit lwz instead of ld to load the 64-bit operand. */ -# define OPAL_ASM_VALUE64(x) (void *) (intptr_t)(x) -#else -# define OPAL_ASM_VALUE64(x) x -#endif - static inline bool opal_atomic_compare_exchange_strong_32(opal_atomic_int32_t *addr, int32_t *oldval, int32_t newval) { @@ -126,32 +122,6 @@ static inline bool opal_atomic_compare_exchange_strong_32(opal_atomic_int32_t *a return ret; } -/* NTH: the LL/SC support is done through macros due to issues with non-optimized builds. The reason - * is that even with an always_inline attribute the compiler may still emit instructions to store - * then load the arguments to/from the stack. This sequence may cause the ll reservation to be - * cancelled. */ -#define opal_atomic_ll_32(addr, ret) \ - do { \ - opal_atomic_int32_t *_addr = (addr); \ - __asm__ __volatile__("lwarx %0, 0, %1 \n\t" : "=&r"(ret) : "r"(_addr)); \ - } while (0) - -#define opal_atomic_sc_32(addr, value, ret) \ - do { \ - opal_atomic_int32_t *_addr = (addr); \ - int32_t _ret, _foo, _newval = (int32_t) value; \ - \ - __asm__ __volatile__(" stwcx. %4, 0, %3 \n\t" \ - " li %0,0 \n\t" \ - " bne- 1f \n\t" \ - " ori %0,%0,1 \n\t" \ - "1:" \ - : "=r"(_ret), "=m"(*_addr), "=r"(_foo) \ - : "r"(_addr), "r"(_newval) \ - : "cc", "memory"); \ - ret = _ret; \ - } while (0) - static inline bool opal_atomic_compare_exchange_strong_acq_32(opal_atomic_int32_t *addr, int32_t *oldval, int32_t newval) { @@ -170,6 +140,56 @@ static inline bool opal_atomic_compare_exchange_strong_rel_32(opal_atomic_int32_ return opal_atomic_compare_exchange_strong_32(addr, oldval, newval); } + +static inline bool opal_atomic_compare_exchange_strong_64(opal_atomic_int64_t *addr, + int64_t *oldval, int64_t newval) +{ + int64_t prev; + bool ret; + + __asm__ __volatile__("1: ldarx %0, 0, %2 \n\t" + " cmpd 0, %0, %3 \n\t" + " bne- 2f \n\t" + " stdcx. %4, 0, %2 \n\t" + " bne- 1b \n\t" + "2:" + : "=&r"(prev), "=m"(*addr) + : "r"(addr), "r"(OPAL_ASM_VALUE64(*oldval)), "r"(OPAL_ASM_VALUE64(newval)), + "m"(*addr) + : "cc", "memory"); + + ret = (prev == *oldval); + *oldval = prev; + return ret; +} + +static inline bool opal_atomic_compare_exchange_strong_acq_64(opal_atomic_int64_t *addr, + int64_t *oldval, int64_t newval) +{ + bool rc; + + rc = opal_atomic_compare_exchange_strong_64(addr, oldval, newval); + opal_atomic_rmb(); + + return rc; +} + +static inline bool opal_atomic_compare_exchange_strong_rel_64(opal_atomic_int64_t *addr, + int64_t *oldval, int64_t newval) +{ + opal_atomic_wmb(); + return opal_atomic_compare_exchange_strong_64(addr, oldval, newval); +} + +#include "opal/sys/atomic_impl_ptr_cswap.h" + + +/********************************************************************** + * + * Swap + * + *********************************************************************/ + static inline int32_t opal_atomic_swap_32(opal_atomic_int32_t *addr, int32_t newval) { int32_t ret; @@ -184,6 +204,56 @@ static inline int32_t opal_atomic_swap_32(opal_atomic_int32_t *addr, int32_t new return ret; } +static inline int64_t opal_atomic_swap_64(opal_atomic_int64_t *addr, int64_t newval) +{ + int64_t ret; + + __asm__ __volatile__("1: ldarx %0, 0, %2 \n\t" + " stdcx. %3, 0, %2 \n\t" + " bne- 1b \n\t" + : "=&r"(ret), "=m"(*addr) + : "r"(addr), "r"(OPAL_ASM_VALUE64(newval)) + : "cc", "memory"); + + return ret; +} + +#include "opal/sys/atomic_impl_ptr_swap.h" + + +/********************************************************************** + * + * Atomic math operations + * + *********************************************************************/ + + +/* NTH: the LL/SC support is done through macros due to issues with non-optimized builds. The reason + * is that even with an always_inline attribute the compiler may still emit instructions to store + * then load the arguments to/from the stack. This sequence may cause the ll reservation to be + * cancelled. */ +#define opal_atomic_ll_32(addr, ret) \ + do { \ + opal_atomic_int32_t *_addr = (addr); \ + __asm__ __volatile__("lwarx %0, 0, %1 \n\t" : "=&r"(ret) : "r"(_addr)); \ + } while (0) + +#define opal_atomic_sc_32(addr, value, ret) \ + do { \ + opal_atomic_int32_t *_addr = (addr); \ + int32_t _ret, _foo, _newval = (int32_t) value; \ + \ + __asm__ __volatile__(" stwcx. %4, 0, %3 \n\t" \ + " li %0,0 \n\t" \ + " bne- 1f \n\t" \ + " ori %0,%0,1 \n\t" \ + "1:" \ + : "=r"(_ret), "=m"(*_addr), "=r"(_foo) \ + : "r"(_addr), "r"(_newval) \ + : "cc", "memory"); \ + ret = _ret; \ + } while (0) + #define OPAL_ATOMIC_POWERPC_DEFINE_ATOMIC_64(type, instr) \ static inline int64_t opal_atomic_fetch_##type##_64(opal_atomic_int64_t *v, int64_t val) \ @@ -207,28 +277,6 @@ OPAL_ATOMIC_POWERPC_DEFINE_ATOMIC_64(or, or) OPAL_ATOMIC_POWERPC_DEFINE_ATOMIC_64(xor, xor) OPAL_ATOMIC_POWERPC_DEFINE_ATOMIC_64(sub, subf) -static inline bool opal_atomic_compare_exchange_strong_64(opal_atomic_int64_t *addr, - int64_t *oldval, int64_t newval) -{ - int64_t prev; - bool ret; - - __asm__ __volatile__("1: ldarx %0, 0, %2 \n\t" - " cmpd 0, %0, %3 \n\t" - " bne- 2f \n\t" - " stdcx. %4, 0, %2 \n\t" - " bne- 1b \n\t" - "2:" - : "=&r"(prev), "=m"(*addr) - : "r"(addr), "r"(OPAL_ASM_VALUE64(*oldval)), "r"(OPAL_ASM_VALUE64(newval)), - "m"(*addr) - : "cc", "memory"); - - ret = (prev == *oldval); - *oldval = prev; - return ret; -} - #define opal_atomic_ll_64(addr, ret) \ do { \ opal_atomic_int64_t *_addr = (addr); \ @@ -252,37 +300,6 @@ static inline bool opal_atomic_compare_exchange_strong_64(opal_atomic_int64_t *a ret = _ret; \ } while (0) -static inline int64_t opal_atomic_swap_64(opal_atomic_int64_t *addr, int64_t newval) -{ - int64_t ret; - - __asm__ __volatile__("1: ldarx %0, 0, %2 \n\t" - " stdcx. %3, 0, %2 \n\t" - " bne- 1b \n\t" - : "=&r"(ret), "=m"(*addr) - : "r"(addr), "r"(OPAL_ASM_VALUE64(newval)) - : "cc", "memory"); - - return ret; -} - -static inline bool opal_atomic_compare_exchange_strong_acq_64(opal_atomic_int64_t *addr, - int64_t *oldval, int64_t newval) -{ - bool rc; - - rc = opal_atomic_compare_exchange_strong_64(addr, oldval, newval); - opal_atomic_rmb(); - - return rc; -} - -static inline bool opal_atomic_compare_exchange_strong_rel_64(opal_atomic_int64_t *addr, - int64_t *oldval, int64_t newval) -{ - opal_atomic_wmb(); - return opal_atomic_compare_exchange_strong_64(addr, oldval, newval); -} #define OPAL_ATOMIC_POWERPC_DEFINE_ATOMIC_32(type, instr) \ static inline int32_t opal_atomic_fetch_##type##_32(opal_atomic_int32_t *v, int val) \ diff --git a/opal/include/opal/sys/x86_64/atomic.h b/opal/include/opal/sys/x86_64/atomic.h index d2d2cd40991..569490e753e 100644 --- a/opal/include/opal/sys/x86_64/atomic.h +++ b/opal/include/opal/sys/x86_64/atomic.h @@ -33,16 +33,6 @@ #define SMPLOCK "lock; " -/********************************************************************** - * - * Define constants for AMD64 / x86_64 / EM64T / ... - * - *********************************************************************/ - -#define OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_32 1 - -#define OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_64 1 - /********************************************************************** * @@ -69,11 +59,13 @@ static inline void opal_atomic_isync(void) { } + /********************************************************************** * - * Atomic math operations + * Compare and Swap * *********************************************************************/ + static inline bool opal_atomic_compare_exchange_strong_32(opal_atomic_int32_t *addr, int32_t *oldval, int32_t newval) { @@ -106,6 +98,8 @@ static inline bool opal_atomic_compare_exchange_strong_64(opal_atomic_int64_t *a #define opal_atomic_compare_exchange_strong_acq_64 opal_atomic_compare_exchange_strong_64 #define opal_atomic_compare_exchange_strong_rel_64 opal_atomic_compare_exchange_strong_64 +#include "opal/sys/atomic_impl_ptr_cswap.h" + #if OPAL_HAVE_CMPXCHG16B && HAVE_OPAL_INT128_T static inline bool opal_atomic_compare_exchange_strong_128(opal_atomic_int128_t *addr, @@ -128,9 +122,14 @@ static inline bool opal_atomic_compare_exchange_strong_128(opal_atomic_int128_t # define OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_128 1 -# define OPAL_HAVE_ATOMIC_SWAP_32 1 +#endif -# define OPAL_HAVE_ATOMIC_SWAP_64 1 + +/********************************************************************** + * + * Swap + * + *********************************************************************/ static inline int32_t opal_atomic_swap_32(opal_atomic_int32_t *addr, int32_t newval) { @@ -148,7 +147,14 @@ static inline int64_t opal_atomic_swap_64(opal_atomic_int64_t *addr, int64_t new return oldval; } -# define OPAL_HAVE_ATOMIC_ADD_32 1 +#include "opal/sys/atomic_impl_ptr_swap.h" + + +/********************************************************************** + * + * Atomic math operations + * + *********************************************************************/ /** * atomic_add - add integer to atomic variable diff --git a/test/asm/atomic_cmpset.c b/test/asm/atomic_cmpset.c index 8a948608bf0..9f6d84588da 100644 --- a/test/asm/atomic_cmpset.c +++ b/test/asm/atomic_cmpset.c @@ -187,42 +187,6 @@ int main(int argc, char *argv[]) assert(old128 == 42); #endif - /* -- cmpset int tests -- */ - - volint = 42, oldint = 42, newint = 50; - assert(opal_atomic_compare_exchange_strong(&volint, &oldint, newint) == true); - opal_atomic_rmb(); - assert(volint == newint); - assert(oldint == 42); - - volint = 42, oldint = 420, newint = 50; - assert(opal_atomic_compare_exchange_strong(&volint, &oldint, newint) == false); - opal_atomic_rmb(); - assert(volint == 42); - assert(oldint == 42); - - volint = 42, oldint = 42, newint = 50; - assert(opal_atomic_compare_exchange_strong_acq(&volint, &oldint, newint) == true); - assert(volint == newint); - assert(oldint == 42); - - volint = 42, oldint = 420, newint = 50; - assert(opal_atomic_compare_exchange_strong_acq(&volint, &oldint, newint) == false); - assert(volint == 42); - assert(oldint == 42); - - volint = 42, oldint = 42, newint = 50; - assert(opal_atomic_compare_exchange_strong_rel(&volint, &oldint, newint) == true); - opal_atomic_rmb(); - assert(volint == newint); - assert(oldint == 42); - - volint = 42, oldint = 420, newint = 50; - assert(opal_atomic_compare_exchange_strong_rel(&volint, &oldint, newint) == false); - opal_atomic_rmb(); - assert(volint == 42); - assert(oldint == 42); - /* -- cmpset ptr tests -- */ volptr = 42, oldptr = 42, newptr = 50; From e9d7bb9d538afbdfd6f8412f926ab1ae8f50a73f Mon Sep 17 00:00:00 2001 From: Brian Barrett Date: Fri, 14 Jan 2022 16:51:30 +0000 Subject: [PATCH 06/11] Make spinlocks mandatory With the change to make compare and swap mandatory, we can always implement spinlocks, so make them mandatory and simplify the interface. Signed-off-by: Brian Barrett --- opal/include/opal/sys/Makefile.am | 1 + opal/include/opal/sys/arm64/atomic.h | 9 +++ opal/include/opal/sys/atomic.h | 77 ++------------------ opal/include/opal/sys/atomic_impl.h | 41 ----------- opal/include/opal/sys/atomic_impl_spinlock.h | 58 +++++++++++++++ opal/include/opal/sys/atomic_stdc.h | 64 ++++++++-------- opal/include/opal/sys/gcc_builtin/atomic.h | 20 +++-- opal/include/opal/sys/powerpc/atomic.h | 9 +++ opal/include/opal/sys/x86_64/atomic.h | 9 +++ opal/include/opal_stdatomic.h | 16 ++++ opal/mca/threads/mutex.h | 14 +--- 11 files changed, 154 insertions(+), 164 deletions(-) create mode 100644 opal/include/opal/sys/atomic_impl_spinlock.h diff --git a/opal/include/opal/sys/Makefile.am b/opal/include/opal/sys/Makefile.am index 510bf59a965..a12a2b0af74 100644 --- a/opal/include/opal/sys/Makefile.am +++ b/opal/include/opal/sys/Makefile.am @@ -34,6 +34,7 @@ headers += \ opal/sys/atomic_impl.h \ opal/sys/atomic_impl_ptr_cswap.h \ opal/sys/atomic_impl_ptr_swap.h \ + opal/sys/atomic_impl_spinlock.h \ opal/sys/timer.h \ opal/sys/cma.h diff --git a/opal/include/opal/sys/arm64/atomic.h b/opal/include/opal/sys/arm64/atomic.h index 72823616128..f3088d56172 100644 --- a/opal/include/opal/sys/arm64/atomic.h +++ b/opal/include/opal/sys/arm64/atomic.h @@ -255,6 +255,15 @@ static inline int64_t opal_atomic_swap_64(opal_atomic_int64_t *addr, int64_t new #include "opal/sys/atomic_impl_ptr_swap.h" +/********************************************************************** + * + * Atomic spinlocks + * + *********************************************************************/ + +#include "opal/sys/atomic_impl_spinlock.h" + + /********************************************************************** * * Atomic math operations diff --git a/opal/include/opal/sys/atomic.h b/opal/include/opal/sys/atomic.h index 78f60a048eb..79033704d1e 100644 --- a/opal/include/opal/sys/atomic.h +++ b/opal/include/opal/sys/atomic.h @@ -41,8 +41,6 @@ * The following #defines will be true / false based on * assembly support: * - * - \c OPAL_HAVE_ATOMIC_SPINLOCKS atomic spinlocks - * * Note that for the Atomic math, atomic add/sub may be implemented as * C code using opal_atomic_compare_exchange. The appearance of atomic * operation will be upheld in these cases. @@ -66,27 +64,6 @@ BEGIN_C_DECLS -/********************************************************************** - * - * Data structures for atomic ops - * - *********************************************************************/ -/** - * Volatile lock object (with optional padding). - * - * \note The internals of the lock are included here, but should be - * considered private. The implementation currently in use may choose - * to use an int or unsigned char as the lock value - the user is not - * informed either way. - */ -struct opal_atomic_lock_t { - union { - opal_atomic_int32_t lock; /**< The lock address (an integer) */ - volatile unsigned char sparc_lock; /**< The lock address on sparc */ - char padding[sizeof(int)]; /**< Array for optional padding */ - } u; -}; -typedef struct opal_atomic_lock_t opal_atomic_lock_t; /********************************************************************** * @@ -105,16 +82,6 @@ typedef struct opal_atomic_lock_t opal_atomic_lock_t; #define OPAL_HAVE_INLINE_ATOMIC_XOR_64 1 #define OPAL_HAVE_INLINE_ATOMIC_SUB_64 1 -/** - * Enumeration of lock states - */ -enum { OPAL_ATOMIC_LOCK_UNLOCKED = 0, OPAL_ATOMIC_LOCK_LOCKED = 1 }; - -# define OPAL_ATOMIC_LOCK_INIT \ - { \ - .u = {.lock = OPAL_ATOMIC_LOCK_UNLOCKED } \ - } - /********************************************************************** * * Load the appropriate architecture files and set some reasonable @@ -374,30 +341,16 @@ static inline intptr_t opal_atomic_swap_ptr(opal_atomic_intptr_t *addr, intptr_t /********************************************************************** * - * Atomic spinlocks - always inlined, if have atomic compare-and-swap + * Atomic spinlocks * *********************************************************************/ - -# if !defined(OPAL_HAVE_ATOMIC_SPINLOCKS) && !defined(DOXYGEN) -/* 0 is more like "pending" - we'll fix up at the end after all - the static inline functions are declared */ -# define OPAL_HAVE_ATOMIC_SPINLOCKS 0 -# endif - -# if defined(DOXYGEN) || OPAL_HAVE_ATOMIC_SPINLOCKS \ - || (OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_32 || OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_64) - /** * Initialize a lock to value * * @param lock Address of the lock * @param value Initial value to set lock to */ -# if OPAL_HAVE_ATOMIC_SPINLOCKS == 0 -static inline -# endif - void - opal_atomic_lock_init(opal_atomic_lock_t *lock, int32_t value); +static inline void opal_atomic_lock_init(opal_atomic_lock_t *lock, int32_t value); /** * Try to acquire a lock. @@ -405,42 +358,22 @@ static inline * @param lock Address of the lock. * @return 0 if the lock was acquired, 1 otherwise. */ -# if OPAL_HAVE_ATOMIC_SPINLOCKS == 0 -static inline -# endif - int - opal_atomic_trylock(opal_atomic_lock_t *lock); +static inline int opal_atomic_trylock(opal_atomic_lock_t *lock); /** * Acquire a lock by spinning. * * @param lock Address of the lock. */ -# if OPAL_HAVE_ATOMIC_SPINLOCKS == 0 -static inline -# endif - void - opal_atomic_lock(opal_atomic_lock_t *lock); +static inline void opal_atomic_lock(opal_atomic_lock_t *lock); /** * Release a lock. * * @param lock Address of the lock. */ -# if OPAL_HAVE_ATOMIC_SPINLOCKS == 0 -static inline -# endif - void - opal_atomic_unlock(opal_atomic_lock_t *lock); - -# if OPAL_HAVE_ATOMIC_SPINLOCKS == 0 -# undef OPAL_HAVE_ATOMIC_SPINLOCKS -# define OPAL_HAVE_ATOMIC_SPINLOCKS \ - (OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_32 || OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_64) -# define OPAL_NEED_INLINE_ATOMIC_SPINLOCKS 1 -# endif +static inline void opal_atomic_unlock(opal_atomic_lock_t *lock); -# endif /* OPAL_HAVE_ATOMIC_SPINLOCKS */ /********************************************************************** * diff --git a/opal/include/opal/sys/atomic_impl.h b/opal/include/opal/sys/atomic_impl.h index 817e580c502..211d51a576d 100644 --- a/opal/include/opal/sys/atomic_impl.h +++ b/opal/include/opal/sys/atomic_impl.h @@ -321,44 +321,3 @@ static inline intptr_t opal_atomic_sub_fetch_ptr(opal_atomic_intptr_t *addr, voi return 0; # endif } - - -/********************************************************************** - * - * Atomic spinlocks - * - *********************************************************************/ -#ifdef OPAL_NEED_INLINE_ATOMIC_SPINLOCKS - -/* - * Lock initialization function. It set the lock to UNLOCKED. - */ -static inline void opal_atomic_lock_init(opal_atomic_lock_t *lock, int32_t value) -{ - lock->u.lock = value; -} - -static inline int opal_atomic_trylock(opal_atomic_lock_t *lock) -{ - int32_t unlocked = OPAL_ATOMIC_LOCK_UNLOCKED; - bool ret = opal_atomic_compare_exchange_strong_acq_32(&lock->u.lock, &unlocked, - OPAL_ATOMIC_LOCK_LOCKED); - return (ret == false) ? 1 : 0; -} - -static inline void opal_atomic_lock(opal_atomic_lock_t *lock) -{ - while (opal_atomic_trylock(lock)) { - while (lock->u.lock == OPAL_ATOMIC_LOCK_LOCKED) { - /* spin */; - } - } -} - -static inline void opal_atomic_unlock(opal_atomic_lock_t *lock) -{ - opal_atomic_wmb(); - lock->u.lock = OPAL_ATOMIC_LOCK_UNLOCKED; -} - -#endif /* OPAL_HAVE_ATOMIC_SPINLOCKS */ diff --git a/opal/include/opal/sys/atomic_impl_spinlock.h b/opal/include/opal/sys/atomic_impl_spinlock.h new file mode 100644 index 00000000000..1f92f5ae72b --- /dev/null +++ b/opal/include/opal/sys/atomic_impl_spinlock.h @@ -0,0 +1,58 @@ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2014 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2010-2014 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2012-2018 Los Alamos National Security, LLC. All rights + * reserved. + * Copyright (c) 2022 Amazon.com, Inc. or its affiliates. + * All Rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + * + * Compare-and-swap based implementation of the atomic interface + */ + +#ifndef ATOMIC_SPINLOCK_IMPL_H +#define ATOMIC_SPINLOCK_IMPL_H + +static inline void opal_atomic_lock_init(opal_atomic_lock_t *lock, int32_t value) +{ + *lock = value; + opal_atomic_wmb(); +} + +static inline int opal_atomic_trylock(opal_atomic_lock_t *lock) +{ + int32_t unlocked = OPAL_ATOMIC_LOCK_UNLOCKED; + bool ret = opal_atomic_compare_exchange_strong_acq_32(lock, &unlocked, + OPAL_ATOMIC_LOCK_LOCKED); + return (ret == false) ? 1 : 0; +} + +static inline void opal_atomic_lock(opal_atomic_lock_t *lock) +{ + while (opal_atomic_trylock(lock)) { + while (*lock == OPAL_ATOMIC_LOCK_LOCKED) { + /* spin */; + } + } +} + +static inline void opal_atomic_unlock(opal_atomic_lock_t *lock) +{ + opal_atomic_wmb(); + *lock = OPAL_ATOMIC_LOCK_UNLOCKED; +} + +#endif /* #ifndef ATOMIC_SPINLOCK_IMPL_H */ diff --git a/opal/include/opal/sys/atomic_stdc.h b/opal/include/opal/sys/atomic_stdc.h index c572f7c1994..5d651748bd7 100644 --- a/opal/include/opal/sys/atomic_stdc.h +++ b/opal/include/opal/sys/atomic_stdc.h @@ -50,8 +50,6 @@ # define OPAL_HAVE_ATOMIC_MIN_64 1 # define OPAL_HAVE_ATOMIC_MAX_64 1 -# define OPAL_HAVE_ATOMIC_SPINLOCKS 1 - /********************************************************************** * @@ -162,6 +160,36 @@ opal_atomic_compare_exchange_strong_128(opal_atomic_int128_t *addr, opal_int128_ atomic_exchange_explicit((_Atomic unsigned long *) addr, value, memory_order_relaxed) +/********************************************************************** + * + * Atomic spinlocks + * + *********************************************************************/ +/* + * Lock initialization function. It set the lock to UNLOCKED. + */ +static inline void opal_atomic_lock_init(opal_atomic_lock_t *lock, bool value) +{ + atomic_flag_clear_explicit(lock, memory_order_relaxed); +} + +static inline int opal_atomic_trylock(opal_atomic_lock_t *lock) +{ + return (int) atomic_flag_test_and_set(lock); +} + +static inline void opal_atomic_lock(opal_atomic_lock_t *lock) +{ + while (opal_atomic_trylock(lock)) { + } +} + +static inline void opal_atomic_unlock(opal_atomic_lock_t *lock) +{ + atomic_flag_clear(lock); +} + + # define OPAL_ATOMIC_STDC_DEFINE_FETCH_OP(op, bits, type, operator) \ static inline type opal_atomic_fetch_##op##_##bits(opal_atomic_##type *addr, type value) \ { \ @@ -173,6 +201,7 @@ opal_atomic_compare_exchange_strong_128(opal_atomic_int128_t *addr, opal_int128_ return atomic_fetch_##op##_explicit(addr, value, memory_order_relaxed) operator value; \ } + OPAL_ATOMIC_STDC_DEFINE_FETCH_OP(add, 32, int32_t, +) OPAL_ATOMIC_STDC_DEFINE_FETCH_OP(add, 64, int64_t, +) OPAL_ATOMIC_STDC_DEFINE_FETCH_OP(add, size_t, size_t, +) @@ -265,35 +294,4 @@ static inline int64_t opal_atomic_max_fetch_64(opal_atomic_int64_t *addr, int64_ return old >= value ? old : value; } -# define OPAL_ATOMIC_LOCK_UNLOCKED false -# define OPAL_ATOMIC_LOCK_LOCKED true - -# define OPAL_ATOMIC_LOCK_INIT ATOMIC_FLAG_INIT - -typedef atomic_flag opal_atomic_lock_t; - -/* - * Lock initialization function. It set the lock to UNLOCKED. - */ -static inline void opal_atomic_lock_init(opal_atomic_lock_t *lock, bool value) -{ - atomic_flag_clear_explicit(lock, memory_order_relaxed); -} - -static inline int opal_atomic_trylock(opal_atomic_lock_t *lock) -{ - return (int) atomic_flag_test_and_set(lock); -} - -static inline void opal_atomic_lock(opal_atomic_lock_t *lock) -{ - while (opal_atomic_trylock(lock)) { - } -} - -static inline void opal_atomic_unlock(opal_atomic_lock_t *lock) -{ - atomic_flag_clear(lock); -} - #endif /* !defined(OPAL_ATOMIC_STDC_H) */ diff --git a/opal/include/opal/sys/gcc_builtin/atomic.h b/opal/include/opal/sys/gcc_builtin/atomic.h index 2570147dab7..a30e9c329b3 100644 --- a/opal/include/opal/sys/gcc_builtin/atomic.h +++ b/opal/include/opal/sys/gcc_builtin/atomic.h @@ -198,20 +198,24 @@ static inline intptr_t opal_atomic_swap_ptr(opal_atomic_intptr_t *addr, intptr_t } +/********************************************************************** + * + * Atomic spinlocks + * + *********************************************************************/ + #if defined(__HLE__) # include -# define OPAL_HAVE_ATOMIC_SPINLOCKS 1 - static inline void opal_atomic_lock_init(opal_atomic_lock_t *lock, int32_t value) { - lock->u.lock = value; + lock = value; } static inline int opal_atomic_trylock(opal_atomic_lock_t *lock) { - int ret = __atomic_exchange_n(&lock->u.lock, OPAL_ATOMIC_LOCK_LOCKED, + int ret = __atomic_exchange_n(&lock, OPAL_ATOMIC_LOCK_LOCKED, __ATOMIC_ACQUIRE | __ATOMIC_HLE_ACQUIRE); if (OPAL_ATOMIC_LOCK_LOCKED == ret) { /* abort the transaction */ @@ -225,7 +229,7 @@ static inline int opal_atomic_trylock(opal_atomic_lock_t *lock) static inline void opal_atomic_lock(opal_atomic_lock_t *lock) { while (OPAL_ATOMIC_LOCK_LOCKED - == __atomic_exchange_n(&lock->u.lock, OPAL_ATOMIC_LOCK_LOCKED, + == __atomic_exchange_n(&lock, OPAL_ATOMIC_LOCK_LOCKED, __ATOMIC_ACQUIRE | __ATOMIC_HLE_ACQUIRE)) { /* abort the transaction */ _mm_pause(); @@ -234,10 +238,14 @@ static inline void opal_atomic_lock(opal_atomic_lock_t *lock) static inline void opal_atomic_unlock(opal_atomic_lock_t *lock) { - __atomic_store_n(&lock->u.lock, OPAL_ATOMIC_LOCK_UNLOCKED, + __atomic_store_n(&lock, OPAL_ATOMIC_LOCK_UNLOCKED, __ATOMIC_RELEASE | __ATOMIC_HLE_RELEASE); } +#else /* #if defined(__HLE__) */ + +#include "opal/sys/atomic_impl_spinlock.h" + #endif diff --git a/opal/include/opal/sys/powerpc/atomic.h b/opal/include/opal/sys/powerpc/atomic.h index 02a732add16..56c8ce648f9 100644 --- a/opal/include/opal/sys/powerpc/atomic.h +++ b/opal/include/opal/sys/powerpc/atomic.h @@ -221,6 +221,15 @@ static inline int64_t opal_atomic_swap_64(opal_atomic_int64_t *addr, int64_t new #include "opal/sys/atomic_impl_ptr_swap.h" +/********************************************************************** + * + * Atomic spinlocks + * + *********************************************************************/ + +#include "opal/sys/atomic_impl_spinlock.h" + + /********************************************************************** * * Atomic math operations diff --git a/opal/include/opal/sys/x86_64/atomic.h b/opal/include/opal/sys/x86_64/atomic.h index 569490e753e..3a7d49acae4 100644 --- a/opal/include/opal/sys/x86_64/atomic.h +++ b/opal/include/opal/sys/x86_64/atomic.h @@ -150,6 +150,15 @@ static inline int64_t opal_atomic_swap_64(opal_atomic_int64_t *addr, int64_t new #include "opal/sys/atomic_impl_ptr_swap.h" +/********************************************************************** + * + * Atomic spinlocks + * + *********************************************************************/ + +#include "opal/sys/atomic_impl_spinlock.h" + + /********************************************************************** * * Atomic math operations diff --git a/opal/include/opal_stdatomic.h b/opal/include/opal_stdatomic.h index 4af17bc2b42..f32b2de9a7d 100644 --- a/opal/include/opal_stdatomic.h +++ b/opal/include/opal_stdatomic.h @@ -2,6 +2,8 @@ /* * Copyright (c) 2018 Los Alamos National Security, LLC. All rights * reserved. + * Copyright (c) 2022 Amazon.com, Inc. or its affiliates. + * All Rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -29,6 +31,13 @@ typedef volatile ssize_t opal_atomic_ssize_t; typedef volatile intptr_t opal_atomic_intptr_t; typedef volatile uintptr_t opal_atomic_uintptr_t; +typedef opal_atomic_int32_t opal_atomic_lock_t; + +enum { OPAL_ATOMIC_LOCK_UNLOCKED = 0, + OPAL_ATOMIC_LOCK_LOCKED = 1 }; + +# define OPAL_ATOMIC_LOCK_INIT OPAL_ATOMIC_LOCK_UNLOCKED + # else /* OPAL_HAVE_C__ATOMIC */ # include @@ -52,6 +61,13 @@ typedef _Atomic ssize_t opal_atomic_ssize_t; typedef _Atomic intptr_t opal_atomic_intptr_t; typedef _Atomic uintptr_t opal_atomic_uintptr_t; +typedef atomic_flag opal_atomic_lock_t; + +# define OPAL_ATOMIC_LOCK_UNLOCKED false +# define OPAL_ATOMIC_LOCK_LOCKED true + +# define OPAL_ATOMIC_LOCK_INIT ATOMIC_FLAG_INIT + # endif /* OPAL_HAVE_C__ATOMIC */ # if HAVE_OPAL_INT128_T diff --git a/opal/mca/threads/mutex.h b/opal/mca/threads/mutex.h index e6c9dc3f5bf..94846e4d3c7 100644 --- a/opal/mca/threads/mutex.h +++ b/opal/mca/threads/mutex.h @@ -18,6 +18,8 @@ * Copyright (c) 2019 Sandia National Laboratories. All rights reserved. * Copyright (c) 2020 Triad National Security, LLC. All rights reserved. * Copyright (c) 2021 Argonne National Laboratory. All rights reserved. + * Copyright (c) 2022 Amazon.com, Inc. or its affiliates. + * All Rights reserved. * * $COPYRIGHT$ * @@ -138,11 +140,7 @@ static inline void opal_mutex_unlock(opal_mutex_t *mutex) */ static inline int opal_mutex_atomic_trylock(opal_mutex_t *mutex) { -#if OPAL_HAVE_ATOMIC_SPINLOCKS return opal_atomic_trylock(&mutex->m_lock_atomic); -#else - return opal_mutex_trylock(mutex); -#endif } /** @@ -152,11 +150,7 @@ static inline int opal_mutex_atomic_trylock(opal_mutex_t *mutex) */ static inline void opal_mutex_atomic_lock(opal_mutex_t *mutex) { -#if OPAL_HAVE_ATOMIC_SPINLOCKS opal_atomic_lock(&mutex->m_lock_atomic); -#else - opal_mutex_lock(mutex); -#endif } /** @@ -166,11 +160,7 @@ static inline void opal_mutex_atomic_lock(opal_mutex_t *mutex) */ static inline void opal_mutex_atomic_unlock(opal_mutex_t *mutex) { -#if OPAL_HAVE_ATOMIC_SPINLOCKS opal_atomic_unlock(&mutex->m_lock_atomic); -#else - opal_mutex_unlock(mutex); -#endif } /** From 06e49b98aaef8af6d8b18eb2f9168ef89c76b2e6 Mon Sep 17 00:00:00 2001 From: Brian Barrett Date: Thu, 20 Jan 2022 18:56:05 +0000 Subject: [PATCH 07/11] Refactor atomic math interface The atomic math interface has become required based on usage, so embrace the requirement and remove all the gorp to make the interface optional. Split out the compatibility implementation into headers that can be included as required by implementations, saving us from a mess of #defines in atomic_impl.h. Reorganize the implementation headers so that the code is in the same order across implementations, for readability. Signed-off-by: Brian Barrett --- opal/include/opal/sys/Makefile.am | 3 +- opal/include/opal/sys/arm64/atomic.h | 32 +- opal/include/opal/sys/atomic.h | 140 +------- opal/include/opal/sys/atomic_impl.h | 323 ------------------ opal/include/opal/sys/atomic_impl_math.h | 67 ++++ .../opal/sys/atomic_impl_minmax_math.h | 104 ++++++ .../opal/sys/atomic_impl_size_t_math.h | 103 ++++++ opal/include/opal/sys/atomic_stdc.h | 116 +------ opal/include/opal/sys/gcc_builtin/atomic.h | 99 ++---- opal/include/opal/sys/powerpc/atomic.h | 140 ++++---- opal/include/opal/sys/x86_64/atomic.h | 82 +++-- 11 files changed, 478 insertions(+), 731 deletions(-) delete mode 100644 opal/include/opal/sys/atomic_impl.h create mode 100644 opal/include/opal/sys/atomic_impl_math.h create mode 100644 opal/include/opal/sys/atomic_impl_minmax_math.h create mode 100644 opal/include/opal/sys/atomic_impl_size_t_math.h diff --git a/opal/include/opal/sys/Makefile.am b/opal/include/opal/sys/Makefile.am index a12a2b0af74..fd35f33fab9 100644 --- a/opal/include/opal/sys/Makefile.am +++ b/opal/include/opal/sys/Makefile.am @@ -31,9 +31,10 @@ headers += \ opal/sys/architecture.h \ opal/sys/atomic.h \ opal/sys/atomic_stdc.h \ - opal/sys/atomic_impl.h \ + opal/sys/atomic_impl_minmax_math.h \ opal/sys/atomic_impl_ptr_cswap.h \ opal/sys/atomic_impl_ptr_swap.h \ + opal/sys/atomic_impl_size_t_math.h \ opal/sys/atomic_impl_spinlock.h \ opal/sys/timer.h \ opal/sys/cma.h diff --git a/opal/include/opal/sys/arm64/atomic.h b/opal/include/opal/sys/arm64/atomic.h index f3088d56172..6fb7b6db268 100644 --- a/opal/include/opal/sys/arm64/atomic.h +++ b/opal/include/opal/sys/arm64/atomic.h @@ -30,17 +30,6 @@ #ifndef OPAL_SYS_ARCH_ATOMIC_H #define OPAL_SYS_ARCH_ATOMIC_H 1 -#define OPAL_HAVE_ATOMIC_ADD_32 1 -#define OPAL_HAVE_ATOMIC_AND_32 1 -#define OPAL_HAVE_ATOMIC_OR_32 1 -#define OPAL_HAVE_ATOMIC_XOR_32 1 -#define OPAL_HAVE_ATOMIC_SUB_32 1 -#define OPAL_HAVE_ATOMIC_ADD_64 1 -#define OPAL_HAVE_ATOMIC_AND_64 1 -#define OPAL_HAVE_ATOMIC_OR_64 1 -#define OPAL_HAVE_ATOMIC_XOR_64 1 -#define OPAL_HAVE_ATOMIC_SUB_64 1 - /********************************************************************** * @@ -286,6 +275,22 @@ static inline int64_t opal_atomic_swap_64(opal_atomic_int64_t *addr, int64_t new : "cc", "memory"); \ \ return old; \ + } \ + static inline type opal_atomic_##name##_fetch_##bits(opal_atomic_##type *addr, \ + type value) \ + { \ + type newval, old; \ + int32_t tmp; \ + \ + __asm__ __volatile__("1: ldxr %" reg "1, [%3] \n" \ + " " inst " %" reg "0, %" reg "1, %" reg "4 \n" \ + " stxr %w2, %" reg "0, [%3] \n" \ + " cbnz %w2, 1b \n" \ + : "=&r"(newval), "=&r"(old), "=&r"(tmp) \ + : "r"(addr), "r"(value) \ + : "cc", "memory"); \ + \ + return newval; \ } OPAL_ASM_MAKE_ATOMIC(int32_t, 32, add, "add", "w") @@ -293,10 +298,15 @@ OPAL_ASM_MAKE_ATOMIC(int32_t, 32, and, "and", "w") OPAL_ASM_MAKE_ATOMIC(int32_t, 32, or, "orr", "w") OPAL_ASM_MAKE_ATOMIC(int32_t, 32, xor, "eor", "w") OPAL_ASM_MAKE_ATOMIC(int32_t, 32, sub, "sub", "w") + OPAL_ASM_MAKE_ATOMIC(int64_t, 64, add, "add", "") OPAL_ASM_MAKE_ATOMIC(int64_t, 64, and, "and", "") OPAL_ASM_MAKE_ATOMIC(int64_t, 64, or, "orr", "") OPAL_ASM_MAKE_ATOMIC(int64_t, 64, xor, "eor", "") OPAL_ASM_MAKE_ATOMIC(int64_t, 64, sub, "sub", "") +#include "opal/sys/atomic_impl_minmax_math.h" +#include "opal/sys/atomic_impl_size_t_math.h" + + #endif /* ! OPAL_SYS_ARCH_ATOMIC_H */ diff --git a/opal/include/opal/sys/atomic.h b/opal/include/opal/sys/atomic.h index 79033704d1e..279e5555ac3 100644 --- a/opal/include/opal/sys/atomic.h +++ b/opal/include/opal/sys/atomic.h @@ -65,23 +65,6 @@ BEGIN_C_DECLS -/********************************************************************** - * - * Set or unset these macros in the architecture-specific atomic.h - * files if we need to specify them as inline or non-inline - * - *********************************************************************/ -#define OPAL_HAVE_INLINE_ATOMIC_ADD_32 1 -#define OPAL_HAVE_INLINE_ATOMIC_AND_32 1 -#define OPAL_HAVE_INLINE_ATOMIC_OR_32 1 -#define OPAL_HAVE_INLINE_ATOMIC_XOR_32 1 -#define OPAL_HAVE_INLINE_ATOMIC_SUB_32 1 -#define OPAL_HAVE_INLINE_ATOMIC_ADD_64 1 -#define OPAL_HAVE_INLINE_ATOMIC_AND_64 1 -#define OPAL_HAVE_INLINE_ATOMIC_OR_64 1 -#define OPAL_HAVE_INLINE_ATOMIC_XOR_64 1 -#define OPAL_HAVE_INLINE_ATOMIC_SUB_64 1 - /********************************************************************** * * Load the appropriate architecture files and set some reasonable @@ -396,7 +379,6 @@ static inline int32_t opal_atomic_fetch_min_32(opal_atomic_int32_t *addr, int32_ static inline int32_t opal_atomic_max_fetch_32(opal_atomic_int32_t *addr, int32_t value); static inline int32_t opal_atomic_fetch_max_32(opal_atomic_int32_t *addr, int32_t value); - static inline int64_t opal_atomic_add_fetch_64(opal_atomic_int64_t *addr, int64_t delta); static inline int64_t opal_atomic_fetch_add_64(opal_atomic_int64_t *addr, int64_t delta); static inline int64_t opal_atomic_and_fetch_64(opal_atomic_int64_t *addr, int64_t value); @@ -411,122 +393,22 @@ static inline int64_t opal_atomic_fetch_min_64(opal_atomic_int64_t *addr, int64_ static inline int64_t opal_atomic_max_fetch_64(opal_atomic_int64_t *addr, int64_t value); static inline int64_t opal_atomic_fetch_max_64(opal_atomic_int64_t *addr, int64_t value); +static inline size_t opal_atomic_add_fetch_size_t(opal_atomic_size_t *addr, size_t delta); +static inline size_t opal_atomic_fetch_add_size_t(opal_atomic_size_t *addr, size_t delta); -/* provide a size_t add/subtract. When in debug mode, make it an - * inline function so that we don't have any casts in the - * interface and can catch type errors. When not in debug mode, - * just make it a macro, so that there's no performance penalty - */ -# if defined(DOXYGEN) || OPAL_ENABLE_DEBUG -static inline size_t opal_atomic_add_fetch_size_t(opal_atomic_size_t *addr, size_t delta) -{ -# if SIZEOF_SIZE_T == 4 - return (size_t) opal_atomic_add_fetch_32((int32_t *) addr, delta); -# elif SIZEOF_SIZE_T == 8 - return (size_t) opal_atomic_add_fetch_64((int64_t *) addr, delta); -# else -# error "Unknown size_t size" -# endif -} - -static inline size_t opal_atomic_fetch_add_size_t(opal_atomic_size_t *addr, size_t delta) -{ -# if SIZEOF_SIZE_T == 4 - return (size_t) opal_atomic_fetch_add_32((int32_t *) addr, delta); -# elif SIZEOF_SIZE_T == 8 - return (size_t) opal_atomic_fetch_add_64((int64_t *) addr, delta); -# else -# error "Unknown size_t size" -# endif -} - -static inline size_t opal_atomic_sub_fetch_size_t(opal_atomic_size_t *addr, size_t delta) -{ -# if SIZEOF_SIZE_T == 4 - return (size_t) opal_atomic_sub_fetch_32((int32_t *) addr, delta); -# elif SIZEOF_SIZE_T == 8 - return (size_t) opal_atomic_sub_fetch_64((int64_t *) addr, delta); -# else -# error "Unknown size_t size" -# endif -} - -static inline size_t opal_atomic_fetch_sub_size_t(opal_atomic_size_t *addr, size_t delta) -{ -# if SIZEOF_SIZE_T == 4 - return (size_t) opal_atomic_fetch_sub_32((int32_t *) addr, delta); -# elif SIZEOF_SIZE_T == 8 - return (size_t) opal_atomic_fetch_sub_64((int64_t *) addr, delta); -# else -# error "Unknown size_t size" -# endif -} - -# else -# if SIZEOF_SIZE_T == 4 -# define opal_atomic_add_fetch_size_t(addr, delta) \ - ((size_t) opal_atomic_add_fetch_32((opal_atomic_int32_t *) addr, delta)) -# define opal_atomic_fetch_add_size_t(addr, delta) \ - ((size_t) opal_atomic_fetch_add_32((opal_atomic_int32_t *) addr, delta)) -# define opal_atomic_sub_fetch_size_t(addr, delta) \ - ((size_t) opal_atomic_sub_fetch_32((opal_atomic_int32_t *) addr, delta)) -# define opal_atomic_fetch_sub_size_t(addr, delta) \ - ((size_t) opal_atomic_fetch_sub_32((opal_atomic_int32_t *) addr, delta)) -# elif SIZEOF_SIZE_T == 8 -# define opal_atomic_add_fetch_size_t(addr, delta) \ - ((size_t) opal_atomic_add_fetch_64((opal_atomic_int64_t *) addr, delta)) -# define opal_atomic_fetch_add_size_t(addr, delta) \ - ((size_t) opal_atomic_fetch_add_64((opal_atomic_int64_t *) addr, delta)) -# define opal_atomic_sub_fetch_size_t(addr, delta) \ - ((size_t) opal_atomic_sub_fetch_64((opal_atomic_int64_t *) addr, delta)) -# define opal_atomic_fetch_sub_size_t(addr, delta) \ - ((size_t) opal_atomic_fetch_sub_64((opal_atomic_int64_t *) addr, delta)) -# else -# error "Unknown size_t size" -# endif -# endif - - -static inline void opal_atomic_add_xx(opal_atomic_intptr_t *addr, int32_t value, size_t length); -static inline void opal_atomic_sub_xx(opal_atomic_intptr_t *addr, int32_t value, size_t length); - -static inline intptr_t opal_atomic_add_fetch_ptr(opal_atomic_intptr_t *addr, void *delta); -static inline intptr_t opal_atomic_fetch_add_ptr(opal_atomic_intptr_t *addr, void *delta); -static inline intptr_t opal_atomic_sub_fetch_ptr(opal_atomic_intptr_t *addr, void *delta); -static inline intptr_t opal_atomic_fetch_sub_ptr(opal_atomic_intptr_t *addr, void *delta); - +#ifdef DOXYGEN /* because this isn't a proper C prototype */ /** - * Atomically increment the content depending on the type. This - * macro detect at compile time the type of the first argument - * and choose the correct function to be called. + * Atomically add delta to addr, type independent * - * \note This macro should only be used for integer types. + * @param addr Address of value to update + * @param delta Value by which to change the value in addr * - * @param addr Address of - * @param delta Value to add (converted to ). + * Generally implemented as a macro (except for when implemented as a + * compiler built-in), this function provides a type-independent math + * operator. */ -# define opal_atomic_add(ADDR, VALUE) \ - opal_atomic_add_xx((opal_atomic_intptr_t *) (ADDR), (int32_t)(VALUE), sizeof(*(ADDR))) - -/** - * Atomically decrement the content depending on the type. This - * macro detect at compile time the type of the first argument - * and choose the correct function to be called. - * - * \note This macro should only be used for integer types. - * - * @param addr Address of - * @param delta Value to substract (converted to ). - */ -# define opal_atomic_sub(ADDR, VALUE) \ - opal_atomic_sub_xx((opal_atomic_intptr_t *) (ADDR), (int32_t)(VALUE), sizeof(*(ADDR))) - - -/* - * Include inline implementations of everything not defined directly - * in assembly - */ -# include "opal/sys/atomic_impl.h" +static inline void opal_atomic_add(type *addr, type delta); +#endif #endif /* !OPAL_C_HAVE__ATOMIC */ diff --git a/opal/include/opal/sys/atomic_impl.h b/opal/include/opal/sys/atomic_impl.h deleted file mode 100644 index 211d51a576d..00000000000 --- a/opal/include/opal/sys/atomic_impl.h +++ /dev/null @@ -1,323 +0,0 @@ -/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2014 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * Copyright (c) 2010-2014 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2012-2018 Los Alamos National Security, LLC. All rights - * reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -/* Inline C implementation of the functions defined in atomic.h */ - -#include - -/********************************************************************** - * - * Atomic math operations - * - * All the architectures provide a compare_and_set atomic operations. If - * they dont provide atomic additions and/or substractions then we can - * define these operations using the atomic compare_and_set. - * - * Some architectures do not provide support for the 64 bits - * atomic operations. Until we find a better solution let's just - * undefine all those functions if there is no 64 bit compare-exchange - * - *********************************************************************/ - -# if !defined(OPAL_HAVE_ATOMIC_MIN_32) -static inline int32_t opal_atomic_fetch_min_32(opal_atomic_int32_t *addr, int32_t value) -{ - int32_t old = *addr; - do { - if (old <= value) { - break; - } - } while (!opal_atomic_compare_exchange_strong_32(addr, &old, value)); - - return old; -} - -# define OPAL_HAVE_ATOMIC_MIN_32 1 - -# endif /* OPAL_HAVE_ATOMIC_MIN_32 */ - -# if !defined(OPAL_HAVE_ATOMIC_MAX_32) -static inline int32_t opal_atomic_fetch_max_32(opal_atomic_int32_t *addr, int32_t value) -{ - int32_t old = *addr; - do { - if (old >= value) { - break; - } - } while (!opal_atomic_compare_exchange_strong_32(addr, &old, value)); - - return old; -} - -# define OPAL_HAVE_ATOMIC_MAX_32 1 -# endif /* OPAL_HAVE_ATOMIC_MAX_32 */ - -# define OPAL_ATOMIC_DEFINE_CMPXCG_OP(type, bits, operation, name) \ - static inline type opal_atomic_fetch_##name##_##bits(opal_atomic_##type *addr, type value) \ - { \ - type oldval; \ - do { \ - oldval = *addr; \ - } while (!opal_atomic_compare_exchange_strong_##bits(addr, &oldval, \ - oldval operation value)); \ - \ - return oldval; \ - } - - -# if !defined(OPAL_HAVE_ATOMIC_ADD_32) -# define OPAL_HAVE_ATOMIC_ADD_32 1 - -OPAL_ATOMIC_DEFINE_CMPXCG_OP(int32_t, 32, +, add) - -# endif /* OPAL_HAVE_ATOMIC_ADD_32 */ - -# if !defined(OPAL_HAVE_ATOMIC_AND_32) -# define OPAL_HAVE_ATOMIC_AND_32 1 - -OPAL_ATOMIC_DEFINE_CMPXCG_OP(int32_t, 32, &, and) - -# endif /* OPAL_HAVE_ATOMIC_AND_32 */ - -# if !defined(OPAL_HAVE_ATOMIC_OR_32) -# define OPAL_HAVE_ATOMIC_OR_32 1 - -OPAL_ATOMIC_DEFINE_CMPXCG_OP(int32_t, 32, |, or) - -# endif /* OPAL_HAVE_ATOMIC_OR_32 */ - -# if !defined(OPAL_HAVE_ATOMIC_XOR_32) -# define OPAL_HAVE_ATOMIC_XOR_32 1 - -OPAL_ATOMIC_DEFINE_CMPXCG_OP(int32_t, 32, ^, xor) - -# endif /* OPAL_HAVE_ATOMIC_XOR_32 */ - -# if !defined(OPAL_HAVE_ATOMIC_SUB_32) -# define OPAL_HAVE_ATOMIC_SUB_32 1 - -OPAL_ATOMIC_DEFINE_CMPXCG_OP(int32_t, 32, -, sub) - -# endif /* OPAL_HAVE_ATOMIC_SUB_32 */ - -# if !defined(OPAL_HAVE_ATOMIC_MIN_64) -static inline int64_t opal_atomic_fetch_min_64(opal_atomic_int64_t *addr, int64_t value) -{ - int64_t old = *addr; - do { - if (old <= value) { - break; - } - } while (!opal_atomic_compare_exchange_strong_64(addr, &old, value)); - - return old; -} - -# define OPAL_HAVE_ATOMIC_MIN_64 1 - -# endif /* OPAL_HAVE_ATOMIC_MIN_64 */ - -# if !defined(OPAL_HAVE_ATOMIC_MAX_64) -static inline int64_t opal_atomic_fetch_max_64(opal_atomic_int64_t *addr, int64_t value) -{ - int64_t old = *addr; - do { - if (old >= value) { - break; - } - } while (!opal_atomic_compare_exchange_strong_64(addr, &old, value)); - - return old; -} - -# define OPAL_HAVE_ATOMIC_MAX_64 1 -# endif /* OPAL_HAVE_ATOMIC_MAX_64 */ - - -# if !defined(OPAL_HAVE_ATOMIC_ADD_64) -# define OPAL_HAVE_ATOMIC_ADD_64 1 - -OPAL_ATOMIC_DEFINE_CMPXCG_OP(int64_t, 64, +, add) - -# endif /* OPAL_HAVE_ATOMIC_ADD_64 */ - -# if !defined(OPAL_HAVE_ATOMIC_AND_64) -# define OPAL_HAVE_ATOMIC_AND_64 1 - -OPAL_ATOMIC_DEFINE_CMPXCG_OP(int64_t, 64, &, and) - -# endif /* OPAL_HAVE_ATOMIC_AND_64 */ - -# if !defined(OPAL_HAVE_ATOMIC_OR_64) -# define OPAL_HAVE_ATOMIC_OR_64 1 - -OPAL_ATOMIC_DEFINE_CMPXCG_OP(int64_t, 64, |, or) - -# endif /* OPAL_HAVE_ATOMIC_OR_64 */ - -# if !defined(OPAL_HAVE_ATOMIC_XOR_64) -# define OPAL_HAVE_ATOMIC_XOR_64 1 - -OPAL_ATOMIC_DEFINE_CMPXCG_OP(int64_t, 64, ^, xor) - -# endif /* OPAL_HAVE_ATOMIC_XOR_64 */ - -# if !defined(OPAL_HAVE_ATOMIC_SUB_64) -# define OPAL_HAVE_ATOMIC_SUB_64 1 - -OPAL_ATOMIC_DEFINE_CMPXCG_OP(int64_t, 64, -, sub) - -# endif /* OPAL_HAVE_ATOMIC_SUB_64 */ - - -static inline void opal_atomic_add_xx(opal_atomic_intptr_t *addr, int32_t value, size_t length) -{ - switch (length) { -# if OPAL_HAVE_ATOMIC_ADD_32 - case 4: - (void) opal_atomic_fetch_add_32((opal_atomic_int32_t *) addr, (int32_t) value); - break; -# endif /* OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_32 */ - -# if OPAL_HAVE_ATOMIC_ADD_64 - case 8: - (void) opal_atomic_fetch_add_64((opal_atomic_int64_t *) addr, (int64_t) value); - break; -# endif /* OPAL_HAVE_ATOMIC_ADD_64 */ - default: - /* This should never happen, so deliberately abort (hopefully - leaving a corefile for analysis) */ - abort(); - } -} - -static inline void opal_atomic_sub_xx(opal_atomic_intptr_t *addr, int32_t value, size_t length) -{ - switch (length) { -# if OPAL_HAVE_ATOMIC_SUB_32 - case 4: - (void) opal_atomic_fetch_sub_32((opal_atomic_int32_t *) addr, (int32_t) value); - break; -# endif /* OPAL_HAVE_ATOMIC_SUB_32 */ - -# if OPAL_HAVE_ATOMIC_SUB_64 - case 8: - (void) opal_atomic_fetch_sub_64((opal_atomic_int64_t *) addr, (int64_t) value); - break; -# endif /* OPAL_HAVE_ATOMIC_SUB_64 */ - default: - /* This should never happen, so deliberately abort (hopefully - leaving a corefile for analysis) */ - abort(); - } -} - -# define OPAL_ATOMIC_DEFINE_OP_FETCH(op, operation, type, ptr_type, suffix) \ - static inline type opal_atomic_##op##_fetch_##suffix(opal_atomic_##ptr_type *addr, \ - type value) \ - { \ - return opal_atomic_fetch_##op##_##suffix(addr, value) operation value; \ - } - -OPAL_ATOMIC_DEFINE_OP_FETCH(add, +, int32_t, int32_t, 32) -OPAL_ATOMIC_DEFINE_OP_FETCH(and, &, int32_t, int32_t, 32) -OPAL_ATOMIC_DEFINE_OP_FETCH(or, |, int32_t, int32_t, 32) -OPAL_ATOMIC_DEFINE_OP_FETCH(xor, ^, int32_t, int32_t, 32) -OPAL_ATOMIC_DEFINE_OP_FETCH(sub, -, int32_t, int32_t, 32) - -static inline int32_t opal_atomic_min_fetch_32(opal_atomic_int32_t *addr, int32_t value) -{ - int32_t old = opal_atomic_fetch_min_32(addr, value); - return old <= value ? old : value; -} - -static inline int32_t opal_atomic_max_fetch_32(opal_atomic_int32_t *addr, int32_t value) -{ - int32_t old = opal_atomic_fetch_max_32(addr, value); - return old >= value ? old : value; -} - -OPAL_ATOMIC_DEFINE_OP_FETCH(add, +, int64_t, int64_t, 64) -OPAL_ATOMIC_DEFINE_OP_FETCH(and, &, int64_t, int64_t, 64) -OPAL_ATOMIC_DEFINE_OP_FETCH(or, |, int64_t, int64_t, 64) -OPAL_ATOMIC_DEFINE_OP_FETCH(xor, ^, int64_t, int64_t, 64) -OPAL_ATOMIC_DEFINE_OP_FETCH(sub, -, int64_t, int64_t, 64) - -static inline int64_t opal_atomic_min_fetch_64(opal_atomic_int64_t *addr, int64_t value) -{ - int64_t old = opal_atomic_fetch_min_64(addr, value); - return old <= value ? old : value; -} - -static inline int64_t opal_atomic_max_fetch_64(opal_atomic_int64_t *addr, int64_t value) -{ - int64_t old = opal_atomic_fetch_max_64(addr, value); - return old >= value ? old : value; -} - -static inline intptr_t opal_atomic_fetch_add_ptr(opal_atomic_intptr_t *addr, void *delta) -{ -# if SIZEOF_VOID_P == 4 && OPAL_HAVE_ATOMIC_ADD_32 - return opal_atomic_fetch_add_32((opal_atomic_int32_t *) addr, (unsigned long) delta); -# elif SIZEOF_VOID_P == 8 && OPAL_HAVE_ATOMIC_ADD_64 - return opal_atomic_fetch_add_64((opal_atomic_int64_t *) addr, (unsigned long) delta); -# else - abort(); - return 0; -# endif -} - -static inline intptr_t opal_atomic_add_fetch_ptr(opal_atomic_intptr_t *addr, void *delta) -{ -# if SIZEOF_VOID_P == 4 && OPAL_HAVE_ATOMIC_ADD_32 - return opal_atomic_add_fetch_32((opal_atomic_int32_t *) addr, (unsigned long) delta); -# elif SIZEOF_VOID_P == 8 && OPAL_HAVE_ATOMIC_ADD_64 - return opal_atomic_add_fetch_64((opal_atomic_int64_t *) addr, (unsigned long) delta); -# else - abort(); - return 0; -# endif -} - -static inline intptr_t opal_atomic_fetch_sub_ptr(opal_atomic_intptr_t *addr, void *delta) -{ -# if SIZEOF_VOID_P == 4 && OPAL_HAVE_ATOMIC_SUB_32 - return opal_atomic_fetch_sub_32((opal_atomic_int32_t *) addr, (unsigned long) delta); -# elif SIZEOF_VOID_P == 8 && OPAL_HAVE_ATOMIC_SUB_32 - return opal_atomic_fetch_sub_64((opal_atomic_int64_t *) addr, (unsigned long) delta); -# else - abort(); - return 0; -# endif -} - -static inline intptr_t opal_atomic_sub_fetch_ptr(opal_atomic_intptr_t *addr, void *delta) -{ -# if SIZEOF_VOID_P == 4 && OPAL_HAVE_ATOMIC_SUB_32 - return opal_atomic_sub_fetch_32((opal_atomic_int32_t *) addr, (unsigned long) delta); -# elif SIZEOF_VOID_P == 8 && OPAL_HAVE_ATOMIC_SUB_32 - return opal_atomic_sub_fetch_64((opal_atomic_int64_t *) addr, (unsigned long) delta); -# else - abort(); - return 0; -# endif -} diff --git a/opal/include/opal/sys/atomic_impl_math.h b/opal/include/opal/sys/atomic_impl_math.h new file mode 100644 index 00000000000..7f48e50a23b --- /dev/null +++ b/opal/include/opal/sys/atomic_impl_math.h @@ -0,0 +1,67 @@ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2014 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2010-2014 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2012-2018 Los Alamos National Security, LLC. All rights + * reserved. + * Copyright (c) 2022 Amazon.com, Inc. or its affiliates. + * All Rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + * + * Implementation of the required atomic math functions in terms of + * compare and swap operators. + */ + +#ifndef ATOMIC_IMPL_MATH_H +#define ATOMIC_IMPL_MATH_H 1 + +#define OPAL_ATOMIC_DEFINE_OP(type, bits, operation, name) \ + static inline type opal_atomic_fetch_##name##_##bits(opal_atomic_##type *addr, type value) \ + { \ + type oldval; \ + do { \ + oldval = *addr; \ + } while (!opal_atomic_compare_exchange_strong_##bits(addr, &oldval, \ + oldval operation value)); \ + \ + return oldval; \ + } \ + \ + static inline type opal_atomic_##name##_fetch_##bits(opal_atomic_##type *addr, type value) \ + { \ + type oldval, newval; \ + do { \ + oldval = *addr; \ + newval = oldval operation value; \ + } while (!opal_atomic_compare_exchange_strong_##bits(addr, &oldval, newval); \ + \ + return newval; \ + } + +OPAL_ATOMIC_DEFINE_OP(int32_t, 32, +, add) +OPAL_ATOMIC_DEFINE_OP(int32_t, 32, &, and) +OPAL_ATOMIC_DEFINE_OP(int32_t, 32, |, or) +OPAL_ATOMIC_DEFINE_OP(int32_t, 32, ^, xor) +OPAL_ATOMIC_DEFINE_OP(int32_t, 32, -, sub) + +OPAL_ATOMIC_DEFINE_OP(int64_t, 64, +, add) +OPAL_ATOMIC_DEFINE_OP(int64_t, 64, &, and) +OPAL_ATOMIC_DEFINE_OP(int64_t, 64, |, or) +OPAL_ATOMIC_DEFINE_OP(int64_t, 64, ^, xor) +OPAL_ATOMIC_DEFINE_OP(int64_t, 64, -, sub) + +#include "opal/sys/atomic_impl_minmax_math.h" + +#endif /* #ifndef ATOMIC_MATH_IMPL_H */ diff --git a/opal/include/opal/sys/atomic_impl_minmax_math.h b/opal/include/opal/sys/atomic_impl_minmax_math.h new file mode 100644 index 00000000000..7ec5920ad46 --- /dev/null +++ b/opal/include/opal/sys/atomic_impl_minmax_math.h @@ -0,0 +1,104 @@ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2014 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2010-2014 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2012-2018 Los Alamos National Security, LLC. All rights + * reserved. + * Copyright (c) 2022 Amazon.com, Inc. or its affiliates. + * All Rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + * + * Implementation of the min/max atomic functions in terms of compare + * and swap. These are broken out from the basic atomic_impl_math.h + * functions because most atomic implementations do not provide native + * min/max interfaces. + */ + +#ifndef ATOMIC_IMPL_MINMAX_MATH_H +#define ATOMIC_IMPL_MINMAX_MATH_H 1 + +static inline int32_t opal_atomic_fetch_min_32(opal_atomic_int32_t *addr, int32_t value) +{ + int32_t old = *addr; + do { + if (old <= value) { + break; + } + } while (!opal_atomic_compare_exchange_strong_32(addr, &old, value)); + + return old; +} + +static inline int32_t opal_atomic_min_fetch_32(opal_atomic_int32_t *addr, int32_t value) +{ + int32_t old = opal_atomic_fetch_min_32(addr, value); + return old <= value ? old : value; +} + +static inline int32_t opal_atomic_fetch_max_32(opal_atomic_int32_t *addr, int32_t value) +{ + int32_t old = *addr; + do { + if (old >= value) { + break; + } + } while (!opal_atomic_compare_exchange_strong_32(addr, &old, value)); + + return old; +} + +static inline int32_t opal_atomic_max_fetch_32(opal_atomic_int32_t *addr, int32_t value) +{ + int32_t old = opal_atomic_fetch_max_32(addr, value); + return old >= value ? old : value; +} + +static inline int64_t opal_atomic_fetch_min_64(opal_atomic_int64_t *addr, int64_t value) +{ + int64_t old = *addr; + do { + if (old <= value) { + break; + } + } while (!opal_atomic_compare_exchange_strong_64(addr, &old, value)); + + return old; +} + +static inline int64_t opal_atomic_fetch_max_64(opal_atomic_int64_t *addr, int64_t value) +{ + int64_t old = *addr; + do { + if (old >= value) { + break; + } + } while (!opal_atomic_compare_exchange_strong_64(addr, &old, value)); + + return old; +} + +static inline int64_t opal_atomic_min_fetch_64(opal_atomic_int64_t *addr, int64_t value) +{ + int64_t old = opal_atomic_fetch_min_64(addr, value); + return old <= value ? old : value; +} + +static inline int64_t opal_atomic_max_fetch_64(opal_atomic_int64_t *addr, int64_t value) +{ + int64_t old = opal_atomic_fetch_max_64(addr, value); + return old >= value ? old : value; +} + +#endif /* #ifndef ATOMIC_MATH_MINMAX_IMPL_H */ diff --git a/opal/include/opal/sys/atomic_impl_size_t_math.h b/opal/include/opal/sys/atomic_impl_size_t_math.h new file mode 100644 index 00000000000..4316bd9a52c --- /dev/null +++ b/opal/include/opal/sys/atomic_impl_size_t_math.h @@ -0,0 +1,103 @@ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2014 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2010-2014 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2012-2018 Los Alamos National Security, LLC. All rights + * reserved. + * Copyright (c) 2022 Amazon.com, Inc. or its affiliates. + * All Rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + * + * Implementation of size_t atomic add functions as wrappers around + * sized implementations. + */ + +#ifndef ATOMIC_IMPL_SIZE_T_MATH_H +#define ATOMIC_IMPL_SIZE_T_MATH_H 1 + +#include + +static inline size_t opal_atomic_add_fetch_size_t(opal_atomic_size_t *addr, size_t delta) +{ +#if SIZEOF_SIZE_T == 4 + return (size_t)opal_atomic_add_fetch_32((opal_atomic_int32_t *) addr, delta); +#elif SIZEOF_SIZE_T == 8 + return (size_t)opal_atomic_add_fetch_64((opal_atomic_int64_t *) addr, delta); +#else +#error "No implementation of opal_atomic_add_fetch_size_t" +#endif +} + +static inline size_t opal_atomic_fetch_add_size_t(opal_atomic_size_t *addr, size_t delta) +{ +#if SIZEOF_SIZE_T == 4 + return (size_t)opal_atomic_fetch_add_32((opal_atomic_int32_t *) addr, delta); +#elif SIZEOF_SIZE_T == 8 + return (size_t)opal_atomic_fetch_add_64((opal_atomic_int64_t *) addr, delta); +#else +#error "No implementation of opal_atomic_fetch_add_size_t" +#endif +} + +static inline size_t opal_atomic_sub_fetch_size_t(opal_atomic_size_t *addr, size_t delta) +{ +#if SIZEOF_SIZE_T == 4 + return (size_t)opal_atomic_sub_fetch_32((opal_atomic_int32_t *) addr, delta); +#elif SIZEOF_SIZE_T == 8 + return (size_t)opal_atomic_sub_fetch_64((opal_atomic_int64_t *) addr, delta); +#else +#error "No implementation of opal_atomic_sub_fetch_size_t" +#endif +} + +static inline size_t opal_atomic_fetch_sub_size_t(opal_atomic_size_t *addr, size_t delta) +{ +#if SIZEOF_SIZE_T == 4 + return (size_t)opal_atomic_fetch_sub_32((opal_atomic_int32_t *) addr, delta); +#elif SIZEOF_SIZE_T == 8 + return (size_t)opal_atomic_fetch_sub_64((opal_atomic_int64_t *) addr, delta); +#else +#error "No implementation of opal_atomic_fetch_sub_size_t" +#endif +} + +/** + * Atomically increment the content depending on the type. This + * macro detect at compile time the type of the first argument + * and choose the correct function to be called. + * + * \note This macro should only be used for integer types. + * + * @param addr Address of + * @param delta Value to add (converted to ). + */ +#define opal_atomic_add(ADDR, VALUE) \ + opal_atomic_add_xx((opal_atomic_intptr_t *) (ADDR), (int32_t)(VALUE), sizeof(*(ADDR))) + +static inline void opal_atomic_add_xx(opal_atomic_intptr_t *addr, int32_t value, size_t length) +{ + switch (length) { + case 4: + (void)opal_atomic_fetch_add_32((opal_atomic_int32_t*)addr, value); + break; + case 8: + (void)opal_atomic_fetch_add_64((opal_atomic_int64_t*)addr, value); + break; + default: + abort(); + } +} + +#endif diff --git a/opal/include/opal/sys/atomic_stdc.h b/opal/include/opal/sys/atomic_stdc.h index 5d651748bd7..622efae5a0e 100644 --- a/opal/include/opal/sys/atomic_stdc.h +++ b/opal/include/opal/sys/atomic_stdc.h @@ -32,25 +32,6 @@ # include # include -# define OPAL_HAVE_ATOMIC_ADD_32 1 -# define OPAL_HAVE_ATOMIC_AND_32 1 -# define OPAL_HAVE_ATOMIC_OR_32 1 -# define OPAL_HAVE_ATOMIC_XOR_32 1 -# define OPAL_HAVE_ATOMIC_SUB_32 1 - -# define OPAL_HAVE_ATOMIC_ADD_64 1 -# define OPAL_HAVE_ATOMIC_AND_64 1 -# define OPAL_HAVE_ATOMIC_OR_64 1 -# define OPAL_HAVE_ATOMIC_XOR_64 1 -# define OPAL_HAVE_ATOMIC_SUB_64 1 - -# define OPAL_HAVE_ATOMIC_MIN_32 1 -# define OPAL_HAVE_ATOMIC_MAX_32 1 - -# define OPAL_HAVE_ATOMIC_MIN_64 1 -# define OPAL_HAVE_ATOMIC_MAX_64 1 - - /********************************************************************** * * Memory Barriers @@ -190,6 +171,12 @@ static inline void opal_atomic_unlock(opal_atomic_lock_t *lock) } +/********************************************************************** + * + * Atomic math operations + * + *********************************************************************/ + # define OPAL_ATOMIC_STDC_DEFINE_FETCH_OP(op, bits, type, operator) \ static inline type opal_atomic_fetch_##op##_##bits(opal_atomic_##type *addr, type value) \ { \ @@ -201,97 +188,24 @@ static inline void opal_atomic_unlock(opal_atomic_lock_t *lock) return atomic_fetch_##op##_explicit(addr, value, memory_order_relaxed) operator value; \ } - OPAL_ATOMIC_STDC_DEFINE_FETCH_OP(add, 32, int32_t, +) -OPAL_ATOMIC_STDC_DEFINE_FETCH_OP(add, 64, int64_t, +) -OPAL_ATOMIC_STDC_DEFINE_FETCH_OP(add, size_t, size_t, +) - +OPAL_ATOMIC_STDC_DEFINE_FETCH_OP(and, 32, int32_t, &) +OPAL_ATOMIC_STDC_DEFINE_FETCH_OP(or, 32, int32_t, |) +OPAL_ATOMIC_STDC_DEFINE_FETCH_OP(xor, 32, int32_t, ^) OPAL_ATOMIC_STDC_DEFINE_FETCH_OP(sub, 32, int32_t, -) -OPAL_ATOMIC_STDC_DEFINE_FETCH_OP(sub, 64, int64_t, -) -OPAL_ATOMIC_STDC_DEFINE_FETCH_OP(sub, size_t, size_t, -) -OPAL_ATOMIC_STDC_DEFINE_FETCH_OP(or, 32, int32_t, |) +OPAL_ATOMIC_STDC_DEFINE_FETCH_OP(add, 64, int64_t, +) +OPAL_ATOMIC_STDC_DEFINE_FETCH_OP(and, 64, int64_t, &) OPAL_ATOMIC_STDC_DEFINE_FETCH_OP(or, 64, int64_t, |) - -OPAL_ATOMIC_STDC_DEFINE_FETCH_OP(xor, 32, int32_t, ^) OPAL_ATOMIC_STDC_DEFINE_FETCH_OP(xor, 64, int64_t, ^) +OPAL_ATOMIC_STDC_DEFINE_FETCH_OP(sub, 64, int64_t, -) -OPAL_ATOMIC_STDC_DEFINE_FETCH_OP(and, 32, int32_t, &) -OPAL_ATOMIC_STDC_DEFINE_FETCH_OP(and, 64, int64_t, &) +OPAL_ATOMIC_STDC_DEFINE_FETCH_OP(add, size_t, size_t, +) +OPAL_ATOMIC_STDC_DEFINE_FETCH_OP(sub, size_t, size_t, -) # define opal_atomic_add(addr, value) \ (void) atomic_fetch_add_explicit(addr, value, memory_order_relaxed) -static inline int32_t opal_atomic_fetch_min_32(opal_atomic_int32_t *addr, int32_t value) -{ - int32_t old = *addr; - do { - if (old <= value) { - break; - } - } while (!opal_atomic_compare_exchange_strong_32(addr, &old, value)); - - return old; -} - -static inline int32_t opal_atomic_fetch_max_32(opal_atomic_int32_t *addr, int32_t value) -{ - int32_t old = *addr; - do { - if (old >= value) { - break; - } - } while (!opal_atomic_compare_exchange_strong_32(addr, &old, value)); - - return old; -} - -static inline int64_t opal_atomic_fetch_min_64(opal_atomic_int64_t *addr, int64_t value) -{ - int64_t old = *addr; - do { - if (old <= value) { - break; - } - } while (!opal_atomic_compare_exchange_strong_64(addr, &old, value)); - - return old; -} - -static inline int64_t opal_atomic_fetch_max_64(opal_atomic_int64_t *addr, int64_t value) -{ - int64_t old = *addr; - do { - if (old >= value) { - break; - } - } while (!opal_atomic_compare_exchange_strong_64(addr, &old, value)); - - return old; -} - -static inline int32_t opal_atomic_min_fetch_32(opal_atomic_int32_t *addr, int32_t value) -{ - int32_t old = opal_atomic_fetch_min_32(addr, value); - return old <= value ? old : value; -} - -static inline int32_t opal_atomic_max_fetch_32(opal_atomic_int32_t *addr, int32_t value) -{ - int32_t old = opal_atomic_fetch_max_32(addr, value); - return old >= value ? old : value; -} - -static inline int64_t opal_atomic_min_fetch_64(opal_atomic_int64_t *addr, int64_t value) -{ - int64_t old = opal_atomic_fetch_min_64(addr, value); - return old <= value ? old : value; -} - -static inline int64_t opal_atomic_max_fetch_64(opal_atomic_int64_t *addr, int64_t value) -{ - int64_t old = opal_atomic_fetch_max_64(addr, value); - return old >= value ? old : value; -} +#include "opal/sys/atomic_impl_minmax_math.h" #endif /* !defined(OPAL_ATOMIC_STDC_H) */ diff --git a/opal/include/opal/sys/gcc_builtin/atomic.h b/opal/include/opal/sys/gcc_builtin/atomic.h index a30e9c329b3..e4b9cc7a0e1 100644 --- a/opal/include/opal/sys/gcc_builtin/atomic.h +++ b/opal/include/opal/sys/gcc_builtin/atomic.h @@ -30,24 +30,6 @@ #ifndef OPAL_SYS_ARCH_ATOMIC_H #define OPAL_SYS_ARCH_ATOMIC_H 1 -/********************************************************************** - * - * Memory Barriers - * - *********************************************************************/ - -#define OPAL_HAVE_ATOMIC_ADD_32 1 -#define OPAL_HAVE_ATOMIC_AND_32 1 -#define OPAL_HAVE_ATOMIC_OR_32 1 -#define OPAL_HAVE_ATOMIC_XOR_32 1 -#define OPAL_HAVE_ATOMIC_SUB_32 1 -#define OPAL_HAVE_ATOMIC_ADD_64 1 -#define OPAL_HAVE_ATOMIC_AND_64 1 -#define OPAL_HAVE_ATOMIC_OR_64 1 -#define OPAL_HAVE_ATOMIC_XOR_64 1 -#define OPAL_HAVE_ATOMIC_SUB_64 1 - - /********************************************************************** * * Memory Barriers @@ -255,57 +237,36 @@ static inline void opal_atomic_unlock(opal_atomic_lock_t *lock) * *********************************************************************/ - -static inline int32_t opal_atomic_fetch_add_32(opal_atomic_int32_t *addr, int32_t delta) -{ - return __atomic_fetch_add(addr, delta, __ATOMIC_RELAXED); -} - -static inline int32_t opal_atomic_fetch_and_32(opal_atomic_int32_t *addr, int32_t value) -{ - return __atomic_fetch_and(addr, value, __ATOMIC_RELAXED); -} - -static inline int32_t opal_atomic_fetch_or_32(opal_atomic_int32_t *addr, int32_t value) -{ - return __atomic_fetch_or(addr, value, __ATOMIC_RELAXED); -} - -static inline int32_t opal_atomic_fetch_xor_32(opal_atomic_int32_t *addr, int32_t value) -{ - return __atomic_fetch_xor(addr, value, __ATOMIC_RELAXED); -} - -static inline int32_t opal_atomic_fetch_sub_32(opal_atomic_int32_t *addr, int32_t delta) -{ - return __atomic_fetch_sub(addr, delta, __ATOMIC_RELAXED); -} - -static inline int64_t opal_atomic_fetch_add_64(opal_atomic_int64_t *addr, int64_t delta) -{ - return __atomic_fetch_add(addr, delta, __ATOMIC_RELAXED); -} - -static inline int64_t opal_atomic_fetch_and_64(opal_atomic_int64_t *addr, int64_t value) -{ - return __atomic_fetch_and(addr, value, __ATOMIC_RELAXED); -} - -static inline int64_t opal_atomic_fetch_or_64(opal_atomic_int64_t *addr, int64_t value) -{ - return __atomic_fetch_or(addr, value, __ATOMIC_RELAXED); -} - -static inline int64_t opal_atomic_fetch_xor_64(opal_atomic_int64_t *addr, int64_t value) -{ - return __atomic_fetch_xor(addr, value, __ATOMIC_RELAXED); -} - -static inline int64_t opal_atomic_fetch_sub_64(opal_atomic_int64_t *addr, int64_t delta) -{ - return __atomic_fetch_sub(addr, delta, __ATOMIC_RELAXED); -} - +#define OPAL_ATOMIC_DEFINE_OP(type, bits, operator, name) \ + static inline type opal_atomic_fetch_##name##_##bits(opal_atomic_##type *addr, type value) \ + { \ + return __atomic_fetch_##name(addr, value, __ATOMIC_RELAXED); \ + } \ + \ + static inline type opal_atomic_##name##_fetch_##bits(opal_atomic_##type *addr, type value) \ + { \ + return __atomic_##name##_fetch(addr, value, __ATOMIC_RELAXED); \ + } + +OPAL_ATOMIC_DEFINE_OP(int32_t, 32, +, add) +OPAL_ATOMIC_DEFINE_OP(int32_t, 32, &, and) +OPAL_ATOMIC_DEFINE_OP(int32_t, 32, |, or) +OPAL_ATOMIC_DEFINE_OP(int32_t, 32, ^, xor) +OPAL_ATOMIC_DEFINE_OP(int32_t, 32, -, sub) + +OPAL_ATOMIC_DEFINE_OP(int64_t, 64, +, add) +OPAL_ATOMIC_DEFINE_OP(int64_t, 64, &, and) +OPAL_ATOMIC_DEFINE_OP(int64_t, 64, |, or) +OPAL_ATOMIC_DEFINE_OP(int64_t, 64, ^, xor) +OPAL_ATOMIC_DEFINE_OP(int64_t, 64, -, sub) + +OPAL_ATOMIC_DEFINE_OP(size_t, size_t, +, add) +OPAL_ATOMIC_DEFINE_OP(size_t, size_t, -, sub) + +#define opal_atomic_add(ADDR, VALUE) \ + (void) __atomic_fetch_add(ADDR, VALUE, __ATOMIC_RELAXED) + +#include "opal/sys/atomic_impl_minmax_math.h" #if defined(__SUNPRO_C) || defined(__SUNPRO_CC) # pragma error_messages(default, E_ARG_INCOMPATIBLE_WITH_ARG_L) diff --git a/opal/include/opal/sys/powerpc/atomic.h b/opal/include/opal/sys/powerpc/atomic.h index 56c8ce648f9..7bc9c4fdf73 100644 --- a/opal/include/opal/sys/powerpc/atomic.h +++ b/opal/include/opal/sys/powerpc/atomic.h @@ -30,26 +30,6 @@ * On powerpc ... */ -/********************************************************************** - * - * Define constants for PowerPC 64 - * - *********************************************************************/ - -#define OPAL_HAVE_ATOMIC_LLSC_32 1 - -#define OPAL_HAVE_ATOMIC_ADD_32 1 -#define OPAL_HAVE_ATOMIC_AND_32 1 -#define OPAL_HAVE_ATOMIC_OR_32 1 -#define OPAL_HAVE_ATOMIC_XOR_32 1 -#define OPAL_HAVE_ATOMIC_SUB_32 1 -#define OPAL_HAVE_ATOMIC_LLSC_64 1 -#define OPAL_HAVE_ATOMIC_ADD_64 1 -#define OPAL_HAVE_ATOMIC_AND_64 1 -#define OPAL_HAVE_ATOMIC_OR_64 1 -#define OPAL_HAVE_ATOMIC_XOR_64 1 -#define OPAL_HAVE_ATOMIC_SUB_64 1 - #if defined(__xlC__) || defined(__IBMC__) || defined(__IBMCPP__) || defined(__ibmxl__) /* work-around bizzare xlc bug in which it sign-extends a pointer to a 32-bit signed integer */ @@ -236,6 +216,80 @@ static inline int64_t opal_atomic_swap_64(opal_atomic_int64_t *addr, int64_t new * *********************************************************************/ +#define OPAL_ATOMIC_POWERPC_DEFINE_ATOMIC_32(type, instr) \ + static inline int32_t opal_atomic_fetch_##type##_32(opal_atomic_int32_t *v, int val) \ + { \ + int32_t newval, old; \ + \ + __asm__ __volatile__("1: lwarx %1, 0, %4 \n\t" \ + " " #instr " %0, %3, %1 \n\t" \ + " stwcx. %0, 0, %4 \n\t" \ + " bne- 1b \n\t" \ + : "=&r"(newval), "=&r"(old), "=m"(*v) \ + : "r"(val), "r" OPAL_ASM_ADDR(v), "m"(*v) \ + : "cc"); \ + \ + return old; \ + } \ + static inline int32_t opal_atomic_##type##_fetch_32(opal_atomic_int32_t *v, int val) \ + { \ + int32_t newval, old; \ + \ + __asm__ __volatile__("1: lwarx %1, 0, %4 \n\t" \ + " " #instr " %0, %3, %1 \n\t" \ + " stwcx. %0, 0, %4 \n\t" \ + " bne- 1b \n\t" \ + : "=&r"(newval), "=&r"(old), "=m"(*v) \ + : "r"(val), "r" OPAL_ASM_ADDR(v), "m"(*v) \ + : "cc"); \ + \ + return newval; \ + } + +OPAL_ATOMIC_POWERPC_DEFINE_ATOMIC_32(add, add) +OPAL_ATOMIC_POWERPC_DEFINE_ATOMIC_32(and, and) +OPAL_ATOMIC_POWERPC_DEFINE_ATOMIC_32(or, or) +OPAL_ATOMIC_POWERPC_DEFINE_ATOMIC_32(xor, xor) +OPAL_ATOMIC_POWERPC_DEFINE_ATOMIC_32(sub, subf) + +#define OPAL_ATOMIC_POWERPC_DEFINE_ATOMIC_64(type, instr) \ + static inline int64_t opal_atomic_fetch_##type##_64(opal_atomic_int64_t *v, int64_t val) \ + { \ + int64_t newval, old; \ + \ + __asm__ __volatile__("1: ldarx %1, 0, %4 \n\t" \ + " " #instr " %0, %3, %1 \n\t" \ + " stdcx. %0, 0, %4 \n\t" \ + " bne- 1b \n\t" \ + : "=&r"(newval), "=&r"(old), "=m"(*v) \ + : "r"(OPAL_ASM_VALUE64(val)), "r" OPAL_ASM_ADDR(v), "m"(*v) \ + : "cc"); \ + \ + return old; \ + } \ + static inline int64_t opal_atomic_##type##_fetch_64(opal_atomic_int64_t *v, int64_t val) \ + { \ + int64_t newval, old; \ + \ + __asm__ __volatile__("1: ldarx %1, 0, %4 \n\t" \ + " " #instr " %0, %3, %1 \n\t" \ + " stdcx. %0, 0, %4 \n\t" \ + " bne- 1b \n\t" \ + : "=&r"(newval), "=&r"(old), "=m"(*v) \ + : "r"(OPAL_ASM_VALUE64(val)), "r" OPAL_ASM_ADDR(v), "m"(*v) \ + : "cc"); \ + \ + return newval; \ + } + +OPAL_ATOMIC_POWERPC_DEFINE_ATOMIC_64(add, add) +OPAL_ATOMIC_POWERPC_DEFINE_ATOMIC_64(and, and) +OPAL_ATOMIC_POWERPC_DEFINE_ATOMIC_64(or, or) +OPAL_ATOMIC_POWERPC_DEFINE_ATOMIC_64(xor, xor) +OPAL_ATOMIC_POWERPC_DEFINE_ATOMIC_64(sub, subf) + +#include "opal/sys/atomic_impl_minmax_math.h" +#include "opal/sys/atomic_impl_size_t_math.h" /* NTH: the LL/SC support is done through macros due to issues with non-optimized builds. The reason * is that even with an always_inline attribute the compiler may still emit instructions to store @@ -263,29 +317,6 @@ static inline int64_t opal_atomic_swap_64(opal_atomic_int64_t *addr, int64_t new ret = _ret; \ } while (0) - -#define OPAL_ATOMIC_POWERPC_DEFINE_ATOMIC_64(type, instr) \ - static inline int64_t opal_atomic_fetch_##type##_64(opal_atomic_int64_t *v, int64_t val) \ - { \ - int64_t t, old; \ - \ - __asm__ __volatile__("1: ldarx %1, 0, %4 \n\t" \ - " " #instr " %0, %3, %1 \n\t" \ - " stdcx. %0, 0, %4 \n\t" \ - " bne- 1b \n\t" \ - : "=&r"(t), "=&r"(old), "=m"(*v) \ - : "r"(OPAL_ASM_VALUE64(val)), "r" OPAL_ASM_ADDR(v), "m"(*v) \ - : "cc"); \ - \ - return old; \ - } - -OPAL_ATOMIC_POWERPC_DEFINE_ATOMIC_64(add, add) -OPAL_ATOMIC_POWERPC_DEFINE_ATOMIC_64(and, and) -OPAL_ATOMIC_POWERPC_DEFINE_ATOMIC_64(or, or) -OPAL_ATOMIC_POWERPC_DEFINE_ATOMIC_64(xor, xor) -OPAL_ATOMIC_POWERPC_DEFINE_ATOMIC_64(sub, subf) - #define opal_atomic_ll_64(addr, ret) \ do { \ opal_atomic_int64_t *_addr = (addr); \ @@ -309,27 +340,4 @@ OPAL_ATOMIC_POWERPC_DEFINE_ATOMIC_64(sub, subf) ret = _ret; \ } while (0) - -#define OPAL_ATOMIC_POWERPC_DEFINE_ATOMIC_32(type, instr) \ - static inline int32_t opal_atomic_fetch_##type##_32(opal_atomic_int32_t *v, int val) \ - { \ - int32_t t, old; \ - \ - __asm__ __volatile__("1: lwarx %1, 0, %4 \n\t" \ - " " #instr " %0, %3, %1 \n\t" \ - " stwcx. %0, 0, %4 \n\t" \ - " bne- 1b \n\t" \ - : "=&r"(t), "=&r"(old), "=m"(*v) \ - : "r"(val), "r" OPAL_ASM_ADDR(v), "m"(*v) \ - : "cc"); \ - \ - return old; \ - } - -OPAL_ATOMIC_POWERPC_DEFINE_ATOMIC_32(add, add) -OPAL_ATOMIC_POWERPC_DEFINE_ATOMIC_32(and, and) -OPAL_ATOMIC_POWERPC_DEFINE_ATOMIC_32(or, or) -OPAL_ATOMIC_POWERPC_DEFINE_ATOMIC_32(xor, xor) -OPAL_ATOMIC_POWERPC_DEFINE_ATOMIC_32(sub, subf) - #endif /* ! OPAL_SYS_ARCH_ATOMIC_H */ diff --git a/opal/include/opal/sys/x86_64/atomic.h b/opal/include/opal/sys/x86_64/atomic.h index 3a7d49acae4..ede9f721764 100644 --- a/opal/include/opal/sys/x86_64/atomic.h +++ b/opal/include/opal/sys/x86_64/atomic.h @@ -165,13 +165,6 @@ static inline int64_t opal_atomic_swap_64(opal_atomic_int64_t *addr, int64_t new * *********************************************************************/ -/** - * atomic_add - add integer to atomic variable - * @i: integer value to add - * @v: pointer of type int - * - * Atomically adds @i to @v. - */ static inline int32_t opal_atomic_fetch_add_32(opal_atomic_int32_t *v, int i) { int ret = i; @@ -179,15 +172,11 @@ static inline int32_t opal_atomic_fetch_add_32(opal_atomic_int32_t *v, int i) return ret; } -# define OPAL_HAVE_ATOMIC_ADD_64 1 +static inline int32_t opal_atomic_add_fetch_32(opal_atomic_int32_t *v, int i) +{ + return opal_atomic_fetch_add_32(v, i) + i; +} -/** - * atomic_add - add integer to atomic variable - * @i: integer value to add - * @v: pointer of type int - * - * Atomically adds @i to @v. - */ static inline int64_t opal_atomic_fetch_add_64(opal_atomic_int64_t *v, int64_t i) { int64_t ret = i; @@ -195,15 +184,11 @@ static inline int64_t opal_atomic_fetch_add_64(opal_atomic_int64_t *v, int64_t i return ret; } -# define OPAL_HAVE_ATOMIC_SUB_32 1 +static inline int64_t opal_atomic_add_fetch_64(opal_atomic_int64_t *v, int64_t i) +{ + return opal_atomic_fetch_add_64(v, i) + i; +} -/** - * atomic_sub - subtract the atomic variable - * @i: integer value to subtract - * @v: pointer of type int - * - * Atomically subtracts @i from @v. - */ static inline int32_t opal_atomic_fetch_sub_32(opal_atomic_int32_t *v, int i) { int ret = -i; @@ -211,15 +196,11 @@ static inline int32_t opal_atomic_fetch_sub_32(opal_atomic_int32_t *v, int i) return ret; } -# define OPAL_HAVE_ATOMIC_SUB_64 1 +static inline int32_t opal_atomic_sub_fetch_32(opal_atomic_int32_t *v, int i) +{ + return opal_atomic_fetch_sub_32(v, i) - i; +} -/** - * atomic_sub - subtract the atomic variable - * @i: integer value to subtract - * @v: pointer of type int - * - * Atomically subtracts @i from @v. - */ static inline int64_t opal_atomic_fetch_sub_64(opal_atomic_int64_t *v, int64_t i) { int64_t ret = -i; @@ -227,4 +208,43 @@ static inline int64_t opal_atomic_fetch_sub_64(opal_atomic_int64_t *v, int64_t i return ret; } +static inline int64_t opal_atomic_fetch_sub_64(opal_atomic_int64_t *v, int64_t i) +{ + return opal_atomic_sub_fetch_64(v, i) - i; +} + +#define OPAL_ATOMIC_DEFINE_OP(type, bits, operation, name) \ + static inline type opal_atomic_fetch_##name##_##bits(opal_atomic_##type *addr, type value) \ + { \ + type oldval; \ + do { \ + oldval = *addr; \ + } while (!opal_atomic_compare_exchange_strong_##bits(addr, &oldval, \ + oldval operation value)); \ + \ + return oldval; \ + } \ + \ + static inline type opal_atomic_##name##_fetch_##bits(opal_atomic_##type *addr, type value) \ + { \ + type oldval, newval; \ + do { \ + oldval = *addr; \ + newval = oldval operation value; \ + } while (!opal_atomic_compare_exchange_strong_##bits(addr, &oldval, newval); \ + \ + return newval; \ + } + +OPAL_ATOMIC_DEFINE_OP(int32_t, 32, &, and) +OPAL_ATOMIC_DEFINE_OP(int32_t, 32, |, or) +OPAL_ATOMIC_DEFINE_OP(int32_t, 32, ^, xor) + +OPAL_ATOMIC_DEFINE_OP(int64_t, 64, &, and) +OPAL_ATOMIC_DEFINE_OP(int64_t, 64, |, or) +OPAL_ATOMIC_DEFINE_OP(int64_t, 64, ^, xor) + +#include "opal/sys/atomic_math_minmax_impl.h" +#include "opal/sys/atomic_math_size_t_impl.h" + #endif /* ! OPAL_SYS_ARCH_ATOMIC_H */ From 14c5d15f0e052528b609ab60aa9dd492fcdf1a75 Mon Sep 17 00:00:00 2001 From: Brian Barrett Date: Thu, 20 Jan 2022 18:57:37 +0000 Subject: [PATCH 08/11] Refactor LL/SC interface Unlike other interfaces, the LL/SC interface must be optional, as many platforms do not provide LL/SC support. Document the interface a bit better, and move the PowerPC LL/SC interface into its own header file so that it can be used with C11/GCC builtin atomics, similar to the ARM platform. Signed-off-by: Brian Barrett --- opal/include/opal/sys/Makefile.am | 1 + opal/include/opal/sys/arm64/atomic_llsc.h | 13 ++- opal/include/opal/sys/atomic.h | 63 +++++++------ opal/include/opal/sys/atomic_impl_ptr_llsc.h | 54 +++++++++++ opal/include/opal/sys/powerpc/Makefile.am | 2 + opal/include/opal/sys/powerpc/atomic.h | 67 +------------ opal/include/opal/sys/powerpc/atomic_helper.h | 44 +++++++++ opal/include/opal/sys/powerpc/atomic_llsc.h | 94 +++++++++++++++++++ 8 files changed, 237 insertions(+), 101 deletions(-) create mode 100644 opal/include/opal/sys/atomic_impl_ptr_llsc.h create mode 100644 opal/include/opal/sys/powerpc/atomic_helper.h create mode 100644 opal/include/opal/sys/powerpc/atomic_llsc.h diff --git a/opal/include/opal/sys/Makefile.am b/opal/include/opal/sys/Makefile.am index fd35f33fab9..9a5e80ea096 100644 --- a/opal/include/opal/sys/Makefile.am +++ b/opal/include/opal/sys/Makefile.am @@ -33,6 +33,7 @@ headers += \ opal/sys/atomic_stdc.h \ opal/sys/atomic_impl_minmax_math.h \ opal/sys/atomic_impl_ptr_cswap.h \ + opal/sys/atomic_impl_ptr_llsc.h \ opal/sys/atomic_impl_ptr_swap.h \ opal/sys/atomic_impl_size_t_math.h \ opal/sys/atomic_impl_spinlock.h \ diff --git a/opal/include/opal/sys/arm64/atomic_llsc.h b/opal/include/opal/sys/arm64/atomic_llsc.h index 57a4a31cc7a..f51ab4a3481 100644 --- a/opal/include/opal/sys/arm64/atomic_llsc.h +++ b/opal/include/opal/sys/arm64/atomic_llsc.h @@ -25,15 +25,16 @@ * $HEADER$ */ -#if !defined(OPAL_SYS_ARCH_ATOMIC_LLSC_H) +#ifndef OPAL_SYS_ARCH_ATOMIC_LLSC_H +#define OPAL_SYS_ARCH_ATOMIC_LLSC_H 1 -# define OPAL_SYS_ARCH_ATOMIC_LLSC_H +/* + * this file is included even when C11 or GCC built-in atomics are + * used, which is why we must check for gcc inline assembly support. + */ # if OPAL_C_GCC_INLINE_ASSEMBLY -# undef OPAL_HAVE_ATOMIC_LLSC_32 -# undef OPAL_HAVE_ATOMIC_LLSC_64 - # define OPAL_HAVE_ATOMIC_LLSC_32 1 # define OPAL_HAVE_ATOMIC_LLSC_64 1 @@ -79,6 +80,8 @@ ret = (_ret == 0); \ } while (0) +#include "opal/sys/atomic_impl_ptr_llsc.h" + # endif /* OPAL_C_GCC_INLINE_ASSEMBLY */ #endif /* ! OPAL_SYS_ARCH_ATOMIC_LLSC_H */ diff --git a/opal/include/opal/sys/atomic.h b/opal/include/opal/sys/atomic.h index 279e5555ac3..902203d4886 100644 --- a/opal/include/opal/sys/atomic.h +++ b/opal/include/opal/sys/atomic.h @@ -412,49 +412,52 @@ static inline void opal_atomic_add(type *addr, type delta); #endif /* !OPAL_C_HAVE__ATOMIC */ -/****** load-linked, store-conditional atomic implementations ******/ -/* C11 atomics do not expose the low-level load-linked, store-conditional - * instructions. Open MPI can use these instructions to implement a more - * efficient version of the lock-free lifo and fifo. On Apple Silicon the - * LL/SC fifo and lifo are ~ 2-20x faster than the CAS128 implementation. */ -#if OPAL_ASSEMBLY_ARCH == OPAL_ARM64 -# include "opal/sys/arm64/atomic_llsc.h" -#endif +/********************************************************************** + * + * Load-linked, Store Conditional + * + * Optional. Check OPAL_HAVE_ATOMIC_LLSC_32, + * OPAL_HAVE_ATOMIC_LLSC_64, or OPAL_HAVE_ATOMIC_LLSC_PTR before + * using. Implemented as macros due to function call behaviors; + * prototyped here as C++-style fuctions for readability. + * + * C11 and GCC built-in atomics don't provide native LL/SC support, so + * if there is an architectual implementation, we use it even if + * we are using the C11 or GCC built-in atomics. + * + *********************************************************************/ -#if !defined(OPAL_HAVE_ATOMIC_LLSC_32) -# define OPAL_HAVE_ATOMIC_LLSC_32 0 -#endif +#ifdef DOXYGEN -#if !defined(OPAL_HAVE_ATOMIC_LLSC_64) -# define OPAL_HAVE_ATOMIC_LLSC_64 0 -#endif - -#if (OPAL_HAVE_ATOMIC_LLSC_32 || OPAL_HAVE_ATOMIC_LLSC_64) +static inline void opal_atomic_ll_32(opal_atomic_int32_t *addr, int32_t &ret); -# if SIZEOF_VOID_P == 4 && OPAL_HAVE_ATOMIC_LLSC_32 +static inline void opal_atomic_sc_32(opal_atomic_int32_t *addr, int32_t newval, int &ret); -# define opal_atomic_ll_ptr(addr, ret) opal_atomic_ll_32((opal_atomic_int32_t *) (addr), ret) -# define opal_atomic_sc_ptr(addr, value, ret) \ - opal_atomic_sc_32((opal_atomic_int32_t *) (addr), (intptr_t)(value), ret) +static inline void opal_atomic_ll_64(opal_atomic_int64_t *addr, int64_t &ret); -# define OPAL_HAVE_ATOMIC_LLSC_PTR 1 +static inline void opal_atomic_sc_64(opal_atomic_int64_t *addr, int64_t newval, int &ret); -# elif SIZEOF_VOID_P == 8 && OPAL_HAVE_ATOMIC_LLSC_64 +static inline void opal_atomic_ll_ptr(opal_atomic_intptr_t *addr, intptr_t &ret); -# define opal_atomic_ll_ptr(addr, ret) opal_atomic_ll_64((opal_atomic_int64_t *) (addr), ret) -# define opal_atomic_sc_ptr(addr, value, ret) \ - opal_atomic_sc_64((opal_atomic_int64_t *) (addr), (intptr_t)(value), ret) +static inline void opal_atomic_sc_ptr(opal_atomic_intptr_t *addr, intptr_t newval, int &ret); -# define OPAL_HAVE_ATOMIC_LLSC_PTR 1 +#endif -# endif +#if OPAL_ASSEMBLY_ARCH == OPAL_ARM64 +# include "opal/sys/arm64/atomic_llsc.h" +#elif OPAL_ASSEMBLY_ARCH == OPAL_POWERPC64 +# include "opal/sys/powerpc/atomic_llsc.h" +#endif -#else +#if !defined(OPAL_HAVE_ATOMIC_LLSC_32) +# define OPAL_HAVE_ATOMIC_LLSC_32 0 +#endif -# define OPAL_HAVE_ATOMIC_LLSC_PTR 0 +#if !defined(OPAL_HAVE_ATOMIC_LLSC_64) +# define OPAL_HAVE_ATOMIC_LLSC_64 0 +#endif -#endif /* (OPAL_HAVE_ATOMIC_LLSC_32 || OPAL_HAVE_ATOMIC_LLSC_64)*/ END_C_DECLS diff --git a/opal/include/opal/sys/atomic_impl_ptr_llsc.h b/opal/include/opal/sys/atomic_impl_ptr_llsc.h new file mode 100644 index 00000000000..076e768cba5 --- /dev/null +++ b/opal/include/opal/sys/atomic_impl_ptr_llsc.h @@ -0,0 +1,54 @@ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2006 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2007 Sun Microsystems, Inc. All rights reserved. + * Copyright (c) 2011 Sandia National Laboratories. All rights reserved. + * Copyright (c) 2011-2017 Los Alamos National Security, LLC. All rights + * reserved. + * Copyright (c) 2017 Research Organization for Information Science + * and Technology (RIST). All rights reserved. + * Copyright (c) 2020-2021 Google, LLC. All rights reserved. + * Copyright (c) 2022 Amazon.com, Inc. or its affiliates. + * All Rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + * + * Pointer-sized wrapper for LL/SC calls, wrappers around size-defined + * calls. Note that these must be macros, as LL/SC may not work + * across function calls. + */ + + +#ifndef ATOMIC_IMPL_PTR_LLSC_H +#define ATOMIC_IMPL_PTR_LLSC_H 1 + +#if SIZEOF_VOID_P == 4 && defined(OPAL_HAVE_ATOMIC_LLSC_32) && OPAL_HAVE_ATOMIC_LLSC_32 + +# define opal_atomic_ll_ptr(addr, ret) opal_atomic_ll_32((opal_atomic_int32_t *) (addr), ret) +# define opal_atomic_sc_ptr(addr, value, ret) \ + opal_atomic_sc_32((opal_atomic_int32_t *) (addr), (intptr_t)(value), ret) + +# define OPAL_HAVE_ATOMIC_LLSC_PTR 1 + +#elif SIZEOF_VOID_P == 8 && defined(OPAL_HAVE_ATOMIC_LLSC_64) && OPAL_HAVE_ATOMIC_LLSC_64 + +# define opal_atomic_ll_ptr(addr, ret) opal_atomic_ll_64((opal_atomic_int64_t *) (addr), ret) +# define opal_atomic_sc_ptr(addr, value, ret) \ + opal_atomic_sc_64((opal_atomic_int64_t *) (addr), (intptr_t)(value), ret) + +# define OPAL_HAVE_ATOMIC_LLSC_PTR 1 + +#endif /* SIZEOF_VOID_P == 8 && OPAL_HAVE_ATOMIC_LLSC_64 */ + +#endif /* ATOMIC_IMPL_PTR_LLSC_H */ diff --git a/opal/include/opal/sys/powerpc/Makefile.am b/opal/include/opal/sys/powerpc/Makefile.am index 612dd2e4d7f..d2dbeeba0a5 100644 --- a/opal/include/opal/sys/powerpc/Makefile.am +++ b/opal/include/opal/sys/powerpc/Makefile.am @@ -20,4 +20,6 @@ headers += \ opal/sys/powerpc/atomic.h \ + opal/sys/powerpc/atomic_helper.h \ + opal/sys/powerpc/atomic_llsc.h \ opal/sys/powerpc/timer.h diff --git a/opal/include/opal/sys/powerpc/atomic.h b/opal/include/opal/sys/powerpc/atomic.h index 7bc9c4fdf73..a92457c6da1 100644 --- a/opal/include/opal/sys/powerpc/atomic.h +++ b/opal/include/opal/sys/powerpc/atomic.h @@ -30,23 +30,7 @@ * On powerpc ... */ -#if defined(__xlC__) || defined(__IBMC__) || defined(__IBMCPP__) || defined(__ibmxl__) -/* work-around bizzare xlc bug in which it sign-extends - a pointer to a 32-bit signed integer */ -# define OPAL_ASM_ADDR(a) ((uintptr_t) a) -#else -# define OPAL_ASM_ADDR(a) (a) -#endif - -#if defined(__PGI) -/* work-around for bug in PGI 16.5-16.7 where the compiler fails to - * correctly emit load instructions for 64-bit operands. without this - * it will emit lwz instead of ld to load the 64-bit operand. */ -# define OPAL_ASM_VALUE64(x) (void *) (intptr_t)(x) -#else -# define OPAL_ASM_VALUE64(x) x -#endif - +#include "opal/sys/powerpc/atomic_helper.h" /********************************************************************** * @@ -291,53 +275,4 @@ OPAL_ATOMIC_POWERPC_DEFINE_ATOMIC_64(sub, subf) #include "opal/sys/atomic_impl_minmax_math.h" #include "opal/sys/atomic_impl_size_t_math.h" -/* NTH: the LL/SC support is done through macros due to issues with non-optimized builds. The reason - * is that even with an always_inline attribute the compiler may still emit instructions to store - * then load the arguments to/from the stack. This sequence may cause the ll reservation to be - * cancelled. */ -#define opal_atomic_ll_32(addr, ret) \ - do { \ - opal_atomic_int32_t *_addr = (addr); \ - __asm__ __volatile__("lwarx %0, 0, %1 \n\t" : "=&r"(ret) : "r"(_addr)); \ - } while (0) - -#define opal_atomic_sc_32(addr, value, ret) \ - do { \ - opal_atomic_int32_t *_addr = (addr); \ - int32_t _ret, _foo, _newval = (int32_t) value; \ - \ - __asm__ __volatile__(" stwcx. %4, 0, %3 \n\t" \ - " li %0,0 \n\t" \ - " bne- 1f \n\t" \ - " ori %0,%0,1 \n\t" \ - "1:" \ - : "=r"(_ret), "=m"(*_addr), "=r"(_foo) \ - : "r"(_addr), "r"(_newval) \ - : "cc", "memory"); \ - ret = _ret; \ - } while (0) - -#define opal_atomic_ll_64(addr, ret) \ - do { \ - opal_atomic_int64_t *_addr = (addr); \ - __asm__ __volatile__("ldarx %0, 0, %1 \n\t" : "=&r"(ret) : "r"(_addr)); \ - } while (0) - -#define opal_atomic_sc_64(addr, value, ret) \ - do { \ - opal_atomic_int64_t *_addr = (addr); \ - int64_t _newval = (int64_t) value; \ - int32_t _ret; \ - \ - __asm__ __volatile__(" stdcx. %2, 0, %1 \n\t" \ - " li %0,0 \n\t" \ - " bne- 1f \n\t" \ - " ori %0,%0,1 \n\t" \ - "1:" \ - : "=r"(_ret) \ - : "r"(_addr), "r"(OPAL_ASM_VALUE64(_newval)) \ - : "cc", "memory"); \ - ret = _ret; \ - } while (0) - #endif /* ! OPAL_SYS_ARCH_ATOMIC_H */ diff --git a/opal/include/opal/sys/powerpc/atomic_helper.h b/opal/include/opal/sys/powerpc/atomic_helper.h new file mode 100644 index 00000000000..5cd43d20652 --- /dev/null +++ b/opal/include/opal/sys/powerpc/atomic_helper.h @@ -0,0 +1,44 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ +/* + > * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2010-2021 IBM Corporation. All rights reserved. + * Copyright (c) 2015-2018 Los Alamos National Security, LLC. All rights + * reserved. + * Copyright (c) 2021 Google, LLC. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#ifndef OPAL_SYS_ARCH_ATOMIC_HELPER_H +#define OPAL_SYS_ARCH_ATOMIC_HELPER_H 1 + +#if defined(__xlC__) || defined(__IBMC__) || defined(__IBMCPP__) || defined(__ibmxl__) +/* work-around bizzare xlc bug in which it sign-extends + a pointer to a 32-bit signed integer */ +# define OPAL_ASM_ADDR(a) ((uintptr_t) a) +#else +# define OPAL_ASM_ADDR(a) (a) +#endif + +#if defined(__PGI) +/* work-around for bug in PGI 16.5-16.7 where the compiler fails to + * correctly emit load instructions for 64-bit operands. without this + * it will emit lwz instead of ld to load the 64-bit operand. */ +# define OPAL_ASM_VALUE64(x) (void *) (intptr_t)(x) +#else +# define OPAL_ASM_VALUE64(x) x +#endif + +#endif /* OPAL_SYS_ARCH_ATOMIC_HELPER_H */ diff --git a/opal/include/opal/sys/powerpc/atomic_llsc.h b/opal/include/opal/sys/powerpc/atomic_llsc.h new file mode 100644 index 00000000000..9e6c7aa5863 --- /dev/null +++ b/opal/include/opal/sys/powerpc/atomic_llsc.h @@ -0,0 +1,94 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2010-2021 IBM Corporation. All rights reserved. + * Copyright (c) 2015-2018 Los Alamos National Security, LLC. All rights + * reserved. + * Copyright (c) 2021 Google, LLC. All rights reserved. + * Copyright (c) 2022 Amazon.com, Inc. or its affiliates. + * All Rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#ifndef OPAL_SYS_ARCH_ATOMIC_LLSC_H +#define OPAL_SYS_ARCH_ATOMIC_LLSC_H 1 + +/* + * this file is included even when C11 or GCC built-in atomics are + * used, which is why we must check for gcc inline assembly support. + */ + +#if OPAL_C_GCC_INLINE_ASSEMBLY + +#include "opal/sys/powerpc/atomic_helper.h" + +#define OPAL_HAVE_ATOMIC_LLSC_32 1 +#define OPAL_HAVE_ATOMIC_LLSC_64 1 + +/* NTH: the LL/SC support is done through macros due to issues with non-optimized builds. The reason + * is that even with an always_inline attribute the compiler may still emit instructions to store + * then load the arguments to/from the stack. This sequence may cause the ll reservation to be + * cancelled. */ +#define opal_atomic_ll_32(addr, ret) \ + do { \ + opal_atomic_int32_t *_addr = (addr); \ + __asm__ __volatile__("lwarx %0, 0, %1 \n\t" : "=&r"(ret) : "r"(_addr)); \ + } while (0) + +#define opal_atomic_sc_32(addr, value, ret) \ + do { \ + opal_atomic_int32_t *_addr = (addr); \ + int32_t _ret, _foo, _newval = (int32_t) value; \ + \ + __asm__ __volatile__(" stwcx. %4, 0, %3 \n\t" \ + " li %0,0 \n\t" \ + " bne- 1f \n\t" \ + " ori %0,%0,1 \n\t" \ + "1:" \ + : "=r"(_ret), "=m"(*_addr), "=r"(_foo) \ + : "r"(_addr), "r"(_newval) \ + : "cc", "memory"); \ + ret = _ret; \ + } while (0) + +#define opal_atomic_ll_64(addr, ret) \ + do { \ + opal_atomic_int64_t *_addr = (addr); \ + __asm__ __volatile__("ldarx %0, 0, %1 \n\t" : "=&r"(ret) : "r"(_addr)); \ + } while (0) + +#define opal_atomic_sc_64(addr, value, ret) \ + do { \ + opal_atomic_int64_t *_addr = (addr); \ + int64_t _newval = (int64_t) value; \ + int32_t _ret; \ + \ + __asm__ __volatile__(" stdcx. %2, 0, %1 \n\t" \ + " li %0,0 \n\t" \ + " bne- 1f \n\t" \ + " ori %0,%0,1 \n\t" \ + "1:" \ + : "=r"(_ret) \ + : "r"(_addr), "r"(OPAL_ASM_VALUE64(_newval)) \ + : "cc", "memory"); \ + ret = _ret; \ + } while (0) + +#include "opal/sys/atomic_impl_ptr_llsc.h" + +#endif /* OPAL_C_GCC_INLINE_ASSEMBLY */ + +#endif /* OPAL_SYS_ARCH_ATOMIC_LLSC_H */ From 057a6b5af61e38990a9a0fb34583b347b72fd8b1 Mon Sep 17 00:00:00 2001 From: Brian Barrett Date: Fri, 14 Jan 2022 21:23:51 +0000 Subject: [PATCH 09/11] Change atomic inclusion to make C11 less unique With the previous refactoring and breaking out the implementation code into seperate headers, it is no longer required that the atomic_stdc.h implementation be included "special" in the atomics header. Clean up the ordering of includes so that every implementation follows the same include ordering. The stdc implementations of the atomics is type independent and we don't enforce inline wrappers because that results in a massive number of warnings from opal_lifo due to its interesting use of volatile. This patch takes the do-no-harm approach, but we need to clean up opal_lifo in a future patch. Signed-off-by: Brian Barrett --- opal/include/opal/sys/atomic.h | 104 ++++++++++++++-------------- opal/include/opal/sys/atomic_stdc.h | 2 +- 2 files changed, 53 insertions(+), 53 deletions(-) diff --git a/opal/include/opal/sys/atomic.h b/opal/include/opal/sys/atomic.h index 902203d4886..26c56b06ed3 100644 --- a/opal/include/opal/sys/atomic.h +++ b/opal/include/opal/sys/atomic.h @@ -56,55 +56,8 @@ #include "opal/sys/architecture.h" #include "opal_stdatomic.h" -#if OPAL_ASSEMBLY_BUILTIN == OPAL_BUILTIN_C11 && !defined(__INTEL_COMPILER) - -# include "atomic_stdc.h" - -#else /* !OPAL_C_HAVE__ATOMIC */ - - BEGIN_C_DECLS -/********************************************************************** - * - * Load the appropriate architecture files and set some reasonable - * default values for our support - * - *********************************************************************/ -# if defined(DOXYGEN) -/* don't include system-level gorp when generating doxygen files */ -# elif OPAL_ASSEMBLY_BUILTIN == OPAL_BUILTIN_GCC -# include "opal/sys/gcc_builtin/atomic.h" -# elif OPAL_ASSEMBLY_ARCH == OPAL_X86_64 -# include "opal/sys/x86_64/atomic.h" -# elif OPAL_ASSEMBLY_ARCH == OPAL_ARM -# include "opal/sys/arm/atomic.h" -# elif OPAL_ASSEMBLY_ARCH == OPAL_ARM64 -# include "opal/sys/arm64/atomic.h" -# elif OPAL_ASSEMBLY_ARCH == OPAL_IA32 -# include "opal/sys/ia32/atomic.h" -# elif OPAL_ASSEMBLY_ARCH == OPAL_POWERPC32 -# include "opal/sys/powerpc/atomic.h" -# elif OPAL_ASSEMBLY_ARCH == OPAL_POWERPC64 -# include "opal/sys/powerpc/atomic.h" -# endif - -# ifndef DOXYGEN -/* compare and set operations can't really be emulated from software, - so if these defines aren't already set, they should be set to 0 - now */ -# ifndef OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_128 -# define OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_128 0 -# endif -# ifndef OPAL_HAVE_ATOMIC_LLSC_32 -# define OPAL_HAVE_ATOMIC_LLSC_32 0 -# endif -# ifndef OPAL_HAVE_ATOMIC_LLSC_64 -# define OPAL_HAVE_ATOMIC_LLSC_64 0 -# endif -# endif /* DOXYGEN */ - - /********************************************************************** * * Memory Barriers @@ -158,6 +111,18 @@ static inline void opal_atomic_wmb(void); * over the 32 and 64 bit implementations). * *********************************************************************/ + +/* + * The stdc implementation is implemetned as macros around the C11 + * atomic interface (which is a type-independent interface). While it + * would be better to have type checking so developers using the C11 + * interface didn't accidently munge something that broke on other + * implementations, there are a ton of warnings due to volatile casing + * in the opal_lifo code. Don't enforce the types of the function + * calls on C11 until we can sort that out. + */ +#if !(OPAL_ASSEMBLY_BUILTIN == OPAL_BUILTIN_C11 && !defined(__INTEL_COMPILER)) + /** * Atomic compare and set of 32 bit intergers with acquire and release semantics. * @@ -284,7 +249,6 @@ static inline bool opal_atomic_compare_exchange_strong_acq_ptr(opal_atomic_intpt static inline bool opal_atomic_compare_exchange_strong_rel_ptr(opal_atomic_intptr_t *addr, intptr_t *oldval, intptr_t newval); - /********************************************************************** * * Swap @@ -321,6 +285,8 @@ static inline int64_t opal_atomic_swap_64(opal_atomic_int64_t *addr, int64_t new */ static inline intptr_t opal_atomic_swap_ptr(opal_atomic_intptr_t *addr, intptr_t newval); +#endif /* #if !(OPAL_ASSEMBLY_BUILTIN == OPAL_BUILTIN_C11 && !defined(__INTEL_COMPILER)) */ + /********************************************************************** * @@ -410,8 +376,6 @@ static inline size_t opal_atomic_fetch_add_size_t(opal_atomic_size_t *addr, size static inline void opal_atomic_add(type *addr, type delta); #endif -#endif /* !OPAL_C_HAVE__ATOMIC */ - /********************************************************************** * @@ -444,20 +408,56 @@ static inline void opal_atomic_sc_ptr(opal_atomic_intptr_t *addr, intptr_t newva #endif + +/********************************************************************** + * + * Load the appropriate architecture files and set some reasonable + * default values for our support + * + *********************************************************************/ + +#if defined(DOXYGEN) +/* don't include system-level gorp when generating doxygen files */ +#elif OPAL_ASSEMBLY_BUILTIN == OPAL_BUILTIN_C11 && !defined(__INTEL_COMPILER) +# include "opal/sys/atomic_stdc.h" +#elif OPAL_ASSEMBLY_BUILTIN == OPAL_BUILTIN_GCC +# include "opal/sys/gcc_builtin/atomic.h" +#elif OPAL_ASSEMBLY_ARCH == OPAL_X86_64 +# include "opal/sys/x86_64/atomic.h" +#elif OPAL_ASSEMBLY_ARCH == OPAL_ARM64 +# include "opal/sys/arm64/atomic.h" +#elif OPAL_ASSEMBLY_ARCH == OPAL_POWERPC64 +# include "opal/sys/powerpc/atomic.h" +#endif + #if OPAL_ASSEMBLY_ARCH == OPAL_ARM64 # include "opal/sys/arm64/atomic_llsc.h" #elif OPAL_ASSEMBLY_ARCH == OPAL_POWERPC64 # include "opal/sys/powerpc/atomic_llsc.h" #endif -#if !defined(OPAL_HAVE_ATOMIC_LLSC_32) + +/********************************************************************** + * + * Ensure defines for the few optional features are always defined + * + *********************************************************************/ + +#ifndef OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_128 +# define OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_128 0 +#endif + +#ifndef OPAL_HAVE_ATOMIC_LLSC_32 # define OPAL_HAVE_ATOMIC_LLSC_32 0 #endif -#if !defined(OPAL_HAVE_ATOMIC_LLSC_64) +#ifndef OPAL_HAVE_ATOMIC_LLSC_64 # define OPAL_HAVE_ATOMIC_LLSC_64 0 #endif +#ifndef OPAL_HAVE_ATOMIC_LLSC_PTR +# define OPAL_HAVE_ATOMIC_LLSC_PTR 0 +#endif END_C_DECLS diff --git a/opal/include/opal/sys/atomic_stdc.h b/opal/include/opal/sys/atomic_stdc.h index 622efae5a0e..62e27705cc4 100644 --- a/opal/include/opal/sys/atomic_stdc.h +++ b/opal/include/opal/sys/atomic_stdc.h @@ -149,7 +149,7 @@ opal_atomic_compare_exchange_strong_128(opal_atomic_int128_t *addr, opal_int128_ /* * Lock initialization function. It set the lock to UNLOCKED. */ -static inline void opal_atomic_lock_init(opal_atomic_lock_t *lock, bool value) +static inline void opal_atomic_lock_init(opal_atomic_lock_t *lock, int32_t value) { atomic_flag_clear_explicit(lock, memory_order_relaxed); } From 10b9725fef1df7d813d6d547af2dc8507c070930 Mon Sep 17 00:00:00 2001 From: Brian Barrett Date: Tue, 18 Jan 2022 17:35:53 +0000 Subject: [PATCH 10/11] Clean up atomics configure logic Finish removal of the old assembly logic that supported non-inline atomics as well as inline styles other than gcc. Rework the primary configure macro to make the flow more obvious (particularly around which atomics style should be used), and push all the decisions about using C11 atomics with certain compilers into the configure logic. Signed-off-by: Brian Barrett --- config/opal_config_asm.m4 | 295 ++++++++++++++++--------------- config/opal_configure_options.m4 | 7 - opal/include/opal/sys/atomic.h | 24 ++- opal/include/opal_stdatomic.h | 17 +- 4 files changed, 172 insertions(+), 171 deletions(-) diff --git a/config/opal_config_asm.m4 b/config/opal_config_asm.m4 index 6c3d3624192..6bedcc08613 100644 --- a/config/opal_config_asm.m4 +++ b/config/opal_config_asm.m4 @@ -380,6 +380,7 @@ __atomic_add_fetch(&tmp64, 1, __ATOMIC_RELAXED);])], fi ]) + AC_DEFUN([OPAL_CHECK_C11_CSWAP_INT128], [ OPAL_VAR_SCOPE_PUSH([atomic_compare_exchange_result atomic_compare_exchange_CFLAGS_save atomic_compare_exchange_LIBS_save]) @@ -500,20 +501,16 @@ AC_DEFUN([OPAL_CHECK_CMPXCHG16B],[ OPAL_VAR_SCOPE_POP ])dnl + dnl ################################################################# dnl -dnl OPAL_CHECK_INLINE_GCC +dnl OPAL_CHECK_INLINE_GCC([action-if-found], [action-if-not-found]) dnl dnl Check if the compiler is capable of doing GCC-style inline dnl assembly. Some compilers emit a warning and ignore the inline dnl assembly (xlc on OS X) and compile without error. Therefore, dnl the test attempts to run the emitted code to check that the -dnl assembly is actually run. To run this test, one argument to -dnl the macro must be an assembly instruction in gcc format to move -dnl the value 0 into the register containing the variable ret. -dnl For PowerPC, this would be: -dnl -dnl "li %0,0" : "=&r"(ret) +dnl assembly is actually run. dnl dnl For testing ia32 assembly, the assembly instruction xaddl is dnl tested. The xaddl instruction is used by some of the atomic @@ -527,181 +524,193 @@ dnl support dnl dnl ################################################################# AC_DEFUN([OPAL_CHECK_INLINE_C_GCC],[ - assembly="$1" - asm_result="unknown" - - AC_MSG_CHECKING([if $CC supports GCC inline assembly]) - - if test ! "$assembly" = "" ; then - AC_RUN_IFELSE([AC_LANG_PROGRAM([AC_INCLUDES_DEFAULT],[[ + AC_CACHE_CHECK([if $CC supports GCC inline assembly], + [opal_cv_asm_gcc_inline_assembly], + [OPAL_VAR_SCOPE_PUSH([asm_result opal_gcc_inline_assign OPAL_C_GCC_INLINE_ASSEMBLY]) + + asm_result="unknown" + + opal_gcc_inline_assign="" + case "${host}" in + x86_64-*x32|i?86-*|x86_64*|amd64*) + opal_gcc_inline_assign='"xaddl %1,%0" : "=m"(ret), "+r"(negone) : "m"(ret)' + ;; + aarch64*) + opal_gcc_inline_assign='"mov %0, #0" : "=&r"(ret)' + ;; + powerpc-*|powerpc64-*|powerpcle-*|powerpc64le-*|rs6000-*|ppc-*) + opal_gcc_inline_assign='"1: li %0,0" : "=&r"(ret)' + ;; + esac + + AS_IF([test "$opal_gcc_inline_assign" != ""], + [AC_RUN_IFELSE([AC_LANG_PROGRAM([AC_INCLUDES_DEFAULT],[[ int ret = 1; int negone = -1; -__asm__ __volatile__ ($assembly); +__asm__ __volatile__ ($opal_gcc_inline_assign); return ret; - ]])], - [asm_result="yes"], [asm_result="no"], - [asm_result="unknown"]) - else - assembly="test skipped - assuming no" - fi - - # if we're cross compiling, just try to compile and figure good enough - if test "$asm_result" = "unknown" ; then - AC_LINK_IFELSE([AC_LANG_PROGRAM([AC_INCLUDES_DEFAULT],[[ + ]])], + [asm_result="yes"], [asm_result="no"], + [asm_result="unknown"])], + [asm_result="no - architecture not supported"]) + + # if we're cross compiling, just try to compile and figure good enough + AS_IF([test "$asm_result" = "unknown"], + [AC_LINK_IFELSE([AC_LANG_PROGRAM([AC_INCLUDES_DEFAULT],[[ int ret = 1; int negone = -1; -__asm__ __volatile__ ($assembly); +__asm__ __volatile__ ($opal_gcc_inline_assign); return ret; - ]])], - [asm_result="yes"], [asm_result="no"]) - fi + ]])], + [asm_result="yes"], [asm_result="no"])]) + opal_cv_asm_gcc_inline_assembly="$asm_result" + OPAL_VAR_SCOPE_POP]) - AC_MSG_RESULT([$asm_result]) - - if test "$asm_result" = "yes" ; then - OPAL_C_GCC_INLINE_ASSEMBLY=1 - opal_cv_asm_inline_supported="yes" - else - OPAL_C_GCC_INLINE_ASSEMBLY=0 - fi + AS_IF([test "$opal_cv_asm_gcc_inline_assembly" = "yes"], + [OPAL_C_GCC_INLINE_ASSEMBLY=1 + $1], + [OPAL_C_GCC_INLINE_ASSEMBLY=0 + $2]) AC_DEFINE_UNQUOTED([OPAL_C_GCC_INLINE_ASSEMBLY], [$OPAL_C_GCC_INLINE_ASSEMBLY], [Whether C compiler supports GCC style inline assembly]) - - unset OPAL_C_GCC_INLINE_ASSEMBLY assembly asm_result ])dnl + dnl ################################################################# dnl dnl OPAL_CONFIG_ASM dnl -dnl DEFINE OPAL_ASSEMBLY_ARCH to something in sys/architecture.h -dnl DEFINE OPAL_ASSEMBLY_FORMAT to string containing correct -dnl format for assembly (not user friendly) -dnl SUBST OPAL_ASSEMBLY_FORMAT to string containing correct -dnl format for assembly (not user friendly) +dnl Configure assembly support. AC_DEFINES the following: +dnl - OPAL_C_GCC_INLINE_ASSEMBLY - 1 if C compiler supports +dnl GCC-style inline assembly +dnl - OPAL_USE_C11_ATOMICS - 1 if atomics implementation should +dnl use C11-style atomics +dnl - OPAL_USE_GCC_BUILTIN_ATOMICS - 1 if atomics implementation +dnl should use GCC built-in style atomics +dnl - OPAL_USE_ASM_ATOMICS - 1 if atomics implementation should +dnl use inline assembly (using GCC-style inline assembly) +dnl for atomics implementaiton dnl dnl ################################################################# AC_DEFUN([OPAL_CONFIG_ASM],[ AC_REQUIRE([OPAL_SETUP_CC]) - AC_ARG_ENABLE([c11-atomics],[AS_HELP_STRING([--enable-c11-atomics], - [Enable use of C11 atomics if available (default: enabled)])]) + OPAL_VAR_SCOPE_PUSH([atomics_found want_c11_atomics want_gcc_builtin_atomics want_asm_atomics opal_cv_asm_arch result]) + + # only assembly style we support today is gcc-style inline + # assembly, find out if it works. We need this even for C11/GCC + # builtin atomics cases, because we use inline assembly for + # timers, LLSC, and 16 byte compare and swap routines. + OPAL_CHECK_INLINE_C_GCC([gcc_inline=1], [gcc_inline=0]) + + atomics_found=no + want_c11_atomics=0 + want_gcc_builtin_atomics=0 + want_asm_atomics=0 + + AC_ARG_ENABLE([c11-atomics], + [AS_HELP_STRING([--enable-c11-atomics], + [Enable use of C11 atomics if available (default: use if available, disabled by default on 64-bit PowerPC)])]) AC_ARG_ENABLE([builtin-atomics], - [AS_HELP_STRING([--enable-builtin-atomics], - [Enable use of GCC built-in atomics (default: autodetect)])]) - - OPAL_CHECK_C11_CSWAP_INT128 - opal_cv_asm_builtin="BUILTIN_NO" - OPAL_CHECK_GCC_ATOMIC_BUILTINS - - if test "x$enable_c11_atomics" != "xno" && test "$opal_cv_c11_supported" = "yes" ; then - opal_cv_asm_builtin="BUILTIN_C11" - OPAL_CHECK_C11_CSWAP_INT128 - elif test "x$enable_c11_atomics" = "xyes"; then - AC_MSG_WARN([C11 atomics were requested but are not supported]) - AC_MSG_ERROR([Cannot continue]) - elif test "$enable_builtin_atomics" = "yes" ; then - if test $opal_cv_have___atomic = "yes" ; then - opal_cv_asm_builtin="BUILTIN_GCC" - else - AC_MSG_WARN([GCC built-in atomics requested but not found.]) - AC_MSG_ERROR([Cannot continue]) - fi - fi + [AS_HELP_STRING([--enable-builtin-atomics], + [Enable use of GCC built-in atomics. Note that C11 atomics are preferred over built-in atomics. (default: use if available, disabled by default on 64-bit PowerPC)])]) - # find our architecture for purposes of assembly stuff - opal_cv_asm_arch="UNSUPPORTED" - OPAL_GCC_INLINE_ASSIGN="" + AC_ARG_ENABLE([builtin-atomics-for-ppc], + [AS_HELP_STRING([--enable-builtin-atomics-for-ppc], + [For performance reasons, 64-bit POWER architectures will not use C11 or GCC built-in atomics, even if --enable-c11-atomics is passed to configure. Enabling this option will re-enable support for both C11 and GCC built-in atomics.])]) + # See the following github PR and some performance numbers/discussion: + # https://github.com/open-mpi/ompi/pull/8649 + # + # This logic is a bit convoluted, but matches existing logic in v4.x. case "${host}" in - x86_64-*x32|i?86-*|x86_64*|amd64*) - if test "$ac_cv_sizeof_long" = "4" ; then - if test $opal_cv_asm_builtin = BUILTIN_NO ; then - AC_MSG_ERROR([IA32 atomics are no longer supported. Use a C11 compiler]) - fi - opal_cv_asm_arch="IA32" - else - opal_cv_asm_arch="X86_64" - OPAL_CHECK_CMPXCHG16B - fi - OPAL_GCC_INLINE_ASSIGN='"xaddl %1,%0" : "=m"(ret), "+r"(negone) : "m"(ret)' - ;; + powerpc-*|powerpc64-*|powerpcle-*|powerpc64le-*|rs6000-*|ppc-*) + AS_IF([test "$ac_cv_sizeof_long" = "8" -a "$enable_builtin_atomics_for_ppc" != "yes"], + [AS_IF([test "$enable_c11_atomics" != "no" -a "$enable_builtin_atomics" != "no"], + [AC_MSG_NOTICE([Disabling built-in and C11 atomics due to known performance issues on Powerpc])]) + AS_IF([test "$enable_c11_atomics" = "yes" -o "$enable_builtin_atomics" = "yes"], + [AC_MSG_WARN([Ignoring --enable-c11-atomics and --enable-builtin-atomics options on POWER. Set +--enable-builtin-atomics-for-ppc to re-enable.])]) + enable_c11_atomics="no" + enable_builtin_atomics="no"]) + ;; + esac - aarch64*) - opal_cv_asm_arch="ARM64" - OPAL_GCC_INLINE_ASSIGN='"mov %0, #0" : "=&r"(ret)' + # Option 1 for atomics: C11 + # + # We currently always disable C11 atomics with the Intel compilers. + # We know builds older than 20200310 are broken with respect to + # C11 atomics, but have not apparently found a build we are happy + # with. In the future, this should be changed to a check for a + # particular Intel version. + AS_IF([test "$enable_c11_atomics" != "no" -a "$opal_cv_c11_supported" = "yes" -a "$opal_cv_c_compiler_vendor" != "intel"], + [AC_MSG_NOTICE([Using C11 atomics]) + OPAL_CHECK_C11_CSWAP_INT128 + want_c11_atomics=1 + atomics_found="C11 atomics"], + [test "$enable_c11_atomics" = "yes"], + [AC_MSG_WARN([C11 atomics were requested but are not supported]) + AC_MSG_ERROR([Cannot continue])]) + + # Option 2 for atomics: GCC-style Builtin + AS_IF([test "$atomics_found" = "no" -a "$enable_builtin_atomics" != "no"], + [OPAL_CHECK_GCC_ATOMIC_BUILTINS + AS_IF([test $opal_cv_have___atomic = "yes"], + [AC_MSG_NOTICE([Using GCC built-in style atomics]) + atomics_found="GCC built-in style atomics" + want_gcc_builtin_atomics=1], + [test "$enable_builtin_atomics" = "yes"], + [AC_MSG_WARN([GCC built-in atomics requested but not found.]) + AC_MSG_ERROR([Cannot continue])])]) + + # Option 3 for atomics: inline assembly + AS_IF([test "$atomics_found" = "no" -a "$gcc_inline" = "1"], + [case "${host}" in + x86_64-*x32|i?86-*|x86_64*|amd64*) + AS_IF([test "$ac_cv_sizeof_long" = "8"], + [OPAL_CHECK_CMPXCHG16B + opal_cv_asm_arch="X86_64" + atomics_found="x86_64 assembly"]) ;; - armv7*|arm-*-linux-gnueabihf|armv6*) - if test $opal_cv_asm_builtin = BUILTIN_NO ; then - AC_MSG_ERROR([32-bit ARM atomics are no longer supported. Use a C11 compiler]) - fi - - opal_cv_asm_arch="ARM" - OPAL_GCC_INLINE_ASSIGN='"mov %0, #0" : "=&r"(ret)' + aarch64*) + opal_cv_asm_arch="ARM64" + atomics_found="aarch64 assembly" ;; - powerpc-*|powerpc64-*|powerpcle-*|powerpc64le-*|rs6000-*|ppc-*) - if test "$ac_cv_sizeof_long" = "4" ; then - if test $opal_cv_asm_builtin = BUILTIN_NO ; then - AC_MSG_ERROR([PowerPC 32-bit atomics are no longer supported. Use a C11 compiler]) - fi - opal_cv_asm_arch="POWERPC32" - elif test "$ac_cv_sizeof_long" = "8" ; then - opal_cv_asm_arch="POWERPC64" - else - AC_MSG_ERROR([Could not determine PowerPC word size: $ac_cv_sizeof_long]) - fi - OPAL_GCC_INLINE_ASSIGN='"1: li %0,0" : "=&r"(ret)' - - # See the following github PR and some performance numbers/discussion: - # https://github.com/open-mpi/ompi/pull/8649 - AC_MSG_CHECKING([$opal_cv_asm_arch: Checking if force gcc atomics requested]) - if test $force_gcc_atomics_ppc = 0 ; then - AC_MSG_RESULT([no]) - opal_cv_asm_builtin="BUILTIN_NO" - else - AC_MSG_RESULT([Yes]) - AC_MSG_WARN([$opal_cv_asm_arch: gcc atomics have been known to perform poorly on powerpc.]) - fi - + powerpc-*|powerpc64-*|powerpcle-*|powerpc64le-*|rs6000-*|ppc-*) + AS_IF([test "$ac_cv_sizeof_long" = "8"], + [opal_cv_asm_arch="POWERPC64" + atomics_found="PowerPC asssembly"]) ;; - *) - if test $opal_cv_have___atomic = "yes" ; then - opal_cv_asm_builtin="BUILTIN_GCC" - else - AC_MSG_ERROR([No atomic primitives available for $host]) - fi - ;; - esac + esac - if test "$opal_cv_asm_builtin" = "BUILTIN_GCC" ; then - AC_DEFINE([OPAL_C_GCC_INLINE_ASSEMBLY], [1], - [Whether C compiler supports GCC style inline assembly]) - else - opal_cv_asm_inline_supported="no" - # now that we know our architecture, try to inline assemble - OPAL_CHECK_INLINE_C_GCC([$OPAL_GCC_INLINE_ASSIGN]) - fi # if opal_cv_asm_builtin = BUILTIN_GCC + AS_IF([test "$atomics_found" != "no"], + [want_asm_atomics=1]) + AC_MSG_CHECKING([for inline assembly atomics]) + AC_MSG_RESULT([$atomics_found])]) + + AS_IF([test "$aomics_found" = "no"], + [AC_MSG_ERROR([No usable atomics implementation found. Cannot continue.])]) result="OPAL_$opal_cv_asm_arch" - AC_MSG_CHECKING([for assembly architecture]) - AC_MSG_RESULT([$opal_cv_asm_arch]) AC_DEFINE_UNQUOTED([OPAL_ASSEMBLY_ARCH], [$result], [Architecture type of assembly to use for atomic operations and CMA]) - result="OPAL_$opal_cv_asm_builtin" - OPAL_ASSEMBLY_BUILTIN="$opal_cv_asm_builtin" - AC_MSG_CHECKING([for builtin atomics]) - AC_MSG_RESULT([$opal_cv_asm_builtin]) - AC_DEFINE_UNQUOTED([OPAL_ASSEMBLY_BUILTIN], [$result], - [Whether to use builtin atomics]) - AC_SUBST([OPAL_ASSEMBLY_BUILTIN]) + AC_DEFINE_UNQUOTED([OPAL_USE_C11_ATOMICS], + [$want_c11_atomics], + [Whether to use C11 atomics for atomics implementation]) + AC_DEFINE_UNQUOTED([OPAL_USE_GCC_BUILTIN_ATOMICS], + [$want_gcc_builtin_atomics], + [Whether to use GCC-style built-in atomics for atomics implementation]) + AC_DEFINE_UNQUOTED([OPAL_USE_ASM_ATOMICS], + [$want_asm_atomics], + [Whether to use assembly-coded atomics for atomics implementation]) - OPAL_SUMMARY_ADD([[Miscellaneous]],[[Atomics]],[],[$opal_cv_asm_builtin]) + OPAL_SUMMARY_ADD([[Miscellaneous]],[[Atomics]],[],[$atomics_found]) - unset result + OPAL_VAR_SCOPE_POP ])dnl diff --git a/config/opal_configure_options.m4 b/config/opal_configure_options.m4 index 6b349a72591..22394afb45f 100644 --- a/config/opal_configure_options.m4 +++ b/config/opal_configure_options.m4 @@ -86,13 +86,6 @@ else WANT_BRANCH_PROBABILITIES=0 fi -AC_ARG_ENABLE([builtin-atomics-for-ppc],[AS_HELP_STRING([--enable-builtin-atomics-for-ppc], - [POWER architectures only: Force use of builtin atomics if available. This could either be gcc builtins or C11 atomics, depending on what is available on your system. Enabling this is known to cause poor performance in atomic operations on Power machines. (default: disabled)])]) -if test "x$enable_builtin_atomics_for_ppc" = "xyes" ; then -force_gcc_atomics_ppc=1 -else -force_gcc_atomics_ppc=0 -fi # # Memory debugging diff --git a/opal/include/opal/sys/atomic.h b/opal/include/opal/sys/atomic.h index 26c56b06ed3..ade552e09fe 100644 --- a/opal/include/opal/sys/atomic.h +++ b/opal/include/opal/sys/atomic.h @@ -121,7 +121,7 @@ static inline void opal_atomic_wmb(void); * in the opal_lifo code. Don't enforce the types of the function * calls on C11 until we can sort that out. */ -#if !(OPAL_ASSEMBLY_BUILTIN == OPAL_BUILTIN_C11 && !defined(__INTEL_COMPILER)) +#if OPAL_USE_C11_ATOMICS == 0 /** * Atomic compare and set of 32 bit intergers with acquire and release semantics. @@ -418,16 +418,22 @@ static inline void opal_atomic_sc_ptr(opal_atomic_intptr_t *addr, intptr_t newva #if defined(DOXYGEN) /* don't include system-level gorp when generating doxygen files */ -#elif OPAL_ASSEMBLY_BUILTIN == OPAL_BUILTIN_C11 && !defined(__INTEL_COMPILER) +#elif OPAL_USE_C11_ATOMICS == 1 # include "opal/sys/atomic_stdc.h" -#elif OPAL_ASSEMBLY_BUILTIN == OPAL_BUILTIN_GCC +#elif OPAL_USE_GCC_BUILTIN_ATOMICS == 1 # include "opal/sys/gcc_builtin/atomic.h" -#elif OPAL_ASSEMBLY_ARCH == OPAL_X86_64 -# include "opal/sys/x86_64/atomic.h" -#elif OPAL_ASSEMBLY_ARCH == OPAL_ARM64 -# include "opal/sys/arm64/atomic.h" -#elif OPAL_ASSEMBLY_ARCH == OPAL_POWERPC64 -# include "opal/sys/powerpc/atomic.h" +#elif OPAL_USE_ASM_ATOMICS == 1 +# if defined(PLATFORM_ARCH_X86_64) +# include "opal/sys/x86_64/atomic.h" +# elif defined(PLATFORM_ARCH_AARCH64) +# include "opal/sys/arm64/atomic.h" +# elif defined(PLATFORM_ARCH_POWERPC) && defined(PLATFORM_ARCH_64) +# include "opal/sys/powerpc/atomic.h" +# else +# error "No asm support found." +# endif +#else +#error "No atomics support found." #endif #if OPAL_ASSEMBLY_ARCH == OPAL_ARM64 diff --git a/opal/include/opal_stdatomic.h b/opal/include/opal_stdatomic.h index f32b2de9a7d..35437551208 100644 --- a/opal/include/opal_stdatomic.h +++ b/opal/include/opal_stdatomic.h @@ -16,7 +16,7 @@ # include "opal_stdint.h" -# if (OPAL_ASSEMBLY_BUILTIN != OPAL_BUILTIN_C11) || defined(__INTEL_COMPILER) +#if OPAL_USE_C11_ATOMICS == 0 typedef volatile int opal_atomic_int_t; typedef volatile long opal_atomic_long_t; @@ -38,15 +38,9 @@ enum { OPAL_ATOMIC_LOCK_UNLOCKED = 0, # define OPAL_ATOMIC_LOCK_INIT OPAL_ATOMIC_LOCK_UNLOCKED -# else /* OPAL_HAVE_C__ATOMIC */ +#else /* OPAL_USE_C11_ATOMICS == 0 */ -# include - -# ifdef __INTEL_COMPILER -# if __INTEL_COMPILER_BUILD_DATE <= 20200310 -#warning C11 _Atomic type not fully supported. The C11 atomic support should have been disabled. -# endif -# endif +# include typedef atomic_int opal_atomic_int_t; typedef atomic_long opal_atomic_long_t; @@ -68,12 +62,11 @@ typedef atomic_flag opal_atomic_lock_t; # define OPAL_ATOMIC_LOCK_INIT ATOMIC_FLAG_INIT -# endif /* OPAL_HAVE_C__ATOMIC */ +# endif /* OPAL_USE_C11_ATOMICS == 0 */ # if HAVE_OPAL_INT128_T -/* do not use C11 atomics for __int128 if they are not lock free */ -# if OPAL_HAVE_C11_CSWAP_INT128 && !defined(__INTEL_COMPILER) +# if OPAL_USE_C11_ATOMICS && OPAL_HAVE_C11_CSWAP_INT128 typedef _Atomic opal_int128_t opal_atomic_int128_t; From d674d8bd297f89190a5dd781a37f23bd55959523 Mon Sep 17 00:00:00 2001 From: Brian Barrett Date: Fri, 14 Jan 2022 22:52:05 +0000 Subject: [PATCH 11/11] Replace OPAL_ASSEMBLY_ARCH with portable platform With the introduction of the portable platform header, all the OPAL_ASSEMBLY_ARCH code was redundant and kind of a pain to deal with. Remove all uses of OPAL_ASSEMBLY_ARCH and replace with the equivalent portable platform conditional. Signed-off-by: Brian Barrett --- config/opal_config_asm.m4 | 11 +---- .../mca/osc/portals4/osc_portals4_component.c | 5 +- opal/include/opal/sys/Makefile.am | 1 - opal/include/opal/sys/architecture.h | 47 ------------------- opal/include/opal/sys/arm64/timer.h | 6 +-- opal/include/opal/sys/atomic.h | 8 ++-- opal/include/opal/sys/atomic_stdc.h | 2 +- opal/include/opal/sys/cma.h | 23 ++++----- opal/include/opal/sys/gcc_builtin/atomic.h | 3 +- opal/include/opal/sys/timer.h | 10 ++-- opal/include/opal/sys/x86_64/timer.h | 2 +- opal/mca/patcher/base/base.h | 5 +- opal/mca/patcher/base/patcher_base_patch.c | 19 ++++---- .../overwrite/patcher_overwrite_module.c | 27 ++++++----- opal/mca/patcher/patcher.h | 5 +- opal/mca/timer/linux/timer_linux_component.c | 4 +- oshmem/shmem/c/shmem_clear_cache_inv.c | 6 ++- oshmem/shmem/c/shmem_clear_cache_line_inv.c | 6 ++- oshmem/shmem/c/shmem_set_cache_inv.c | 6 ++- oshmem/shmem/c/shmem_set_cache_line_inv.c | 6 ++- oshmem/shmem/c/shmem_udcflush.c | 6 ++- oshmem/shmem/c/shmem_udcflush_line.c | 6 ++- 22 files changed, 90 insertions(+), 124 deletions(-) delete mode 100644 opal/include/opal/sys/architecture.h diff --git a/config/opal_config_asm.m4 b/config/opal_config_asm.m4 index 6bedcc08613..b176c8c6ca2 100644 --- a/config/opal_config_asm.m4 +++ b/config/opal_config_asm.m4 @@ -597,7 +597,7 @@ dnl ################################################################# AC_DEFUN([OPAL_CONFIG_ASM],[ AC_REQUIRE([OPAL_SETUP_CC]) - OPAL_VAR_SCOPE_PUSH([atomics_found want_c11_atomics want_gcc_builtin_atomics want_asm_atomics opal_cv_asm_arch result]) + OPAL_VAR_SCOPE_PUSH([atomics_found want_c11_atomics want_gcc_builtin_atomics want_asm_atomics]) # only assembly style we support today is gcc-style inline # assembly, find out if it works. We need this even for C11/GCC @@ -672,19 +672,16 @@ AC_DEFUN([OPAL_CONFIG_ASM],[ x86_64-*x32|i?86-*|x86_64*|amd64*) AS_IF([test "$ac_cv_sizeof_long" = "8"], [OPAL_CHECK_CMPXCHG16B - opal_cv_asm_arch="X86_64" atomics_found="x86_64 assembly"]) ;; aarch64*) - opal_cv_asm_arch="ARM64" atomics_found="aarch64 assembly" ;; powerpc-*|powerpc64-*|powerpcle-*|powerpc64le-*|rs6000-*|ppc-*) AS_IF([test "$ac_cv_sizeof_long" = "8"], - [opal_cv_asm_arch="POWERPC64" - atomics_found="PowerPC asssembly"]) + [atomics_found="PowerPC asssembly"]) ;; esac @@ -696,10 +693,6 @@ AC_DEFUN([OPAL_CONFIG_ASM],[ AS_IF([test "$aomics_found" = "no"], [AC_MSG_ERROR([No usable atomics implementation found. Cannot continue.])]) - result="OPAL_$opal_cv_asm_arch" - AC_DEFINE_UNQUOTED([OPAL_ASSEMBLY_ARCH], [$result], - [Architecture type of assembly to use for atomic operations and CMA]) - AC_DEFINE_UNQUOTED([OPAL_USE_C11_ATOMICS], [$want_c11_atomics], [Whether to use C11 atomics for atomics implementation]) diff --git a/ompi/mca/osc/portals4/osc_portals4_component.c b/ompi/mca/osc/portals4/osc_portals4_component.c index e033b25623b..6a89c47f6a6 100644 --- a/ompi/mca/osc/portals4/osc_portals4_component.c +++ b/ompi/mca/osc/portals4/osc_portals4_component.c @@ -9,7 +9,7 @@ * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2016-2017 IBM Corporation. All rights reserved. - * Copyright (c) 2018 Amazon.com, Inc. or its affiliates. All Rights reserved. + * Copyright (c) 2018-2022 Amazon.com, Inc. or its affiliates. All Rights reserved. * Copyright (c) 2020 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * $COPYRIGHT$ @@ -24,6 +24,7 @@ #include "opal/util/printf.h" #include "opal/include/opal/align.h" #include "opal/mca/mpool/base/base.h" +#include "opal/opal_portable_platform.h" #include "ompi/mca/osc/base/base.h" #include "ompi/mca/osc/base/osc_base_obj_convert.h" @@ -589,7 +590,7 @@ component_select(struct ompi_win_t *win, void **base, size_t size, int disp_unit module->passive_target_access_epoch = false; -#if OPAL_ASSEMBLY_ARCH == OPAL_X86_64 || OPAL_ASSEMBLY_ARCH == OPAL_IA32 +#if defined(PLATFORM_ARCH_X86) || defined(PLATFORM_ARCH_X86_64) *model = MPI_WIN_UNIFIED; #else *model = MPI_WIN_SEPARATE; diff --git a/opal/include/opal/sys/Makefile.am b/opal/include/opal/sys/Makefile.am index 9a5e80ea096..b56e909b376 100644 --- a/opal/include/opal/sys/Makefile.am +++ b/opal/include/opal/sys/Makefile.am @@ -28,7 +28,6 @@ # This makefile.am does not stand on its own - it is included from opal/Makefile.am headers += \ - opal/sys/architecture.h \ opal/sys/atomic.h \ opal/sys/atomic_stdc.h \ opal/sys/atomic_impl_minmax_math.h \ diff --git a/opal/include/opal/sys/architecture.h b/opal/include/opal/sys/architecture.h deleted file mode 100644 index 8f53bda5f20..00000000000 --- a/opal/include/opal/sys/architecture.h +++ /dev/null @@ -1,47 +0,0 @@ -/* - * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * Copyright (c) 2011 Sandia National Laboratories. All rights reserved. - * Copyright (c) 2014 Intel, Inc. All rights reserved - * Copyright (c) 2016 Los Alamos National Security, LLC. All rights - * reserved. - * Copyright (c) 2017 Research Organization for Information Science - * and Technology (RIST). All rights reserved. - * Copyright (c) 2020 Google, LLC. All rights reserved. - * Copyright (c) 2022 Amazon.com, Inc. or its affiliates. - * All Rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -/* - * List of supported architectures - */ - -#ifndef OPAL_SYS_ARCHITECTURE_H -#define OPAL_SYS_ARCHITECTURE_H - -/* Architectures */ -#define OPAL_UNSUPPORTED 0000 -#define OPAL_IA32 0010 -#define OPAL_X86_64 0030 -#define OPAL_POWERPC32 0050 -#define OPAL_POWERPC64 0051 -#define OPAL_ARM 0100 -#define OPAL_ARM64 0101 -#define OPAL_BUILTIN_GCC 0202 -#define OPAL_BUILTIN_NO 0203 -#define OPAL_BUILTIN_C11 0204 - -#endif /* #ifndef OPAL_SYS_ARCHITECTURE_H */ diff --git a/opal/include/opal/sys/arm64/timer.h b/opal/include/opal/sys/arm64/timer.h index d6237e9dec0..0f237e81506 100644 --- a/opal/include/opal/sys/arm64/timer.h +++ b/opal/include/opal/sys/arm64/timer.h @@ -19,7 +19,7 @@ #ifndef OPAL_SYS_ARCH_TIMER_H #define OPAL_SYS_ARCH_TIMER_H 1 -#if OPAL_ASSEMBLY_ARCH == OPAL_ARM64 +#if defined(PLATFORM_ARCH_AARCH64) typedef uint64_t opal_timer_t; #else typedef uint32_t opal_timer_t; @@ -32,7 +32,7 @@ static inline opal_timer_t opal_sys_timer_get_cycles(void) opal_timer_t ret; __asm__ __volatile__("isb" ::: "memory"); -#if OPAL_ASSEMBLY_ARCH == OPAL_ARM64 +#if defined(PLATFORM_ARCH_AARCH64) __asm__ __volatile__("mrs %0, CNTVCT_EL0" : "=r"(ret)); #else __asm__ __volatile__("mrs %0, CNTVCT" : "=r"(ret)); @@ -44,7 +44,7 @@ static inline opal_timer_t opal_sys_timer_get_cycles(void) static inline opal_timer_t opal_sys_timer_get_freq(void) { opal_timer_t freq; -#if OPAL_ASSEMBLY_ARCH == OPAL_ARM64 +#if defined(PLATFORM_ARCH_AARCH64) __asm__ __volatile__("mrs %0, CNTFRQ_EL0" : "=r"(freq)); #else __asm__ __volatile__("mrs %0, CNTFRQ" : "=r"(freq)); diff --git a/opal/include/opal/sys/atomic.h b/opal/include/opal/sys/atomic.h index ade552e09fe..be647260b73 100644 --- a/opal/include/opal/sys/atomic.h +++ b/opal/include/opal/sys/atomic.h @@ -49,11 +49,9 @@ #ifndef OPAL_SYS_ATOMIC_H #define OPAL_SYS_ATOMIC_H 1 -#include "opal_config.h" - #include -#include "opal/sys/architecture.h" +#include "opal/opal_portable_platform.h" #include "opal_stdatomic.h" BEGIN_C_DECLS @@ -436,9 +434,9 @@ static inline void opal_atomic_sc_ptr(opal_atomic_intptr_t *addr, intptr_t newva #error "No atomics support found." #endif -#if OPAL_ASSEMBLY_ARCH == OPAL_ARM64 +#if defined(PLATFORM_ARCH_AARCH64) # include "opal/sys/arm64/atomic_llsc.h" -#elif OPAL_ASSEMBLY_ARCH == OPAL_POWERPC64 +#elif defined(PLATFORM_ARCH_POWERPC) && defined(PLATFORM_ARCH_64) # include "opal/sys/powerpc/atomic_llsc.h" #endif diff --git a/opal/include/opal/sys/atomic_stdc.h b/opal/include/opal/sys/atomic_stdc.h index 62e27705cc4..4f5bceaa922 100644 --- a/opal/include/opal/sys/atomic_stdc.h +++ b/opal/include/opal/sys/atomic_stdc.h @@ -50,7 +50,7 @@ static inline void opal_atomic_wmb(void) static inline void opal_atomic_rmb(void) { -# if OPAL_ASSEMBLY_ARCH == OPAL_X86_64 +# if defined(PLATFORM_ARCH_X86_64) /* work around a bug in older gcc versions (observed in gcc 6.x) * where acquire seems to get treated as a no-op instead of being * equivalent to __asm__ __volatile__("": : :"memory") on x86_64 */ diff --git a/opal/include/opal/sys/cma.h b/opal/include/opal/sys/cma.h index e5b4961613e..b1db5f7700c 100644 --- a/opal/include/opal/sys/cma.h +++ b/opal/include/opal/sys/cma.h @@ -5,6 +5,8 @@ * Copyright (c) 2017 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2020 Google, LLC. All rights reserved. + * Copyright (c) 2022 Amazon.com, Inc. or its affiliates. + * All Rights reserved. * $COPYRIGHT$ */ @@ -19,12 +21,7 @@ #ifndef OPAL_SYS_CMA_H #define OPAL_SYS_CMA_H 1 -#if !defined(OPAL_ASSEMBLY_ARCH) -/* need opal_config.h for the assembly architecture */ -# include "opal_config.h" -#endif - -#include "opal/sys/architecture.h" +#include "opal/opal_portable_platform.h" #ifdef HAVE_SYS_TYPES_H # include @@ -34,28 +31,28 @@ # include #endif -#ifdef __linux__ +#ifdef PLATFORM_OS_LINUX /* Cross Memory Attach is so far only supported under linux */ -# if OPAL_ASSEMBLY_ARCH == OPAL_X86_64 +# if defined(PLATFORM_ARCH_x86_64) # define __NR_process_vm_readv 310 # define __NR_process_vm_writev 311 -# elif OPAL_ASSEMBLY_ARCH == OPAL_IA32 +# elif defined(PLATFORM_ARCH_X86) # define __NR_process_vm_readv 347 # define __NR_process_vm_writev 348 -# elif OPAL_ASSEMBLY_ARCH == OPAL_POWERPC32 +# elif defined(PLATFORM_ARCH_POWERPC) && defined(PLATFORM_ARCH_32) # define __NR_process_vm_readv 351 # define __NR_process_vm_writev 352 -# elif OPAL_ASSEMBLY_ARCH == OPAL_POWERPC64 +# elif defined(PLATFORM_ARCH_POWERPC) && defined(PLATFORM_ARCH_64) # define __NR_process_vm_readv 351 # define __NR_process_vm_writev 352 -# elif OPAL_ASSEMBLY_ARCH == OPAL_ARM +# elif defined(PLATFORM_ARCH_ARM) # define __NR_process_vm_readv 376 # define __NR_process_vm_writev 377 -# elif OPAL_ASSEMBLY_ARCH == OPAL_ARM64 +# elif defined(PLATFORM_ARCH_AARCH64) /* ARM64 uses the asm-generic syscall numbers */ diff --git a/opal/include/opal/sys/gcc_builtin/atomic.h b/opal/include/opal/sys/gcc_builtin/atomic.h index e4b9cc7a0e1..4615f9fb4f3 100644 --- a/opal/include/opal/sys/gcc_builtin/atomic.h +++ b/opal/include/opal/sys/gcc_builtin/atomic.h @@ -35,7 +35,8 @@ * Memory Barriers * *********************************************************************/ -#if (OPAL_ASSEMBLY_ARCH == OPAL_X86_64) && defined (__GNUC__) && !defined(__llvm) && (__GNUC__ < 6) + +#if defined(PLATFORM_ARCH_X86_64) && defined (__GNUC__) && !defined(__llvm) && (__GNUC__ < 6) /* work around a bug in older gcc versions where ACQUIRE seems to get * treated as a no-op instead */ #define OPAL_BUSTED_ATOMIC_MB 1 diff --git a/opal/include/opal/sys/timer.h b/opal/include/opal/sys/timer.h index fd04b296f76..3f63839a48c 100644 --- a/opal/include/opal/sys/timer.h +++ b/opal/include/opal/sys/timer.h @@ -33,9 +33,7 @@ #ifndef OPAL_SYS_TIMER_H #define OPAL_SYS_TIMER_H 1 -#include "opal_config.h" - -#include "opal/sys/architecture.h" +#include "opal/opal_portable_platform.h" #ifdef HAVE_SYS_TYPES_H # include @@ -59,11 +57,11 @@ BEGIN_C_DECLS #if defined(DOXYGEN) /* don't include system-level gorp when generating doxygen files */ -#elif OPAL_ASSEMBLY_ARCH == OPAL_X86_64 || OPAL_ASSEMBLY_ARCH == OPAL_IA32 +#elif defined(PLATFORM_ARCH_X86_64) || defined(PLATFORM_ARCH_X86) # include "opal/sys/x86_64/timer.h" -#elif OPAL_ASSEMBLY_ARCH == OPAL_ARM64 || OPAL_ASSEMBLY_ARCH == OPAL_ARM +#elif defined(PLATFORM_ARCH_ARM) || defined(PLATFORM_ARCH_AARCH64) # include "opal/sys/arm64/timer.h" -#elif OPAL_ASSEMBLY_ARCH == OPAL_POWERPC64 || OPAL_ASSEMBLY_ARCH == OPAL_POWERPC32 +#elif defined(PLATFORM_ARCH_POWERPC) # include "opal/sys/powerpc/timer.h" #endif diff --git a/opal/include/opal/sys/x86_64/timer.h b/opal/include/opal/sys/x86_64/timer.h index 03bb02387d0..cbef5e82e95 100644 --- a/opal/include/opal/sys/x86_64/timer.h +++ b/opal/include/opal/sys/x86_64/timer.h @@ -32,7 +32,7 @@ typedef uint64_t opal_timer_t; #if OPAL_C_GCC_INLINE_ASSEMBLY -# if OPAL_ASSEMBLY_ARCH == OPAL_X86_64 +# if defined(PLATFORM_ARCH_X86_64) /* TODO: add AMD mfence version and dispatch at init */ static inline opal_timer_t opal_sys_timer_get_cycles(void) diff --git a/opal/mca/patcher/base/base.h b/opal/mca/patcher/base/base.h index 4487ca9a9b5..66f4ce8516a 100644 --- a/opal/mca/patcher/base/base.h +++ b/opal/mca/patcher/base/base.h @@ -12,6 +12,8 @@ * All rights reserved. * Copyright (c) 2016 Los Alamos National Security, LLC. All rights * reserved. + * Copyright (c) 2022 Amazon.com, Inc. or its affiliates. + * All Rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -24,6 +26,7 @@ #define OPAL_PATCHER_BASE_H #include "opal_config.h" +#include "opal/opal_portable_platform.h" #include "opal/mca/base/mca_base_framework.h" #include "opal/mca/patcher/patcher.h" @@ -69,7 +72,7 @@ OPAL_DECLSPEC void mca_base_patcher_patch_apply_binary(mca_patcher_base_patch_t static inline uintptr_t mca_patcher_base_addr_text(uintptr_t addr) { -#if (OPAL_ASSEMBLY_ARCH == OPAL_POWERPC64) && (!defined(_CALL_ELF) || (_CALL_ELF != 2)) +#if defined(PLATFORM_ARCH_POWERPC) && defined(PLATFORM_ARCH_64) && (!defined(_CALL_ELF) || (_CALL_ELF != 2)) struct odp_t { uintptr_t text; uintptr_t toc; diff --git a/opal/mca/patcher/base/patcher_base_patch.c b/opal/mca/patcher/base/patcher_base_patch.c index 8e84847733e..57ee1d291a2 100644 --- a/opal/mca/patcher/base/patcher_base_patch.c +++ b/opal/mca/patcher/base/patcher_base_patch.c @@ -4,6 +4,8 @@ * reserved. * Copyright (c) 2017 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2022 Amazon.com, Inc. or its affiliates. + * All Rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -15,6 +17,7 @@ #include "opal/mca/patcher/base/base.h" #include "opal/mca/patcher/patcher.h" +#include "opal/opal_portable_platform.h" #include "opal/prefetch.h" #include "opal/util/sys_limits.h" #include @@ -33,7 +36,7 @@ static void mca_patcher_base_patch_destruct(mca_patcher_base_patch_t *patch) OBJ_CLASS_INSTANCE(mca_patcher_base_patch_t, opal_list_item_t, mca_patcher_base_patch_construct, mca_patcher_base_patch_destruct); -#if defined(__PPC__) +#if defined(PLATFORM_ARCH_POWERPC) // PowerPC instructions used in patching // Reference: "PowerPC User Instruction Set Architecture" @@ -65,7 +68,7 @@ static unsigned int rldicr(unsigned int RT, unsigned int RS, unsigned int SH, un static int PatchLoadImm(uintptr_t addr, unsigned int reg, size_t value) { -# if defined(__PPC64__) +# if defined(PLATFORM_ARCH_64) *(unsigned int *) (addr + 0) = addis(reg, 0, (value >> 48)); *(unsigned int *) (addr + 4) = ori(reg, reg, (value >> 32)); *(unsigned int *) (addr + 8) = rldicr(reg, reg, 32, 31); @@ -84,7 +87,7 @@ static int PatchLoadImm(uintptr_t addr, unsigned int reg, size_t value) #if !HAVE___CLEAR_CACHE static void flush_and_invalidate_cache(unsigned long a) { -# if OPAL_ASSEMBLY_ARCH == OPAL_IA32 +# if defined(PLATFORM_ARCH_X86) static int have_clflush = -1; if (OPAL_UNLIKELY(-1 == have_clflush)) { @@ -107,9 +110,9 @@ static void flush_and_invalidate_cache(unsigned long a) /* does not work with AMD processors */ __asm__ volatile("mfence;clflush %0;mfence" : : "m"(*(char *) a)); } -# elif OPAL_ASSEMBLY_ARCH == OPAL_X86_64 +# elif defined(PLATFORM_ARCH_X86_64) __asm__ volatile("mfence;clflush %0;mfence" : : "m"(*(char *) a)); -# elif OPAL_ASSEMBLY_ARCH == OPAL_ARM64 +# elif defined(PLATFORM_ARCH_AARCH64) __asm__ volatile("dc cvau, %0\n\t" "dsb ish\n\t" "ic ivau, %0\n\t" @@ -128,7 +131,7 @@ static void ModifyMemoryProtection(uintptr_t addr, size_t length, int prot) length = bound - base; -#if defined(__PPC__) +#if defined(PLATFORM_ARCH_POWERPC) /* NTH: is a loop necessary here? */ do { if (mprotect((void *) base, page_size, prot)) @@ -154,7 +157,7 @@ static inline void apply_patch(unsigned char *patch_data, uintptr_t address, siz #else size_t offset_jump = 16; -# if OPAL_ASSEMBLY_ARCH == OPAL_ARM64 +# if defined(PLATFORM_ARCH_AARCH64) offset_jump = 32; # endif @@ -184,7 +187,7 @@ void mca_base_patcher_patch_apply_binary(mca_patcher_base_patch_t *patch) int mca_patcher_base_patch_hook(mca_patcher_base_module_t *module, uintptr_t hook_addr) { -#if (OPAL_ASSEMBLY_ARCH == OPAL_POWERPC64) +#if defined(PLATFORM_ARCH_POWERPC) && defined(PLATFORM_ARCH_64) mca_patcher_base_patch_t *hook_patch; const unsigned int nop = 0x60000000; diff --git a/opal/mca/patcher/overwrite/patcher_overwrite_module.c b/opal/mca/patcher/overwrite/patcher_overwrite_module.c index e2d44724326..0b305d04181 100644 --- a/opal/mca/patcher/overwrite/patcher_overwrite_module.c +++ b/opal/mca/patcher/overwrite/patcher_overwrite_module.c @@ -5,6 +5,8 @@ * Copyright (c) 2016 IBM Corporation. All rights reserved. * Copyright (c) 2017 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2022 Amazon.com, Inc. or its affiliates. + * All Rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -20,6 +22,7 @@ #include "opal/prefetch.h" #include "opal/util/output.h" #include "opal/util/sys_limits.h" +#include "opal/opal_portable_platform.h" #include #include @@ -30,19 +33,19 @@ #include #include -#if (OPAL_ASSEMBLY_ARCH == OPAL_IA32) || (OPAL_ASSEMBLY_ARCH == OPAL_X86_64) +#if defined(PLATFORM_ARCH_X86) || defined(PLATFORM_ARCH_X86_64) static int mca_patcher_overwrite_apply_patch(mca_patcher_base_patch_t *patch) { uintptr_t func_new_addr = patch->patch_value; { -# if (OPAL_ASSEMBLY_ARCH == OPAL_IA32) +# if defined(PLATFORM_ARCH_32) patch->patch_data_size = 5; *(unsigned char *) (patch->patch_data + 0) = 0xe9; *(unsigned int *) (patch->patch_data + 1) = (unsigned int) (func_new_addr - patch->patch_orig - 5); -# elif (OPAL_ASSEMBLY_ARCH == OPAL_X86_64) +# elif defined(PLATFORM_ARCH_64) patch->patch_data_size = 13; *(unsigned short *) (patch->patch_data + 0) = 0xbb49; *(unsigned long *) (patch->patch_data + 2) = (unsigned long) func_new_addr; @@ -59,7 +62,7 @@ static int mca_patcher_overwrite_apply_patch(mca_patcher_base_patch_t *patch) /* end of #if defined(__i386__) || defined(__x86_64__) */ // ------------------------------------------------- PPC equivalent: -#elif (OPAL_ASSEMBLY_ARCH == OPAL_POWERPC32) || (OPAL_ASSEMBLY_ARCH == OPAL_POWERPC64) +#elif defined(PLATFORM_ARCH_POWERPC) // PowerPC instructions used in patching // Reference: "PowerPC User Instruction Set Architecture" @@ -91,7 +94,7 @@ static unsigned int rldicr(unsigned int RT, unsigned int RS, unsigned int SH, un static int PatchLoadImm(uintptr_t addr, unsigned int reg, size_t value) { -# if (OPAL_ASSEMBLY_ARCH == OPAL_POWERPC64) +# if defined(PLATFORM_ARCH_64) *(unsigned int *) (addr + 0) = addis(reg, 0, (value >> 48)); *(unsigned int *) (addr + 4) = ori(reg, reg, (value >> 32)); *(unsigned int *) (addr + 8) = rldicr(reg, reg, 32, 31); @@ -115,7 +118,7 @@ static int mca_patcher_overwrite_apply_patch(mca_patcher_base_patch_t *patch) hook_addr = mca_patcher_base_addr_text(patch->patch_value); // Patch for hook function: -# if (OPAL_ASSEMBLY_ARCH == OPAL_POWERPC64) +# if defined(PLATFORM_ARCH_64) rc = mca_patcher_base_patch_hook(&mca_patcher_overwrite_module, hook_addr); if (OPAL_SUCCESS != rc) { return rc; @@ -142,7 +145,7 @@ static int mca_patcher_overwrite_apply_patch(mca_patcher_base_patch_t *patch) return OPAL_SUCCESS; } -#elif defined(__aarch64__) +#elif defined(PLATFORM_ARCH_AARCH64) /** * @brief Generate a mov immediate instruction @@ -228,16 +231,16 @@ static int mca_patcher_overwrite_apply_patch(mca_patcher_base_patch_t *patch) static bool mca_patcher_is_function_patched(unsigned char *target) { -#if (OPAL_ASSEMBLY_ARCH == OPAL_IA32) +#if defined(PLATFORM_ARCH_X86) return (*(unsigned char *)target == 0xe9); -#elif (OPAL_ASSEMBLY_ARCH == OPAL_X86_64) +#elif defined(PLATFORM_ARCH_X86_64) return ( (*(unsigned short*)(target + 0) == 0xbb49) && (*(unsigned char* )(target +10) == 0x41 ) && (*(unsigned char* )(target +11) == 0xff ) && (*(unsigned char* )(target +12) == 0xe3 ) ); -#elif (OPAL_ASSEMBLY_ARCH == OPAL_POWERPC32 ) || (OPAL_ASSEMBLY_ARCH == OPAL_POWERPC64) +#elif defined(PLATFORM_ARCH_POWERPC) const unsigned int gr_max = 0xF; //11 is used in our patching code, but is the max 4 or 5 bits? const unsigned int addr_max = 0xFFFF; unsigned int addis_base = addis( 0, 0, 0); @@ -248,7 +251,7 @@ static bool mca_patcher_is_function_patched(unsigned char *target) unsigned int mtspr_mask = ~(mtspr_base ^ mtspr( 9, gr_max)); unsigned int bcctr_base = bcctr(20, 0, 0); // 20 = always unsigned int bcctr_mask = ~(bcctr_base ^ bcctr(20, 0, 0)); -#if (OPAL_ASSEMBLY_ARCH == OPAL_POWERPC32) +#if defined(PLATFORM_ARCH_32) return ( ((*(unsigned int *) (target + 0 )) & addis_mask) == addis_base && @@ -272,7 +275,7 @@ static bool mca_patcher_is_function_patched(unsigned char *target) ((*(unsigned int *) (target + 24)) & bcctr_mask) == bcctr_base ); #endif -#elif defined(__aarch64__) +#elif defined(PLATFORM_ARCH_AARCH64) uint32_t mov_mask=~((0xFFFF << 5) | 0x1F); uint32_t br_mask=~(0x1F << 5); diff --git a/opal/mca/patcher/patcher.h b/opal/mca/patcher/patcher.h index 2ebf752e6d1..8779a583f77 100644 --- a/opal/mca/patcher/patcher.h +++ b/opal/mca/patcher/patcher.h @@ -2,6 +2,8 @@ /* * Copyright (c) 2016 Los Alamos National Security, LLC. All rights * reserved. + * Copyright (c) 2022 Amazon.com, Inc. or its affiliates. + * All Rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -17,11 +19,12 @@ #include "opal/class/opal_list.h" #include "opal/mca/base/base.h" #include "opal/mca/mca.h" +#include "opal/opal_portable_platform.h" /* Any function being patched in as a hook must use SYMBOLPATCH_BEGIN at the top, * and SYMBOLPATCH_END before it returns (this is just for PPC). */ -#if (OPAL_ASSEMBLY_ARCH == OPAL_POWERPC64) +#if defined(PLATFORM_ARCH_POWERPC) && defined(PLATFORM_ARCH_64) /* special processing for ppc64 to save and restore TOC (r2) * Reference: "64-bit PowerPC ELF Application Binary Interface Supplement 1.9" */ diff --git a/opal/mca/timer/linux/timer_linux_component.c b/opal/mca/timer/linux/timer_linux_component.c index b710e54f0e8..fabb3f4ea09 100644 --- a/opal/mca/timer/linux/timer_linux_component.c +++ b/opal/mca/timer/linux/timer_linux_component.c @@ -16,6 +16,8 @@ * reserved. * Copyright (c) 2015-2017 Cisco Systems, Inc. All rights reserved * Copyright (c) 2016 Broadcom Limited. All rights reserved. + * Copyright (c) 2022 Amazon.com, Inc. or its affiliates. + * All Rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -132,7 +134,7 @@ static int opal_timer_linux_find_freq(void) } } -#if ((OPAL_ASSEMBLY_ARCH == OPAL_IA32) || (OPAL_ASSEMBLY_ARCH == OPAL_X86_64)) +#if defined(PLATFORM_ARCH_X86) || defined(PLATFORM_ARCH_X86_64) if (0 == opal_timer_linux_freq && opal_sys_timer_is_monotonic()) { /* tsc is exposed through bogomips ~> loops_per_jiffy ~> tsc_khz */ loc = find_info(fp, "bogomips", buf, 1024); diff --git a/oshmem/shmem/c/shmem_clear_cache_inv.c b/oshmem/shmem/c/shmem_clear_cache_inv.c index f1d5ec26dfe..c80cf7e6feb 100644 --- a/oshmem/shmem/c/shmem_clear_cache_inv.c +++ b/oshmem/shmem/c/shmem_clear_cache_inv.c @@ -4,6 +4,8 @@ * Copyright (c) 2014 Intel, Inc. All rights reserved. * Copyright (c) 2017 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2022 Amazon.com, Inc. or its affiliates. + * All Rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -18,7 +20,7 @@ #include "oshmem/mca/spml/spml.h" #include "oshmem/shmem/shmem_api_logger.h" -#include "opal/sys/architecture.h" +#include "opal/opal_portable_platform.h" #if OSHMEM_PROFILING #include "oshmem/include/pshmem.h" @@ -28,7 +30,7 @@ void shmem_clear_cache_inv(void) { -#if (OPAL_ASSEMBLY_ARCH == OPAL_IA32) || (OPAL_ASSEMBLY_ARCH == OPAL_X86_64) +#if defined(PLATFORM_ARCH_X86) || defined(PLATFORM_ARCH_X86_64) do {SHMEM_API_VERBOSE(10,"shmem_clear_cache_inv is not supported by the current CPU architecture");}while (0); #else /* another implementation */ diff --git a/oshmem/shmem/c/shmem_clear_cache_line_inv.c b/oshmem/shmem/c/shmem_clear_cache_line_inv.c index bb9bea03506..720b2d6ed20 100644 --- a/oshmem/shmem/c/shmem_clear_cache_line_inv.c +++ b/oshmem/shmem/c/shmem_clear_cache_line_inv.c @@ -4,6 +4,8 @@ * Copyright (c) 2014 Intel, Inc. All rights reserved. * Copyright (c) 2017 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2022 Amazon.com, Inc. or its affiliates. + * All Rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -18,7 +20,7 @@ #include "oshmem/mca/spml/spml.h" #include "oshmem/shmem/shmem_api_logger.h" -#include "opal/sys/architecture.h" +#include "opal/opal_portable_platform.h" #if OSHMEM_PROFILING #include "oshmem/include/pshmem.h" @@ -28,7 +30,7 @@ void shmem_clear_cache_line_inv(void *target) { -#if (OPAL_ASSEMBLY_ARCH == OPAL_IA32) || (OPAL_ASSEMBLY_ARCH == OPAL_X86_64) +#if defined(PLATFORM_ARCH_X86) || defined(PLATFORM_ARCH_X86_64) do {SHMEM_API_VERBOSE(10,"shmem_clear_cache_line_inv is not supported by the current CPU architecture");}while (0); #else /* another implementation */ diff --git a/oshmem/shmem/c/shmem_set_cache_inv.c b/oshmem/shmem/c/shmem_set_cache_inv.c index 4b8701046ba..8c3e00f13fb 100644 --- a/oshmem/shmem/c/shmem_set_cache_inv.c +++ b/oshmem/shmem/c/shmem_set_cache_inv.c @@ -4,6 +4,8 @@ * Copyright (c) 2014 Intel, Inc. All rights reserved. * Copyright (c) 2017 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2022 Amazon.com, Inc. or its affiliates. + * All Rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -18,7 +20,7 @@ #include "oshmem/mca/spml/spml.h" #include "oshmem/shmem/shmem_api_logger.h" -#include "opal/sys/architecture.h" +#include "opal/opal_portable_platform.h" #if OSHMEM_PROFILING #include "oshmem/include/pshmem.h" @@ -28,7 +30,7 @@ void shmem_set_cache_inv(void) { -#if (OPAL_ASSEMBLY_ARCH == OPAL_IA32) || (OPAL_ASSEMBLY_ARCH == OPAL_X86_64) +#if defined(PLATFORM_ARCH_X86) || defined(PLATFORM_ARCH_X86_64) do {SHMEM_API_VERBOSE(10,"shmem_set_cache_inv is not supported by the current CPU architecture");}while (0); #else /* another implementation */ diff --git a/oshmem/shmem/c/shmem_set_cache_line_inv.c b/oshmem/shmem/c/shmem_set_cache_line_inv.c index f94793aa854..74c612ac6e9 100644 --- a/oshmem/shmem/c/shmem_set_cache_line_inv.c +++ b/oshmem/shmem/c/shmem_set_cache_line_inv.c @@ -4,6 +4,8 @@ * Copyright (c) 2014 Intel, Inc. All rights reserved. * Copyright (c) 2017 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2022 Amazon.com, Inc. or its affiliates. + * All Rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -18,7 +20,7 @@ #include "oshmem/mca/spml/spml.h" #include "oshmem/shmem/shmem_api_logger.h" -#include "opal/sys/architecture.h" +#include "opal/opal_portable_platform.h" #if OSHMEM_PROFILING #include "oshmem/include/pshmem.h" @@ -28,7 +30,7 @@ void shmem_set_cache_line_inv(void *target) { -#if (OPAL_ASSEMBLY_ARCH == OPAL_IA32) || (OPAL_ASSEMBLY_ARCH == OPAL_X86_64) +#if defined(PLATFORM_ARCH_X86) || defined(PLATFORM_ARCH_X86_64) do {SHMEM_API_VERBOSE(10,"shmem_set_cache_line_inv is not supported by the current CPU architecture");}while (0); #else /* another implementation */ diff --git a/oshmem/shmem/c/shmem_udcflush.c b/oshmem/shmem/c/shmem_udcflush.c index 376bee0b856..a60a7f54411 100644 --- a/oshmem/shmem/c/shmem_udcflush.c +++ b/oshmem/shmem/c/shmem_udcflush.c @@ -4,6 +4,8 @@ * Copyright (c) 2014 Intel, Inc. All rights reserved. * Copyright (c) 2017 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2022 Amazon.com, Inc. or its affiliates. + * All Rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -17,7 +19,7 @@ #include "oshmem/mca/spml/spml.h" #include "oshmem/shmem/shmem_api_logger.h" -#include "opal/sys/architecture.h" +#include "opal/opal_portable_platform.h" #if OSHMEM_PROFILING #include "oshmem/include/pshmem.h" @@ -27,7 +29,7 @@ void shmem_udcflush(void) { -#if (OPAL_ASSEMBLY_ARCH == OPAL_IA32) || (OPAL_ASSEMBLY_ARCH == OPAL_X86_64) +#if defined(PLATFORM_ARCH_X86) || defined(PLATFORM_ARCH_X86_64) do {SHMEM_API_VERBOSE(10,"shmem_udcflush is not supported by the current CPU architecture");}while (0); #else /* another implementation */ diff --git a/oshmem/shmem/c/shmem_udcflush_line.c b/oshmem/shmem/c/shmem_udcflush_line.c index 2d5166ca334..7603a93bd77 100644 --- a/oshmem/shmem/c/shmem_udcflush_line.c +++ b/oshmem/shmem/c/shmem_udcflush_line.c @@ -4,6 +4,8 @@ * Copyright (c) 2014 Intel, Inc. All rights reserved. * Copyright (c) 2017 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2022 Amazon.com, Inc. or its affiliates. + * All Rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -16,7 +18,7 @@ #include "oshmem/include/shmem.h" #include "oshmem/shmem/shmem_api_logger.h" -#include "opal/sys/architecture.h" +#include "opal/opal_portable_platform.h" #if OSHMEM_PROFILING #include "oshmem/include/pshmem.h" @@ -26,7 +28,7 @@ void shmem_udcflush_line(void *target) { -#if (OPAL_ASSEMBLY_ARCH == OPAL_IA32) || (OPAL_ASSEMBLY_ARCH == OPAL_X86_64) +#if defined(PLATFORM_ARCH_X86) || defined(PLATFORM_ARCH_X86_64) do {SHMEM_API_VERBOSE(10,"shmem_udcflush_line is not supported by the current CPU architecture");}while (0); #else /* another implementation */