10
10
* Copyright (c) 2019-2021 Google, LLC. All rights reserved.
11
11
* Copyright (c) 2021 IBM Corporation. All rights reserved.
12
12
* Copyright (c) 2022 Cisco Systems, Inc. All rights reserved
13
+ * Copyright (c) 2022 Amazon.com, Inc. or its affiliates.
14
+ * All Rights reserved.
13
15
* $COPYRIGHT$
14
16
*
15
17
* Additional copyrights may follow
16
18
*
17
19
* $HEADER$
18
20
*/
19
21
22
+ #include "ompi_config.h"
23
+
20
24
#include "osc_rdma_accumulate.h"
21
25
#include "osc_rdma_request.h"
22
26
#include "osc_rdma_comm.h"
27
+ #include "osc_rdma_lock.h"
28
+ #include "osc_rdma_btl_comm.h"
23
29
30
+ #include "opal/util/minmax.h"
24
31
#include "ompi/mca/osc/base/base.h"
25
32
#include "ompi/mca/osc/base/osc_base_obj_convert.h"
26
33
@@ -157,13 +164,11 @@ static int ompi_osc_rdma_fetch_and_op_atomic (ompi_osc_rdma_sync_t *sync, const
157
164
mca_btl_base_registration_handle_t * target_handle , ompi_op_t * op , ompi_osc_rdma_request_t * req )
158
165
{
159
166
ompi_osc_rdma_module_t * module = sync -> module ;
160
- mca_btl_base_module_t * selected_btl = ompi_osc_rdma_selected_btl (module , peer -> data_btl_index );
161
- int32_t atomic_flags = selected_btl -> btl_atomic_flags ;
162
167
int btl_op , flags ;
163
168
int64_t origin ;
164
169
165
- if ((8 != extent && !((MCA_BTL_ATOMIC_SUPPORTS_32BIT & atomic_flags ) && 4 == extent )) ||
166
- (!(OMPI_DATATYPE_FLAG_DATA_INT & dt -> super .flags ) && !(MCA_BTL_ATOMIC_SUPPORTS_FLOAT & atomic_flags )) ||
170
+ if ((8 != extent && !((MCA_BTL_ATOMIC_SUPPORTS_32BIT & module -> atomic_flags ) && 4 == extent )) ||
171
+ (!(OMPI_DATATYPE_FLAG_DATA_INT & dt -> super .flags ) && !(MCA_BTL_ATOMIC_SUPPORTS_FLOAT & module -> atomic_flags )) ||
167
172
!ompi_op_is_intrinsic (op ) || (0 == ompi_osc_rdma_op_mapping [op -> op_type ])) {
168
173
return OMPI_ERR_NOT_SUPPORTED ;
169
174
}
@@ -235,19 +240,11 @@ static int ompi_osc_rdma_acc_single_atomic (ompi_osc_rdma_sync_t *sync, const vo
235
240
ompi_op_t * op , ompi_osc_rdma_request_t * req )
236
241
{
237
242
ompi_osc_rdma_module_t * module = sync -> module ;
238
- mca_btl_base_module_t * selected_btl = ompi_osc_rdma_selected_btl (module , peer -> data_btl_index );
239
- int32_t atomic_flags = selected_btl -> btl_atomic_flags ;
240
243
int btl_op , flags ;
241
244
int64_t origin ;
242
245
243
- if (!(selected_btl -> btl_flags & MCA_BTL_FLAGS_ATOMIC_OPS )) {
244
- /* btl put atomics not supported or disabled. fall back on fetch-and-op */
245
- return ompi_osc_rdma_fetch_and_op_atomic (sync , origin_addr , NULL , dt , extent , peer , target_address , target_handle ,
246
- op , req );
247
- }
248
-
249
- if ((8 != extent && !((MCA_BTL_ATOMIC_SUPPORTS_32BIT & atomic_flags ) && 4 == extent )) ||
250
- (!(OMPI_DATATYPE_FLAG_DATA_INT & dt -> super .flags ) && !(MCA_BTL_ATOMIC_SUPPORTS_FLOAT & atomic_flags )) ||
246
+ if ((8 != extent && !((MCA_BTL_ATOMIC_SUPPORTS_32BIT & module -> atomic_flags ) && 4 == extent )) ||
247
+ (!(OMPI_DATATYPE_FLAG_DATA_INT & dt -> super .flags ) && !(MCA_BTL_ATOMIC_SUPPORTS_FLOAT & module -> atomic_flags )) ||
251
248
!ompi_op_is_intrinsic (op ) || (0 == ompi_osc_rdma_op_mapping [op -> op_type ])) {
252
249
return OMPI_ERR_NOT_SUPPORTED ;
253
250
}
@@ -585,9 +582,9 @@ static inline int ompi_osc_rdma_gacc_master (ompi_osc_rdma_sync_t *sync, const v
585
582
586
583
/* determine how much to put in this operation */
587
584
if (source_count ) {
588
- acc_len = min ( min (target_iovec [target_iov_index ].iov_len , source_iovec [source_iov_index ].iov_len ), acc_limit );
585
+ acc_len = opal_min ( opal_min (target_iovec [target_iov_index ].iov_len , source_iovec [source_iov_index ].iov_len ), acc_limit );
589
586
} else {
590
- acc_len = min (target_iovec [target_iov_index ].iov_len , acc_limit );
587
+ acc_len = opal_min (target_iovec [target_iov_index ].iov_len , acc_limit );
591
588
}
592
589
593
590
if (0 != acc_len ) {
@@ -662,13 +659,11 @@ static inline int ompi_osc_rdma_cas_atomic (ompi_osc_rdma_sync_t *sync, const vo
662
659
bool lock_acquired )
663
660
{
664
661
ompi_osc_rdma_module_t * module = sync -> module ;
665
- mca_btl_base_module_t * btl = ompi_osc_rdma_selected_btl (module , peer -> data_btl_index );
666
- int32_t atomic_flags = btl -> btl_atomic_flags ;
667
662
const size_t size = datatype -> super .size ;
668
663
int64_t compare , source ;
669
664
int flags , ret ;
670
665
671
- if (8 != size && !(4 == size && (MCA_BTL_ATOMIC_SUPPORTS_32BIT & atomic_flags ))) {
666
+ if (8 != size && !(4 == size && (MCA_BTL_ATOMIC_SUPPORTS_32BIT & module -> atomic_flags ))) {
672
667
return OMPI_ERR_NOT_SUPPORTED ;
673
668
}
674
669
@@ -716,7 +711,6 @@ static inline int cas_rdma (ompi_osc_rdma_sync_t *sync, const void *source_addr,
716
711
mca_btl_base_registration_handle_t * target_handle , bool lock_acquired )
717
712
{
718
713
ompi_osc_rdma_module_t * module = sync -> module ;
719
- mca_btl_base_module_t * btl = ompi_osc_rdma_selected_btl (module , peer -> data_btl_index );
720
714
unsigned long len = datatype -> super .size ;
721
715
mca_btl_base_registration_handle_t * local_handle = NULL ;
722
716
ompi_osc_rdma_frag_t * frag = NULL ;
@@ -741,26 +735,30 @@ static inline int cas_rdma (ompi_osc_rdma_sync_t *sync, const void *source_addr,
741
735
return OMPI_SUCCESS ;
742
736
}
743
737
744
- if (btl -> btl_register_mem && len > btl -> btl_put_local_registration_threshold ) {
745
- do {
746
- ret = ompi_osc_rdma_frag_alloc (module , len , & frag , & ptr );
747
- if (OPAL_UNLIKELY (OMPI_SUCCESS == ret )) {
748
- break ;
749
- }
738
+ if (module -> use_memory_registration ) {
739
+ mca_btl_base_module_t * btl = ompi_osc_rdma_selected_btl (module , peer -> data_btl_index );
740
+ if (len > btl -> btl_put_local_registration_threshold ) {
741
+ do {
742
+ ret = ompi_osc_rdma_frag_alloc (module , len , & frag , & ptr );
743
+ if (OPAL_UNLIKELY (OMPI_SUCCESS == ret )) {
744
+ break ;
745
+ }
750
746
751
- ompi_osc_rdma_progress (module );
752
- } while (1 );
747
+ ompi_osc_rdma_progress (module );
748
+ } while (1 );
753
749
754
- memcpy (ptr , source_addr , len );
755
- local_handle = frag -> handle ;
750
+ memcpy (ptr , source_addr , len );
751
+ local_handle = frag -> handle ;
752
+ }
756
753
}
757
754
758
755
OSC_RDMA_VERBOSE (MCA_BASE_VERBOSE_TRACE , "RDMA compare-and-swap initiating blocking btl put..." );
759
756
760
757
do {
761
- ret = btl -> btl_put (btl , peer -> data_endpoint , ptr , target_address ,
762
- local_handle , target_handle , len , 0 , MCA_BTL_NO_ORDER ,
763
- ompi_osc_rdma_cas_put_complete , (void * ) & complete , NULL );
758
+ ret = ompi_osc_rdma_btl_put (module , peer -> data_btl_index , peer -> data_endpoint ,
759
+ ptr , target_address , local_handle , target_handle ,
760
+ len , 0 , MCA_BTL_NO_ORDER ,
761
+ ompi_osc_rdma_cas_put_complete , (void * ) & complete , NULL );
764
762
if (OPAL_SUCCESS == ret || (OPAL_ERR_OUT_OF_RESOURCE != ret && OPAL_ERR_TEMP_OUT_OF_RESOURCE != ret )) {
765
763
break ;
766
764
}
0 commit comments