1
1
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
2
2
/*
3
- * Copyright (c) 2014-2016 Los Alamos National Security, LLC. All rights
3
+ * Copyright (c) 2014-2017 Los Alamos National Security, LLC. All rights
4
4
* reserved.
5
5
* Copyright (c) 2016-2017 Research Organization for Information Science
6
6
* and Technology (RIST). All rights reserved.
18
18
19
19
#include "ompi/mca/osc/base/osc_base_obj_convert.h"
20
20
21
+ enum ompi_osc_rdma_event_type_t {
22
+ OMPI_OSC_RDMA_EVENT_TYPE_PUT ,
23
+ };
24
+
25
+ typedef enum ompi_osc_rdma_event_type_t ompi_osc_rdma_event_type_t ;
26
+
27
+ struct ompi_osc_rdma_event_t {
28
+ opal_event_t super ;
29
+ ompi_osc_rdma_module_t * module ;
30
+ struct mca_btl_base_endpoint_t * endpoint ;
31
+ void * local_address ;
32
+ mca_btl_base_registration_handle_t * local_handle ;
33
+ uint64_t remote_address ;
34
+ mca_btl_base_registration_handle_t * remote_handle ;
35
+ uint64_t length ;
36
+ mca_btl_base_rdma_completion_fn_t cbfunc ;
37
+ void * cbcontext ;
38
+ void * cbdata ;
39
+ };
40
+
41
+ typedef struct ompi_osc_rdma_event_t ompi_osc_rdma_event_t ;
42
+
43
+ static void * ompi_osc_rdma_event_put (int fd , int flags , void * context )
44
+ {
45
+ ompi_osc_rdma_event_t * event = (ompi_osc_rdma_event_t * ) context ;
46
+ int ret ;
47
+
48
+ ret = event -> module -> selected_btl -> btl_put (event -> module -> selected_btl , event -> endpoint , event -> local_address ,
49
+ event -> remote_address , event -> local_handle , event -> remote_handle ,
50
+ event -> length , 0 , MCA_BTL_NO_ORDER , event -> cbfunc , event -> cbcontext ,
51
+ event -> cbdata );
52
+ if (OPAL_LIKELY (OPAL_SUCCESS == ret )) {
53
+ /* done with this event */
54
+ opal_event_del (& event -> super );
55
+ free (event );
56
+ } else {
57
+ /* re-activate the event */
58
+ opal_event_active (& event -> super , OPAL_EV_READ , 1 );
59
+ }
60
+
61
+ return NULL ;
62
+ }
63
+
64
+ static int ompi_osc_rdma_event_queue (ompi_osc_rdma_module_t * module , struct mca_btl_base_endpoint_t * endpoint ,
65
+ ompi_osc_rdma_event_type_t event_type , void * local_address , mca_btl_base_registration_handle_t * local_handle ,
66
+ uint64_t remote_address , mca_btl_base_registration_handle_t * remote_handle ,
67
+ uint64_t length , mca_btl_base_rdma_completion_fn_t cbfunc , void * cbcontext ,
68
+ void * cbdata )
69
+ {
70
+ ompi_osc_rdma_event_t * event = malloc (sizeof (* event ));
71
+ void * (* event_func ) (int , int , void * );
72
+
73
+ if (OPAL_UNLIKELY (NULL == event )) {
74
+ return OMPI_ERR_OUT_OF_RESOURCE ;
75
+ }
76
+
77
+ event -> module = module ;
78
+ event -> endpoint = endpoint ;
79
+ event -> local_address = local_address ;
80
+ event -> local_handle = local_handle ;
81
+ event -> remote_address = remote_address ;
82
+ event -> remote_handle = remote_handle ;
83
+ event -> length = length ;
84
+ event -> cbfunc = cbfunc ;
85
+ event -> cbcontext = cbcontext ;
86
+ event -> cbdata = cbdata ;
87
+
88
+ switch (event_type ) {
89
+ case OMPI_OSC_RDMA_EVENT_TYPE_PUT :
90
+ event_func = ompi_osc_rdma_event_put ;
91
+ break ;
92
+ default :
93
+ opal_output (0 , "osc/rdma: cannot queue unknown event type %d" , event_type );
94
+ abort ();
95
+ }
96
+
97
+ opal_event_set (opal_sync_event_base , & event -> super , -1 , OPAL_EV_READ ,
98
+ event_func , event );
99
+ opal_event_active (& event -> super , OPAL_EV_READ , 1 );
100
+
101
+ return OMPI_SUCCESS ;
102
+ }
103
+
104
+
21
105
static int ompi_osc_rdma_gacc_local (const void * source_buffer , int source_count , ompi_datatype_t * source_datatype ,
22
106
void * result_buffer , int result_count , ompi_datatype_t * result_datatype ,
23
107
ompi_osc_rdma_peer_t * peer , uint64_t target_address ,
@@ -113,7 +197,7 @@ static void ompi_osc_rdma_acc_put_complete (struct mca_btl_base_module_t *btl, s
113
197
}
114
198
115
199
ompi_osc_rdma_sync_rdma_dec (sync );
116
- peer -> flags &= ~ OMPI_OSC_RDMA_PEER_ACCUMULATING ;
200
+ ompi_osc_rdma_peer_clear_flag ( peer , OMPI_OSC_RDMA_PEER_ACCUMULATING ) ;
117
201
}
118
202
119
203
/* completion of an accumulate get operation */
@@ -171,7 +255,12 @@ static void ompi_osc_rdma_acc_get_complete (struct mca_btl_base_module_t *btl, s
171
255
(mca_btl_base_registration_handle_t * ) request -> ctx ,
172
256
request -> len , 0 , MCA_BTL_NO_ORDER , ompi_osc_rdma_acc_put_complete ,
173
257
request , NULL );
174
- /* TODO -- we can do better. probably should queue up the next step and handle it in progress */
258
+ if (OPAL_SUCCESS != status ) {
259
+ status = ompi_osc_rdma_event_queue (module , endpoint , OMPI_OSC_RDMA_EVENT_TYPE_PUT , (void * ) source , local_handle ,
260
+ request -> target_address , (mca_btl_base_registration_handle_t * ) request -> ctx ,
261
+ request -> len , ompi_osc_rdma_acc_put_complete , request , NULL );
262
+ }
263
+
175
264
assert (OPAL_SUCCESS == status );
176
265
}
177
266
@@ -203,13 +292,12 @@ static inline int ompi_osc_rdma_gacc_contig (ompi_osc_rdma_sync_t *sync, const v
203
292
204
293
OPAL_THREAD_LOCK (& module -> lock );
205
294
/* to ensure order wait until the previous accumulate completes */
206
- while (ompi_osc_rdma_peer_is_accumulating (peer )) {
295
+ while (! ompi_osc_rdma_peer_test_set_flag (peer , OMPI_OSC_RDMA_PEER_ACCUMULATING )) {
207
296
OPAL_THREAD_UNLOCK (& module -> lock );
208
297
ompi_osc_rdma_progress (module );
209
298
OPAL_THREAD_LOCK (& module -> lock );
210
299
}
211
300
212
- peer -> flags |= OMPI_OSC_RDMA_PEER_ACCUMULATING ;
213
301
OPAL_THREAD_UNLOCK (& module -> lock );
214
302
215
303
if (!ompi_osc_rdma_peer_is_exclusive (peer )) {
@@ -847,11 +935,12 @@ static void ompi_osc_rdma_cas_get_complete (struct mca_btl_base_module_t *btl, s
847
935
ompi_osc_rdma_acc_put_complete , request , NULL );
848
936
if (OPAL_UNLIKELY (OPAL_SUCCESS != ret )) {
849
937
OSC_RDMA_VERBOSE (MCA_BASE_VERBOSE_ERROR , "could not start put to complete accumulate operation. opal return code "
850
- "%d" , ret );
851
- }
938
+ "%d. queuing operation..." , ret );
852
939
853
- /* TODO -- we can do better. probably should queue up the next step and handle it in progress */
854
- assert (OPAL_SUCCESS == ret );
940
+ ret = ompi_osc_rdma_event_queue (module , peer -> data_endpoint , OMPI_OSC_RDMA_EVENT_TYPE_PUT , local_address , local_handle ,
941
+ request -> target_address , (mca_btl_base_registration_handle_t * ) request -> ctx , request -> len ,
942
+ ompi_osc_rdma_acc_put_complete , request , NULL );
943
+ }
855
944
856
945
return ;
857
946
}
@@ -868,7 +957,7 @@ static void ompi_osc_rdma_cas_get_complete (struct mca_btl_base_module_t *btl, s
868
957
ompi_osc_rdma_request_complete (request , status );
869
958
870
959
ompi_osc_rdma_sync_rdma_dec (sync );
871
- peer -> flags &= ~ OMPI_OSC_RDMA_PEER_ACCUMULATING ;
960
+ ompi_osc_rdma_peer_clear_flag ( peer , OMPI_OSC_RDMA_PEER_ACCUMULATING ) ;
872
961
}
873
962
874
963
static inline int cas_rdma (ompi_osc_rdma_sync_t * sync , const void * source_addr , const void * compare_addr , void * result_addr ,
@@ -894,12 +983,11 @@ static inline int cas_rdma (ompi_osc_rdma_sync_t *sync, const void *source_addr,
894
983
895
984
OPAL_THREAD_LOCK (& module -> lock );
896
985
/* to ensure order wait until the previous accumulate completes */
897
- while (ompi_osc_rdma_peer_is_accumulating (peer )) {
986
+ while (! ompi_osc_rdma_peer_test_set_flag (peer , OMPI_OSC_RDMA_PEER_ACCUMULATING )) {
898
987
OPAL_THREAD_UNLOCK (& module -> lock );
899
988
ompi_osc_rdma_progress (module );
900
989
OPAL_THREAD_LOCK (& module -> lock );
901
990
}
902
- peer -> flags |= OMPI_OSC_RDMA_PEER_ACCUMULATING ;
903
991
OPAL_THREAD_UNLOCK (& module -> lock );
904
992
905
993
offset = target_address & btl_alignment_mask ;;
0 commit comments