You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
Teach the dynamic rules file reader to look for the
alltoall_algorithm_max_requests tuning parameter. To keep the dynamic rules
file format backward compatible the alltoall_algorithm_max_requests is
optional. When not present in the rule definition the value of the
corresponding MCA variable is used instead.
Resolves#12589
Signed-off-by: Burlen Loring <[email protected]>
longNCOL=0, /* number of collectives for which rules are provided */
72
+
COLID=0, /* identifies the collective type to associate the rules with */
73
+
NCOMSIZES=0, /* number of sets of message size rules. the key is communicator size */
74
+
COMSIZE=0, /* communicator size, the key identifying a specific set of message size rules. */
75
+
NMSGSIZES=0, /* number of message size rules in the set. */
76
+
MSGSIZE=0, /* message size, the key identifying a specific rule in the set. */
77
+
ALG=0, /* the collective specific algorithm to use */
78
+
FANINOUT=0, /* algorithm specific tuning parameter */
79
+
SEGSIZE=0, /* algorithm specific tuning parameter */
80
+
MAXREQ=0; /* algorithm specific tuning parameter */
61
81
FILE*fptr= (FILE*) NULL;
62
82
intx, ncs, nms;
63
83
@@ -103,106 +123,126 @@ int ompi_coll_tuned_read_rules_config_file (char *fname, ompi_coll_alg_rule_t**
103
123
goto on_file_error;
104
124
}
105
125
106
-
if( (getnext(fptr, &X) <0) || (X<0) ) {
126
+
/* get the number of collectives for which rules are provided in the file */
127
+
if( (getnext(fptr, &NCOL) <0) || (NCOL<0) ) {
107
128
OPAL_OUTPUT((ompi_coll_tuned_stream,"Could not read number of collectives in configuration file around line %d\n", fileline));
108
129
goto on_file_error;
109
130
}
110
-
if (X>n_collectives) {
111
-
OPAL_OUTPUT((ompi_coll_tuned_stream,"Number of collectives in configuration file %ld is greater than number of MPI collectives possible %d ??? error around line %d\n", X, n_collectives, fileline));
131
+
if (NCOL>n_collectives) {
132
+
OPAL_OUTPUT((ompi_coll_tuned_stream,"Number of collectives in configuration file %ld is greater than number of MPI collectives possible %d ??? error around line %d\n", NCOL, n_collectives, fileline));
112
133
goto on_file_error;
113
134
}
114
135
115
-
for (x=0;x<X;x++) { /* for each collective */
136
+
for (x=0;x<NCOL;x++) { /* for each collective */
116
137
117
-
if( (getnext(fptr, &CI) <0) || (CI<0) ) {
138
+
/* get the collective for which rules are being provided */
139
+
if( (getnext(fptr, &COLID) <0) || (COLID<0) ) {
118
140
OPAL_OUTPUT((ompi_coll_tuned_stream,"Could not read next Collective id in configuration file around line %d\n", fileline));
119
141
goto on_file_error;
120
142
}
121
-
if (CI>=n_collectives) {
122
-
OPAL_OUTPUT((ompi_coll_tuned_stream,"Collective id in configuration file %ld is greater than MPI collectives possible %d. Error around line %d\n", CI, n_collectives, fileline));
143
+
if (COLID>=n_collectives) {
144
+
OPAL_OUTPUT((ompi_coll_tuned_stream,"Collective id in configuration file %ld is greater than MPI collectives possible %d. Error around line %d\n", COLID, n_collectives, fileline));
123
145
goto on_file_error;
124
146
}
125
147
126
-
if (alg_rules[CI].alg_rule_id!=CI) {
127
-
OPAL_OUTPUT((ompi_coll_tuned_stream, "Internal error in handling collective ID %ld\n", CI));
148
+
if (alg_rules[COLID].alg_rule_id!=COLID) {
149
+
OPAL_OUTPUT((ompi_coll_tuned_stream, "Internal error in handling collective ID %ld\n", COLID));
128
150
goto on_file_error;
129
151
}
130
-
OPAL_OUTPUT((ompi_coll_tuned_stream, "Reading dynamic rule for collective ID %ld\n", CI));
131
-
alg_p=&alg_rules[CI];
152
+
OPAL_OUTPUT((ompi_coll_tuned_stream, "Reading dynamic rule for collective ID %ld\n", COLID));
153
+
alg_p=&alg_rules[COLID];
132
154
133
-
alg_p->alg_rule_id=CI;
155
+
alg_p->alg_rule_id=COLID;
134
156
alg_p->n_com_sizes=0;
135
157
alg_p->com_rules= (ompi_coll_com_rule_t*) NULL;
136
158
137
-
if( (getnext (fptr, &NCS) <0) || (NCS<0) ) {
138
-
OPAL_OUTPUT((ompi_coll_tuned_stream,"Could not read count of communicators for collective ID %ld at around line %d\n", CI, fileline));
159
+
/* get the number of communicator sizes for which a set of rules are to be provided */
OPAL_OUTPUT((ompi_coll_tuned_stream,"Could not read communicator size for collective ID %ld com rule %d at around line %d\n", COLID, ncs, fileline));
155
179
goto on_file_error;
156
180
}
157
181
158
-
com_p->mpi_comsize=CS;
182
+
com_p->mpi_comsize=COMSIZE;
159
183
160
-
if( (getnext (fptr, &NMS) <0) || (NMS<0) ) {
161
-
OPAL_OUTPUT((ompi_coll_tuned_stream,"Could not read number of message sizes for collective ID %ld com rule %d at around line %d\n", CI, ncs, fileline));
184
+
/* get the number of message sizes to specify rules for. inner set size */
OPAL_OUTPUT((ompi_coll_tuned_stream,"Could not read number of message sizes for collective ID %ld com rule %d at around line %d\n", COLID, ncs, fileline));
162
187
goto on_file_error;
163
188
}
164
189
OPAL_OUTPUT((ompi_coll_tuned_stream, "Read message count %ld for dynamic rule for collective ID %ld and comm size %ld\n",
OPAL_OUTPUT((ompi_coll_tuned_stream,"Cannot allocate msg rules for file [%s]\n", fname));
170
195
goto on_file_error;
171
196
}
172
197
173
198
msg_p=com_p->msg_rules;
174
199
175
-
for (nms=0;nms<NMS;nms++) { /* for each msg size */
200
+
for (nms=0;nms<NMSGSIZES;nms++) { /* for each msg size */
176
201
177
202
msg_p=&(com_p->msg_rules[nms]);
178
203
179
-
if( (getnext (fptr, &MS) <0) || (MS<0) ) {
180
-
OPAL_OUTPUT((ompi_coll_tuned_stream,"Could not read message size for collective ID %ld com rule %d msg rule %d at around line %d\n", CI, ncs, nms, fileline));
204
+
/* read the message size to associate the rule with */
OPAL_OUTPUT((ompi_coll_tuned_stream,"Could not read message size for collective ID %ld com rule %d msg rule %d at around line %d\n", COLID, ncs, nms, fileline));
181
207
goto on_file_error;
182
208
}
183
-
msg_p->msg_size= (size_t)MS;
209
+
msg_p->msg_size= (size_t)MSGSIZE;
184
210
211
+
/* read the collective specific algorithm identifier */
185
212
if( (getnext (fptr, &ALG) <0) || (ALG<0) ) {
186
-
OPAL_OUTPUT((ompi_coll_tuned_stream,"Could not read target algorithm method for collective ID %ld com rule %d msg rule %d at around line %d\n", CI, ncs, nms, fileline));
213
+
OPAL_OUTPUT((ompi_coll_tuned_stream,"Could not read target algorithm method for collective ID %ld com rule %d msg rule %d at around line %d\n", COLID, ncs, nms, fileline));
OPAL_OUTPUT((ompi_coll_tuned_stream,"Could not read fan in/out topo for collective ID %ld com rule %d msg rule %d at around line %d\n", CI, ncs, nms, fileline));
220
+
OPAL_OUTPUT((ompi_coll_tuned_stream,"Could not read fan in/out topo for collective ID %ld com rule %d msg rule %d at around line %d\n", COLID, ncs, nms, fileline));
193
221
goto on_file_error;
194
222
}
195
223
msg_p->result_topo_faninout=FANINOUT;
196
224
197
-
if( (getnext (fptr, &SS) <0) || (SS<0) ) {
198
-
OPAL_OUTPUT((ompi_coll_tuned_stream,"Could not read target segment size for collective ID %ld com rule %d msg rule %d at around line %d\n", CI, ncs, nms, fileline));
OPAL_OUTPUT((ompi_coll_tuned_stream,"Could not read target segment size for collective ID %ld com rule %d msg rule %d at around line %d\n", COLID, ncs, nms, fileline));
199
228
goto on_file_error;
200
229
}
201
-
msg_p->result_segsize=SS;
230
+
msg_p->result_segsize=SEGSIZE;
231
+
232
+
/* read the max requests tuning parameter. optional */
OPAL_OUTPUT((ompi_coll_tuned_stream,"Could not read max requests for collective ID %ld com rule %d msg rule %d at around line %d\n", COLID, ncs, nms, fileline));
237
+
goto on_file_error;
238
+
}
239
+
msg_p->result_max_requests=MAXREQ;
240
+
}
202
241
203
-
if (!nms&&MS) {
242
+
/* check the first rule is for 0 size. look-up depends on this */
243
+
if (!nms&&MSGSIZE) {
204
244
OPAL_OUTPUT((ompi_coll_tuned_stream,"All algorithms must specify a rule for message size of zero upwards always first!\n"));
205
-
OPAL_OUTPUT((ompi_coll_tuned_stream,"Message size was %lu for collective ID %ld com rule %d msg rule %d at around line %d\n", MS, CI, ncs, nms, fileline));
245
+
OPAL_OUTPUT((ompi_coll_tuned_stream,"Message size was %lu for collective ID %ld com rule %d msg rule %d at around line %d\n", MSGSIZE, COLID, ncs, nms, fileline));
206
246
goto on_file_error;
207
247
}
208
248
@@ -219,7 +259,7 @@ int ompi_coll_tuned_read_rules_config_file (char *fname, ompi_coll_alg_rule_t**
219
259
} /* comm size */
220
260
221
261
total_alg_count++;
222
-
OPAL_OUTPUT((ompi_coll_tuned_stream, "Done reading dynamic rule for collective ID %ld\n", CI));
262
+
OPAL_OUTPUT((ompi_coll_tuned_stream, "Done reading dynamic rule for collective ID %ld\n", COLID));
0 commit comments