Skip to content

coll tuned dynamic rules file alltoall_algorithm_max_requests #12827

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 23 additions & 0 deletions ompi/mca/coll/base/coll_base_util.c
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,9 @@
* Copyright (c) 2014-2020 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* Copyright (c) 2023 Jeffrey M. Squyres. All rights reserved.
*
* Copyright (c) 2024 NVIDIA CORPORATION. All rights reserved.
*
* $COPYRIGHT$
*
* Additional copyrights may follow
Expand Down Expand Up @@ -482,6 +485,26 @@ int ompi_coll_base_file_peek_next_char_is(FILE *fptr, int *fileline, int expecte
} while (1);
}

/**
* return non-zero if the next non-space to read on the current line is a digit.
* otherwise return 0.
*/
int ompi_coll_base_file_peek_next_char_isdigit(FILE *fptr)
{
do {
int next = fgetc(fptr);

if ((' ' == next) || ('\t' == next)) {
continue; /* discard space and tab. keep everything else */
}

ungetc(next, fptr); /* put the char back into the stream */

return isdigit(next); /* report back whether or not next is a digit */

} while (1);
}

/**
* There are certainly simpler implementation for this function when performance
* is not a critical point. But, as this function is used during the collective
Expand Down
2 changes: 2 additions & 0 deletions ompi/mca/coll/base/coll_base_util.h
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
* All rights reserved.
* Copyright (c) 2014-2020 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* Copyright (c) 2024 NVIDIA CORPORATION. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
Expand Down Expand Up @@ -195,6 +196,7 @@ int ompi_coll_base_file_getnext_string(FILE *fptr, int *fileline, char** val);
* eat the value, otherwise put it back into the file.
*/
int ompi_coll_base_file_peek_next_char_is(FILE *fptr, int *fileline, int expected);
int ompi_coll_base_file_peek_next_char_isdigit(FILE *fptr);

/* Miscellaneous function */
const char* mca_coll_base_colltype_to_str(int collid);
Expand Down
131 changes: 89 additions & 42 deletions ompi/mca/coll/tuned/coll_tuned_dynamic_file.c
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
* Copyright (c) 2015 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* Copyright (c) 2023 Jeffrey M. Squyres. All rights reserved.
* Copyright (c) 2024 NVIDIA CORPORATION. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
Expand Down Expand Up @@ -42,13 +43,24 @@
static int fileline=0; /* used for verbose error messages */

#define getnext(fptr, pval) ompi_coll_base_file_getnext_long(fptr, &fileline, pval)
#define isnext_digit(fptr) ompi_coll_base_file_peek_next_char_isdigit(fptr)

/*
* Reads a rule file called fname
* Builds the algorithm rule table for a max of n_collectives
* The rule file defines a set of sets of rules. The outer set is keyed on
* communicator size while the inner set is keyed on message size. When a
* communicator is constructed its size is used to look up the nested set of
* message size keyed rules. When a collective is called the message size
* determined from its call arguments are used to lookup a specific rule in the
* inner set.
*
* Rules for communicator and message sizes 0 and N (where N is the larger than
* largest key you provide) can be specified to fall back to the fixed decision
* framework above and below the communicator and message size ranges of
* interest.
*
* If an error occurs it removes rule table and then exits with a very verbose
* error message (this stops the user using a half baked rule table
* error message. this stops the user using a half baked rule table.
*
* Returns the number of actual collectives that a rule exists for
* (note 0 is NOT an error)
Expand All @@ -57,9 +69,18 @@ static int fileline=0; /* used for verbose error messages */

int ompi_coll_tuned_read_rules_config_file (char *fname, ompi_coll_alg_rule_t** rules, int n_collectives)
{
long CI, NCS, CS, ALG, NMS, FANINOUT, X, MS, SS;
long NCOL = 0, /* number of collectives for which rules are provided */
COLID = 0, /* identifies the collective type to associate the rules with */
NCOMSIZES = 0, /* number of sets of message size rules. the key is communicator size */
COMSIZE = 0, /* communicator size, the key identifying a specific set of message size rules. */
NMSGSIZES = 0, /* number of message size rules in the set. */
MSGSIZE = 0, /* message size, the key identifying a specific rule in the set. */
ALG = 0, /* the collective specific algorithm to use */
FANINOUT = 0, /* algorithm specific tuning parameter */
SEGSIZE = 0, /* algorithm specific tuning parameter */
MAXREQ = 0; /* algorithm specific tuning parameter */
FILE *fptr = (FILE*) NULL;
int x, ncs, nms;
int x, ncs, nms, version;

ompi_coll_alg_rule_t *alg_rules = (ompi_coll_alg_rule_t*) NULL; /* complete table of rules */

Expand Down Expand Up @@ -103,106 +124,131 @@ int ompi_coll_tuned_read_rules_config_file (char *fname, ompi_coll_alg_rule_t**
goto on_file_error;
}

if( (getnext(fptr, &X) < 0) || (X < 0) ) {
/* consume the optional version identifier */
if (0 == fscanf(fptr, "rule-file-version-%u", &version)) {
version = 1;
}

/* get the number of collectives for which rules are provided in the file */
if( (getnext(fptr, &NCOL) < 0) || (NCOL < 0) ) {
OPAL_OUTPUT((ompi_coll_tuned_stream,"Could not read number of collectives in configuration file around line %d\n", fileline));
goto on_file_error;
}
if (X>n_collectives) {
OPAL_OUTPUT((ompi_coll_tuned_stream,"Number of collectives in configuration file %ld is greater than number of MPI collectives possible %d ??? error around line %d\n", X, n_collectives, fileline));
if (NCOL>n_collectives) {
OPAL_OUTPUT((ompi_coll_tuned_stream,"Number of collectives in configuration file %ld is greater than number of MPI collectives possible %d ??? error around line %d\n", NCOL, n_collectives, fileline));
goto on_file_error;
}

for (x=0;x<X;x++) { /* for each collective */
for (x=0;x<NCOL;x++) { /* for each collective */

if( (getnext(fptr, &CI) < 0) || (CI < 0) ) {
/* get the collective for which rules are being provided */
if( (getnext(fptr, &COLID) < 0) || (COLID < 0) ) {
OPAL_OUTPUT((ompi_coll_tuned_stream,"Could not read next Collective id in configuration file around line %d\n", fileline));
goto on_file_error;
}
if (CI>=n_collectives) {
OPAL_OUTPUT((ompi_coll_tuned_stream,"Collective id in configuration file %ld is greater than MPI collectives possible %d. Error around line %d\n", CI, n_collectives, fileline));
if (COLID>=n_collectives) {
OPAL_OUTPUT((ompi_coll_tuned_stream,"Collective id in configuration file %ld is greater than MPI collectives possible %d. Error around line %d\n", COLID, n_collectives, fileline));
goto on_file_error;
}

if (alg_rules[CI].alg_rule_id != CI) {
OPAL_OUTPUT((ompi_coll_tuned_stream, "Internal error in handling collective ID %ld\n", CI));
if (alg_rules[COLID].alg_rule_id != COLID) {
OPAL_OUTPUT((ompi_coll_tuned_stream, "Internal error in handling collective ID %ld\n", COLID));
goto on_file_error;
}
OPAL_OUTPUT((ompi_coll_tuned_stream, "Reading dynamic rule for collective ID %ld\n", CI));
alg_p = &alg_rules[CI];
OPAL_OUTPUT((ompi_coll_tuned_stream, "Reading dynamic rule for collective ID %ld\n", COLID));
alg_p = &alg_rules[COLID];

alg_p->alg_rule_id = CI;
alg_p->alg_rule_id = COLID;
alg_p->n_com_sizes = 0;
alg_p->com_rules = (ompi_coll_com_rule_t *) NULL;

if( (getnext (fptr, &NCS) < 0) || (NCS < 0) ) {
OPAL_OUTPUT((ompi_coll_tuned_stream,"Could not read count of communicators for collective ID %ld at around line %d\n", CI, fileline));
/* get the number of communicator sizes for which a set of rules are to be provided */
if( (getnext (fptr, &NCOMSIZES) < 0) || (NCOMSIZES < 0) ) {
OPAL_OUTPUT((ompi_coll_tuned_stream,"Could not read count of communicators for collective ID %ld at around line %d\n", COLID, fileline));
goto on_file_error;
}
OPAL_OUTPUT((ompi_coll_tuned_stream, "Read communicator count %ld for dynamic rule for collective ID %ld\n", NCS, CI));
alg_p->n_com_sizes = NCS;
alg_p->com_rules = ompi_coll_tuned_mk_com_rules (NCS, CI);
OPAL_OUTPUT((ompi_coll_tuned_stream, "Read communicator count %ld for dynamic rule for collective ID %ld\n", NCOMSIZES, COLID));
alg_p->n_com_sizes = NCOMSIZES;
alg_p->com_rules = ompi_coll_tuned_mk_com_rules (NCOMSIZES, COLID);
if (NULL == alg_p->com_rules) {
OPAL_OUTPUT((ompi_coll_tuned_stream,"Cannot allocate com rules for file [%s]\n", fname));
goto on_file_error;
}

for (ncs=0;ncs<NCS;ncs++) { /* for each comm size */
for (ncs=0;ncs<NCOMSIZES;ncs++) { /* for each comm size */

com_p = &(alg_p->com_rules[ncs]);

if( (getnext (fptr, &CS) < 0) || (CS < 0) ) {
OPAL_OUTPUT((ompi_coll_tuned_stream,"Could not read communicator size for collective ID %ld com rule %d at around line %d\n", CI, ncs, fileline));
/* get the communicator size to associate the set of rules with */
if( (getnext (fptr, &COMSIZE) < 0) || (COMSIZE < 0) ) {
OPAL_OUTPUT((ompi_coll_tuned_stream,"Could not read communicator size for collective ID %ld com rule %d at around line %d\n", COLID, ncs, fileline));
goto on_file_error;
}

com_p->mpi_comsize = CS;
com_p->mpi_comsize = COMSIZE;

if( (getnext (fptr, &NMS) < 0) || (NMS < 0) ) {
OPAL_OUTPUT((ompi_coll_tuned_stream,"Could not read number of message sizes for collective ID %ld com rule %d at around line %d\n", CI, ncs, fileline));
/* get the number of message sizes to specify rules for. inner set size */
if( (getnext (fptr, &NMSGSIZES) < 0) || (NMSGSIZES < 0) ) {
OPAL_OUTPUT((ompi_coll_tuned_stream,"Could not read number of message sizes for collective ID %ld com rule %d at around line %d\n", COLID, ncs, fileline));
goto on_file_error;
}
OPAL_OUTPUT((ompi_coll_tuned_stream, "Read message count %ld for dynamic rule for collective ID %ld and comm size %ld\n",
NMS, CI, CS));
com_p->n_msg_sizes = NMS;
com_p->msg_rules = ompi_coll_tuned_mk_msg_rules (NMS, CI, ncs, CS);
NMSGSIZES, COLID, COMSIZE));
com_p->n_msg_sizes = NMSGSIZES;
com_p->msg_rules = ompi_coll_tuned_mk_msg_rules (NMSGSIZES, COLID, ncs, COMSIZE);
if (NULL == com_p->msg_rules) {
OPAL_OUTPUT((ompi_coll_tuned_stream,"Cannot allocate msg rules for file [%s]\n", fname));
goto on_file_error;
}

msg_p = com_p->msg_rules;

for (nms=0;nms<NMS;nms++) { /* for each msg size */
for (nms=0;nms<NMSGSIZES;nms++) { /* for each msg size */

msg_p = &(com_p->msg_rules[nms]);

if( (getnext (fptr, &MS) < 0) || (MS < 0) ) {
OPAL_OUTPUT((ompi_coll_tuned_stream,"Could not read message size for collective ID %ld com rule %d msg rule %d at around line %d\n", CI, ncs, nms, fileline));
/* read the message size to associate the rule with */
if( (getnext (fptr, &MSGSIZE) < 0) || (MSGSIZE < 0) ) {
OPAL_OUTPUT((ompi_coll_tuned_stream,"Could not read message size for collective ID %ld com rule %d msg rule %d at around line %d\n", COLID, ncs, nms, fileline));
goto on_file_error;
}
msg_p->msg_size = (size_t)MS;
msg_p->msg_size = (size_t)MSGSIZE;

/* read the collective specific algorithm identifier */
if( (getnext (fptr, &ALG) < 0) || (ALG < 0) ) {
OPAL_OUTPUT((ompi_coll_tuned_stream,"Could not read target algorithm method for collective ID %ld com rule %d msg rule %d at around line %d\n", CI, ncs, nms, fileline));
OPAL_OUTPUT((ompi_coll_tuned_stream,"Could not read target algorithm method for collective ID %ld com rule %d msg rule %d at around line %d\n", COLID, ncs, nms, fileline));
goto on_file_error;
}
msg_p->result_alg = ALG;

/* read faninout tuning parameter. required */
if( (getnext (fptr, &FANINOUT) < 0) || (FANINOUT < 0) ) {
OPAL_OUTPUT((ompi_coll_tuned_stream,"Could not read fan in/out topo for collective ID %ld com rule %d msg rule %d at around line %d\n", CI, ncs, nms, fileline));
OPAL_OUTPUT((ompi_coll_tuned_stream,"Could not read fan in/out topo for collective ID %ld com rule %d msg rule %d at around line %d\n", COLID, ncs, nms, fileline));
goto on_file_error;
}
msg_p->result_topo_faninout = FANINOUT;

if( (getnext (fptr, &SS) < 0) || (SS < 0) ) {
OPAL_OUTPUT((ompi_coll_tuned_stream,"Could not read target segment size for collective ID %ld com rule %d msg rule %d at around line %d\n", CI, ncs, nms, fileline));
/* read segsize tuning parameter. required */
if( (getnext (fptr, &SEGSIZE) < 0) || (SEGSIZE < 0) ) {
OPAL_OUTPUT((ompi_coll_tuned_stream,"Could not read target segment size for collective ID %ld com rule %d msg rule %d at around line %d\n", COLID, ncs, nms, fileline));
goto on_file_error;
}
msg_p->result_segsize = SS;
msg_p->result_segsize = SEGSIZE;

/* read the max requests tuning parameter. optional */
msg_p->result_max_requests = ompi_coll_tuned_alltoall_max_requests;
if( (version > 1) && isnext_digit(fptr) ) {
if( (getnext (fptr, &MAXREQ) < 0) || (MAXREQ < 0) ) {
OPAL_OUTPUT((ompi_coll_tuned_stream,"Could not read max requests for collective ID %ld com rule %d msg rule %d at around line %d\n", COLID, ncs, nms, fileline));
goto on_file_error;
}
msg_p->result_max_requests = MAXREQ;
}

if (!nms && MS) {
/* check the first rule is for 0 size. look-up depends on this */
if (!nms && MSGSIZE) {
OPAL_OUTPUT((ompi_coll_tuned_stream,"All algorithms must specify a rule for message size of zero upwards always first!\n"));
OPAL_OUTPUT((ompi_coll_tuned_stream,"Message size was %lu for collective ID %ld com rule %d msg rule %d at around line %d\n", MS, CI, ncs, nms, fileline));
OPAL_OUTPUT((ompi_coll_tuned_stream,"Message size was %lu for collective ID %ld com rule %d msg rule %d at around line %d\n", MSGSIZE, COLID, ncs, nms, fileline));
goto on_file_error;
}

Expand All @@ -219,13 +265,14 @@ int ompi_coll_tuned_read_rules_config_file (char *fname, ompi_coll_alg_rule_t**
} /* comm size */

total_alg_count++;
OPAL_OUTPUT((ompi_coll_tuned_stream, "Done reading dynamic rule for collective ID %ld\n", CI));
OPAL_OUTPUT((ompi_coll_tuned_stream, "Done reading dynamic rule for collective ID %ld\n", COLID));

} /* per collective */

fclose (fptr);

OPAL_OUTPUT((ompi_coll_tuned_stream,"\nConfigure file Stats\n"));
OPAL_OUTPUT((ompi_coll_tuned_stream,"Version\t\t\t\t\t: %5u\n", version));
OPAL_OUTPUT((ompi_coll_tuned_stream,"Collectives with rules\t\t\t: %5d\n", total_alg_count));
OPAL_OUTPUT((ompi_coll_tuned_stream,"Communicator sizes with rules\t\t: %5d\n", total_com_count));
OPAL_OUTPUT((ompi_coll_tuned_stream,"Message sizes with rules\t\t: %5d\n", total_msg_count));
Expand Down
Loading