Skip to content

Commit a760be1

Browse files
markallejjhursey
authored andcommitted
hook/prot: adding shortened output
Normally we print a -prot table up to 16 hosts that looks like this, where 16 can be changed via MPI_PROT_MAX: ``` host | 0 1 2 3 4 5 6 7 8 ======|============================================== 0 : shm ib ib ib ib ib ib ib ib 1 : ib shm ib ib ib ib ib ib ib 2 : ib ib self ib ib ib ib ib ib 3 : ib ib ib self ib ib ib ib ib 4 : ib ib ib ib self ib ib ib ib 5 : ib ib ib ib ib self ib ib ib 6 : ib ib ib ib ib ib self ib ib 7 : ib ib ib ib ib ib ib self ib 8 : ib ib ib ib ib ib ib ib self ``` This checkin reduces MPI_PROT_MAX to 12 but adds a shorter table output that looks like this: ``` host | 0 1 2 3 4 8 ======|==================== 0 : A C C C C C C C C 1 : C A C C C C C C C 2 : C C B C C C C C C 3 : C C C B C C C C C 4 : C C C C B C C C C 5 : C C C C C B C C C 6 : C C C C C C B C C 7 : C C C C C C C B C 8 : C C C C C C C C B key: A == shm key: B == self key: C == ib ``` That is used from 13 up to 36 ranks (or 3*MPI_PROT_MAX). Signed-off-by: Joshua Hursey <[email protected]>
1 parent fa24868 commit a760be1

File tree

1 file changed

+94
-2
lines changed

1 file changed

+94
-2
lines changed

ompi/mca/hook/prot/hook_prot_fns.c

Lines changed: 94 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -268,7 +268,8 @@ ompi_report_prots(int mode) // 1 = from init, 2 = from finalize
268268
{
269269
int numhosts, i, j, k;
270270
char *p;
271-
int max2Dprottable = 16;
271+
int max2Dprottable = 12;
272+
int max2D1Cprottable = 36;
272273
int hostidprotbrief = 0;
273274
char * max2Dprotptr = NULL;
274275
char * hostidprotptr = NULL;
@@ -290,7 +291,10 @@ ompi_report_prots(int mode) // 1 = from init, 2 = from finalize
290291

291292
if (hpmp_myrank == 0) {
292293
max2Dprotptr = getenv("MPI_PROT_MAX");
293-
if (max2Dprotptr) { max2Dprottable = atoi(max2Dprotptr); }
294+
if (max2Dprotptr) {
295+
max2Dprottable = atoi(max2Dprotptr);
296+
max2D1Cprottable = 3 * max2Dprottable;
297+
}
294298

295299
hostidprotptr = getenv("MPI_PROT_BRIEF");
296300
if (hostidprotptr) { hostidprotbrief = atoi(hostidprotptr); }
@@ -552,6 +556,94 @@ ompi_report_prots(int mode) // 1 = from init, 2 = from finalize
552556
printf("\n");
553557
free(str);
554558
}
559+
else if (nleaderranks <= max2D1Cprottable) {
560+
char *str, *p;
561+
int tmp, per, done;
562+
char char_code[COMM_METHOD_MAX + 1], next_char;
563+
int method_count[COMM_METHOD_MAX + 1];
564+
565+
// characters for the number column in the 2d table,
566+
// must be large enough for the digits needed for host numbers
567+
per = 2;
568+
tmp = nleaderranks;
569+
while (tmp >= 10) { ++per; tmp /= 10; }
570+
571+
// pick a character code for each comm method based on
572+
// how many times it's in the table, use 'A' for the least common
573+
for (i=0; i<=COMM_METHOD_MAX; ++i) {
574+
char_code[i] = 0;
575+
method_count[i] = 0;
576+
}
577+
for (i=0; i<nleaderranks; ++i) {
578+
for (k=0; k<nleaderranks; ++k) {
579+
tmp = method[i * nleaderranks + k];
580+
++method_count[tmp];
581+
}
582+
}
583+
next_char = 'A';
584+
done = 0;
585+
while (!done) {
586+
int next_idx = -1;
587+
done = 1;
588+
for (i=0; i<=COMM_METHOD_MAX; ++i) {
589+
if (!char_code[i] && method_count[i]) {
590+
done = 0;
591+
if ( (next_idx == -1) ||
592+
(method_count[i] < method_count[next_idx]))
593+
{
594+
next_idx = i;
595+
}
596+
}
597+
}
598+
if (next_idx != -1) {
599+
char_code[next_idx] = next_char;
600+
++next_char;
601+
}
602+
}
603+
604+
str = malloc(per + 32 + nleaderranks * 2 + 1);
605+
p = str;
606+
sprintf(p, "0 1 2 3 ", i);
607+
p += 8;
608+
for (i=4; i<nleaderranks; i+=4) {
609+
sprintf(p, "%d", i);
610+
for (j=(int)strlen(p); j<8; ++j) {
611+
p[j] = ' ';
612+
}
613+
p[j] = 0;
614+
p += j;
615+
}
616+
--p;
617+
while (p>=str && ((*p)==' ')) { *(p--)=0; }
618+
tmp = (int)strlen(str) + 2;
619+
printf(" host | %s\n", str);
620+
memset(str, (int)'=', tmp);
621+
printf("======|=%s\n", str);
622+
623+
for (i=0; i<nleaderranks; ++i) {
624+
str[0] = 0;
625+
p = str;
626+
for (k=0; k<nleaderranks; ++k) {
627+
p[0] = char_code[method[i * nleaderranks + k]];
628+
p[1] = ' ';
629+
p[2] = 0;
630+
p += 2;
631+
}
632+
--p;
633+
while (p>str && *p==' ') { *(p--)=0; }
634+
printf("%5d : %s\n", i, str);
635+
}
636+
free(str);
637+
for (i=0; i<=COMM_METHOD_MAX; ++i) {
638+
for (k=0; k<=COMM_METHOD_MAX; ++k) {
639+
if (char_code[k] == 'A' + i) {
640+
printf("key: %c == %s\n", char_code[k],
641+
comm_method_string(k));
642+
}
643+
}
644+
}
645+
printf("\n");
646+
}
555647
// 3: abbreviated summary of interconnect and outliers
556648
// - check diagonal for uniformity + self, save majority method
557649
// - check non-diagonal for uniformity, save majority method

0 commit comments

Comments
 (0)