Skip to content

Commit 804a517

Browse files
authored
Merge pull request #6146 from bosilca/topic/treematch_update
Update to the latest TreeMatch (v1.3).
2 parents 1cd1f4a + 74f2365 commit 804a517

16 files changed

+877
-315
lines changed

ompi/mca/topo/treematch/topo_treematch_dist_graph_create.c

Lines changed: 75 additions & 57 deletions
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,8 @@
33
* Copyright (c) 2011-2017 The University of Tennessee and The University
44
* of Tennessee Research Foundation. All rights
55
* reserved.
6-
* Copyright (c) 2011-2016 INRIA. All rights reserved.
7-
* Copyright (c) 2012-2017 Bordeaux Polytechnic Institute
6+
* Copyright (c) 2011-2018 Inria. All rights reserved.
7+
* Copyright (c) 2011-2018 Bordeaux Polytechnic Institute
88
* Copyright (c) 2015-2017 Intel, Inc. All rights reserved.
99
* Copyright (c) 2015-2017 Research Organization for Information Science
1010
* and Technology (RIST). All rights reserved.
@@ -36,7 +36,7 @@
3636

3737
#include "opal/mca/pmix/pmix.h"
3838

39-
/* #define __DEBUG__ 1 */
39+
/* #define __DEBUG__ 1 */
4040

4141
/**
4242
* This function is a allreduce between all processes to detect for oversubscription.
@@ -320,7 +320,7 @@ int mca_topo_treematch_dist_graph_create(mca_topo_base_module_t* topo_module,
320320
}
321321

322322
reqs = (MPI_Request *)calloc(num_procs_in_node-1, sizeof(MPI_Request));
323-
if( rank == lindex_to_grank[0] ) { /* local leader clean the hierarchy */
323+
if( rank == lindex_to_grank[0] ) { /* local leader cleans the hierarchy */
324324
int array_size = effective_depth + 1;
325325
int *myhierarchy = (int *)calloc(array_size, sizeof(int));
326326

@@ -449,7 +449,9 @@ int mca_topo_treematch_dist_graph_create(mca_topo_base_module_t* topo_module,
449449

450450
for(i = 0; i < num_nodes; i++)
451451
num_objs_total += objs_per_node[i];
452-
obj_mapping = (int *)calloc(num_objs_total,sizeof(int));
452+
obj_mapping = (int *)malloc(num_objs_total*sizeof(int));
453+
for(i = 0; i < num_objs_total; i++)
454+
obj_mapping[i] = -1;
453455

454456
memcpy(obj_mapping, obj_to_rank_in_comm, objs_per_node[0]*sizeof(int));
455457
displ = objs_per_node[0];
@@ -508,8 +510,8 @@ int mca_topo_treematch_dist_graph_create(mca_topo_base_module_t* topo_module,
508510

509511
for(i = 0 ; i < hierarchies[0]; i++)
510512
hierarchies[i+1] = tracker[i]->arity;
511-
for(; i < (TM_MAX_LEVELS+1); i++) /* fill up everything else with -1 */
512-
hierarchies[i] = -1;
513+
for(; i < (TM_MAX_LEVELS+1); i++) /* fill up everything else with 0 */
514+
hierarchies[i] = 0;
513515

514516
/* gather hierarchies iff more than 1 node! */
515517
if ( num_nodes > 1 ) {
@@ -592,32 +594,24 @@ int mca_topo_treematch_dist_graph_create(mca_topo_base_module_t* topo_module,
592594
for(i = 1 ; i < tm_topology->nb_levels; i++)
593595
tm_topology->nb_nodes[i] = tm_topology->nb_nodes[i-1] * tm_topology->arity[i-1];
594596

597+
#ifdef __DEBUG__
598+
assert(num_objs_total == (int)tm_topology->nb_nodes[tm_topology->nb_levels-1]);
599+
#endif
595600
/* Build process id tab */
596-
tm_topology->node_id = (int **)calloc(tm_topology->nb_levels, sizeof(int*));
597-
tm_topology->node_rank = (int **)malloc(sizeof(int *) * tm_topology->nb_levels);
598-
for(i = 0; i < tm_topology->nb_levels; i++) {
599-
tm_topology->node_id[i] = (int *)calloc(tm_topology->nb_nodes[i], sizeof(int));
600-
tm_topology->node_rank[i] = (int * )calloc(tm_topology->nb_nodes[i], sizeof(int));
601-
/*note : we make the hypothesis that logical indexes in hwloc range from
602-
0 to N, are contiguous and crescent. */
603-
604-
for( j = 0 ; j < (int)tm_topology->nb_nodes[i] ; j++ ) {
605-
tm_topology->node_id[i][j] = j;
606-
tm_topology->node_rank[i][j] = j;
607-
608-
/* Should use object->logical_index */
609-
/* obj = hwloc_get_obj_by_depth(topo,i,j%num_objs_in_node);
610-
id = obj->logical_index + (num_objs_in_node)*(j/num_obj_in_node)*/
611-
/*
612-
int id = core_numbering[j%nb_core_per_nodes] + (nb_core_per_nodes)*(j/nb_core_per_nodes);
613-
topology->node_id[i][j] = id;
614-
topology->node_rank[i][id] = j;
615-
*/
616-
}
601+
tm_topology->node_id = (int *)malloc(num_objs_total*sizeof(int));
602+
tm_topology->node_rank = (int *)malloc(num_objs_total*sizeof(int));
603+
for( i = 0 ; i < num_objs_total ; i++ )
604+
tm_topology->node_id[i] = tm_topology->node_rank[i] = -1;
605+
/*note : we make the hypothesis that logical indexes in hwloc range from
606+
0 to N, are contiguous and crescent. */
607+
for( i = 0 ; i < num_objs_total ; i++ ) {
608+
tm_topology->node_id[i] = obj_mapping[i]; /* use process ranks instead of core numbers */
609+
if (obj_mapping[i] != -1) /* so that k[i] is the new rank of process i */
610+
tm_topology->node_rank[obj_mapping[i]] = i; /* after computation by TreeMatch */
617611
}
612+
618613
/* unused for now*/
619614
tm_topology->cost = (double*)calloc(tm_topology->nb_levels,sizeof(double));
620-
621615
tm_topology->nb_proc_units = num_objs_total;
622616

623617
tm_topology->nb_constraints = 0;
@@ -627,22 +621,23 @@ int mca_topo_treematch_dist_graph_create(mca_topo_base_module_t* topo_module,
627621
tm_topology->constraints = (int *)calloc(tm_topology->nb_constraints,sizeof(int));
628622
for(idx = 0, i = 0; i < tm_topology->nb_proc_units ; i++)
629623
if (obj_mapping[i] != -1)
630-
tm_topology->constraints[idx++] = obj_mapping[i];
631-
624+
tm_topology->constraints[idx++] = obj_mapping[i]; /* use process ranks instead of core numbers */
625+
#ifdef __DEBUG__
626+
assert(idx == tm_topology->nb_constraints);
627+
#endif
632628
tm_topology->oversub_fact = 1;
633629

634630
#ifdef __DEBUG__
635-
assert(num_objs_total == (int)tm_topology->nb_nodes[tm_topology->nb_levels-1]);
636-
631+
/*
637632
for(i = 0; i < tm_topology->nb_levels ; i++) {
638633
opal_output_verbose(10, ompi_topo_base_framework.framework_output,
639634
"tm topo node_id for level [%i] : ",i);
640635
dump_int_array(10, ompi_topo_base_framework.framework_output,
641636
"", "", obj_mapping, tm_topology->nb_nodes[i]);
642637
}
638+
*/
643639
tm_display_topology(tm_topology);
644640
#endif
645-
646641
comm_pattern = (double **)malloc(size*sizeof(double *));
647642
for(i = 0 ; i < size ; i++)
648643
comm_pattern[i] = local_pattern + i * size;
@@ -660,15 +655,14 @@ int mca_topo_treematch_dist_graph_create(mca_topo_base_module_t* topo_module,
660655
"", "", comm_pattern[i], size);
661656
}
662657
#endif
663-
tm_optimize_topology(&tm_topology);
658+
//tm_optimize_topology(&tm_topology);
664659
aff_mat = tm_build_affinity_mat(comm_pattern,size);
665660
comm_tree = tm_build_tree_from_topology(tm_topology,aff_mat, NULL, NULL);
666661
sol = tm_compute_mapping(tm_topology, comm_tree);
667662

668663
k = (int *)calloc(sol->k_length, sizeof(int));
669664
for(idx = 0 ; idx < (int)sol->k_length ; idx++)
670665
k[idx] = sol->k[idx][0];
671-
672666
#ifdef __DEBUG__
673667
opal_output_verbose(10, ompi_topo_base_framework.framework_output,
674668
"====> nb levels : %i\n",tm_topology->nb_levels);
@@ -690,6 +684,7 @@ int mca_topo_treematch_dist_graph_create(mca_topo_base_module_t* topo_module,
690684

691685
/* Todo : Bcast + group creation */
692686
/* scatter the ranks */
687+
/* don't need to convert k from local rank to global rank */
693688
if (OMPI_SUCCESS != (err = comm_old->c_coll->coll_scatter(k, 1, MPI_INT,
694689
&newrank, 1, MPI_INT,
695690
0, comm_old,
@@ -770,6 +765,7 @@ int mca_topo_treematch_dist_graph_create(mca_topo_base_module_t* topo_module,
770765
tm_solution_t *sol = NULL;
771766
tm_affinity_mat_t *aff_mat = NULL;
772767
double **comm_pattern = NULL;
768+
int *obj_to_rank_in_lcomm = NULL;
773769

774770
comm_pattern = (double **)malloc(num_procs_in_node*sizeof(double *));
775771
for( i = 0; i < num_procs_in_node; i++ ) {
@@ -800,35 +796,57 @@ int mca_topo_treematch_dist_graph_create(mca_topo_base_module_t* topo_module,
800796
tm_topology->nb_levels = numlevels;
801797
tm_topology->arity = (int *)calloc(tm_topology->nb_levels, sizeof(int));
802798
tm_topology->nb_nodes = (size_t *)calloc(tm_topology->nb_levels, sizeof(size_t));
803-
tm_topology->node_id = (int **)malloc(tm_topology->nb_levels*sizeof(int *));
804-
tm_topology->node_rank = (int **)malloc(tm_topology->nb_levels*sizeof(int *));
805-
799+
806800
for(i = 0 ; i < tm_topology->nb_levels ; i++){
807801
int nb_objs = hwloc_get_nbobjs_by_depth(opal_hwloc_topology, tracker[i]->depth);
808802
tm_topology->nb_nodes[i] = nb_objs;
809803
tm_topology->arity[i] = tracker[i]->arity;
810-
tm_topology->node_id[i] = (int *)calloc(tm_topology->nb_nodes[i], sizeof(int));
811-
tm_topology->node_rank[i] = (int * )calloc(tm_topology->nb_nodes[i], sizeof(int));
812-
for(j = 0; j < (int)tm_topology->nb_nodes[i] ; j++){
813-
tm_topology->node_id[i][j] = j;
814-
tm_topology->node_rank[i][j] = j;
815-
}
816804
}
817805

806+
807+
#ifdef __DEBUG__
808+
assert(num_objs_in_node == (int)tm_topology->nb_nodes[tm_topology->nb_levels-1]);
809+
#endif
810+
/* create a table that derives the rank in local (node) comm from the object number */
811+
obj_to_rank_in_lcomm = (int *)malloc(num_objs_in_node*sizeof(int));
812+
for(i = 0 ; i < num_objs_in_node ; i++) {
813+
obj_to_rank_in_lcomm[i] = -1;
814+
object = hwloc_get_obj_by_depth(opal_hwloc_topology, effective_depth, i);
815+
for( j = 0; j < num_procs_in_node ; j++ )
816+
if(localrank_to_objnum[j] == (int)(object->logical_index)) {
817+
obj_to_rank_in_lcomm[i] = j;
818+
break;
819+
}
820+
}
821+
822+
/* Build process id tab */
823+
tm_topology->node_id = (int *)malloc(num_objs_in_node*sizeof(int));
824+
tm_topology->node_rank = (int *)malloc(num_objs_in_node*sizeof(int));
825+
for(i = 1 ; i < num_objs_in_node; i++)
826+
tm_topology->node_id[i] = tm_topology->node_rank[i] = -1;
827+
828+
for( i = 0 ; i < num_objs_in_node ; i++ ) {
829+
/*note : we make the hypothesis that logical indexes in hwloc range from
830+
0 to N, are contiguous and crescent. */
831+
tm_topology->node_id[i] = obj_to_rank_in_lcomm[i];
832+
if( obj_to_rank_in_lcomm[i] != -1)
833+
tm_topology->node_rank[obj_to_rank_in_lcomm[i]] = i;
834+
}
835+
818836
/* unused for now*/
819837
tm_topology->cost = (double*)calloc(tm_topology->nb_levels,sizeof(double));
820838

821839
tm_topology->nb_proc_units = num_objs_in_node;
822-
//tm_topology->nb_proc_units = num_procs_in_node;
823840
tm_topology->nb_constraints = 0;
824-
for(i = 0; i < num_procs_in_node ; i++)
825-
if (localrank_to_objnum[i] != -1)
841+
842+
for(i = 0; i < num_objs_in_node ; i++)
843+
if (obj_to_rank_in_lcomm[i] != -1)
826844
tm_topology->nb_constraints++;
827-
845+
828846
tm_topology->constraints = (int *)calloc(tm_topology->nb_constraints,sizeof(int));
829-
for(idx = 0,i = 0; i < num_procs_in_node ; i++)
830-
if (localrank_to_objnum[i] != -1)
831-
tm_topology->constraints[idx++] = localrank_to_objnum[i];
847+
for(idx = 0,i = 0; i < num_objs_in_node ; i++)
848+
if (obj_to_rank_in_lcomm[i] != -1)
849+
tm_topology->constraints[idx++] = obj_to_rank_in_lcomm[i];
832850

833851
tm_topology->oversub_fact = 1;
834852

@@ -841,12 +859,12 @@ int mca_topo_treematch_dist_graph_create(mca_topo_base_module_t* topo_module,
841859
OPAL_OUTPUT_VERBOSE((10, ompi_topo_base_framework.framework_output,
842860
"Nb objs for level %i : %lu | arity %i\n ",
843861
i, tm_topology->nb_nodes[i],tm_topology->arity[i]));
844-
dump_int_array(10, ompi_topo_base_framework.framework_output,
845-
"", "Obj id ", tm_topology->node_id[i], tm_topology->nb_nodes[i]);
846862
}
863+
dump_int_array(10, ompi_topo_base_framework.framework_output,
864+
"", "Obj id ", tm_topology->node_id, tm_topology->nb_nodes[tm_topology->nb_levels-1]);
847865
tm_display_topology(tm_topology);
848866
#endif
849-
tm_optimize_topology(&tm_topology);
867+
//tm_optimize_topology(&tm_topology);
850868
aff_mat = tm_build_affinity_mat(comm_pattern,num_procs_in_node);
851869
comm_tree = tm_build_tree_from_topology(tm_topology,aff_mat, NULL, NULL);
852870
sol = tm_compute_mapping(tm_topology, comm_tree);
@@ -866,15 +884,15 @@ int mca_topo_treematch_dist_graph_create(mca_topo_base_module_t* topo_module,
866884
dump_int_array(10, ompi_topo_base_framework.framework_output,
867885
"Matching : ", "", sol->sigma, sol->sigma_length);
868886
#endif
869-
887+
free(obj_to_rank_in_lcomm);
870888
free(aff_mat->sum_row);
871889
free(aff_mat);
872890
free(comm_pattern);
873891
tm_free_solution(sol);
874892
tm_free_tree(comm_tree);
875893
tm_free_topology(tm_topology);
876894
}
877-
895+
878896
/* Todo : Bcast + group creation */
879897
/* scatter the ranks */
880898
if (OMPI_SUCCESS != (err = localcomm->c_coll->coll_scatter(k, 1, MPI_INT,

ompi/mca/topo/treematch/treematch/PriorityQueue.c

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,10 @@
44
/*
55
This comparison function is used to sort elements in key descending order.
66
*/
7+
int compfunc(const FiboNode * const, const FiboNode * const);
8+
9+
10+
711
static int compFunc(const FiboNode * const node1, const FiboNode * const node2)
812
{
913
return

ompi/mca/topo/treematch/treematch/tm_bucket.c

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ static int ilog2(int val)
3131

3232
static int verbose_level = ERROR;
3333

34-
static bucket_list_t global_bl;
34+
bucket_list_t global_bl;
3535

3636
int tab_cmp(const void*,const void*);
3737
int old_bucket_id(int,int,bucket_list_t);
@@ -199,7 +199,7 @@ void add_to_bucket(int id,int i,int j,bucket_list_t bucket_list)
199199
/* display_bucket(bucket);*/
200200
if(verbose_level >= DEBUG){
201201
printf("Extending bucket %d (%p) from size %d to size %d!\n",
202-
id, (void*)bucket->bucket, bucket->nb_elem, bucket->nb_elem+size);
202+
id,(void*)bucket->bucket, bucket->nb_elem, bucket->nb_elem+size);
203203
}
204204

205205
bucket->bucket = (coord*)REALLOC(bucket->bucket,sizeof(coord)*(size + bucket->bucket_len));
@@ -648,6 +648,7 @@ double bucket_grouping(tm_affinity_mat_t *aff_mat,tm_tree_t *tab_node, tm_tree_t
648648
wait_work_completion(works[id]);
649649
val+=tab_val[id];
650650
FREE(works[id]->args);
651+
destroy_work(works[id]);
651652
}
652653

653654

0 commit comments

Comments
 (0)