3
3
* Copyright (c) 2011-2017 The University of Tennessee and The University
4
4
* of Tennessee Research Foundation. All rights
5
5
* reserved.
6
- * Copyright (c) 2011-2016 INRIA . All rights reserved.
7
- * Copyright (c) 2012-2017 Bordeaux Polytechnic Institute
6
+ * Copyright (c) 2011-2018 Inria . All rights reserved.
7
+ * Copyright (c) 2011-2018 Bordeaux Polytechnic Institute
8
8
* Copyright (c) 2015-2017 Intel, Inc. All rights reserved.
9
9
* Copyright (c) 2015-2017 Research Organization for Information Science
10
10
* and Technology (RIST). All rights reserved.
36
36
37
37
#include "opal/mca/pmix/pmix.h"
38
38
39
- /* #define __DEBUG__ 1 */
39
+ /* #define __DEBUG__ 1 */
40
40
41
41
/**
42
42
* This function is a allreduce between all processes to detect for oversubscription.
@@ -320,7 +320,7 @@ int mca_topo_treematch_dist_graph_create(mca_topo_base_module_t* topo_module,
320
320
}
321
321
322
322
reqs = (MPI_Request * )calloc (num_procs_in_node - 1 , sizeof (MPI_Request ));
323
- if ( rank == lindex_to_grank [0 ] ) { /* local leader clean the hierarchy */
323
+ if ( rank == lindex_to_grank [0 ] ) { /* local leader cleans the hierarchy */
324
324
int array_size = effective_depth + 1 ;
325
325
int * myhierarchy = (int * )calloc (array_size , sizeof (int ));
326
326
@@ -449,7 +449,9 @@ int mca_topo_treematch_dist_graph_create(mca_topo_base_module_t* topo_module,
449
449
450
450
for (i = 0 ; i < num_nodes ; i ++ )
451
451
num_objs_total += objs_per_node [i ];
452
- obj_mapping = (int * )calloc (num_objs_total ,sizeof (int ));
452
+ obj_mapping = (int * )malloc (num_objs_total * sizeof (int ));
453
+ for (i = 0 ; i < num_objs_total ; i ++ )
454
+ obj_mapping [i ] = -1 ;
453
455
454
456
memcpy (obj_mapping , obj_to_rank_in_comm , objs_per_node [0 ]* sizeof (int ));
455
457
displ = objs_per_node [0 ];
@@ -508,8 +510,8 @@ int mca_topo_treematch_dist_graph_create(mca_topo_base_module_t* topo_module,
508
510
509
511
for (i = 0 ; i < hierarchies [0 ]; i ++ )
510
512
hierarchies [i + 1 ] = tracker [i ]-> arity ;
511
- for (; i < (TM_MAX_LEVELS + 1 ); i ++ ) /* fill up everything else with -1 */
512
- hierarchies [i ] = -1 ;
513
+ for (; i < (TM_MAX_LEVELS + 1 ); i ++ ) /* fill up everything else with 0 */
514
+ hierarchies [i ] = 0 ;
513
515
514
516
/* gather hierarchies iff more than 1 node! */
515
517
if ( num_nodes > 1 ) {
@@ -592,32 +594,24 @@ int mca_topo_treematch_dist_graph_create(mca_topo_base_module_t* topo_module,
592
594
for (i = 1 ; i < tm_topology -> nb_levels ; i ++ )
593
595
tm_topology -> nb_nodes [i ] = tm_topology -> nb_nodes [i - 1 ] * tm_topology -> arity [i - 1 ];
594
596
597
+ #ifdef __DEBUG__
598
+ assert (num_objs_total == (int )tm_topology -> nb_nodes [tm_topology -> nb_levels - 1 ]);
599
+ #endif
595
600
/* Build process id tab */
596
- tm_topology -> node_id = (int * * )calloc (tm_topology -> nb_levels , sizeof (int * ));
597
- tm_topology -> node_rank = (int * * )malloc (sizeof (int * ) * tm_topology -> nb_levels );
598
- for (i = 0 ; i < tm_topology -> nb_levels ; i ++ ) {
599
- tm_topology -> node_id [i ] = (int * )calloc (tm_topology -> nb_nodes [i ], sizeof (int ));
600
- tm_topology -> node_rank [i ] = (int * )calloc (tm_topology -> nb_nodes [i ], sizeof (int ));
601
- /*note : we make the hypothesis that logical indexes in hwloc range from
602
- 0 to N, are contiguous and crescent. */
603
-
604
- for ( j = 0 ; j < (int )tm_topology -> nb_nodes [i ] ; j ++ ) {
605
- tm_topology -> node_id [i ][j ] = j ;
606
- tm_topology -> node_rank [i ][j ] = j ;
607
-
608
- /* Should use object->logical_index */
609
- /* obj = hwloc_get_obj_by_depth(topo,i,j%num_objs_in_node);
610
- id = obj->logical_index + (num_objs_in_node)*(j/num_obj_in_node)*/
611
- /*
612
- int id = core_numbering[j%nb_core_per_nodes] + (nb_core_per_nodes)*(j/nb_core_per_nodes);
613
- topology->node_id[i][j] = id;
614
- topology->node_rank[i][id] = j;
615
- */
616
- }
601
+ tm_topology -> node_id = (int * )malloc (num_objs_total * sizeof (int ));
602
+ tm_topology -> node_rank = (int * )malloc (num_objs_total * sizeof (int ));
603
+ for ( i = 0 ; i < num_objs_total ; i ++ )
604
+ tm_topology -> node_id [i ] = tm_topology -> node_rank [i ] = -1 ;
605
+ /*note : we make the hypothesis that logical indexes in hwloc range from
606
+ 0 to N, are contiguous and crescent. */
607
+ for ( i = 0 ; i < num_objs_total ; i ++ ) {
608
+ tm_topology -> node_id [i ] = obj_mapping [i ]; /* use process ranks instead of core numbers */
609
+ if (obj_mapping [i ] != -1 ) /* so that k[i] is the new rank of process i */
610
+ tm_topology -> node_rank [obj_mapping [i ]] = i ; /* after computation by TreeMatch */
617
611
}
612
+
618
613
/* unused for now*/
619
614
tm_topology -> cost = (double * )calloc (tm_topology -> nb_levels ,sizeof (double ));
620
-
621
615
tm_topology -> nb_proc_units = num_objs_total ;
622
616
623
617
tm_topology -> nb_constraints = 0 ;
@@ -627,22 +621,23 @@ int mca_topo_treematch_dist_graph_create(mca_topo_base_module_t* topo_module,
627
621
tm_topology -> constraints = (int * )calloc (tm_topology -> nb_constraints ,sizeof (int ));
628
622
for (idx = 0 , i = 0 ; i < tm_topology -> nb_proc_units ; i ++ )
629
623
if (obj_mapping [i ] != -1 )
630
- tm_topology -> constraints [idx ++ ] = obj_mapping [i ];
631
-
624
+ tm_topology -> constraints [idx ++ ] = obj_mapping [i ]; /* use process ranks instead of core numbers */
625
+ #ifdef __DEBUG__
626
+ assert (idx == tm_topology -> nb_constraints );
627
+ #endif
632
628
tm_topology -> oversub_fact = 1 ;
633
629
634
630
#ifdef __DEBUG__
635
- assert (num_objs_total == (int )tm_topology -> nb_nodes [tm_topology -> nb_levels - 1 ]);
636
-
631
+ /*
637
632
for(i = 0; i < tm_topology->nb_levels ; i++) {
638
633
opal_output_verbose(10, ompi_topo_base_framework.framework_output,
639
634
"tm topo node_id for level [%i] : ",i);
640
635
dump_int_array(10, ompi_topo_base_framework.framework_output,
641
636
"", "", obj_mapping, tm_topology->nb_nodes[i]);
642
637
}
638
+ */
643
639
tm_display_topology (tm_topology );
644
640
#endif
645
-
646
641
comm_pattern = (double * * )malloc (size * sizeof (double * ));
647
642
for (i = 0 ; i < size ; i ++ )
648
643
comm_pattern [i ] = local_pattern + i * size ;
@@ -660,15 +655,14 @@ int mca_topo_treematch_dist_graph_create(mca_topo_base_module_t* topo_module,
660
655
"" , "" , comm_pattern [i ], size );
661
656
}
662
657
#endif
663
- tm_optimize_topology (& tm_topology );
658
+ // tm_optimize_topology(&tm_topology);
664
659
aff_mat = tm_build_affinity_mat (comm_pattern ,size );
665
660
comm_tree = tm_build_tree_from_topology (tm_topology ,aff_mat , NULL , NULL );
666
661
sol = tm_compute_mapping (tm_topology , comm_tree );
667
662
668
663
k = (int * )calloc (sol -> k_length , sizeof (int ));
669
664
for (idx = 0 ; idx < (int )sol -> k_length ; idx ++ )
670
665
k [idx ] = sol -> k [idx ][0 ];
671
-
672
666
#ifdef __DEBUG__
673
667
opal_output_verbose (10 , ompi_topo_base_framework .framework_output ,
674
668
"====> nb levels : %i\n" ,tm_topology -> nb_levels );
@@ -690,6 +684,7 @@ int mca_topo_treematch_dist_graph_create(mca_topo_base_module_t* topo_module,
690
684
691
685
/* Todo : Bcast + group creation */
692
686
/* scatter the ranks */
687
+ /* don't need to convert k from local rank to global rank */
693
688
if (OMPI_SUCCESS != (err = comm_old -> c_coll -> coll_scatter (k , 1 , MPI_INT ,
694
689
& newrank , 1 , MPI_INT ,
695
690
0 , comm_old ,
@@ -770,6 +765,7 @@ int mca_topo_treematch_dist_graph_create(mca_topo_base_module_t* topo_module,
770
765
tm_solution_t * sol = NULL ;
771
766
tm_affinity_mat_t * aff_mat = NULL ;
772
767
double * * comm_pattern = NULL ;
768
+ int * obj_to_rank_in_lcomm = NULL ;
773
769
774
770
comm_pattern = (double * * )malloc (num_procs_in_node * sizeof (double * ));
775
771
for ( i = 0 ; i < num_procs_in_node ; i ++ ) {
@@ -800,35 +796,57 @@ int mca_topo_treematch_dist_graph_create(mca_topo_base_module_t* topo_module,
800
796
tm_topology -> nb_levels = numlevels ;
801
797
tm_topology -> arity = (int * )calloc (tm_topology -> nb_levels , sizeof (int ));
802
798
tm_topology -> nb_nodes = (size_t * )calloc (tm_topology -> nb_levels , sizeof (size_t ));
803
- tm_topology -> node_id = (int * * )malloc (tm_topology -> nb_levels * sizeof (int * ));
804
- tm_topology -> node_rank = (int * * )malloc (tm_topology -> nb_levels * sizeof (int * ));
805
-
799
+
806
800
for (i = 0 ; i < tm_topology -> nb_levels ; i ++ ){
807
801
int nb_objs = hwloc_get_nbobjs_by_depth (opal_hwloc_topology , tracker [i ]-> depth );
808
802
tm_topology -> nb_nodes [i ] = nb_objs ;
809
803
tm_topology -> arity [i ] = tracker [i ]-> arity ;
810
- tm_topology -> node_id [i ] = (int * )calloc (tm_topology -> nb_nodes [i ], sizeof (int ));
811
- tm_topology -> node_rank [i ] = (int * )calloc (tm_topology -> nb_nodes [i ], sizeof (int ));
812
- for (j = 0 ; j < (int )tm_topology -> nb_nodes [i ] ; j ++ ){
813
- tm_topology -> node_id [i ][j ] = j ;
814
- tm_topology -> node_rank [i ][j ] = j ;
815
- }
816
804
}
817
805
806
+
807
+ #ifdef __DEBUG__
808
+ assert (num_objs_in_node == (int )tm_topology -> nb_nodes [tm_topology -> nb_levels - 1 ]);
809
+ #endif
810
+ /* create a table that derives the rank in local (node) comm from the object number */
811
+ obj_to_rank_in_lcomm = (int * )malloc (num_objs_in_node * sizeof (int ));
812
+ for (i = 0 ; i < num_objs_in_node ; i ++ ) {
813
+ obj_to_rank_in_lcomm [i ] = -1 ;
814
+ object = hwloc_get_obj_by_depth (opal_hwloc_topology , effective_depth , i );
815
+ for ( j = 0 ; j < num_procs_in_node ; j ++ )
816
+ if (localrank_to_objnum [j ] == (int )(object -> logical_index )) {
817
+ obj_to_rank_in_lcomm [i ] = j ;
818
+ break ;
819
+ }
820
+ }
821
+
822
+ /* Build process id tab */
823
+ tm_topology -> node_id = (int * )malloc (num_objs_in_node * sizeof (int ));
824
+ tm_topology -> node_rank = (int * )malloc (num_objs_in_node * sizeof (int ));
825
+ for (i = 1 ; i < num_objs_in_node ; i ++ )
826
+ tm_topology -> node_id [i ] = tm_topology -> node_rank [i ] = -1 ;
827
+
828
+ for ( i = 0 ; i < num_objs_in_node ; i ++ ) {
829
+ /*note : we make the hypothesis that logical indexes in hwloc range from
830
+ 0 to N, are contiguous and crescent. */
831
+ tm_topology -> node_id [i ] = obj_to_rank_in_lcomm [i ];
832
+ if ( obj_to_rank_in_lcomm [i ] != -1 )
833
+ tm_topology -> node_rank [obj_to_rank_in_lcomm [i ]] = i ;
834
+ }
835
+
818
836
/* unused for now*/
819
837
tm_topology -> cost = (double * )calloc (tm_topology -> nb_levels ,sizeof (double ));
820
838
821
839
tm_topology -> nb_proc_units = num_objs_in_node ;
822
- //tm_topology->nb_proc_units = num_procs_in_node;
823
840
tm_topology -> nb_constraints = 0 ;
824
- for (i = 0 ; i < num_procs_in_node ; i ++ )
825
- if (localrank_to_objnum [i ] != -1 )
841
+
842
+ for (i = 0 ; i < num_objs_in_node ; i ++ )
843
+ if (obj_to_rank_in_lcomm [i ] != -1 )
826
844
tm_topology -> nb_constraints ++ ;
827
-
845
+
828
846
tm_topology -> constraints = (int * )calloc (tm_topology -> nb_constraints ,sizeof (int ));
829
- for (idx = 0 ,i = 0 ; i < num_procs_in_node ; i ++ )
830
- if (localrank_to_objnum [i ] != -1 )
831
- tm_topology -> constraints [idx ++ ] = localrank_to_objnum [i ];
847
+ for (idx = 0 ,i = 0 ; i < num_objs_in_node ; i ++ )
848
+ if (obj_to_rank_in_lcomm [i ] != -1 )
849
+ tm_topology -> constraints [idx ++ ] = obj_to_rank_in_lcomm [i ];
832
850
833
851
tm_topology -> oversub_fact = 1 ;
834
852
@@ -841,12 +859,12 @@ int mca_topo_treematch_dist_graph_create(mca_topo_base_module_t* topo_module,
841
859
OPAL_OUTPUT_VERBOSE ((10 , ompi_topo_base_framework .framework_output ,
842
860
"Nb objs for level %i : %lu | arity %i\n " ,
843
861
i , tm_topology -> nb_nodes [i ],tm_topology -> arity [i ]));
844
- dump_int_array (10 , ompi_topo_base_framework .framework_output ,
845
- "" , "Obj id " , tm_topology -> node_id [i ], tm_topology -> nb_nodes [i ]);
846
862
}
863
+ dump_int_array (10 , ompi_topo_base_framework .framework_output ,
864
+ "" , "Obj id " , tm_topology -> node_id , tm_topology -> nb_nodes [tm_topology -> nb_levels - 1 ]);
847
865
tm_display_topology (tm_topology );
848
866
#endif
849
- tm_optimize_topology (& tm_topology );
867
+ // tm_optimize_topology(&tm_topology);
850
868
aff_mat = tm_build_affinity_mat (comm_pattern ,num_procs_in_node );
851
869
comm_tree = tm_build_tree_from_topology (tm_topology ,aff_mat , NULL , NULL );
852
870
sol = tm_compute_mapping (tm_topology , comm_tree );
@@ -866,15 +884,15 @@ int mca_topo_treematch_dist_graph_create(mca_topo_base_module_t* topo_module,
866
884
dump_int_array (10 , ompi_topo_base_framework .framework_output ,
867
885
"Matching : " , "" , sol -> sigma , sol -> sigma_length );
868
886
#endif
869
-
887
+ free ( obj_to_rank_in_lcomm );
870
888
free (aff_mat -> sum_row );
871
889
free (aff_mat );
872
890
free (comm_pattern );
873
891
tm_free_solution (sol );
874
892
tm_free_tree (comm_tree );
875
893
tm_free_topology (tm_topology );
876
894
}
877
-
895
+
878
896
/* Todo : Bcast + group creation */
879
897
/* scatter the ranks */
880
898
if (OMPI_SUCCESS != (err = localcomm -> c_coll -> coll_scatter (k , 1 , MPI_INT ,
0 commit comments