Skip to content

Commit a872e54

Browse files
Merge branch 'main' into topic/main/detect-host-numa-as-device-mem
2 parents b0bc58f + 7d20b86 commit a872e54

23 files changed

+1305
-135
lines changed

docs/installing-open-mpi/configure-cli-options/networking.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -73,7 +73,7 @@ can be used with ``configure``:
7373
only necessary if the PSM headers and libraries are not in default
7474
compiler/linker search paths.
7575

76-
PSM is the support library for QLogic InfiniPath and Intel TrueScale
76+
PSM is the support library for QLogic InfiniPath and Intel True Scale
7777
network adapters.
7878

7979
* ``--with-psm-libdir=DIR``:

docs/tuning-apps/networking/cuda.rst

Lines changed: 12 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -160,21 +160,21 @@ PSM2 support for CUDA
160160
---------------------
161161

162162
CUDA-aware support is present in PSM2 MTL. When running CUDA-aware
163-
Open MPI on Intel Omni-path, the PSM2 MTL will automatically set
163+
Open MPI on Cornelis Networks Omni-Path, the PSM2 MTL will automatically set
164164
``PSM2_CUDA`` environment variable which enables PSM2 to handle GPU
165165
buffers. If the user wants to use host buffers with a CUDA-aware Open
166166
MPI, it is recommended to set ``PSM2_CUDA`` to ``0`` in the execution
167167
environment. PSM2 also has support for the NVIDIA GPUDirect support
168168
feature. To enable this, users will need to set ``PSM2_GPUDIRECT``
169169
to ``1`` in the execution environment.
170170

171-
Note: The PSM2 library and ``hfi1`` driver with CUDA support are
172-
requirements to use GPUDirect support on Intel Omni-Path. The minimum
171+
Note: The PSM2 library and ``hfi1`` driver with CUDA support are requirements
172+
to use GPUDirect support on Cornelis Networks Omni-Path. The minimum
173173
PSM2 build version required is `PSM2 10.2.175
174174
<https://github.com/01org/opa-psm2/releases/tag/PSM2_10.2-175>`_.
175175

176-
For more information refer to the `Intel Omni-Path documentation
177-
<https://www.intel.com/content/www/us/en/support/articles/000016242/network-and-i-o/fabric-products.html>`_.
176+
For more information refer to the `Cornelis Networks Customer Center
177+
<https://customercenter.cornelisnetworks.com/>`_.
178178

179179
/////////////////////////////////////////////////////////////////////////
180180

@@ -629,10 +629,9 @@ limit is reached:
629629
What are some guidelines for using CUDA and Open MPI with Omni-Path?
630630
--------------------------------------------------------------------
631631

632-
When developing CUDA-aware Open MPI applications for OPA-based
633-
fabrics, the PSM2 transport is preferred and a CUDA-aware version of
634-
PSM2 is provided with all versions of the Intel Omni-Path IFS software
635-
suite.
632+
When developing CUDA-aware Open MPI applications for OPA-based fabrics, the
633+
PSM2 transport is preferred and a CUDA-aware version of PSM2 is provided with
634+
all versions of the Cornelis Networks Omni-Path OPXS software suite.
636635

637636
.. error:: TODO Are Intel/OPA references still correct?
638637

@@ -655,10 +654,10 @@ processes do not move between NUMA nodes. See the section on
655654
:ref:`NUMA Node Issues <faq-cuda-mpi-cuda-numa-issues-label>`, for
656655
more information.
657656

658-
For more information see the *Intel Performance Scaled Messaging 2
659-
(PSM2) Programmer's Guide* and the *Intel Omni-Path Performance Tuning
660-
Guide*, which can be found on the `Intel Omni-Path web site
661-
<https://www.intel.com/omnipath/FabricSoftwarePublications>`_.
657+
For more information see the *Cornelis Networks Performance Scaled Messaging 2
658+
(PSM2) Programmer's Guide* and the *Cornelis Networks Omni-Path Performance
659+
Tuning Guide*, which can be found in the `Cornelis Networks Customer Center
660+
<https://customercenter.cornelisnetworks.com/>`_.
662661

663662
.. error:: TODO Are Intel/OPA references still correct?
664663

docs/tuning-apps/networking/ofi.rst

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -131,6 +131,5 @@ Users can restrict access to a single HFI using the environment variable:
131131
More details can be found on the PSM2 Programmer's Guide and the Omni-Path
132132
Fabric Performance Tuning Guide.
133133

134-
Please also see `the full Omni-Path documentation
135-
<https://www.intel.com/content/www/us/en/support/articles/000016242/network-and-i-o/fabric-products.html>`_
134+
Please see the `Cornelis Networks Customer Center <https://customercenter.cornelisnetworks.com/>`_
136135
for more details.

ompi/mca/coll/han/Makefile.am

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ coll_han.h \
1616
coll_han_trigger.h \
1717
coll_han_algorithms.h \
1818
coll_han_alltoall.c \
19+
coll_han_alltoallv.c \
1920
coll_han_dynamic.h \
2021
coll_han_dynamic_file.h \
2122
coll_han_barrier.c \
@@ -34,8 +35,7 @@ coll_han_algorithms.c \
3435
coll_han_dynamic.c \
3536
coll_han_dynamic_file.c \
3637
coll_han_topo.c \
37-
coll_han_subcomms.c \
38-
coll_han_utils.c
38+
coll_han_subcomms.c
3939

4040
# Make the output library in this directory, and name it either
4141
# mca_<type>_<name>.la (for DSO builds) or libmca_<type>_<name>.la

ompi/mca/coll/han/coll_han.h

Lines changed: 20 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -199,6 +199,7 @@ typedef struct mca_coll_han_op_module_name_t {
199199
mca_coll_han_op_up_low_module_name_t scatter;
200200
mca_coll_han_op_up_low_module_name_t scatterv;
201201
mca_coll_han_op_up_low_module_name_t alltoall;
202+
mca_coll_han_op_up_low_module_name_t alltoallv;
202203
} mca_coll_han_op_module_name_t;
203204

204205
/**
@@ -260,6 +261,11 @@ typedef struct mca_coll_han_component_t {
260261
/* alltoall: parallel stages */
261262
int32_t han_alltoall_pstages;
262263

264+
/* low level module for alltoallv */
265+
uint32_t han_alltoallv_low_module;
266+
int64_t han_alltoallv_smsc_avg_send_limit;
267+
double han_alltoallv_smsc_noncontig_activation_limit;
268+
263269

264270
/* name of the modules */
265271
mca_coll_han_op_module_name_t han_op_module_name;
@@ -286,6 +292,8 @@ typedef struct mca_coll_han_component_t {
286292

287293
/* Define maximum dynamic errors printed by rank 0 with a 0 verbosity level */
288294
int max_dynamic_errors;
295+
296+
opal_free_list_t pack_buffers;
289297
} mca_coll_han_component_t;
290298

291299
/*
@@ -297,6 +305,7 @@ typedef struct mca_coll_han_single_collective_fallback_s
297305
union
298306
{
299307
mca_coll_base_module_alltoall_fn_t alltoall;
308+
mca_coll_base_module_alltoallv_fn_t alltoallv;
300309
mca_coll_base_module_allgather_fn_t allgather;
301310
mca_coll_base_module_allgatherv_fn_t allgatherv;
302311
mca_coll_base_module_allreduce_fn_t allreduce;
@@ -319,6 +328,7 @@ typedef struct mca_coll_han_single_collective_fallback_s
319328
typedef struct mca_coll_han_collectives_fallback_s
320329
{
321330
mca_coll_han_single_collective_fallback_t alltoall;
331+
mca_coll_han_single_collective_fallback_t alltoallv;
322332
mca_coll_han_single_collective_fallback_t allgather;
323333
mca_coll_han_single_collective_fallback_t allgatherv;
324334
mca_coll_han_single_collective_fallback_t allreduce;
@@ -384,6 +394,9 @@ OBJ_CLASS_DECLARATION(mca_coll_han_module_t);
384394
#define previous_alltoall fallback.alltoall.alltoall
385395
#define previous_alltoall_module fallback.alltoall.module
386396

397+
#define previous_alltoallv fallback.alltoallv.alltoallv
398+
#define previous_alltoallv_module fallback.alltoallv.module
399+
387400
#define previous_allgather fallback.allgather.allgather
388401
#define previous_allgather_module fallback.allgather.module
389402

@@ -440,6 +453,7 @@ OBJ_CLASS_DECLARATION(mca_coll_han_module_t);
440453
HAN_UNINSTALL_COLL_API(COMM, HANM, allgather); \
441454
HAN_UNINSTALL_COLL_API(COMM, HANM, allgatherv); \
442455
HAN_UNINSTALL_COLL_API(COMM, HANM, alltoall); \
456+
HAN_UNINSTALL_COLL_API(COMM, HANM, alltoallv); \
443457
han_module->enabled = false; /* entire module set to pass-through from now on */ \
444458
} while(0)
445459

@@ -503,6 +517,9 @@ int
503517
mca_coll_han_alltoall_intra_dynamic(ALLTOALL_BASE_ARGS,
504518
mca_coll_base_module_t *module);
505519
int
520+
mca_coll_han_alltoallv_intra_dynamic(ALLTOALLV_BASE_ARGS,
521+
mca_coll_base_module_t *module);
522+
int
506523
mca_coll_han_allgather_intra_dynamic(ALLGATHER_BASE_ARGS,
507524
mca_coll_base_module_t *module);
508525
int
@@ -544,13 +561,6 @@ ompi_coll_han_reorder_gather(const void *sbuf,
544561
struct ompi_communicator_t *comm,
545562
int * topo);
546563

547-
size_t
548-
coll_han_utils_gcd(const uint64_t *numerators, const size_t size);
549-
550-
int
551-
coll_han_utils_create_contiguous_datatype(size_t count, const ompi_datatype_t *oldType,
552-
ompi_datatype_t **newType);
553-
554564
static inline struct mca_smsc_endpoint_t *mca_coll_han_get_smsc_endpoint (struct ompi_proc_t *proc) {
555565
extern opal_mutex_t mca_coll_han_lock;
556566
if (NULL == proc->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_SMSC]) {
@@ -566,4 +576,7 @@ static inline struct mca_smsc_endpoint_t *mca_coll_han_get_smsc_endpoint (struct
566576
return (struct mca_smsc_endpoint_t *) proc->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_SMSC];
567577
}
568578

579+
#define COLL_HAN_PACKBUF_PAYLOAD_BYTES (128*1024)
580+
581+
569582
#endif /* MCA_COLL_HAN_EXPORT_H */

ompi/mca/coll/han/coll_han_algorithms.c

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,10 @@ mca_coll_han_algorithm_value_t* mca_coll_han_available_algorithms[COLLCOUNT] =
8282
{"smsc", (fnptr_t)&mca_coll_han_alltoall_using_smsc}, // 2-level
8383
{ 0 }
8484
},
85+
[ALLTOALLV] = (mca_coll_han_algorithm_value_t[]){
86+
{"smsc", (fnptr_t)&mca_coll_han_alltoallv_using_smsc}, // 2-level
87+
{ 0 }
88+
},
8589
};
8690

8791
int

ompi/mca/coll/han/coll_han_algorithms.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -214,5 +214,10 @@ int
214214
mca_coll_han_alltoall_using_smsc(ALLTOALL_BASE_ARGS,
215215
mca_coll_base_module_t *module);
216216

217+
/* Alltoallv */
218+
int
219+
mca_coll_han_alltoallv_using_smsc(ALLTOALLV_BASE_ARGS,
220+
mca_coll_base_module_t *module);
221+
217222

218223
#endif

0 commit comments

Comments
 (0)