@@ -135,47 +135,47 @@ OPAL_DECLSPEC int opal_common_ofi_providers_subset_of_list(struct fi_info *provi
135
135
/**
136
136
* Selects NIC (provider) based on hardware locality
137
137
*
138
- * In multi-nic situations, use hardware topology to pick the "best"
139
- * of the selected NICs.
140
- * There are 3 main cases that this covers :
141
- *
142
- * 1. If the first provider passed into this function is the only valid
143
- * provider, this provider is returned.
144
- *
145
- * 2. If there is more than 1 provider that matches the type of the first
146
- * provider in the list, and the BDF data
147
- * is available then a provider is selected based on locality of device
148
- * cpuset and process cpuset and tries to ensure that processes
149
- * are distributed evenly across NICs. This has two separate
150
- * cases:
151
- *
152
- * i. There is one or more provider local to the process:
153
- *
154
- * (local rank % number of providers of the same type
155
- * that share the process cpuset) is used to select one
156
- * of these providers .
157
- *
158
- * ii. There is no provider that is local to the process:
159
- *
160
- * (local rank % number of providers of the same type)
161
- * is used to select one of these providers
162
- *
163
- * 3. If there is more than 1 providers of the same type in the
164
- * list, and the BDF data is not available (the ofi version does
165
- * not support fi_info.nic or the provider does not support BDF)
166
- * then (local rank % number of providers of the same type) is
167
- * used to select one of these providers
168
- *
169
- * @param provider_list (IN) struct fi_info* An initially selected
170
- * provider NIC. The provider name and
171
- * attributes are used to restrict NIC
172
- * selection. This provider is returned if the
173
- * NIC selection fails.
174
- *
175
- * @param provider (OUT) struct fi_info* object with the selected
176
- * provider if the selection succeeds
177
- * if the selection fails, returns the fi_info
178
- * object that was initially provided.
138
+ * The selection is based on the following priority:
139
+ *
140
+ * Single-NIC :
141
+ *
142
+ * If only 1 provider is available, always return that provider.
143
+ *
144
+ * Multi-NIC:
145
+ *
146
+ * 1. If the process is NOT bound, pick a NIC using (local rank % number
147
+ * of providers of the same type). This gives a fair chance to each
148
+ * qualified NIC and balances overall utilization.
149
+ *
150
+ * 2. If the process is bound, we compare providers in the list that have
151
+ * the same type as the first provider, and find the provider with the
152
+ * shortest distance to the current process.
153
+ *
154
+ * i. If the provider has PCI BDF data, we attempt to compute the
155
+ * distance between the NIC and the current process cpuset. The NIC
156
+ * with the shortest distance is returned .
157
+ *
158
+ * * For equidistant NICs, we select a NIC in round-robin fashion
159
+ * using the package rank of the current process, i.e. (package
160
+ * rank % number of providers with the same distance).
161
+ *
162
+ * ii. If we cannot compute the distance between the NIC and the
163
+ * current process, e.g. PCI BDF data is not available, a NIC will be
164
+ * selected in a round-robin fashion using package rank, i.e. (package
165
+ * rank % number of providers of the same type).
166
+ *
167
+ * @param[in] provider_list struct fi_info* An initially selected
168
+ * provider NIC. The provider name and
169
+ * attributes are used to restrict NIC
170
+ * selection. This provider is returned if the
171
+ * NIC selection fails.
172
+ *
173
+ * @param[in] process_info opal_process_info_t* The current process info
174
+ *
175
+ * @param[out] provider struct fi_info* object with the selected
176
+ * provider if the selection succeeds
177
+ * if the selection fails, returns the fi_info
178
+ * object that was initially provided.
179
179
*
180
180
* All errors should be recoverable and will return the initially provided
181
181
* provider. However, if an error occurs we can no longer guarantee
@@ -184,7 +184,7 @@ OPAL_DECLSPEC int opal_common_ofi_providers_subset_of_list(struct fi_info *provi
184
184
*
185
185
*/
186
186
OPAL_DECLSPEC struct fi_info * opal_common_ofi_select_provider (struct fi_info * provider_list ,
187
- opal_process_info_t * process_info );
187
+ opal_process_info_t * process_info );
188
188
189
189
/**
190
190
* Obtain EP endpoint name
0 commit comments