@@ -2306,7 +2306,7 @@ static inline int free_eager_rx_buff_req(nccl_net_ofi_rdma_req_t *req,
2306
2306
nccl_net_ofi_rdma_ep_t * ep = rx_buff_data -> ep ;
2307
2307
/* Free buffer */
2308
2308
if (rx_buff_data -> rx_buff_fl_elem ) {
2309
- nccl_ofi_freelist_entry_free (ep -> rx_buff_fl , rx_buff_data -> rx_buff_fl_elem );
2309
+ nccl_ofi_freelist_entry_free (ep -> eager_rx_buff_fl , rx_buff_data -> rx_buff_fl_elem );
2310
2310
}
2311
2311
return free_base_req (NULL , ep -> rx_buff_reqs_fl , req , false);
2312
2312
}
@@ -2325,7 +2325,7 @@ static inline nccl_net_ofi_rdma_req_t *eager_rx_buff_req_alloc(nccl_net_ofi_rdma
2325
2325
rdma_req_rx_buff_data_t * rx_buff_data = get_rx_buff_data (req );
2326
2326
2327
2327
nccl_ofi_freelist_elem_t * rx_buff_fl_elem =
2328
- nccl_ofi_freelist_entry_alloc (ep -> rx_buff_fl );
2328
+ nccl_ofi_freelist_entry_alloc (ep -> eager_rx_buff_fl );
2329
2329
if (!rx_buff_fl_elem ) {
2330
2330
NCCL_OFI_WARN ("Failed to allocate rx_buff_fl_elem" );
2331
2331
req -> free (req , false);
@@ -2334,7 +2334,7 @@ static inline nccl_net_ofi_rdma_req_t *eager_rx_buff_req_alloc(nccl_net_ofi_rdma
2334
2334
assert (NCCL_OFI_IS_PTR_ALIGNED (rx_buff_fl_elem -> ptr , EAGER_RX_BUFFER_ALIGNMENT ));
2335
2335
2336
2336
rx_buff_data -> rx_buff_fl_elem = rx_buff_fl_elem ;
2337
- rx_buff_data -> buff_len = ep -> rx_buff_size ;
2337
+ rx_buff_data -> buff_len = ep -> eager_rx_buff_size ;
2338
2338
rx_buff_data -> rail = rail ;
2339
2339
rx_buff_data -> ep = ep ;
2340
2340
return req ;
@@ -2348,7 +2348,7 @@ static inline int ctrl_rx_buff_req_free(nccl_net_ofi_rdma_req_t *req,
2348
2348
nccl_net_ofi_rdma_ep_t * ep = rx_buff_data -> ep ;
2349
2349
/* Free buffer */
2350
2350
if (rx_buff_data -> rx_buff_fl_elem ) {
2351
- nccl_ofi_freelist_entry_free (ep -> rx_buff_fl , rx_buff_data -> rx_buff_fl_elem );
2351
+ nccl_ofi_freelist_entry_free (ep -> ctrl_rx_buff_fl , rx_buff_data -> rx_buff_fl_elem );
2352
2352
}
2353
2353
return free_base_req (NULL , ep -> rx_buff_reqs_fl , req , false);
2354
2354
}
@@ -2367,16 +2367,15 @@ static inline nccl_net_ofi_rdma_req_t *ctrl_rx_buff_req_alloc(nccl_net_ofi_rdma_
2367
2367
rdma_req_rx_buff_data_t * rx_buff_data = get_rx_buff_data (req );
2368
2368
2369
2369
nccl_ofi_freelist_elem_t * rx_buff_fl_elem =
2370
- nccl_ofi_freelist_entry_alloc (ep -> rx_buff_fl );
2370
+ nccl_ofi_freelist_entry_alloc (ep -> ctrl_rx_buff_fl );
2371
2371
if (!rx_buff_fl_elem ) {
2372
2372
NCCL_OFI_WARN ("Failed to allocate rx_buff_fl_elem" );
2373
2373
req -> free (req , false);
2374
2374
return NULL ;
2375
2375
}
2376
- assert (NCCL_OFI_IS_PTR_ALIGNED (rx_buff_fl_elem -> ptr , EAGER_RX_BUFFER_ALIGNMENT ));
2377
2376
2378
2377
rx_buff_data -> rx_buff_fl_elem = rx_buff_fl_elem ;
2379
- rx_buff_data -> buff_len = ep -> rx_buff_size ;
2378
+ rx_buff_data -> buff_len = ep -> ctrl_rx_buff_size ;
2380
2379
rx_buff_data -> rail = rail ;
2381
2380
rx_buff_data -> ep = ep ;
2382
2381
return req ;
@@ -5517,8 +5516,9 @@ static int post_rx_buffer(nccl_net_ofi_rdma_req_t *req,
5517
5516
* accessible but undefined to cover cases where the buffer
5518
5517
* gets re-posted */
5519
5518
nccl_net_ofi_rdma_ep_t * ep = rx_buff_data -> ep ;
5520
- nccl_ofi_freelist_entry_set_undefined (ep -> rx_buff_fl ,
5521
- rx_buff_fl_elem -> ptr );
5519
+ nccl_ofi_freelist_t * fl = (req -> type == NCCL_OFI_RDMA_EAGER_RX_BUFF ?
5520
+ ep -> eager_rx_buff_fl : ep -> ctrl_rx_buff_fl );
5521
+ nccl_ofi_freelist_entry_set_undefined (fl , rx_buff_fl_elem -> ptr );
5522
5522
5523
5523
iov .iov_base = rx_buff_fl_elem -> ptr ;
5524
5524
iov .iov_len = rx_buff_data -> buff_len ;
@@ -6195,17 +6195,28 @@ static inline int init_rx_buffers(nccl_net_ofi_rdma_ep_t *ep)
6195
6195
return ret ;
6196
6196
}
6197
6197
6198
- ret = nccl_ofi_freelist_init_mr (ep -> rx_buff_size ,
6198
+ ret = nccl_ofi_freelist_init_mr (ep -> ctrl_rx_buff_size ,
6199
6199
ofi_nccl_rdma_min_posted_bounce_buffers (), 16 , 0 ,
6200
6200
freelist_regmr_host_fn , freelist_deregmr_host_fn ,
6201
- ep , EAGER_RX_BUFFER_ALIGNMENT , & ep -> rx_buff_fl );
6201
+ ep , 1 , & ep -> ctrl_rx_buff_fl );
6202
6202
if (ret != 0 ) {
6203
- NCCL_OFI_WARN ("Failed to init rx_buff_fl " );
6203
+ NCCL_OFI_WARN ("Failed to init ctrl_rx_buff_fl " );
6204
6204
if (nccl_ofi_freelist_fini (ep -> rx_buff_reqs_fl ))
6205
6205
NCCL_OFI_WARN ("Also failed to freelist_fini rx_buff_reqs_fl" );
6206
6206
return ret ;
6207
6207
}
6208
6208
6209
+ ret = nccl_ofi_freelist_init_mr (ep -> eager_rx_buff_size ,
6210
+ ofi_nccl_rdma_min_posted_bounce_buffers (), 16 , 0 ,
6211
+ freelist_regmr_host_fn , freelist_deregmr_host_fn ,
6212
+ ep , EAGER_RX_BUFFER_ALIGNMENT , & ep -> eager_rx_buff_fl );
6213
+ if (ret != 0 ) {
6214
+ NCCL_OFI_WARN ("Failed to init eager_rx_buff_size" );
6215
+ nccl_ofi_freelist_fini (ep -> ctrl_rx_buff_fl );
6216
+ nccl_ofi_freelist_fini (ep -> rx_buff_reqs_fl );
6217
+ return ret ;
6218
+ }
6219
+
6209
6220
/*
6210
6221
* The *_rx_buff_posted limits are used in the progress engine to
6211
6222
* determine if the receive queue is hydrated with sufficient buffers.
@@ -6255,9 +6266,15 @@ static inline int fini_rx_buffers(nccl_net_ofi_rdma_ep_t *ep)
6255
6266
int ret = 0 ;
6256
6267
nccl_net_ofi_ep_rail_t * rail ;
6257
6268
6258
- ret = nccl_ofi_freelist_fini (ep -> rx_buff_fl );
6269
+ ret = nccl_ofi_freelist_fini (ep -> ctrl_rx_buff_fl );
6270
+ if (ret != 0 ) {
6271
+ NCCL_OFI_WARN ("Failed to fini ctrl_rx_buff_fl" );
6272
+ return ret ;
6273
+ }
6274
+
6275
+ ret = nccl_ofi_freelist_fini (ep -> eager_rx_buff_fl );
6259
6276
if (ret != 0 ) {
6260
- NCCL_OFI_WARN ("Failed to fini rx_buff_fl " );
6277
+ NCCL_OFI_WARN ("Failed to fini eager_rx_buff_fl " );
6261
6278
return ret ;
6262
6279
}
6263
6280
@@ -7219,8 +7236,11 @@ static int nccl_net_ofi_rdma_domain_create_endpoint(nccl_net_ofi_domain_t *base_
7219
7236
goto error ;
7220
7237
}
7221
7238
7222
- ep -> rx_buff_size = NCCL_OFI_MAX (NCCL_OFI_MAX (sizeof (nccl_net_ofi_rdma_ctrl_msg_t ), eager_max_size ),
7223
- sizeof (nccl_ofi_rdma_connection_info_t ));
7239
+ ep -> ctrl_rx_buff_size =
7240
+ NCCL_OFI_MAX (sizeof (nccl_net_ofi_rdma_ctrl_msg_t ),
7241
+ NCCL_OFI_MAX (sizeof (nccl_ofi_rdma_connection_info_t ),
7242
+ sizeof (nccl_net_ofi_rdma_close_msg_t )));
7243
+ ep -> eager_rx_buff_size = eager_max_size ;
7224
7244
7225
7245
ep -> is_endpoint_per_communicator_ep = false;
7226
7246
0 commit comments