Skip to content

Commit 120bc74

Browse files
committed
API: added descs to checkRemoteMD
* As well as minor reordering of the API Signed-off-by: Moein Khazraee <moein@nvidia.com>
1 parent 53012ac commit 120bc74

File tree

6 files changed

+180
-162
lines changed

6 files changed

+180
-162
lines changed

examples/python/blocking_send_recv_example.py

-1
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,6 @@
2020
import torch
2121

2222
from nixl._api import nixl_agent, nixl_agent_config
23-
from nixl._bindings import nixlNotFoundError
2423

2524

2625
def parse_args():

examples/python/partial_md_example.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -80,7 +80,7 @@
8080
ready = False
8181

8282
while not ready:
83-
ready = init_agent.check_remote_metadata("target")
83+
ready = init_agent.check_remote_metadata("target", target_xfer_descs1)
8484

8585
xfer_handle_1 = init_agent.initialize_xfer(
8686
"READ", init_xfer_descs, target_xfer_descs1, "target", b"UUID1"

src/api/cpp/nixl.h

+58-54
Original file line numberDiff line numberDiff line change
@@ -316,6 +316,59 @@ class nixlAgent {
316316
const nixl_blob_t &msg,
317317
const nixl_opt_args_t* extra_params = nullptr);
318318

319+
/*** Metadata handling through side channel ***/
320+
/**
321+
* @brief Get metadata blob for this agent, to be given to other agents.
322+
*
323+
* @param str [out] The serialized metadata blob
324+
* @return nixl_status_t Error code if call was not successful
325+
*/
326+
nixl_status_t
327+
getLocalMD (nixl_blob_t &str) const;
328+
329+
/**
330+
* @brief Get partial metadata blob for this agent, to be given to other agents.
331+
* If `descs` is empty, only backends' connection info is included in the metadata,
332+
* regardless of the value of `extra_params->includeConnInfo` and `descs` memory type.
333+
* If `descs` is non-empty, the metadata of the descriptors in the list are included,
334+
* and if `extra_params->includeConnInfo` is true, the connection info of the
335+
* backends supporting the memory type is also included.
336+
* If `extra_params->backends` is non-empty, only the descriptors supported by the
337+
* backends in the list and the backends' connection info are included in the metadata.
338+
*
339+
* @param descs [in] Descriptor list to include in the metadata
340+
* @param str [out] The serialized metadata blob
341+
* @param extra_params [in] Optional extra parameters used in getting partial metadata
342+
* @return nixl_status_t Error code if call was not successful
343+
*/
344+
nixl_status_t
345+
getLocalPartialMD(const nixl_reg_dlist_t &descs,
346+
nixl_blob_t &str,
347+
const nixl_opt_args_t* extra_params = nullptr) const;
348+
349+
/**
350+
* @brief Load other agent's metadata and unpack it internally. Now the local
351+
* agent can initiate transfers towards the remote agent.
352+
*
353+
* @param remote_metadata Serialized metadata blob to be loaded
354+
* @param agent_name [out] Agent name extracted from the loaded metadata blob
355+
* @return nixl_status_t Error code if call was not successful
356+
*/
357+
nixl_status_t
358+
loadRemoteMD (const nixl_blob_t &remote_metadata,
359+
std::string &agent_name);
360+
361+
/**
362+
* @brief Invalidate the remote agent metadata cached locally. This will
363+
* disconnect from that agent if already connected, and no more
364+
* transfers can be initiated towards that agent.
365+
*
366+
* @param remote_agent Remote agent name to invalidate its metadata blob
367+
* @return nixl_status_t Error code if call was not successful
368+
*/
369+
nixl_status_t
370+
invalidateRemoteMD (const std::string &remote_agent);
371+
319372
/*** Metadata handling through direct channels (p2p socket and ETCD) ***/
320373
/**
321374
* @brief Send your own agent metadata to a remote location.
@@ -348,7 +401,7 @@ class nixlAgent {
348401
* @return nixl_status_t Error code if call was not successful
349402
*/
350403
nixl_status_t
351-
sendLocalPartialMD(nixl_reg_dlist_t &descs,
404+
sendLocalPartialMD(const nixl_reg_dlist_t &descs,
352405
const nixl_opt_args_t* extra_params = nullptr) const;
353406

354407
/**
@@ -381,65 +434,16 @@ class nixlAgent {
381434

382435
/**
383436
* @brief Check if metadata is available for a remote agent.
437+
* For partial metadata methods are used, the descriptor list in question
438+
* can be specified; otherwise, empty `descs` can be passed.
384439
*
385440
* @param str Remote agent to check for
386441
* @return nixl_status_t Error code, NOT_FOUND if metadata not found
387442
*/
388443
nixl_status_t
389-
checkRemoteMD (const std::string remote_name) const;
444+
checkRemoteMD (const std::string remote_name,
445+
const nixl_xfer_dlist_t &descs) const;
390446

391-
/*** Metadata handling through side channel ***/
392-
/**
393-
* @brief Get metadata blob for this agent, to be given to other agents.
394-
*
395-
* @param str [out] The serialized metadata blob
396-
* @return nixl_status_t Error code if call was not successful
397-
*/
398-
nixl_status_t
399-
getLocalMD (nixl_blob_t &str) const;
400-
401-
/**
402-
* @brief Get partial metadata blob for this agent, to be given to other agents.
403-
* If `descs` is empty, only backends' connection info is included in the metadata,
404-
* regardless of the value of `extra_params->includeConnInfo` and `descs` memory type.
405-
* If `descs` is non-empty, the metadata of the descriptors in the list are included,
406-
* and if `extra_params->includeConnInfo` is true, the connection info of the
407-
* backends supporting the memory type is also included.
408-
* If `extra_params->backends` is non-empty, only the descriptors supported by the
409-
* backends in the list and the backends' connection info are included in the metadata.
410-
*
411-
* @param descs [in] Descriptor list to include in the metadata
412-
* @param str [out] The serialized metadata blob
413-
* @param extra_params [in] Optional extra parameters used in getting partial metadata
414-
* @return nixl_status_t Error code if call was not successful
415-
*/
416-
nixl_status_t
417-
getLocalPartialMD(nixl_reg_dlist_t &descs,
418-
nixl_blob_t &str,
419-
const nixl_opt_args_t* extra_params = nullptr) const;
420-
421-
/**
422-
* @brief Load other agent's metadata and unpack it internally. Now the local
423-
* agent can initiate transfers towards the remote agent.
424-
*
425-
* @param remote_metadata Serialized metadata blob to be loaded
426-
* @param agent_name [out] Agent name extracted from the loaded metadata blob
427-
* @return nixl_status_t Error code if call was not successful
428-
*/
429-
nixl_status_t
430-
loadRemoteMD (const nixl_blob_t &remote_metadata,
431-
std::string &agent_name);
432-
433-
/**
434-
* @brief Invalidate the remote agent metadata cached locally. This will
435-
* disconnect from that agent if already connected, and no more
436-
* transfers can be initiated towards that agent.
437-
*
438-
* @param remote_agent Remote agent name to invalidate its metadata blob
439-
* @return nixl_status_t Error code if call was not successful
440-
*/
441-
nixl_status_t
442-
invalidateRemoteMD (const std::string &remote_agent);
443447
};
444448

445449
#endif

src/api/python/_api.py

+31-26
Original file line numberDiff line numberDiff line change
@@ -612,6 +612,29 @@ def get_partial_agent_metadata(
612612
handle_list.append(self.backends[backend_string])
613613
return self.agent.getLocalPartialMD(descs, inc_conn_info, handle_list)
614614

615+
"""
616+
@brief Add a remote agent using its metadata. After this call, current agent can
617+
initiate transfers towards the remote agent.
618+
619+
@param metadata Metadata of the remote agent, received out-of-band in bytes.
620+
@return Name of the added remote agent.
621+
"""
622+
623+
def add_remote_agent(self, metadata: bytes) -> str:
624+
agent_name = self.agent.loadRemoteMD(metadata)
625+
return agent_name
626+
627+
"""
628+
@brief Remove a remote agent. After this call, current agent cannot initiate
629+
transfers towards the remote agent specified in the call anymore.
630+
This call will also result in a disconnect between the two agents.
631+
632+
@param agent Name of the remote agent.
633+
"""
634+
635+
def remove_remote_agent(self, agent: str):
636+
self.agent.invalidateRemoteMD(agent)
637+
615638
"""
616639
@brief Send all of your metadata to a peer or central metadata server.
617640
@@ -674,42 +697,24 @@ def invalidate_local_metadata(
674697
self.agent.invalidateLocalMD(ip_addr, port)
675698

676699
"""
677-
@brief Check if the remote metadata for a specific agent is available
700+
@brief Check if the remote metadata for a specific agent is available.
701+
When partial metadata methods are used, the descriptor list in question can be specified.
678702
679703
@param agent Name of the remote agent.
680704
681705
@return True if available, False otherwise
682706
"""
683707

684-
def check_remote_metadata(self, agent: str) -> bool:
685-
if self.agent.checkRemoteMD(agent) == nixlBind.NIXL_SUCCESS:
708+
def check_remote_metadata(
709+
self, agent: str, descs: nixlBind.nixlXferDList = None
710+
) -> bool:
711+
if descs is None: # Just empty list, mem_type not important
712+
descs = nixlBind.nixlXferDList(nixlBind.DRAM_SEG)
713+
if self.agent.checkRemoteMD(agent, descs) == nixlBind.NIXL_SUCCESS:
686714
return True
687715
else:
688716
return False
689717

690-
"""
691-
@brief Add a remote agent using its metadata. After this call, current agent can
692-
initiate transfers towards the remote agent.
693-
694-
@param metadata Metadata of the remote agent, received out-of-band in bytes.
695-
@return Name of the added remote agent.
696-
"""
697-
698-
def add_remote_agent(self, metadata: bytes) -> str:
699-
agent_name = self.agent.loadRemoteMD(metadata)
700-
return agent_name
701-
702-
"""
703-
@brief Remove a remote agent. After this call, current agent cannot initiate
704-
transfers towards the remote agent specified in the call anymore.
705-
This call will also result in a disconnect between the two agents.
706-
707-
@param agent Name of the remote agent.
708-
"""
709-
710-
def remove_remote_agent(self, agent: str):
711-
self.agent.invalidateRemoteMD(agent)
712-
713718
"""
714719
@brief Get nixlXferDList from different input types:
715720
a) list of 3 element tuples (address, len, device ID) alongside a mandatory memory type

src/bindings/python/nixl_bindings.cpp

+8-8
Original file line numberDiff line numberDiff line change
@@ -504,6 +504,13 @@ PYBIND11_MODULE(_bindings, m) {
504504
throw_nixl_exception(agent.getLocalPartialMD(descs, ret_str, &extra_params));
505505
return py::bytes(ret_str);
506506
}, py::arg("descs"), py::arg("inc_conn_info") = false, py::arg("backends") = std::vector<uintptr_t>({}))
507+
.def("loadRemoteMD", [](nixlAgent &agent, const std::string &remote_metadata) -> py::bytes {
508+
//python can only interpret text strings
509+
std::string remote_name("");
510+
throw_nixl_exception(agent.loadRemoteMD(remote_metadata, remote_name));
511+
return py::bytes(remote_name);
512+
})
513+
.def("invalidateRemoteMD", &nixlAgent::invalidateRemoteMD)
507514
.def("sendLocalMD", [](nixlAgent &agent, std::string ip_addr, int port){
508515
nixl_opt_args_t extra_params;
509516

@@ -542,12 +549,5 @@ PYBIND11_MODULE(_bindings, m) {
542549

543550
throw_nixl_exception(agent.invalidateLocalMD(&extra_params));
544551
}, py::arg("ip_addr") = std::string(""), py::arg("port") = 0 )
545-
.def("checkRemoteMD", &nixlAgent::checkRemoteMD)
546-
.def("loadRemoteMD", [](nixlAgent &agent, const std::string &remote_metadata) -> py::bytes {
547-
//python can only interpret text strings
548-
std::string remote_name("");
549-
throw_nixl_exception(agent.loadRemoteMD(remote_metadata, remote_name));
550-
return py::bytes(remote_name);
551-
})
552-
.def("invalidateRemoteMD", &nixlAgent::invalidateRemoteMD);
552+
.def("checkRemoteMD", &nixlAgent::checkRemoteMD);
553553
}

0 commit comments

Comments
 (0)