Skip to content

Commit 3810b36

Browse files
authored
Fix iteration over remoteSockets during invalidation (#195)
When receiving INVL command from peer the connection socket is closed and removed from remoteSockets map. However, since command processing is performed inside an outer loop over remoteSockets, the removal also invalidates the outer loop iterator which causes a segfault[0]. Fix the issue by refactoring the loop to assign the new iterator value returned by map::erase() to the outer loop iterator. Signed-off-by: Vlad Buslov <vladbu@nvidia.com>
1 parent 6e06af1 commit 3810b36

File tree

1 file changed

+18
-6
lines changed

1 file changed

+18
-6
lines changed

src/core/nixl_listener.cpp

+18-6
Original file line numberDiff line numberDiff line change
@@ -195,17 +195,22 @@ void nixlAgentData::commWorker(nixlAgent* myAgent){
195195
}
196196

197197
// third, do remote commands
198-
for (const auto& [sock_peer, socketClient] : remoteSockets ) {
198+
auto socket_iter = remoteSockets.begin();
199+
while (socket_iter != remoteSockets.end()) {
199200
std::string commands;
200201
std::vector<std::string> command_list;
201202
nixl_status_t ret;
202203

203-
ssize_t recv_bytes = recvCommMessage(socketClient, commands);
204+
ssize_t recv_bytes = recvCommMessage(socket_iter->second, commands);
204205

205-
if(recv_bytes == 0 || recv_bytes == -1) continue;
206+
if(recv_bytes == 0 || recv_bytes == -1) {
207+
socket_iter++;
208+
continue;
209+
}
206210

207211
command_list = str_split_substr(commands, "NIXLCOMM:");
208212

213+
bool invl = false;
209214
for(std::string command : command_list) {
210215

211216
if(command.size() < 4) continue;
@@ -225,14 +230,21 @@ void nixlAgentData::commWorker(nixlAgent* myAgent){
225230
nixl_blob_t my_MD;
226231
myAgent->getLocalMD(my_MD);
227232

228-
sendCommMessage(socketClient, std::string("NIXLCOMM:LOAD" + my_MD));
233+
sendCommMessage(socket_iter->second, std::string("NIXLCOMM:LOAD" + my_MD));
229234
} else if(header == "INVL") {
230-
close(socketClient);
231-
remoteSockets.erase(sock_peer);
235+
invl = true;
236+
break;
232237
} else {
233238
throw std::runtime_error("Received socket message with bad header" + header + ", critically failing\n");
234239
}
235240
}
241+
242+
if (invl) {
243+
close(socket_iter->second);
244+
socket_iter = remoteSockets.erase(socket_iter);
245+
} else {
246+
socket_iter++;
247+
}
236248
}
237249

238250
nixlTime::us_t start = nixlTime::getUs();

0 commit comments

Comments
 (0)