@@ -16,12 +16,14 @@ namespace serve {
16
16
* Aside from that, this action sends the computed KV data to remote
17
17
* instances after computing the KV data.
18
18
*/
19
- class NewRequestPrefillWithKVSendActionObj : public BatchPrefillBaseActionObj {
19
+ class DisaggRemoteSendActionObj : public BatchPrefillBaseActionObj {
20
20
public:
21
- explicit NewRequestPrefillWithKVSendActionObj (
22
- Array<Model> models, std::vector<ModelWorkspace> model_workspaces, EngineConfig engine_config,
23
- std::vector<picojson::object> model_configs, Optional<EventTraceRecorder> trace_recorder,
24
- FRequestStreamCallback request_stream_callback, Device device)
21
+ explicit DisaggRemoteSendActionObj (Array<Model> models,
22
+ std::vector<ModelWorkspace> model_workspaces,
23
+ EngineConfig engine_config,
24
+ std::vector<picojson::object> model_configs,
25
+ Optional<EventTraceRecorder> trace_recorder,
26
+ FRequestStreamCallback request_stream_callback, Device device)
25
27
: BatchPrefillBaseActionObj(std::move(models), std::move(engine_config),
26
28
std::move(model_configs), std::move(trace_recorder)),
27
29
model_workspaces_(std::move(model_workspaces)),
@@ -39,7 +41,7 @@ class NewRequestPrefillWithKVSendActionObj : public BatchPrefillBaseActionObj {
39
41
// - Find the requests in `waiting_queue` that can prefill in this step.
40
42
std::vector<PrefillInput> prefill_inputs;
41
43
{
42
- NVTXScopedRange nvtx_scope (" NewRequestPrefillWithKVSend getting requests" );
44
+ NVTXScopedRange nvtx_scope (" DisaggRemoteSend getting requests" );
43
45
prefill_inputs = GetRequestStateEntriesToPrefill (estate);
44
46
if (prefill_inputs.empty ()) {
45
47
return {};
@@ -48,7 +50,7 @@ class NewRequestPrefillWithKVSendActionObj : public BatchPrefillBaseActionObj {
48
50
49
51
int num_rsentries = prefill_inputs.size ();
50
52
{
51
- NVTXScopedRange nvtx_scope (" NewRequestPrefillWithKVSend matching prefix" );
53
+ NVTXScopedRange nvtx_scope (" DisaggRemoteSend matching prefix" );
52
54
for (int i = 0 ; i < num_rsentries; ++i) {
53
55
MatchPrefixCache (estate, &prefill_inputs[i]);
54
56
}
@@ -183,12 +185,12 @@ class NewRequestPrefillWithKVSendActionObj : public BatchPrefillBaseActionObj {
183
185
}
184
186
185
187
// Explicitly filter the waiting queue to only keep the requests
186
- // with disaggregation request kind "kRemotePrefill ".
188
+ // with disaggregation request kind "kRemoteSend ".
187
189
std::vector<Request> waiting_queue;
188
190
waiting_queue.reserve (estate->waiting_queue .size ());
189
191
for (Request request : estate->waiting_queue ) {
190
192
if (request->generation_cfg ->debug_config .disagg_config .kind ==
191
- DisaggRequestKind::kRemotePrefill ) {
193
+ DisaggRequestKind::kRemoteSend ) {
192
194
waiting_queue.push_back (request);
193
195
}
194
196
}
@@ -481,11 +483,11 @@ class NewRequestPrefillWithKVSendActionObj : public BatchPrefillBaseActionObj {
481
483
TVMStreamHandle compute_stream_ = nullptr ;
482
484
};
483
485
484
- EngineAction EngineAction::NewRequestPrefillWithKVSend (
486
+ EngineAction EngineAction::DisaggRemoteSend (
485
487
Array<Model> models, std::vector<ModelWorkspace> model_workspaces, EngineConfig engine_config,
486
488
std::vector<picojson::object> model_configs, Optional<EventTraceRecorder> trace_recorder,
487
489
FRequestStreamCallback request_stream_callback, Device device) {
488
- return EngineAction (make_object<NewRequestPrefillWithKVSendActionObj >(
490
+ return EngineAction (make_object<DisaggRemoteSendActionObj >(
489
491
std::move (models), std::move (model_workspaces), std::move (engine_config),
490
492
std::move (model_configs), std::move (trace_recorder), std::move (request_stream_callback),
491
493
device));
0 commit comments