@@ -199,6 +199,7 @@ ov::npuw::JustInferRequest::JustInferRequest(const std::shared_ptr<ov::npuw::Com
199
199
// Create infer requests
200
200
// Preallocate funcall tensors & substitute function call requests
201
201
bool failover_happened = false ;
202
+ bool has_spatial = false ;
202
203
for (size_t i = 0 ; i < m_num_submodels; i++) {
203
204
LOG_INFO (" Creating infer request for Subgraph[" << i << " ]..." );
204
205
LOG_BLOCK ();
@@ -221,6 +222,8 @@ ov::npuw::JustInferRequest::JustInferRequest(const std::shared_ptr<ov::npuw::Com
221
222
222
223
// Initialize the spatial IO placeholders, if required
223
224
if (proto_comp_model_desc.spatial ) {
225
+ has_spatial = true ;
226
+
224
227
m_spatial_io[real_idx].inputs .resize (proto_comp_model_desc.param_base );
225
228
m_spatial_io[real_idx].input_tails .resize (proto_comp_model_desc.param_base );
226
229
m_spatial_io[real_idx].outputs .resize (num_outputs);
@@ -399,6 +402,24 @@ ov::npuw::JustInferRequest::JustInferRequest(const std::shared_ptr<ov::npuw::Com
399
402
} // for(closure)
400
403
LOG_VERB (" DONE" );
401
404
}
405
+
406
+ // Handle spatial dynamic submission
407
+ if (has_spatial) {
408
+ if (m_npuw_model->m_cfg .get <::intel_npu::NPUW_SPATIAL_DYN>()) {
409
+ LOG_VERB (" Finding spatial features..." );
410
+ LOG_BLOCK ();
411
+ m_spatial_selector = runtime::spatial::AttentionMask::find (*this );
412
+ if (!m_spatial_selector) {
413
+ LOG_WARN (" Spatial capability is enabled, but no run-time features were found." );
414
+ // Fallback selector to ALL
415
+ m_spatial_selector.reset (new runtime::spatial::All ());
416
+ }
417
+ } else {
418
+ // Just force selector to ALL
419
+ m_spatial_selector.reset (new runtime::spatial::All ());
420
+ }
421
+ LOG_VERB (" Done" );
422
+ }
402
423
}
403
424
404
425
void ov::npuw::JustInferRequest::connect_subrequests () {
@@ -506,6 +527,11 @@ void ov::npuw::JustInferRequest::prepare_for_infer() {
506
527
LOG_DEBUG (" Pre-initializing weights for subgraph[" << id << " ]" );
507
528
unpack_closure (id, m_subrequests[id]);
508
529
}
530
+
531
+ // Adjust spatial input range, if supported
532
+ if (m_spatial_selector) {
533
+ m_spatial_selector->prepare ();
534
+ }
509
535
LOG_DEBUG (" Done" );
510
536
}
511
537
@@ -915,6 +941,7 @@ void ov::npuw::JustInferRequest::unsafe_infer(std::size_t real_idx) {
915
941
// must be prepared in the m_spatial_io at this point
916
942
const auto & spatial = comp_model_desc.spatial .value ();
917
943
const auto num_outputs = comp_model_desc.compiled_model ->outputs ().size ();
944
+ NPUW_ASSERT (m_spatial_selector);
918
945
919
946
// Create a sparse vector with full input sizes.
920
947
// For the access simplicity, its size is aligned with function's
@@ -940,6 +967,10 @@ void ov::npuw::JustInferRequest::unsafe_infer(std::size_t real_idx) {
940
967
941
968
std::size_t offset = 0u ;
942
969
for (std::size_t i = 0u ; i < spatial.nway_iters ; i++, offset += spatial.nway ) {
970
+ if (!m_spatial_selector->need_submit (offset, spatial.nway )) {
971
+ continue ;
972
+ }
973
+
943
974
// Collect spatial inputs for this offset
944
975
for (auto && param : spatial.params ) {
945
976
const auto & iport = comp_model_desc.compiled_model ->inputs ()[param.idx ];
@@ -963,7 +994,7 @@ void ov::npuw::JustInferRequest::unsafe_infer(std::size_t real_idx) {
963
994
} // for(full_nway_times)
964
995
965
996
// Now process the tail, if required
966
- if (spatial.tail_size ) {
997
+ if (spatial.tail_size && m_spatial_selector-> need_submit (offset, spatial. tail_size ) ) {
967
998
// Copy the sub-ranges to spatial inputs
968
999
// NOTE: tails buffers are read from/written to at 0th offset!
969
1000
for (auto && param : spatial.params ) {
@@ -1085,7 +1116,7 @@ ov::npuw::TensorPtr ov::npuw::JustInferRequest::allocMem(const ov::element::Type
1085
1116
return ov::get_tensor_impl (ov::Tensor (type, shape));
1086
1117
}
1087
1118
1088
- std::lock_guard<std::mutex> guard (m_alloc_mutex);
1119
+ // Protect access to shared context(s) - at least among infer requests
1089
1120
auto remote_ctx = m_npuw_model->get_plugin ()->get_core ()->get_default_context (device)._ptr ;
1090
1121
auto remote_tensor = remote_ctx->create_host_tensor (type, shape);
1091
1122
return ov::get_tensor_impl (ov::make_tensor (remote_tensor));
0 commit comments