@@ -19,11 +19,12 @@ constexpr size_t BATCH_AXIS = 0;
19
19
20
20
namespace intel_npu {
21
21
22
- SyncInferRequest::SyncInferRequest (const std::shared_ptr<const ICompiledModel>& compiledModel)
22
+ SyncInferRequest::SyncInferRequest (const std::shared_ptr<const ICompiledModel>& compiledModel, const Config& config )
23
23
: _compiledModel(compiledModel),
24
24
_metadata (compiledModel->get_network_metadata ()),
25
- _userInputTensors(_metadata.inputs.size(), nullptr),
26
- _userOutputTensors(_metadata.outputs.size(), nullptr) {
25
+ _logger(" SyncInferRequest" , config.get<LOG_LEVEL>()),
26
+ _userInputTensors(_metadata.inputs.size(), std::vector<ov::SoPtr<ov::ITensor>>(1 , {nullptr })),
27
+ _userOutputTensors(_metadata.outputs.size(), {nullptr }) {
27
28
OPENVINO_ASSERT (_compiledModel);
28
29
29
30
if (get_outputs ().empty ()) {
@@ -121,7 +122,7 @@ ov::SoPtr<ov::ITensor> SyncInferRequest::get_tensor(const ov::Output<const ov::N
121
122
OPENVINO_ASSERT (foundPort.found (), " Cannot find tensor for port " , port);
122
123
123
124
if (foundPort.is_input ()) {
124
- return _userInputTensors. at (foundPort.idx );
125
+ return get_user_input (foundPort.idx );
125
126
}
126
127
return _userOutputTensors.at (foundPort.idx );
127
128
}
@@ -138,17 +139,22 @@ void SyncInferRequest::set_tensor(const ov::Output<const ov::Node>& port, const
138
139
}
139
140
140
141
if (foundPort.is_input ()) {
141
- _userInputTensors. at (foundPort.idx ) = tensor. _ptr ;
142
+ get_user_input (foundPort.idx ) = tensor;
142
143
} else {
143
- _userOutputTensors.at (foundPort.idx ) = tensor. _ptr ;
144
+ _userOutputTensors.at (foundPort.idx ) = tensor;
144
145
}
145
146
}
146
147
147
- std::vector<ov::SoPtr<ov::ITensor>> SyncInferRequest::get_tensors (const ov::Output<const ov::Node>& /* port*/ ) const {
148
+ std::vector<ov::SoPtr<ov::ITensor>> SyncInferRequest::get_tensors (const ov::Output<const ov::Node>& port) const {
148
149
OV_ITT_SCOPED_TASK (ov::itt::domains::Plugin, " get_tensors" );
149
150
150
- // Using batches of tensors is currently not supported by the NPU plugin. In this scenario, the OpenVINO API demands
151
- // returning an empty vector.
151
+ auto foundPort = find_port (port);
152
+ OPENVINO_ASSERT (foundPort.found (), " Cannot find input tensors for port " , port);
153
+
154
+ if (foundPort.is_input () && is_batched_input (foundPort.idx )) {
155
+ return get_user_inputs (foundPort.idx );
156
+ }
157
+
152
158
return {};
153
159
}
154
160
@@ -192,11 +198,89 @@ void SyncInferRequest::check_tensor(const ov::Output<const ov::Node>& port,
192
198
" Tensor data equal nullptr!" );
193
199
}
194
200
201
+ void SyncInferRequest::check_batched_tensors (const ov::Output<const ov::Node>& port,
202
+ const std::vector<ov::SoPtr<ov::ITensor>>& tensors) const {
203
+ OPENVINO_ASSERT (!tensors.empty (), " set_input_tensors/set_tensors can't be called with empty tensors" );
204
+ OPENVINO_ASSERT (
205
+ tensors.size () != 1 ,
206
+ " Internal error (plugin): check_batched_tensors is not allowed to have only one tensor inside batch" );
207
+
208
+ auto layout = ov::layout::get_layout (port);
209
+
210
+ int64_t batch_idx;
211
+
212
+ if (layout.empty ()) {
213
+ _logger.warning (" set_input_tensors/set_tensors layout is not set, assuming batch dimension is found on 0 axis" );
214
+ batch_idx = BATCH_AXIS;
215
+ } else {
216
+ OPENVINO_ASSERT (ov::layout::has_batch (layout),
217
+ " set_input_tensors/set_tensors can be used only for inputs with N(batch) dimension"
218
+ " 'layout' defined. Current layout is " ,
219
+ layout.to_string ());
220
+ batch_idx = ov::layout::batch_idx (layout);
221
+ }
222
+
223
+ if (batch_idx < 0 ) {
224
+ batch_idx += static_cast <int64_t >(tensors[BATCH_AXIS]->get_shape ().size ());
225
+ }
226
+ OPENVINO_ASSERT (batch_idx == BATCH_AXIS,
227
+ " set_input_tensors/set_tensors is not currently supported for batch dimension index " ,
228
+ batch_idx,
229
+ " != 0" );
230
+ std::for_each (tensors.begin (), tensors.end (), [&batch_idx](const ov::SoPtr<ov::ITensor>& item) {
231
+ OPENVINO_ASSERT (item, " Unintialized tensor is provided!" );
232
+ OPENVINO_ASSERT (item->get_shape ()[batch_idx] == 1 ,
233
+ " set_input_tensors/set_tensors. Tensors shall represent one item in a batch, " ,
234
+ item->get_shape ()[batch_idx],
235
+ " provided" );
236
+ });
237
+ auto tensors_size = static_cast <int >(tensors.size ());
238
+ if (port.get_partial_shape ().rank ().is_static ()) {
239
+ OPENVINO_ASSERT (batch_idx >= 0 && batch_idx < port.get_partial_shape ().rank ().get_length (),
240
+ " set_input_tensors/set_tensors error. Layout " ,
241
+ layout.to_string (),
242
+ " is incorrect for operation with shape " ,
243
+ port.get_partial_shape ());
244
+ auto batch = port.get_partial_shape ()[batch_idx];
245
+
246
+ OPENVINO_ASSERT (batch.is_dynamic () || batch.get_length () == tensors_size,
247
+ " set_input_tensors/set_tensors error. Input shape " ,
248
+ port.get_partial_shape (),
249
+ " batch " ,
250
+ batch,
251
+ " doesn't match with total blobs count: " ,
252
+ tensors_size);
253
+ }
254
+
255
+ auto batched_shape = tensors[BATCH_AXIS]->get_shape ();
256
+ auto element_type = tensors[BATCH_AXIS]->get_element_type ();
257
+ batched_shape[batch_idx] = tensors_size;
258
+ for (const auto & item : tensors) {
259
+ OPENVINO_ASSERT (item, " Unintialized tensor is provided!" );
260
+ auto item_shape = item->get_shape ();
261
+ item_shape[batch_idx] = batched_shape[batch_idx];
262
+ OPENVINO_ASSERT (item_shape == batched_shape && item->get_element_type () == element_type &&
263
+ " set_input_tensors/set_tensors error. Tensor with element type " ,
264
+ item->get_element_type (),
265
+ " and shape " ,
266
+ item_shape,
267
+ " is not compatible with batched tensor with element type " ,
268
+ element_type,
269
+ " and shape " ,
270
+ batched_shape);
271
+ OPENVINO_ASSERT (item->is_continuous (), " Strides for batched tensors should be default." );
272
+ }
273
+ }
274
+
195
275
void SyncInferRequest::check_tensors () const {
196
276
const auto & inputs = _compiledModel->inputs ();
197
277
for (size_t i = 0 ; i < inputs.size (); i++) {
198
- if (_userInputTensors.at (i)) {
199
- check_tensor (inputs[i], _userInputTensors.at (i));
278
+ if (is_batched_input (i)) {
279
+ check_batched_tensors (inputs[i], get_user_inputs (i));
280
+ continue ;
281
+ }
282
+ if (get_user_input (i)) {
283
+ check_tensor (inputs[i], get_user_input (i));
200
284
}
201
285
}
202
286
@@ -229,16 +313,16 @@ std::shared_ptr<ov::ITensor> SyncInferRequest::allocate_tensor(const IODescripto
229
313
OPENVINO_ASSERT (descriptor.relatedDescriptorIndex .has_value (),
230
314
" The link between state descriptors is missing, state name: " ,
231
315
descriptor.nameFromCompiler );
232
- tensor = _userInputTensors. at (*descriptor.relatedDescriptorIndex );
316
+ tensor = get_user_input (*descriptor.relatedDescriptorIndex ). _ptr ;
233
317
} else if (allocator) {
234
318
tensor = ov::make_tensor (descriptor.precision , allocatedTensorShape, allocator);
235
319
} else {
236
320
tensor = ov::make_tensor (descriptor.precision , allocatedTensorShape);
237
321
}
238
322
239
323
if (isInput) {
240
- if (_userInputTensors. at (index ) == nullptr ) {
241
- _userInputTensors. at (index ) = tensor;
324
+ if (get_user_input (index ) == nullptr ) {
325
+ get_user_input (index ) = tensor;
242
326
}
243
327
244
328
if (descriptor.isStateInput ) {
@@ -250,4 +334,17 @@ std::shared_ptr<ov::ITensor> SyncInferRequest::allocate_tensor(const IODescripto
250
334
251
335
return tensor;
252
336
}
337
+
338
+ bool SyncInferRequest::is_batched_input (size_t idx) const {
339
+ return _userInputTensors.at (idx).size () > 1 ;
340
+ }
341
+
342
+ ov::SoPtr<ov::ITensor>& SyncInferRequest::get_user_input (size_t index) const {
343
+ return _userInputTensors.at (index ).at (0 );
344
+ }
345
+
346
+ std::vector<ov::SoPtr<ov::ITensor>>& SyncInferRequest::get_user_inputs (size_t index) const {
347
+ return _userInputTensors.at (index );
348
+ }
349
+
253
350
} // namespace intel_npu
0 commit comments