Skip to content

Commit 3d8d09b

Browse files
razvanapetroaienikita-kud
authored andcommitted
Adding the BENCHMARK_INIT config option in order to allow running the bapp on the init part of the compiled model
1 parent f07228b commit 3d8d09b

File tree

8 files changed

+116
-44
lines changed

8 files changed

+116
-44
lines changed

samples/cpp/benchmark_app/utils.cpp

+5-4
Original file line numberDiff line numberDiff line change
@@ -287,11 +287,12 @@ size_t get_batch_size(const benchmark_app::InputsInfo& inputs_info) {
287287
size_t batch_size = 0;
288288
for (auto& info : inputs_info) {
289289
if (ov::layout::has_batch(info.second.layout)) {
290-
if (batch_size == 0)
290+
if (batch_size == 0) {
291291
batch_size = info.second.batch();
292-
else if (batch_size != info.second.batch())
293-
throw std::logic_error("Can't deterimine batch size: batch is "
294-
"different for different inputs!");
292+
} else if (batch_size != info.second.batch()) {
293+
batch_size = 0;
294+
break;
295+
}
295296
}
296297
}
297298
if (batch_size == 0) {

src/plugins/intel_npu/src/al/include/intel_npu/config/common.hpp

+10
Original file line numberDiff line numberDiff line change
@@ -275,6 +275,16 @@ struct SEPARATE_WEIGHTS final : OptionBase<SEPARATE_WEIGHTS, bool> {
275275
}
276276
};
277277

278+
struct BENCHMARK_INIT final : OptionBase<BENCHMARK_INIT, bool> {
279+
static std::string_view key() {
280+
return ov::intel_npu::benchmark_init.name();
281+
}
282+
283+
static bool defaultValue() {
284+
return false;
285+
}
286+
};
287+
278288
} // namespace intel_npu
279289

280290
namespace ov {

src/plugins/intel_npu/src/al/include/intel_npu/npu_private_properties.hpp

+2
Original file line numberDiff line numberDiff line change
@@ -349,6 +349,8 @@ static constexpr ov::Property<BatchMode> batch_mode{"NPU_BATCH_MODE"};
349349
*/
350350
static constexpr ov::Property<bool> separate_weights{"NPU_SEPARATE_WEIGHTS"};
351351

352+
static constexpr ov::Property<bool> benchmark_init{"NPU_BENCHMARK_INIT"};
353+
352354
/**
353355
* @brief [Only for NPU Plugin]
354356
* Type: integer, default is 1

src/plugins/intel_npu/src/al/src/config/common.cpp

+1
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ void intel_npu::registerCommonOptions(OptionsDesc& desc) {
2323
desc.add<LOADED_FROM_CACHE>();
2424
desc.add<BATCH_MODE>();
2525
desc.add<SEPARATE_WEIGHTS>();
26+
desc.add<BENCHMARK_INIT>();
2627
}
2728

2829
//

src/plugins/intel_npu/src/backend/src/zero_infer_request.cpp

+4-2
Original file line numberDiff line numberDiff line change
@@ -586,8 +586,10 @@ void ZeroInferRequest::infer_async() {
586586
for (const auto& userTensor : _userInputTensors) {
587587
const IODescriptor inputDescriptor = _metadata.inputs.at(inputIndex);
588588

589-
OPENVINO_ASSERT(!inputDescriptor.isInitInputWeights,
590-
"This path should not be used for running inferences for the \"init\" model");
589+
if (!_config.get<BENCHMARK_INIT>()) {
590+
OPENVINO_ASSERT(!inputDescriptor.isInitInputWeights,
591+
"This path should not be used for running inferences for the \"init\" model");
592+
}
591593

592594
if (inputDescriptor.isShapeTensor) {
593595
OPENVINO_ASSERT(inputDescriptor.relatedDescriptorIndex.has_value(),

src/plugins/intel_npu/src/compiler_adapter/src/driver_compiler_adapter.cpp

+5
Original file line numberDiff line numberDiff line change
@@ -649,6 +649,11 @@ std::string DriverCompilerAdapter::serializeConfig(const Config& config,
649649
<< VALUE_DELIMITER;
650650
content = std::regex_replace(content, std::regex(separateWeightsStream.str()), "");
651651

652+
std::ostringstream benchmarkInitStream;
653+
benchmarkInitStream << ov::intel_npu::benchmark_init.name() << KEY_VALUE_SEPARATOR << VALUE_DELIMITER << "\\S+"
654+
<< VALUE_DELIMITER;
655+
content = std::regex_replace(content, std::regex(benchmarkInitStream.str()), "");
656+
652657
// FINAL step to convert prefixes of remaining params, to ensure backwards compatibility
653658
// From 5.0.0, driver compiler start to use NPU_ prefix, the old version uses VPU_ prefix
654659
if (compilerVersion.major < 5) {

src/plugins/intel_npu/src/plugin/src/compiled_model.cpp

+4-1
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,7 @@ std::shared_ptr<ov::IAsyncInferRequest> CompiledModel::create_infer_request() co
6666
_device->createInferRequest(shared_from_this(), _config);
6767
syncInferRequest->initialize_states();
6868

69-
if (_config.get<SEPARATE_WEIGHTS>()) {
69+
if (_config.get<SEPARATE_WEIGHTS>() && _initGraph != nullptr) {
7070
if (!_config.get<CREATE_EXECUTOR>() || _config.get<DEFER_WEIGHTS_LOAD>()) {
7171
begin = std::chrono::steady_clock::now();
7272
_initGraph->initialize(_config);
@@ -88,6 +88,9 @@ std::shared_ptr<ov::IAsyncInferRequest> CompiledModel::create_infer_request() co
8888
end = std::chrono::steady_clock::now();
8989
std::cout << "set_weights_inputs() call "
9090
<< std::chrono::duration_cast<std::chrono::milliseconds>(end - begin).count() << "[ms]" << std::endl;
91+
} else if (_config.get<SEPARATE_WEIGHTS>() && _initGraph == nullptr) {
92+
_logger.warning("SEPARATE_WEIGHTS config option was set but no compiled model for the init schedule was found. "
93+
"run_init() will not run.");
9194
}
9295

9396
return std::make_shared<AsyncInferRequest>(syncInferRequest,

src/plugins/intel_npu/src/plugin/src/plugin.cpp

+85-37
Original file line numberDiff line numberDiff line change
@@ -48,49 +48,85 @@ const char* NPU_PLUGIN_LIB_NAME = "openvino_intel_npu_plugin";
4848
* @returns The dummy "ov::Model" composed of "parameter" and "result" nodes built using the given descriptors.
4949
*/
5050
std::shared_ptr<ov::Model> create_dummy_model(const std::vector<IODescriptor>& inputDescriptors,
51-
const std::vector<IODescriptor>& outputDescriptors) {
51+
const std::vector<IODescriptor>& outputDescriptors,
52+
const bool benchmarkInit = false) {
5253
ov::ParameterVector parameters;
5354
ov::NodeVector results;
5455

5556
for (const IODescriptor& inputDescriptor : inputDescriptors) {
56-
if (inputDescriptor.isStateInput || inputDescriptor.isStateOutput || inputDescriptor.isShapeTensor ||
57-
inputDescriptor.isInitInputWeights || inputDescriptor.isMainInputWeights) {
58-
continue;
59-
}
57+
if (!benchmarkInit) {
58+
if (inputDescriptor.isStateInput || inputDescriptor.isStateOutput || inputDescriptor.isShapeTensor ||
59+
inputDescriptor.isInitInputWeights || inputDescriptor.isMainInputWeights) {
60+
continue;
61+
}
6062

61-
std::shared_ptr<ov::op::v0::Parameter> parameter = std::make_shared<ov::op::v0::Parameter>(
62-
inputDescriptor.precision,
63-
inputDescriptor.shapeFromIRModel.has_value() ? *inputDescriptor.shapeFromIRModel
64-
: inputDescriptor.shapeFromCompiler);
63+
std::shared_ptr<ov::op::v0::Parameter> parameter = std::make_shared<ov::op::v0::Parameter>(
64+
inputDescriptor.precision,
65+
inputDescriptor.shapeFromIRModel.has_value() ? *inputDescriptor.shapeFromIRModel
66+
: inputDescriptor.shapeFromCompiler);
67+
parameter->set_friendly_name(inputDescriptor.nodeFriendlyName);
68+
parameter->output(0).get_tensor().set_names(inputDescriptor.outputTensorNames);
69+
parameters.push_back(parameter);
70+
} else {
71+
if (inputDescriptor.isStateInput || inputDescriptor.isStateOutput || inputDescriptor.isShapeTensor ||
72+
inputDescriptor.isMainInputWeights) {
73+
continue;
74+
}
6575

66-
parameter->set_friendly_name(inputDescriptor.nodeFriendlyName);
67-
parameter->output(0).get_tensor().set_names(inputDescriptor.outputTensorNames);
68-
parameters.push_back(parameter);
76+
std::shared_ptr<ov::op::v0::Parameter> parameter = std::make_shared<ov::op::v0::Parameter>(
77+
inputDescriptor.precision,
78+
inputDescriptor.shapeFromIRModel.has_value() ? *inputDescriptor.shapeFromIRModel
79+
: inputDescriptor.shapeFromCompiler);
80+
parameter->set_friendly_name(inputDescriptor.nameFromCompiler);
81+
parameter->output(0).get_tensor().set_names(
82+
std::unordered_set<std::string>{inputDescriptor.nameFromCompiler});
83+
parameters.push_back(parameter);
84+
}
6985
}
7086

7187
// The "result" nodes require a parent node in order to satisfy the API conventions. Additionally, a dummy shape for
7288
// the "Constant" node was required since the specific constructor does not accept "ov::PartialShape" values (a
7389
// constant can't have dynamic shape). The dummy tensor was also brought in order to register the correct,
7490
// potentially dynamic, output shape.
7591
for (const IODescriptor& outputDescriptor : outputDescriptors) {
76-
if (outputDescriptor.isStateInput || outputDescriptor.isStateOutput || outputDescriptor.isShapeTensor ||
77-
outputDescriptor.isInitOutputWeights) {
78-
continue;
79-
}
92+
if (!benchmarkInit) {
93+
if (outputDescriptor.isStateInput || outputDescriptor.isStateOutput || outputDescriptor.isShapeTensor ||
94+
outputDescriptor.isInitOutputWeights) {
95+
continue;
96+
}
97+
98+
std::shared_ptr<ov::Node> constantDummy =
99+
std::make_shared<ov::op::v0::Constant>(outputDescriptor.precision, CONSTANT_NODE_DUMMY_SHAPE);
80100

81-
std::shared_ptr<ov::Node> constantDummy =
82-
std::make_shared<ov::op::v0::Constant>(outputDescriptor.precision, CONSTANT_NODE_DUMMY_SHAPE);
101+
const std::shared_ptr<ov::descriptor::Tensor>& tensorDummy =
102+
std::make_shared<ov::descriptor::Tensor>(outputDescriptor.precision,
103+
outputDescriptor.shapeFromCompiler,
104+
outputDescriptor.outputTensorNames);
83105

84-
const std::shared_ptr<ov::descriptor::Tensor>& tensorDummy = std::make_shared<ov::descriptor::Tensor>(
85-
outputDescriptor.precision,
86-
outputDescriptor.shapeFromIRModel.has_value() ? *outputDescriptor.shapeFromIRModel
87-
: outputDescriptor.shapeFromCompiler,
88-
outputDescriptor.outputTensorNames);
106+
std::shared_ptr<ov::Node> result = std::make_shared<ov::op::v0::Result>(constantDummy);
107+
result->output(0).set_tensor_ptr(tensorDummy);
108+
109+
result->set_friendly_name(outputDescriptor.nodeFriendlyName);
110+
results.push_back(result);
111+
} else {
112+
if (outputDescriptor.isStateInput || outputDescriptor.isStateOutput || outputDescriptor.isShapeTensor) {
113+
continue;
114+
}
89115

90-
std::shared_ptr<ov::Node> result = std::make_shared<ov::op::v0::Result>(constantDummy);
91-
result->output(0).set_tensor_ptr(tensorDummy);
92-
result->set_friendly_name(outputDescriptor.nodeFriendlyName);
93-
results.push_back(result);
116+
std::shared_ptr<ov::Node> constantDummy =
117+
std::make_shared<ov::op::v0::Constant>(outputDescriptor.precision, CONSTANT_NODE_DUMMY_SHAPE);
118+
119+
const std::shared_ptr<ov::descriptor::Tensor>& tensorDummy = std::make_shared<ov::descriptor::Tensor>(
120+
outputDescriptor.precision,
121+
outputDescriptor.shapeFromCompiler,
122+
std::unordered_set<std::string>{outputDescriptor.nameFromCompiler});
123+
124+
std::shared_ptr<ov::Node> result = std::make_shared<ov::op::v0::Result>(constantDummy);
125+
result->output(0).set_tensor_ptr(tensorDummy);
126+
127+
result->set_friendly_name(outputDescriptor.nameFromCompiler);
128+
results.push_back(result);
129+
}
94130
}
95131

96132
return std::make_shared<ov::Model>(results, parameters);
@@ -589,6 +625,12 @@ Plugin::Plugin()
589625
[](const Config& config) {
590626
return config.getString<SEPARATE_WEIGHTS>();
591627
}}},
628+
{ov::intel_npu::benchmark_init.name(),
629+
{false,
630+
ov::PropertyMutability::RW,
631+
[](const Config& config) {
632+
return config.getString<BENCHMARK_INIT>();
633+
}}},
592634
};
593635

594636
for (auto& property : _properties) {
@@ -860,16 +902,22 @@ std::shared_ptr<ov::ICompiledModel> Plugin::import_model(std::istream& stream, c
860902
auto graph = compiler->parse(std::move(blob), localConfig);
861903
graph->update_network_name("net" + std::to_string(_compiledModelLoadCounter++));
862904

863-
const std::shared_ptr<ov::Model> modelDummy =
864-
create_dummy_model(graph->get_metadata().inputs, graph->get_metadata().outputs);
865-
866-
compiledModel = std::make_shared<CompiledModel>(modelDummy,
867-
shared_from_this(),
868-
device,
869-
graph,
870-
localConfig,
871-
initGraph,
872-
initModel);
905+
if (!localConfig.get<BENCHMARK_INIT>()) {
906+
const std::shared_ptr<ov::Model> modelDummy =
907+
create_dummy_model(graph->get_metadata().inputs, graph->get_metadata().outputs);
908+
compiledModel = std::make_shared<CompiledModel>(modelDummy,
909+
shared_from_this(),
910+
device,
911+
graph,
912+
localConfig,
913+
initGraph,
914+
initModel);
915+
} else {
916+
const std::shared_ptr<ov::Model> modelDummy =
917+
create_dummy_model(initGraph->get_metadata().inputs, initGraph->get_metadata().outputs, true);
918+
compiledModel =
919+
std::make_shared<CompiledModel>(modelDummy, shared_from_this(), device, initGraph, localConfig);
920+
}
873921
}
874922
} catch (const std::exception& ex) {
875923
OPENVINO_THROW("Can't import network: ", ex.what());

0 commit comments

Comments
 (0)