Skip to content

Commit 324a282

Browse files
authored
NPUW: Regularized REP pipeline (openvinotoolkit#27089)
### Details: - Introduce a pattern-guided REG pipeline and make it default. If there's no known patterns, it falls back into REP. ### Tickets: - *E-142355*
1 parent 15072ab commit 324a282

File tree

7 files changed

+82
-18
lines changed

7 files changed

+82
-18
lines changed

src/plugins/intel_npu/src/al/include/intel_npu/al/config/npuw.hpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ void registerNPUWOptions(OptionsDesc& desc);
3030
DEFINE_OPT(NPU_USE_NPUW, bool, false, use_npuw, CompileTime);
3131
DEFINE_OPT(NPUW_DEVICES, std::string, "NPU,CPU", npuw::devices, CompileTime);
3232
DEFINE_OPT(NPUW_SUBMODEL_DEVICE, std::string, "", npuw::submodel_device, CompileTime);
33-
DEFINE_OPT(NPUW_ONLINE_PIPELINE, std::string, "REP", npuw::partitioning::online::pipeline, CompileTime);
33+
DEFINE_OPT(NPUW_ONLINE_PIPELINE, std::string, "REG", npuw::partitioning::online::pipeline, CompileTime);
3434
DEFINE_OPT(NPUW_ONLINE_AVOID, std::string, "", npuw::partitioning::online::avoid, CompileTime);
3535
DEFINE_OPT(NPUW_ONLINE_ISOLATE, std::string, "", npuw::partitioning::online::isolate, CompileTime);
3636
DEFINE_OPT(NPUW_ONLINE_NO_FOLD, std::string, "", npuw::partitioning::online::nofold, CompileTime);

src/plugins/intel_npu/src/al/include/npuw_private_properties.hpp

+2-2
Original file line numberDiff line numberDiff line change
@@ -67,8 +67,8 @@ namespace online {
6767
* @brief
6868
* Type: std::string.
6969
* Specify which partitioning pipeline to run.
70-
* Possible values: "NONE", "INIT", "JUST", "REP", "COMPUTE".
71-
* Default value: "REP".
70+
* Possible values: "NONE", "INIT", "JUST", "REP", "REG", "COMPUTE".
71+
* Default value: "REG".
7272
*/
7373
static constexpr ov::Property<std::string> pipeline{"NPUW_ONLINE_PIPELINE"};
7474

src/plugins/intel_npu/src/plugin/npuw/partitioning/online/compiler.cpp

+47-9
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,6 @@ std::vector<Isolate> getIsolates(const std::string& isolates_unparsed);
4040
std::vector<std::string> getNoFolds(::intel_npu::Config& cfg);
4141
std::vector<std::string> getNoFolds(const std::string& nofolds_unparsed);
4242
// Set default predefined values for COMPUTE pipeline
43-
void setComputeConfig(PassContext& ctx);
4443
void dump_partitioning(const ov::npuw::Ensemble& ens, const std::string& to);
4544

4645
size_t getMinGraphSize(::intel_npu::Config& cfg) {
@@ -204,12 +203,6 @@ std::vector<std::string> getNoFolds(const std::string& nofolds_unparsed) {
204203
return nofolds;
205204
}
206205

207-
void setComputeConfig(PassContext& ctx) {
208-
// FIXME: initialize via a dedicated function instead of parsing
209-
ctx.isolates = detail::getIsolates(ISOL_PRESETS.at("COMPUTE"));
210-
ctx.nofolds = detail::getNoFolds("compute");
211-
}
212-
213206
void dump_partitioning(const ov::npuw::Ensemble& ens, const std::string& to) {
214207
pugi::xml_document doc;
215208

@@ -277,10 +270,21 @@ class Compiler {
277270
NONE, // Partitioning will consist of a single group with all the Ops
278271
INIT, // Initialize only. The hardest mode, every group has just 1 layer inside
279272
JUST, // "justParitioning" - combination of LHF + Remnants
280-
REP, // Repeated blocks pipeline - combination of repeatedBlocks and Remnants - default configuration
273+
REP, // Repeated blocks pipeline - combination of repeatedBlocks and Remnants
274+
REG, // Regularized repeated blocks pipeline -same as REP, but with some strong hints first
281275
COMPUTE // Separates non-foldable compute subgraphs from the model based on predefined rules + REP
282276
};
283277

278+
template <class C>
279+
void warn_unused() {
280+
const auto& val = m_cfg.get<C>();
281+
if (val != C::defaultValue()) {
282+
LOG_WARN("User-specified configuration {" << C::key() << " : " << val
283+
<< "} is ignored in the current pipeline "
284+
<< m_cfg.get<::intel_npu::NPUW_ONLINE_PIPELINE>());
285+
}
286+
}
287+
284288
Pipeline currentPipeline() {
285289
std::string pipeline_opt = m_cfg.getString<::intel_npu::NPUW_ONLINE_PIPELINE>();
286290
if (pipeline_opt == "NONE") {
@@ -291,6 +295,8 @@ class Compiler {
291295
return Pipeline::JUST;
292296
} else if (pipeline_opt == "REP") {
293297
return Pipeline::REP;
298+
} else if (pipeline_opt == "REG") {
299+
return Pipeline::REG;
294300
} else if (pipeline_opt == "COMPUTE") {
295301
return Pipeline::COMPUTE;
296302
} else {
@@ -346,6 +352,23 @@ class Compiler {
346352
LOG_INFO("Done");
347353
}
348354

355+
void reg() {
356+
LOG_INFO("Online partitioning: compiling regularized repeated blocks pipeline...");
357+
LOG_BLOCK();
358+
359+
m_snapshot->earlyAvoids();
360+
m_snapshot->earlyRegroup();
361+
m_snapshot->repeatedBlocks([&]() {
362+
// This callback is called when repeatingBlocks algorithm thinks it is done
363+
m_snapshot->stripTag("compute");
364+
});
365+
m_snapshot->repeat([&] {
366+
m_snapshot->fuseRemnantsExtended();
367+
});
368+
369+
LOG_INFO("Done");
370+
}
371+
349372
public:
350373
Compiler(const std::shared_ptr<ov::Model>& model, ::intel_npu::Config& cfg)
351374
: m_model(model),
@@ -384,9 +407,24 @@ class Compiler {
384407
case Pipeline::REP:
385408
rep();
386409
break;
410+
case Pipeline::REG:
411+
warn_unused<::intel_npu::NPUW_ONLINE_ISOLATE>();
412+
413+
// Only get isolates here.
414+
// NB: We ignore NO_FOLD everywhere except pipeline COMPUTE - this needs
415+
// to be aligned in the future
416+
ctx.isolates = detail::getIsolates(detail::ISOL_PRESETS.at("COMPUTE"));
417+
m_snapshot->setCtx(ctx);
418+
reg();
419+
break;
387420
case Pipeline::COMPUTE:
421+
warn_unused<::intel_npu::NPUW_ONLINE_ISOLATE>();
422+
warn_unused<::intel_npu::NPUW_ONLINE_NO_FOLD>();
423+
388424
// Manually set predefined isolates and nofolds then do rep() pipeline
389-
detail::setComputeConfig(ctx);
425+
// FIXME: initialize via a dedicated function instead of parsing
426+
ctx.isolates = detail::getIsolates(detail::ISOL_PRESETS.at("COMPUTE"));
427+
ctx.nofolds = detail::getNoFolds("compute");
390428
m_snapshot->setCtx(ctx);
391429
rep();
392430
break;

src/plugins/intel_npu/src/plugin/npuw/partitioning/online/group.cpp

+4
Original file line numberDiff line numberDiff line change
@@ -443,6 +443,10 @@ void Group::isolate(const std::string& tag) {
443443
m_isol_tag = tag;
444444
}
445445

446+
void Group::dontIsolate() {
447+
m_isol_tag = "";
448+
}
449+
446450
const std::string& Group::isolatedTag() const {
447451
return m_isol_tag;
448452
}

src/plugins/intel_npu/src/plugin/npuw/partitioning/online/group.hpp

+1
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,7 @@ class Group : public std::enable_shared_from_this<Group> {
7777
// FIXME: unify avoid and isolate
7878
void avoid(const std::string& device);
7979
void isolate(const std::string& tag);
80+
void dontIsolate();
8081
const std::set<std::string>& avoidedTargets() const;
8182
const std::string& isolatedTag() const;
8283
std::string specialTags() const;

src/plugins/intel_npu/src/plugin/npuw/partitioning/online/snapshot.cpp

+23-5
Original file line numberDiff line numberDiff line change
@@ -436,18 +436,27 @@ void Snapshot::earlyRegroup() {
436436
LOG_INFO("DONE.");
437437
}
438438

439-
void Snapshot::repeatedBlocks() {
439+
void Snapshot::repeatedBlocks(Snapshot::CB&& on_done) {
440440
LOG_INFO("Online partitioning: executing repeatedBlocks pass group...");
441441
LOG_BLOCK();
442442

443443
identifyUniques();
444444
repeat([&] {
445445
repeat([&] {
446-
mergeUniques();
446+
repeat([&] {
447+
mergeUniques();
448+
});
449+
mergeTriangles();
450+
markInternalCompute();
451+
resetExcludedRep();
447452
});
448-
mergeTriangles();
449-
markInternalCompute();
450-
resetExcludedRep();
453+
// While the current process is entirely done, let the caller
454+
// influence the partitioning - so the algorithm could continue.
455+
if (on_done) {
456+
on_done();
457+
} else {
458+
return; // FROM top-level repeat!
459+
}
451460
});
452461
cleanUpUniques();
453462

@@ -1086,3 +1095,12 @@ void Snapshot::repeat(detail::Pass&& pass) {
10861095
void Snapshot::setCtx(const ov::npuw::online::PassContext& ctx) {
10871096
m_ctx = ctx;
10881097
}
1098+
1099+
void Snapshot::stripTag(const std::string& tag) {
1100+
for (auto&& nh : m_graph->nodes()) {
1101+
auto gptr = m_graph->meta(nh).get<Group::GPtr>();
1102+
if (gptr->isolatedTag() == tag) {
1103+
gptr->dontIsolate();
1104+
}
1105+
}
1106+
}

src/plugins/intel_npu/src/plugin/npuw/partitioning/online/snapshot.hpp

+4-1
Original file line numberDiff line numberDiff line change
@@ -46,10 +46,13 @@ class Snapshot : public std::enable_shared_from_this<Snapshot> {
4646
void fuseInputs();
4747

4848
// Advanced passes for repeated blocks algorithm
49-
void repeatedBlocks();
49+
using CB = std::function<void()>;
50+
void repeatedBlocks(CB&& on_done = {});
5051
void earlyAvoids();
5152
void earlyRegroup();
5253

54+
void stripTag(const std::string& tag);
55+
5356
// Utility
5457
std::shared_ptr<own::ade::Graph> getGraph() const;
5558
const detail::OVPortsMap& getPortsMap() const;

0 commit comments

Comments
 (0)