Skip to content

Commit e306cbc

Browse files
[CPU] [DEBUG CAPS] Extension for snippets and other ngraph transformations (#14223)
1 parent 40e19de commit e306cbc

20 files changed

+1272
-712
lines changed

src/plugins/intel_cpu/src/config.cpp

+19-56
Original file line numberDiff line numberDiff line change
@@ -14,10 +14,11 @@
1414
#include "ie_parallel.hpp"
1515
#include "ie_system_conf.h"
1616

17-
#include <cpp_interfaces/interface/ie_internal_plugin_config.hpp>
17+
#include "cpp_interfaces/interface/ie_internal_plugin_config.hpp"
1818
#include "openvino/core/type/element_type_traits.hpp"
1919
#include "openvino/runtime/properties.hpp"
20-
#include <cpu/x64/cpu_isa_traits.hpp>
20+
#include "utils/debug_capabilities.h"
21+
#include "cpu/x64/cpu_isa_traits.hpp"
2122

2223
namespace ov {
2324
namespace intel_cpu {
@@ -48,10 +49,24 @@ Config::Config() {
4849
if (!dnnl::impl::cpu::x64::mayiuse(dnnl::impl::cpu::x64::avx512_core_bf16))
4950
enforceBF16 = false;
5051

51-
CPU_DEBUG_CAP_ENABLE(readDebugCapsProperties());
52+
CPU_DEBUG_CAP_ENABLE(applyDebugCapsProperties());
53+
5254
updateProperties();
5355
}
5456

57+
#ifdef CPU_DEBUG_CAPS
58+
/**
59+
* Debug capabilities configuration has more priority than common one
60+
* Some of the debug capabilities also require to enable some of common
61+
* configuration properties
62+
*/
63+
void Config::applyDebugCapsProperties() {
64+
// always enable perf counters for verbose mode and performance summary
65+
if (!debugCaps.verbose.empty() || !debugCaps.summaryPerf.empty())
66+
collectPerfCounters = true;
67+
}
68+
#endif
69+
5570
void Config::readProperties(const std::map<std::string, std::string> &prop) {
5671
const auto streamExecutorConfigKeys = streamExecutorConfig.SupportedKeys();
5772
const auto hintsConfigKeys = perfHintsConfig.SupportedKeys();
@@ -184,7 +199,7 @@ void Config::readProperties(const std::map<std::string, std::string> &prop) {
184199
if (exclusiveAsyncRequests) // Exclusive request feature disables the streams
185200
streamExecutorConfig._streams = 1;
186201

187-
CPU_DEBUG_CAP_ENABLE(readDebugCapsProperties());
202+
CPU_DEBUG_CAP_ENABLE(applyDebugCapsProperties());
188203
updateProperties();
189204
}
190205

@@ -239,58 +254,6 @@ void Config::updateProperties() {
239254
_config.insert({PluginConfigParams::KEY_CACHE_DIR, cache_dir});
240255
}
241256

242-
#ifdef CPU_DEBUG_CAPS
243-
void Config::readDebugCapsProperties() {
244-
auto readEnv = [](const char* envVar) {
245-
return std::getenv(envVar);
246-
};
247-
248-
auto parseDumpFormat = [](const std::string& format) {
249-
if (format == "BIN")
250-
return FORMAT::BIN;
251-
else if (format == "TEXT")
252-
return FORMAT::TEXT;
253-
else
254-
IE_THROW() << "readDebugCapsProperties: Unknown dump format";
255-
};
256-
257-
const char* envVarValue = nullptr;
258-
259-
if (envVarValue = readEnv("OV_CPU_EXEC_GRAPH_PATH"))
260-
execGraphPath = envVarValue;
261-
262-
if (envVarValue = readEnv("OV_CPU_VERBOSE"))
263-
verbose = envVarValue;
264-
265-
if (envVarValue = readEnv("OV_CPU_BLOB_DUMP_DIR"))
266-
blobDumpDir = envVarValue;
267-
268-
if (envVarValue = readEnv("OV_CPU_BLOB_DUMP_FORMAT"))
269-
blobDumpFormat = parseDumpFormat(envVarValue);
270-
271-
if (envVarValue = readEnv("OV_CPU_BLOB_DUMP_NODE_EXEC_ID"))
272-
blobDumpFilters[BY_EXEC_ID] = envVarValue;
273-
274-
if (envVarValue = readEnv("OV_CPU_BLOB_DUMP_NODE_PORTS"))
275-
blobDumpFilters[BY_PORTS] = envVarValue;
276-
277-
if (envVarValue = readEnv("OV_CPU_BLOB_DUMP_NODE_TYPE"))
278-
blobDumpFilters[BY_TYPE] = envVarValue;
279-
280-
if (envVarValue = readEnv("OV_CPU_BLOB_DUMP_NODE_NAME"))
281-
blobDumpFilters[BY_NAME] = envVarValue;
282-
283-
if (envVarValue = readEnv("OV_CPU_SUMMARY_PERF")) {
284-
collectPerfCounters = true;
285-
summaryPerf = envVarValue;
286-
}
287-
288-
// always enable perf counters for verbose mode
289-
if (!verbose.empty())
290-
collectPerfCounters = true;
291-
}
292-
#endif // CPU_DEBUG_CAPS
293-
294257
} // namespace intel_cpu
295258
} // namespace ov
296259

src/plugins/intel_cpu/src/config.h

+8-24
Original file line numberDiff line numberDiff line change
@@ -6,8 +6,11 @@
66

77
#include <threading/ie_istreams_executor.hpp>
88
#include <ie_performance_hints.hpp>
9-
#include "utils/debug_capabilities.h"
9+
#include <ie/ie_common.h>
10+
#include <openvino/util/common_util.hpp>
11+
#include "utils/debug_caps_config.h"
1012

13+
#include <bitset>
1114
#include <string>
1215
#include <map>
1316
#include <mutex>
@@ -57,31 +60,12 @@ struct Config {
5760

5861
std::map<std::string, std::string> _config;
5962

60-
#ifdef CPU_DEBUG_CAPS
61-
enum FILTER {
62-
BY_PORTS,
63-
BY_EXEC_ID,
64-
BY_TYPE,
65-
BY_NAME,
66-
};
67-
68-
enum class FORMAT {
69-
BIN,
70-
TEXT,
71-
};
72-
73-
std::string execGraphPath;
74-
std::string verbose;
75-
std::string blobDumpDir = "cpu_dump";
76-
FORMAT blobDumpFormat = FORMAT::TEXT;
77-
// std::hash<int> is necessary for Ubuntu-16.04 (gcc-5.4 and defect in C++11 standart)
78-
std::unordered_map<FILTER, std::string, std::hash<int>> blobDumpFilters;
79-
std::string summaryPerf = "";
63+
bool isNewApi = true;
8064

81-
void readDebugCapsProperties();
65+
#ifdef CPU_DEBUG_CAPS
66+
DebugCapsConfig debugCaps;
67+
void applyDebugCapsProperties();
8268
#endif
83-
84-
bool isNewApi = true;
8569
};
8670

8771
} // namespace intel_cpu

src/plugins/intel_cpu/src/docs/README.md

+1
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ Use the following cmake option to enable debug capabilities:
66
* [Verbose mode](verbose.md)
77
* [Blob dumping](blob_dumping.md)
88
* [Graph serialization](graph_serialization.md)
9+
* [Graph transformation disabling](feature_disabling.md#graph-transformations)
910

1011
## Debug log
1112

src/plugins/intel_cpu/src/docs/blob_dumping.md

+2-2
Original file line numberDiff line numberDiff line change
@@ -29,8 +29,8 @@ Default is *cpu_dump*
2929
OV_CPU_BLOB_DUMP_FORMAT=<format> binary ...
3030
```
3131
Options are:
32-
* BIN (default)
33-
* TEXT
32+
* BIN
33+
* TEXT (default)
3434

3535
## Filter input / output blobs
3636
To dump only input / output blobs:
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
# Filters
2+
3+
Filters described below have the following common format:
4+
```sh
5+
filter_name=<comma_separated_tokens>
6+
```
7+
Tokens are processed from left to right and each one includes or excludes corresponding value.\
8+
For exclusion token is just prepended by minus: *-token*\
9+
All tokens are case insensitive and no tokens is treated as *all*\
10+
So filters below are equal:
11+
* filter_name
12+
* filter_name=all
13+
* filter_name=-all,ALL
14+
15+
## IR format filter
16+
17+
IR format filter is used to specify output IR formats, e.g. for [serialization](graph_serialization.md#graph-transformations).
18+
```sh
19+
formats=<comma_separated_tokens>
20+
```
21+
22+
The following tokens are supported:
23+
* all\
24+
equals to <xml,dot,svg>
25+
* xml (default)\
26+
IR in .xml file. Can be opened using, for example, *netron* app. (For now the option is Linux only)
27+
* xmlbin\
28+
IR in .xml and .bin files. Can be opened using, for example, *netron* app.
29+
* dot\
30+
IR in .dot file (.svg.dot file if svg is also specified). Can be inspected using, for example, *graphviz* tools.
31+
* svg\
32+
IR in .svg file. Requires *dot* tool to be installed on the host, not supported on Windows.\
33+
Generation is based on dot representation, so IR is additionally dumped to .svg.dot file.
34+
35+
## Transformation filter
36+
37+
Transformation filter is used to specify main graph transformation stages for different purposes,
38+
e.g. for [disabling](feature_disabling.md#graph-transformation) or [serialization](graph_serialization.md#graph-transformations).
39+
```sh
40+
transformations=<comma_separated_tokens>
41+
```
42+
43+
The following tokens are supported:
44+
* all (default)\
45+
equals to <preLpt,lpt,postLpt,snippets,specific>
46+
* common \
47+
equals to <preLpt,postLpt>
48+
* preLpt
49+
* lpt
50+
* postLpt
51+
* snippets
52+
* specific
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
# Feature disabling
2+
3+
Common way to disable something in CPU plugin is implied by means of environment variable **OV_CPU_DISABLE**:
4+
```sh
5+
OV_CPU_DISABLE=<space_separated_options> binary ...
6+
```
7+
Option names are case insensitive and processed from left to right,\
8+
so last one overwrites previous ones if duplicated.
9+
10+
Examples:
11+
```sh
12+
OV_CPU_DISABLE="transformations" binary ...
13+
OV_CPU_DISABLE="transformations=lpt" binary ...
14+
OV_CPU_DISABLE="transformations=all,-common" binary ...
15+
```
16+
17+
By means of corresponding options **OV_CPU_DISABLE** controls disabling of the following features:
18+
19+
## Graph transformations
20+
21+
Graph transformation disabling is controlled by the following option inside **OV_CPU_DISABLE**:
22+
```sh
23+
transformations=<comma_separated_tokens>
24+
```
25+
Filter with main transformation stages to disable specified ones.\
26+
See [transformation filter](debug_caps_filters.md#transformation-filter) for more details.
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,43 @@
11
# Graph serialization
22

3-
The functionality allows to serialize execution graph using environment variable:
3+
Graph serialization is disabled by default and controlled by environment variables.
4+
5+
## Execution graph
6+
7+
Execution graph could be serialized using environment variable **OV_CPU_EXEC_GRAPH_PATH**:
48
```sh
5-
OV_CPU_EXEC_GRAPH_PATH=<path> binary ...
9+
OV_CPU_EXEC_GRAPH_PATH=<option> binary ...
610
```
7-
811
Possible serialization options:
9-
* cout
12+
* cout\
13+
Serialize to console output.
14+
* \<path\>.xml\
15+
Serialize graph into .xml and .bin files. Can be opened using, for example, *netron* app.
16+
* **TBD**: \<path\>.dot\
17+
Serialize graph into .dot file. Can be inspected using, for example, *graphviz* tools.
1018

11-
Serialize to console output
12-
* \<path\>.xml
19+
## Graph transformations
20+
21+
Additionally, IR could be serialized at specified stages using environment variable **OV_CPU_DUMP_IR**:
22+
```sh
23+
OV_CPU_DUMP_IR=<space_separated_options> binary ...
24+
```
25+
26+
Examples:
27+
```sh
28+
OV_CPU_DUMP_IR="transformations" binary ...
29+
OV_CPU_DUMP_IR="transformations=snippets dir=path/dumpDir" binary ...
30+
OV_CPU_DUMP_IR="transformations=all,-common DIR=path/dumpdir formats=svg,xml" binary ...
31+
```
1332

14-
Serialize graph into .xml and .bin files. Can be opened using, for example, *netron* app
15-
* \<path\>.dot
33+
Option names are case insensitive, the following options are supported:
34+
* dir=\<path\>\
35+
Path to dumped IR files. If omitted, it defaults to *intel_cpu_dump*
36+
* formats=<comma_separated_tokens>\
37+
Filter with IR formats to dump. If omitted, it defaults to *xml*\
38+
See [IR format filter](debug_caps_filters.md#ir-format-filter) for more details.
39+
* transformations=<comma_separated_tokens>\
40+
Filter with main transformation stages to serialize graph before and after specified ones.\
41+
See [transformation filter](debug_caps_filters.md#transformation-filter) for more details.
1642

17-
TBD. Serialize graph into .dot file. Can be inspected using, for example, *graphviz* tools.
43+
Options are processed from left to right, so last one overwrites previous ones if duplicated.

src/plugins/intel_cpu/src/graph.cpp

+3-3
Original file line numberDiff line numberDiff line change
@@ -1073,7 +1073,7 @@ void Graph::InferStatic(InferRequestBase* request) {
10731073
dnnl::stream stream(eng);
10741074

10751075
for (const auto& node : executableGraphNodes) {
1076-
VERBOSE(node, config.verbose);
1076+
VERBOSE(node, config.debugCaps.verbose);
10771077
PERF(node, config.collectPerfCounters);
10781078

10791079
if (request)
@@ -1160,7 +1160,7 @@ void Graph::InferDynamic(InferRequestBase* request) {
11601160
updateNodes(stopIndx);
11611161
for (; inferCounter < stopIndx; ++inferCounter) {
11621162
auto& node = executableGraphNodes[inferCounter];
1163-
VERBOSE(node, config.verbose);
1163+
VERBOSE(node, config.debugCaps.verbose);
11641164
PERF(node, config.collectPerfCounters);
11651165

11661166
if (request)
@@ -1171,7 +1171,7 @@ void Graph::InferDynamic(InferRequestBase* request) {
11711171
}
11721172

11731173
inline void Graph::ExecuteNode(const NodePtr& node, const dnnl::stream& stream) const {
1174-
DUMP(node, config, infer_count);
1174+
DUMP(node, config.debugCaps, infer_count);
11751175
OV_ITT_SCOPED_TASK(itt::domains::intel_cpu, node->profiling.execute);
11761176

11771177
if (node->isDynamicNode()) {

src/plugins/intel_cpu/src/graph_dumper.cpp

+2-2
Original file line numberDiff line numberDiff line change
@@ -210,7 +210,7 @@ std::shared_ptr<ngraph::Function> dump_graph_as_ie_ngraph_net(const Graph &graph
210210

211211
#ifdef CPU_DEBUG_CAPS
212212
void serialize(const Graph &graph) {
213-
const std::string& path = graph.getConfig().execGraphPath;
213+
const std::string& path = graph.getConfig().debugCaps.execGraphPath;
214214

215215
if (path.empty())
216216
return;
@@ -257,7 +257,7 @@ void serializeToCout(const Graph &graph) {
257257
}
258258

259259
void summary_perf(const Graph &graph) {
260-
const std::string& summaryPerf = graph.getConfig().summaryPerf;
260+
const std::string& summaryPerf = graph.getConfig().debugCaps.summaryPerf;
261261

262262
if (summaryPerf.empty())
263263
return;

src/plugins/intel_cpu/src/ngraph_transformations/convert_to_cpu_specific_opset.hpp

+1
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ namespace intel_cpu {
2727

2828
inline void ConvertToCPUSpecificOpset(std::shared_ptr<ngraph::Function> &nGraphFunc) {
2929
RUN_ON_FUNCTION_SCOPE(ConvertToCPUSpecificOpset);
30+
3031
ngraph::pass::Manager manager;
3132
manager.register_pass<ConvertMatMulToFC>();
3233
manager.register_pass<AlignMatMulInputRanks>();

0 commit comments

Comments
 (0)