@@ -17,6 +17,7 @@ namespace intel_npu {
17
17
//
18
18
19
19
void registerNPUWOptions (OptionsDesc& desc);
20
+ void registerNPUWLLMOptions (OptionsDesc& desc);
20
21
21
22
#define DEFINE_OPT (Name, Type, DefaultValue, PropertyKey, Mode ) \
22
23
struct Name final : OptionBase<Name, Type> { \
@@ -66,4 +67,110 @@ DEFINE_OPT(NPUW_DUMP_SUBS, std::string, "", npuw::dump::subgraphs, CompileTime);
66
67
DEFINE_OPT (NPUW_DUMP_SUBS_ON_FAIL, std::string, " " , npuw::dump::subgraphs_on_fail, CompileTime);
67
68
DEFINE_OPT (NPUW_DUMP_IO, std::string, " " , npuw::dump::inputs_outputs, RunTime);
68
69
DEFINE_OPT (NPUW_DUMP_IO_ITERS, bool , false , npuw::dump::io_iters, RunTime);
70
+ DEFINE_OPT (NPUW_LLM, bool , false , npuw::llm::enabled, CompileTime);
71
+ DEFINE_OPT (NPUW_LLM_MAX_PROMPT_LEN, uint32_t , 1024 , npuw::llm::max_prompt_len, CompileTime);
72
+ DEFINE_OPT (NPUW_LLM_MIN_RESPONSE_LEN, uint32_t , 128 , npuw::llm::min_response_len, CompileTime);
73
+
74
+ namespace npuw {
75
+ namespace llm {
76
+ struct ModelDesc {
77
+ std::string type;
78
+ std::string name_or_path;
79
+ int num_key_value_heads;
80
+ };
81
+ enum class GenerateHint { FAST_COMPILE, BEST_PERF };
82
+ } // namespace llm
83
+ } // namespace npuw
84
+
85
+ struct NPUW_LLM_MODEL_DESC final : OptionBase<NPUW_LLM_MODEL_DESC, ::intel_npu::npuw::llm::ModelDesc> {
86
+ static std::string_view key () {
87
+ return ov::intel_npu::npuw::llm::model_desc.name ();
88
+ }
89
+
90
+ static constexpr std::string_view getTypeName () {
91
+ return " ::intel_npu::npuw::llm::ModelDesc" ;
92
+ }
93
+
94
+ static ::intel_npu::npuw::llm::ModelDesc defaultValue () {
95
+ return {};
96
+ }
97
+
98
+ static ::intel_npu::npuw::llm::ModelDesc parse (std::string_view val) {
99
+ ::intel_npu::npuw::llm::ModelDesc res;
100
+ std::map<std::string, std::string> res_map = OptionParser<std::map<std::string, std::string>>::parse (val);
101
+ res.type = res_map[" type" ];
102
+ res.name_or_path = res_map[" name_or_path" ];
103
+ res.num_key_value_heads = std::stoi (res_map[" num_key_value_heads" ]);
104
+ return res;
105
+ }
106
+
107
+ static std::string toString (const ::intel_npu::npuw::llm::ModelDesc& val) {
108
+ std::string res;
109
+ std::map<std::string, std::string> res_map;
110
+ res_map[" type" ] = val.type ;
111
+ res_map[" name_or_path" ] = val.name_or_path ;
112
+ res_map[" num_key_value_heads" ] = std::to_string (val.num_key_value_heads );
113
+ return OptionPrinter<std::map<std::string, std::string>>::toString (res_map);
114
+ }
115
+
116
+ static OptionMode mode () {
117
+ return OptionMode::CompileTime;
118
+ }
119
+
120
+ static bool isPublic () {
121
+ return true ;
122
+ }
123
+ };
124
+
125
+ struct NPUW_LLM_GENERATE_HINT final : OptionBase<NPUW_LLM_GENERATE_HINT, ::intel_npu::npuw::llm::GenerateHint> {
126
+ static std::string_view key () {
127
+ return ov::intel_npu::npuw::llm::generate_hint.name ();
128
+ }
129
+
130
+ static constexpr std::string_view getTypeName () {
131
+ return " ::intel_npu::npuw::llm::GenerateHint" ;
132
+ }
133
+
134
+ static ::intel_npu::npuw::llm::GenerateHint defaultValue () {
135
+ return ::intel_npu::npuw::llm::GenerateHint::FAST_COMPILE;
136
+ }
137
+
138
+ static ::intel_npu::npuw::llm::GenerateHint parse (std::string_view val) {
139
+ ::intel_npu::npuw::llm::GenerateHint res;
140
+
141
+ if (val == " FAST_COMPILE" ) {
142
+ res = ::intel_npu::npuw::llm::GenerateHint::FAST_COMPILE;
143
+ } else if (val == " BEST_PERF" ) {
144
+ res = ::intel_npu::npuw::llm::GenerateHint::BEST_PERF;
145
+ } else {
146
+ OPENVINO_THROW (" Unsupported \" GENERATE_HINT\" provided: " ,
147
+ val,
148
+ " . Please select either \" FAST_COMPILE\" or \" BEST_PERF\" ." );
149
+ }
150
+ return res;
151
+ }
152
+
153
+ static std::string toString (const ::intel_npu::npuw::llm::GenerateHint& val) {
154
+ std::string res;
155
+ switch (val) {
156
+ case ::intel_npu::npuw::llm::GenerateHint::FAST_COMPILE:
157
+ res = " FAST_COMPILE" ;
158
+ break ;
159
+ case ::intel_npu::npuw::llm::GenerateHint::BEST_PERF:
160
+ res = " BEST_PERF" ;
161
+ break ;
162
+ default :
163
+ OPENVINO_THROW (" Can't convert provided \" GENERATE_HINT\" : " , int (val), " to string." );
164
+ }
165
+ return res;
166
+ }
167
+
168
+ static OptionMode mode () {
169
+ return OptionMode::CompileTime;
170
+ }
171
+
172
+ static bool isPublic () {
173
+ return true ;
174
+ }
175
+ };
69
176
} // namespace intel_npu
0 commit comments