|
6 | 6 | #include "intel_gpu/plugin/remote_context.hpp"
|
7 | 7 | #include "openvino/core/any.hpp"
|
8 | 8 | #include "openvino/core/model.hpp"
|
9 |
| -#include "openvino/op/concat.hpp" |
10 |
| -#include "openvino/op/convert.hpp" |
11 |
| -#include "openvino/op/gather.hpp" |
12 | 9 | #include "openvino/op/loop.hpp"
|
13 | 10 | #include "openvino/op/lstm_sequence.hpp"
|
14 |
| -#include "openvino/op/paged_attention.hpp" |
15 | 11 | #include "openvino/op/search_sorted.hpp"
|
16 | 12 | #include "openvino/op/stft.hpp"
|
17 |
| -#include "openvino/pass/pattern/matcher.hpp" |
18 |
| -#include "openvino/pass/pattern/op/label.hpp" |
19 |
| -#include "openvino/pass/pattern/op/or.hpp" |
20 |
| -#include "openvino/pass/pattern/op/wrap_type.hpp" |
21 | 13 | #include "ov_ops/dynamic_quantize.hpp"
|
22 | 14 | #include "openvino/runtime/internal_properties.hpp"
|
23 | 15 | #include "intel_gpu/runtime/internal_properties.hpp"
|
24 | 16 | #include "openvino/runtime/plugin_config.hpp"
|
25 | 17 | #include "openvino/runtime/properties.hpp"
|
| 18 | +#include "transformations/utils/utils.hpp" |
26 | 19 |
|
27 | 20 |
|
28 | 21 | namespace ov::intel_gpu {
|
@@ -86,32 +79,6 @@ bool requires_new_shape_infer(const std::shared_ptr<ov::Node>& op) {
|
86 | 79 | return false;
|
87 | 80 | }
|
88 | 81 |
|
89 |
| -bool is_llm(const ov::Model& model) { |
90 |
| - using namespace ov::pass::pattern; |
91 |
| - |
92 |
| - auto past = wrap_type<ov::op::v6::ReadValue>(); |
93 |
| - auto convert_past = wrap_type<ov::op::v0::Convert>({past}); |
94 |
| - auto gather_input = std::make_shared<ov::pass::pattern::op::Or>(OutputVector{past, convert_past}); |
95 |
| - auto beam_idx = wrap_type<ov::op::v0::Parameter>(); |
96 |
| - auto gather_past = wrap_type<ov::op::v8::Gather>({gather_input, beam_idx, wrap_type<ov::op::v0::Constant>()}); |
97 |
| - auto gather_convert = wrap_type<ov::op::v0::Convert>({gather_past}); |
98 |
| - auto concat_past_input = std::make_shared<ov::pass::pattern::op::Or>(OutputVector{past, convert_past, gather_past, gather_convert}); |
99 |
| - auto concat = wrap_type<ov::op::v0::Concat>({concat_past_input, any_input()}); |
100 |
| - auto convert_present = wrap_type<ov::op::v0::Convert>({concat}); |
101 |
| - auto present_input = std::make_shared<ov::pass::pattern::op::Or>(OutputVector{concat, convert_present}); |
102 |
| - auto present = wrap_type<ov::op::v6::Assign>({present_input}); |
103 |
| - |
104 |
| - auto kvcache_matcher = std::make_shared<ov::pass::pattern::Matcher>(present, "KVCacheMatcher"); |
105 |
| - |
106 |
| - for (auto& op : model.get_ordered_ops()) { |
107 |
| - if (kvcache_matcher->match(op) || ov::is_type<ov::op::PagedAttentionExtension>(op)) { |
108 |
| - return true; |
109 |
| - } |
110 |
| - } |
111 |
| - |
112 |
| - return false; |
113 |
| -} |
114 |
| - |
115 | 82 | } // namespace
|
116 | 83 |
|
117 | 84 | ExecutionConfig::ExecutionConfig() : ov::PluginConfig() { }
|
@@ -163,7 +130,7 @@ void ExecutionConfig::apply_rt_info(const IRemoteContext* context, const ov::RTM
|
163 | 130 | }
|
164 | 131 |
|
165 | 132 | void ExecutionConfig::apply_model_specific_options(const IRemoteContext* context, const ov::Model& model) {
|
166 |
| - apply_rt_info(context, get_rt_info(model), is_llm(model)); |
| 133 | + apply_rt_info(context, get_rt_info(model), ov::op::util::is_large_language_model(model)); |
167 | 134 |
|
168 | 135 | const auto& ops = model.get_ops();
|
169 | 136 |
|
|
0 commit comments