-
Notifications
You must be signed in to change notification settings - Fork 2.5k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
WIP: [CPU][ARM] Weights compression f32->f16 is moved to CPU Plug-in side #21080
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -23,6 +23,12 @@ TRANSFORMATIONS_API void unmark_as_decompression(const std::shared_ptr<Node>& no | |
|
||
TRANSFORMATIONS_API bool is_decompression(const std::shared_ptr<Node>& node); | ||
|
||
TRANSFORMATIONS_API void mark_as_compression(const std::shared_ptr<Node>& node); | ||
|
||
TRANSFORMATIONS_API void unmark_as_compression(const std::shared_ptr<Node>& node); | ||
|
||
TRANSFORMATIONS_API bool is_compression(const std::shared_ptr<Node>& node); | ||
|
||
/** | ||
* @ingroup ie_runtime_attr_api | ||
* @brief Decompression class represents runtime info attribute that marks operation | ||
|
@@ -43,4 +49,19 @@ class TRANSFORMATIONS_API Decompression : public RuntimeAttribute { | |
} | ||
}; | ||
|
||
class TRANSFORMATIONS_API Compression : public RuntimeAttribute { | ||
public: | ||
OPENVINO_RTTI("Compression", "0"); | ||
|
||
Compression() = default; | ||
|
||
bool visit_attributes(AttributeVisitor& visitor) override { | ||
return true; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. is it really necessary to store this rt_info to IR? |
||
} | ||
|
||
bool is_copyable() const override { | ||
return false; | ||
} | ||
}; | ||
|
||
} // namespace ov |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -10,6 +10,7 @@ | |
#include "openvino/op/result.hpp" | ||
#include "openvino/op/util/precision_sensitive_attribute.hpp" | ||
#include "openvino/pass/constant_folding.hpp" | ||
#include "transformations/rt_info/decompression.hpp" | ||
#include "transformations/rt_info/disable_fp16_compression.hpp" | ||
|
||
using namespace ov; | ||
|
@@ -48,6 +49,7 @@ bool ov::pass::AlignMixedFP32FP16Types::run_on_model(const std::shared_ptr<ov::M | |
copy_runtime_info(incoming_node, convert); | ||
input.replace_source_output(convert); | ||
disable_fp16_compression(convert); | ||
mark_as_compression(convert); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This converts are decompression converts: they upcast to fp32 for precision sensitive subgraphs. Is it possible to rename |
||
pass::disable_constant_folding(convert); | ||
is_changed = true; | ||
} | ||
|
@@ -76,6 +78,7 @@ bool ov::pass::AlignMixedFP32FP16Types::run_on_model(const std::shared_ptr<ov::M | |
auto init_name = node->get_friendly_name() + "_compressed_to_f16"; | ||
convert->set_friendly_name(generate_uniq_name(init_name)); | ||
out_inputs.replace_source_output(convert); | ||
mark_as_compression(convert); | ||
pass::disable_constant_folding(convert); | ||
is_changed = true; | ||
} | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Could you please provide in comment with explanation why we need this rt_info? Fron which it will be clear why we cannot use the existing ones a need a new rt_info