Skip to content

Commit 72268b5

Browse files
authored
[CPU] Add Clamp for FakeConvertDecomposition (openvinotoolkit#28651)
### Details: - *Ngraph FakeConvert layer applies clamp for f8 (f8e4m3 applies clamp, f8e5m2 partially applies clamp), while ngraph Convert layer doesn't apply clamp. So the idea is to add Clamp layer in FakeConvertDecomposition to assure the clamp behavior of FakeConvert is still included for plugins after decomposition.* - *Ngraph reference emulate_f8e4m3_on_fp16 applies clamp for overflowed value as well as for NaN(f8e4m3 does not have INF in Specification). However, it seems emulate_f8e5m2_on_fp16 only applies clamp for overflowed value (by the flag can_round), but not for INF. To align behavior between f8e4m3 and f8e5m2, clamp for INF is added in emulate_f8e5m2_on_fp16.* - *Test cases are added to reproduce the issue beforehand.* ### Tickets: - *[CVS-160719](https://jira.devtools.intel.com/browse/CVS-160719)* - *[CVS-161459](https://jira.devtools.intel.com/browse/CVS-161459)*
1 parent 9d86b8c commit 72268b5

File tree

9 files changed

+84
-33
lines changed

9 files changed

+84
-33
lines changed

src/common/transformations/src/transformations/op_conversions/fake_convert_decomposition.cpp

+13-2
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
#include "itt.hpp"
88
#include "openvino/core/rt_info.hpp"
99
#include "openvino/op/add.hpp"
10+
#include "openvino/op/clamp.hpp"
1011
#include "openvino/op/constant.hpp"
1112
#include "openvino/op/convert.hpp"
1213
#include "openvino/op/divide.hpp"
@@ -41,20 +42,30 @@ ov::pass::FakeConvertDecomposition::FakeConvertDecomposition() {
4142
data = decomp_ops.add(data.get_node_shared_ptr());
4243
}
4344

45+
// Align with clamp behavior of FakeConvert in ngraph reference
46+
const auto lower_bound = fake_convert_node->get_destination_element_type() == ov::element::f8e4m3
47+
? static_cast<float>(std::numeric_limits<ov::float8_e4m3>::lowest())
48+
: static_cast<float>(std::numeric_limits<ov::float8_e5m2>::lowest());
49+
const auto upper_bound = fake_convert_node->get_destination_element_type() == ov::element::f8e4m3
50+
? static_cast<float>(std::numeric_limits<ov::float8_e4m3>::max())
51+
: static_cast<float>(std::numeric_limits<ov::float8_e5m2>::max());
52+
4453
std::shared_ptr<Node> result;
4554
const auto scale = decomp_ops.make<ov::op::v1::Multiply>(data, input_scale);
4655
if (fake_convert_node->get_input_size() == 2) {
56+
const auto clamp = decomp_ops.make<ov::op::v0::Clamp>(scale, lower_bound, upper_bound);
4757
const auto downconvert =
48-
decomp_ops.make<ov::op::v0::Convert>(scale, fake_convert_node->get_destination_element_type());
58+
decomp_ops.make<ov::op::v0::Convert>(clamp, fake_convert_node->get_destination_element_type());
4959
const auto upconvert = decomp_ops.make<ov::op::v0::Convert>(downconvert, input_type);
5060

5161
result = decomp_ops.make<ov::op::v1::Divide>(upconvert, input_scale);
5262
} else {
5363
const Output<Node> input_shift{fake_convert_node->input_value(2)};
5464
const auto shift = decomp_ops.make<ov::op::v1::Subtract>(scale, input_shift);
5565

66+
const auto clamp = decomp_ops.make<ov::op::v0::Clamp>(shift, lower_bound, upper_bound);
5667
const auto downconvert =
57-
decomp_ops.make<ov::op::v0::Convert>(shift, fake_convert_node->get_destination_element_type());
68+
decomp_ops.make<ov::op::v0::Convert>(clamp, fake_convert_node->get_destination_element_type());
5869
const auto upconvert = decomp_ops.make<ov::op::v0::Convert>(downconvert, input_type);
5970

6071
const auto deshift = decomp_ops.make<ov::op::v1::Add>(upconvert, input_shift);

src/common/transformations/tests/op_conversions/fake_convert_decomposition_test.cpp

+11-2
Original file line numberDiff line numberDiff line change
@@ -80,17 +80,26 @@ TEST_P(FakeConvertDecompositionTest, CompareFunctions) {
8080
params.push_back(input_data);
8181
std::shared_ptr<Node> data = input_data;
8282

83+
const auto lower_bound = dst_prec == ov::element::f8e4m3
84+
? static_cast<float>(std::numeric_limits<ov::float8_e4m3>::lowest())
85+
: static_cast<float>(std::numeric_limits<ov::float8_e5m2>::lowest());
86+
const auto upper_bound = dst_prec == ov::element::f8e4m3
87+
? static_cast<float>(std::numeric_limits<ov::float8_e4m3>::max())
88+
: static_cast<float>(std::numeric_limits<ov::float8_e5m2>::max());
89+
8390
std::shared_ptr<Node> result;
8491
const auto scale = std::make_shared<ov::op::v1::Multiply>(data, input_scale);
8592
if (default_shift) {
86-
const auto downconvert = std::make_shared<ov::op::v0::Convert>(scale, dst_prec);
93+
const auto clamp = std::make_shared<ov::op::v0::Clamp>(scale, lower_bound, upper_bound);
94+
const auto downconvert = std::make_shared<ov::op::v0::Convert>(clamp, dst_prec);
8795
const auto upconvert = std::make_shared<ov::op::v0::Convert>(downconvert, data_prec);
8896

8997
result = std::make_shared<ov::op::v1::Divide>(upconvert, input_scale);
9098
} else {
9199
const auto shift = std::make_shared<ov::op::v1::Subtract>(scale, input_shift);
92100

93-
const auto downconvert = std::make_shared<ov::op::v0::Convert>(shift, dst_prec);
101+
const auto clamp = std::make_shared<ov::op::v0::Clamp>(shift, lower_bound, upper_bound);
102+
const auto downconvert = std::make_shared<ov::op::v0::Convert>(clamp, dst_prec);
94103
const auto upconvert = std::make_shared<ov::op::v0::Convert>(downconvert, data_prec);
95104

96105
const auto deshift = std::make_shared<ov::op::v1::Add>(upconvert, input_shift);

src/core/reference/include/openvino/reference/fake_convert.hpp

+6-4
Original file line numberDiff line numberDiff line change
@@ -18,23 +18,25 @@ namespace func {
1818
*
1919
* @param arg_f Pointer to the input data.
2020
* @param out_f Pointer to the otuput data.
21-
* @param count Number of elements in the data input.
21+
* @param count Number of elements in the data input.
22+
* @param use_clamp If use clamp.
2223
*/
23-
void emulate_f8e5m2_on_fp16(const float16* const arg_f, float16* out_f, size_t count);
24+
void emulate_f8e5m2_on_fp16(const float16* const arg_f, float16* out_f, size_t count, bool use_clamp = true);
2425

2526
/**
2627
* @brief Emulation of conversion fp16 value to f8e4m3 format
2728
*
2829
* @param arg_f Pointer to the input data.
2930
* @param out_f Pointer to the otuput data.
30-
* @param count Number of elements in the data input.
31+
* @param count Number of elements in the data input.
32+
* @param use_clamp If use clamp.
3133
*
3234
* Exponent denormal values 0 -7
3335
* Exponent normal values 1..15 -6..8 (7 - exponent)
3436
* Exponent NaN values 15 8
3537
*
3638
*/
37-
void emulate_f8e4m3_on_fp16(const float16* arg_f, float16* out_f, size_t count);
39+
void emulate_f8e4m3_on_fp16(const float16* arg_f, float16* out_f, size_t count, bool use_clamp = true);
3840
} // namespace func
3941

4042
namespace fake_convert_details {

src/core/reference/src/op/fake_convert.cpp

+17-12
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,15 @@
77
namespace ov {
88
namespace reference {
99
namespace func {
10-
void emulate_f8e5m2_on_fp16(const float16* const arg_f, float16* out_f, size_t count) {
10+
/**
11+
* @brief Emulation of conversion fp16 value to f8e5m2 format
12+
*
13+
* @param arg_f Pointer to the input data.
14+
* @param out_f Pointer to the otuput data.
15+
* @param count Number of elements in the data input.
16+
* @param use_clamp If use clamp.
17+
*/
18+
void emulate_f8e5m2_on_fp16(const float16* const arg_f, float16* out_f, size_t count, bool use_clamp) {
1119
const auto arg_u = reinterpret_cast<const uint16_t*>(arg_f);
1220
auto out_u = reinterpret_cast<uint16_t*>(out_f);
1321
uint16_t val_bit_repr;
@@ -24,13 +32,6 @@ void emulate_f8e5m2_on_fp16(const float16* const arg_f, float16* out_f, size_t c
2432
for (size_t i = 0; i < count; ++i) {
2533
/// converts float number to half precision in round-to-nearest-even mode and returns half with converted value.
2634
val_bit_repr = arg_u[i];
27-
/// 0x7c00 = 0111110000000000 - exponent mask
28-
/// s 11111 xxx xxxx xxxx - is nan (if some x is 1) or inf (if all x is 0)
29-
/// 0x7800 is 0111100000000000 and 0x400 is 0000010000000000
30-
/// number is not normal if all exponent is 1 or 0
31-
/// 0x7f00 is 0 11111 1100000000
32-
/// 0x7b00 is 0 11110 1100000000
33-
const bool can_round = ((val_bit_repr & 0x7F00) < 0x7B00) ? true : false;
3435
/// s 11111 xxx xxxx xxxx - is nan (if some x is 1) or inf (if all x is 0)
3536
const bool is_naninf = ((val_bit_repr & fp16_inf) == fp16_inf) ? true : false;
3637
/* nearest rounding masks */
@@ -39,7 +40,7 @@ void emulate_f8e5m2_on_fp16(const float16* const arg_f, float16* out_f, size_t c
3940
/// rne_tie - 0x180 is 0 00000 0110000000 or 384.0
4041
uint16_t rnmask_tie = (val_bit_repr & rne_tie);
4142

42-
if (!is_naninf && can_round) {
43+
if (!is_naninf) {
4344
/* round to nearest even, if rne_mask is enabled */
4445
/* 0 00000 0010000000, find grs patterns */
4546
// 0xx - do nothing
@@ -48,6 +49,10 @@ void emulate_f8e5m2_on_fp16(const float16* const arg_f, float16* out_f, size_t c
4849
val_bit_repr += (((rnmask > 0x0080) || (rnmask_tie == rne_tie)) << lshift);
4950
}
5051
val_bit_repr &= mask_mant; /* truncation */
52+
if (use_clamp) {
53+
// clamp inf to max and -inf to lowest, S.11111.00 -> S.11110.11
54+
val_bit_repr -= (((val_bit_repr & 0x7F00) == fp16_inf) << lshift);
55+
}
5156
out_u[i] = val_bit_repr;
5257
}
5358
}
@@ -57,19 +62,19 @@ void emulate_f8e5m2_on_fp16(const float16* const arg_f, float16* out_f, size_t c
5762
*
5863
* @param arg_f Pointer to the input data.
5964
* @param out_f Pointer to the otuput data.
60-
* @param count Number of elements in the data input.
65+
* @param count Number of elements in the data input.
66+
* @param use_clamp If use clamp.
6167
*
6268
* Exponent denormal values 0 -7
6369
* Exponent normal values 1..15 -6..8 (7 - exponent)
6470
* Exponent NaN values 15 8
6571
*
6672
*/
67-
void emulate_f8e4m3_on_fp16(const float16* arg_f, float16* out_f, size_t count) {
73+
void emulate_f8e4m3_on_fp16(const float16* arg_f, float16* out_f, size_t count, bool use_clamp) {
6874
const auto arg_u = reinterpret_cast<const uint16_t*>(arg_f);
6975
auto out_u = reinterpret_cast<uint16_t*>(out_f);
7076
uint16_t val_bit_repr;
7177

72-
constexpr auto use_clamp = true;
7378
constexpr auto exp_bits = 5;
7479
constexpr auto mbits = 9;
7580
constexpr auto non_mant_bits = exp_bits + 1; /// exponent + sign

src/core/src/type/float8_e5m2.cpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ constexpr uint8_t f8e5m2_m_mask = 0x03; // f8e5m2 mantissa bit mask
2828

2929
uint8_t f32_to_f8e5m2_bits(const float value) {
3030
auto f16 = static_cast<float16>(value);
31-
reference::func::emulate_f8e5m2_on_fp16(&f16, &f16, 1);
31+
reference::func::emulate_f8e5m2_on_fp16(&f16, &f16, 1, false);
3232
return static_cast<uint8_t>((f16.to_bits() >> byte_shift));
3333
}
3434
} // namespace

src/core/tests/eval.cpp

+7-4
Original file line numberDiff line numberDiff line change
@@ -3569,10 +3569,13 @@ TEST(eval, evaluate_fake_convert_f32_to_f8e5m2_big_scale_1) {
35693569
EXPECT_EQ(result.get_element_type(), et);
35703570
EXPECT_EQ(result.get_shape(), data_shape);
35713571

3572-
constexpr auto inf = std::numeric_limits<float>::infinity();
3573-
EXPECT_THAT(
3574-
read_vector<float>(result),
3575-
Pointwise(FloatEq(), std::vector<float>{fp8::MAX_F8E5M2 / 2.f, fp8::MAX_F8E5M2, fp8::MAX_F8E5M2, inf, inf}));
3572+
EXPECT_THAT(read_vector<float>(result),
3573+
Pointwise(FloatEq(),
3574+
std::vector<float>{fp8::MAX_F8E5M2 / 2.f,
3575+
fp8::MAX_F8E5M2,
3576+
fp8::MAX_F8E5M2,
3577+
fp8::MAX_F8E5M2,
3578+
fp8::MAX_F8E5M2}));
35763579
}
35773580

35783581
TEST(eval, evaluate_fake_convert_f32_matching_f8_to_f8e5m2_scale_1) {

src/core/tests/float8_e5m2.cpp

+6-6
Original file line numberDiff line numberDiff line change
@@ -191,28 +191,28 @@ TEST(F8E5M2Test, f8e5m2_num_limits_exp) {
191191
EXPECT_EQ(max_exp10, 4);
192192
}
193193

194-
TEST(F8E5M2Test, f32_ge_f8_max_round_to_inf) {
194+
TEST(F8E5M2Test, f32_as_f16_inf_gt_f8_max_round_to_inf) {
195195
const auto f8 = ov::float8_e5m2(65520.0f);
196196

197197
EXPECT_EQ(f8.to_bits(), 0b01111100);
198198
}
199199

200-
TEST(F8E5M2Test, f32_ge_f8_max_round_to_max) {
200+
TEST(F8E5M2Test, f32_gt_f16_max_gt_f8_max_round_to_inf) {
201201
const auto f8 = ov::float8_e5m2(65519.9f);
202202

203-
EXPECT_EQ(f8.to_bits(), 0b01111011);
203+
EXPECT_EQ(f8.to_bits(), 0b01111100);
204204
}
205205

206-
TEST(F8E5M2Test, f32_ge_f8_max_round_to_minus_inf) {
206+
TEST(F8E5M2Test, f32_as_f16_minus_inf_lt_f8_lowest_round_to_minus_inf) {
207207
const auto f8 = ov::float8_e5m2(-65520.0f);
208208

209209
EXPECT_EQ(f8.to_bits(), 0b11111100);
210210
}
211211

212-
TEST(F8E5M2Test, f32_ge_f8_max_round_to_lowest) {
212+
TEST(F8E5M2Test, f32_lt_f16_lowest_lt_f8_lowest_round_to_minus_inf) {
213213
const auto f8 = ov::float8_e5m2(-65519.9f);
214214

215-
EXPECT_EQ(f8.to_bits(), 0b11111011);
215+
EXPECT_EQ(f8.to_bits(), 0b11111100);
216216
}
217217

218218
template <class TContainer>

src/plugins/intel_cpu/tests/functional/shared_tests_instances/skip_tests_config.cpp

+4
Original file line numberDiff line numberDiff line change
@@ -175,6 +175,9 @@ std::vector<std::string> disabledTestPatterns() {
175175
R"(.*smoke_TopK/TopKLayerTest.Inference.*_k=21_.*_sort=value_modelType=f16_trgDev=CPU.*)",
176176
// Issue: 121812
177177
R"(.*ConvertCPULayerTest.*outFmts=(nhwc|nChw8c|nChw16c).*)",
178+
// Issue: 123320
179+
// Input precision bf16 is converted to fp32 by logic in core_config.cpp during ngraph reference test.
180+
R"(.*FakeConvertLayerTest.*dataPrecision=bf16.*)",
178181
// Need to generate sequence exactly in the i64 data type. Enable in scope of i64 enabling.
179182
R"(.*RandomUniformLayerTestCPU.*OutPrc=i64.*)",
180183
// Issue: 123815 (Tests are sensintive to available thread count on testing machines)
@@ -532,6 +535,7 @@ std::vector<std::string> disabledTestPatterns() {
532535
retVector.emplace_back(R"(.*CompileModelWithCacheEncryptionTest.*CanImportModelWithoutException.*)");
533536
retVector.emplace_back(R"(.*ConcatMultiQuerySDPTest.*f16.*)");
534537
retVector.emplace_back(R"(.*ConcatSDPTest.*f16.*)");
538+
retVector.emplace_back(R"(.*FakeConvertLayerTest.*f16.*)");
535539
retVector.emplace_back(R"(.*CoreThreadingTestsWithCacheEnabled.*smoke_compiled_model_cache_enabled.*)");
536540
retVector.emplace_back(R"(.*CoreThreadingTestsWithIter.*smoke_CompileModel.*)");
537541
retVector.emplace_back(R"(.*CustomOpConvertI64CPUTest.*CompareWithRefs.*)");

src/tests/functional/shared_test_classes/src/single_op/fake_convert.cpp

+19-2
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,8 @@
44

55
#include "shared_test_classes/single_op/fake_convert.hpp"
66

7+
#include <random>
8+
79
#include "openvino/opsets/opset1.hpp"
810
#include "openvino/opsets/opset13.hpp"
911

@@ -52,9 +54,24 @@ void FakeConvertLayerTest::SetUp() {
5254

5355
init_input_shapes(data_shapes);
5456

57+
std::vector<float> scale_values(ov::shape_size(scale_shape));
58+
std::vector<float> shift_values(ov::shape_size(shift_shape));
59+
std::mt19937 gen(0);
60+
std::uniform_real_distribution<float> dis(0, static_cast<float>(ov::shape_size(scale_shape)));
61+
for (auto& scale_value : scale_values)
62+
scale_value = dis(gen);
63+
for (auto& shift_value : shift_values)
64+
shift_value = dis(gen);
65+
66+
if (data_prec == ov::element::f16) {
67+
configuration.insert(ov::hint::inference_precision(ov::element::f16));
68+
} else if (data_prec == ov::element::bf16) {
69+
configuration.insert(ov::hint::inference_precision(ov::element::bf16));
70+
}
71+
5572
const auto data = std::make_shared<opset1::Parameter>(data_prec, inputDynamicShapes.front());
56-
const auto scale = std::make_shared<opset1::Constant>(data_prec, scale_shape);
57-
const auto shift = std::make_shared<opset1::Constant>(data_prec, shift_shape);
73+
const auto scale = std::make_shared<opset1::Constant>(data_prec, scale_shape, scale_values);
74+
const auto shift = std::make_shared<opset1::Constant>(data_prec, shift_shape, shift_values);
5875

5976
const auto fake_convert = default_shift ? std::make_shared<opset13::FakeConvert>(data, scale, dst_prec)
6077
: std::make_shared<opset13::FakeConvert>(data, scale, shift, dst_prec);

0 commit comments

Comments
 (0)