Skip to content

Commit 0a348d8

Browse files
authored
[core] Convert reference use intrinsic implementation for ChromeOS (#26870)
### Details: - Add intrinsic implementation for convert reference ### Tickets: - CVS-152654
1 parent 500284d commit 0a348d8

File tree

8 files changed

+406
-183
lines changed

8 files changed

+406
-183
lines changed

src/core/reference/CMakeLists.txt

+12
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,18 @@ add_library(${TARGET_NAME} STATIC ${LIBRARY_SRC} ${PUBLIC_HEADERS})
2121
add_library(openvino::reference ALIAS ${TARGET_NAME})
2222
set_target_properties(${TARGET_NAME} PROPERTIES EXPORT_NAME reference)
2323

24+
if(ENABLE_AVX2)
25+
ov_avx2_optimization_flags(avx2_flags)
26+
27+
set(OV_REFERENCE_X86_AVX2_SRC
28+
${CMAKE_CURRENT_SOURCE_DIR}/src/op/convert_x86_intrinsics.cpp
29+
)
30+
set_source_files_properties(${OV_REFERENCE_X86_AVX2_SRC} PROPERTIES COMPILE_OPTIONS "${avx2_flags}"
31+
SKIP_UNITY_BUILD_INCLUSION ON
32+
SKIP_PRECOMPILE_HEADERS ON)
33+
target_compile_definitions(${TARGET_NAME} PRIVATE HAVE_AVX2)
34+
endif()
35+
2436
ov_build_target_faster(${TARGET_NAME}
2537
UNITY
2638
PCH PRIVATE "src/precomp.hpp")

src/core/reference/include/openvino/reference/convert.hpp

+7-9
Original file line numberDiff line numberDiff line change
@@ -14,9 +14,11 @@
1414
#include "openvino/core/type/nf4.hpp"
1515

1616
#if !defined(OS_CHROMEOS) && (defined(OPENVINO_ARCH_X86) || defined(OPENVINO_ARCH_X86_64))
17-
# define OV_CORE_USE_XBYAK_JIT 1
18-
#else
19-
# define OV_CORE_USE_XBYAK_JIT 0
17+
# define OV_CORE_USE_XBYAK_JIT
18+
#endif
19+
20+
#if defined(OS_CHROMEOS) && defined(OPENVINO_ARCH_X86_64) && defined(HAVE_AVX2)
21+
# define OV_CORE_USE_INTRINSICS
2022
#endif
2123

2224
namespace ov {
@@ -33,12 +35,12 @@ namespace reference {
3335
namespace detail {
3436

3537
template <typename TI, typename TO>
36-
typename std::enable_if<!std::is_same<TO, char>::value, TO>::type convert(const TI v) {
38+
constexpr typename std::enable_if<!std::is_same<TO, char>::value, TO>::type convert(const TI v) {
3739
return static_cast<TO>(v);
3840
}
3941

4042
template <typename TI, typename TO>
41-
typename std::enable_if<std::is_same<TO, char>::value, TO>::type convert(const TI v) {
43+
constexpr typename std::enable_if<std::is_same<TO, char>::value, TO>::type convert(const TI v) {
4244
return static_cast<char>(static_cast<bool>(v));
4345
}
4446
} // namespace detail
@@ -62,8 +64,6 @@ void convert(const TI* arg, TO* out, const size_t count) {
6264
std::transform(arg, arg + count, out, detail::convert<TI, TO>);
6365
}
6466

65-
#if OV_CORE_USE_XBYAK_JIT
66-
6767
template <>
6868
void convert<uint8_t, float16>(const uint8_t* arg, float16* out, size_t count);
6969
template <>
@@ -79,8 +79,6 @@ void convert<bfloat16, float16>(const bfloat16* arg, float16* out, size_t count)
7979
template <>
8080
void convert<bfloat16, float>(const bfloat16* arg, float* out, size_t count);
8181

82-
#endif // OV_CORE_USE_XBYAK_JIT
83-
8482
template <>
8583
void convert<int32_t, float16>(const int32_t* arg, float16* out, size_t count);
8684

Original file line numberDiff line numberDiff line change
@@ -0,0 +1,88 @@
1+
// Copyright (C) 2018-2024 Intel Corporation
2+
// SPDX-License-Identifier: Apache-2.0
3+
//
4+
5+
#pragma once
6+
7+
#include <functional>
8+
#include <type_traits>
9+
10+
#include "openvino/reference/convert.hpp"
11+
12+
namespace ov {
13+
14+
// forward declare from inference dev API (cannot be included)
15+
extern bool with_cpu_x86_avx2();
16+
17+
namespace reference {
18+
19+
struct NoClamp {
20+
static constexpr bool enabled = false;
21+
22+
// Generic implementation
23+
template <class T>
24+
static constexpr T apply(const T v) {
25+
return v;
26+
}
27+
28+
// Specialize for optimization
29+
template <class T, class R>
30+
static R apply(const T v);
31+
};
32+
33+
template <class TI, class TO>
34+
struct Clamp {
35+
static constexpr bool enabled = true;
36+
37+
// Generic implementation
38+
static constexpr TO apply(const TI v) {
39+
return (v < std::numeric_limits<TO>::lowest())
40+
? std::numeric_limits<TO>::lowest()
41+
: ((v > std::numeric_limits<TO>::max()) ? std::numeric_limits<TO>::max()
42+
: detail::convert<TI, TO>(v));
43+
}
44+
45+
// Specialize for optimization
46+
template <class T, class R>
47+
static R apply(const T v);
48+
};
49+
50+
template <class TI, class TO>
51+
struct Converter {
52+
static constexpr size_t vec_f32_size = 32 / sizeof(float);
53+
54+
// Generic implementation to convert tail elements
55+
template <class ClampMode>
56+
static void tail(const TI* in, TO* out, size_t n) {
57+
std::transform(in, in + n, out, [](const TI v) {
58+
return detail::convert<decltype(ClampMode::apply(v)), TO>(ClampMode::apply(v));
59+
});
60+
}
61+
62+
// Helper struct to defined optimized version of conversion
63+
template <class ClampMode>
64+
struct Optimized {
65+
static constexpr bool enabled = false;
66+
static void run(const TI* in, TO* out) {}
67+
};
68+
69+
// Generic implementation of conversion
70+
template <class ClampMode, typename std::enable_if<!Optimized<ClampMode>::enabled>::type* = nullptr>
71+
static void apply(const TI* in, TO* out, size_t n) {
72+
return tail<ClampMode>(in, out, n);
73+
}
74+
75+
// Enabled when Optimized struct specialized defined for optimization
76+
template <class ClampMode, typename std::enable_if<Optimized<ClampMode>::enabled>::type* = nullptr>
77+
static void apply(const TI* in, TO* out, size_t n) {
78+
if (with_cpu_x86_avx2()) {
79+
for (; n >= vec_f32_size; n -= vec_f32_size, in += vec_f32_size, out += vec_f32_size) {
80+
Optimized<ClampMode>::run(in, out);
81+
}
82+
}
83+
tail<ClampMode>(in, out, n);
84+
}
85+
};
86+
87+
} // namespace reference
88+
} // namespace ov
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,87 @@
1+
// Copyright (C) 2018-2024 Intel Corporation
2+
// SPDX-License-Identifier: Apache-2.0
3+
//
4+
5+
#pragma once
6+
7+
#ifdef OV_CORE_USE_INTRINSICS
8+
# include <immintrin.h>
9+
10+
# include "openvino/reference/utils/convert_util.hpp"
11+
12+
namespace ov {
13+
namespace reference {
14+
# ifdef HAVE_AVX2
15+
16+
// Clamp optimized specializations
17+
template <>
18+
__m128i NoClamp::apply<__m256i, __m128i>(const __m256i vec_i32);
19+
20+
template <>
21+
template <>
22+
__m256 Clamp<float, float16>::apply<__m256, __m256>(const __m256 vec_f32);
23+
24+
// Conversion optimized specializations
25+
// --- f32 -> other
26+
template <>
27+
template <>
28+
struct Converter<float, float16>::Optimized<NoClamp> {
29+
static constexpr bool enabled = true;
30+
static void run(const float* in, float16* out);
31+
};
32+
33+
template <>
34+
template <>
35+
struct Converter<float, float16>::Optimized<Clamp<float, float16>> {
36+
static constexpr bool enabled = true;
37+
static void run(const float* in, float16* out);
38+
};
39+
40+
template <>
41+
template <>
42+
struct Converter<float, int8_t>::Optimized<NoClamp> {
43+
static constexpr bool enabled = true;
44+
static void run(const float* in, int8_t* out);
45+
};
46+
47+
// --- f16 -> other
48+
template <>
49+
template <>
50+
struct Converter<float16, float>::Optimized<NoClamp> {
51+
static constexpr bool enabled = true;
52+
static void run(const float16* in, float* out);
53+
};
54+
55+
template <>
56+
template <>
57+
struct Converter<float16, int8_t>::Optimized<NoClamp> {
58+
static constexpr bool enabled = true;
59+
static void run(const float16* in, int8_t* out);
60+
};
61+
62+
// --- bf16 -> other
63+
template <>
64+
template <>
65+
struct Converter<bfloat16, float16>::Optimized<Clamp<float, float16>> {
66+
static constexpr bool enabled = true;
67+
static void run(const bfloat16* in, float16* out);
68+
};
69+
70+
template <>
71+
template <>
72+
struct Converter<bfloat16, float>::Optimized<NoClamp> {
73+
static constexpr bool enabled = true;
74+
static void run(const bfloat16* in, float* out);
75+
};
76+
77+
// --- u8 -> other
78+
template <>
79+
template <>
80+
struct Converter<uint8_t, float16>::Optimized<NoClamp> {
81+
static constexpr bool enabled = true;
82+
static void run(const uint8_t* in, float16* out);
83+
};
84+
# endif // HAVE_AVX2
85+
} // namespace reference
86+
} // namespace ov
87+
#endif

0 commit comments

Comments
 (0)