Skip to content

Commit 91a5518

Browse files
authored
[CPU][ARM] JIT Ceiling Operation (openvinotoolkit#27527)
### Details: - Added JIT emitter for Eltwise Ceiling operation on ARM64 SIMD - Implemented fp32 optimization replacing C++ Math implementation - Modified ARM64 executor to support new JIT emitter - Updated kernel files to include Ceiling in Eltwise operations - Added test coverage for JIT implementation verification - Transitioned operation type from Math to Eltwise for better performance ### Tickets: - openvinotoolkit#27498
1 parent c801f4e commit 91a5518

File tree

9 files changed

+93
-4
lines changed

9 files changed

+93
-4
lines changed

src/plugins/intel_cpu/src/cpu_types.cpp

+2-1
Original file line numberDiff line numberDiff line change
@@ -191,7 +191,7 @@ static const TypeToNameMap& get_type_to_name_tbl() {
191191
{"Atan", Type::Math},
192192
{"Atanh", Type::Math},
193193
{"Ceil", Type::Math},
194-
{"Ceiling", Type::Math},
194+
{"Ceiling", Type::Eltwise},
195195
{"Cos", Type::Math},
196196
{"Cosh", Type::Math},
197197
{"Floor", Type::Eltwise},
@@ -419,6 +419,7 @@ std::string algToString(const Algorithm alg) {
419419
CASE(EltwiseSubtract);
420420
CASE(EltwiseDivide);
421421
CASE(EltwiseFloor);
422+
CASE(EltwiseCeiling);
422423
CASE(EltwiseFloorMod);
423424
CASE(EltwiseMod);
424425
CASE(EltwiseMaximum);

src/plugins/intel_cpu/src/cpu_types.h

+1
Original file line numberDiff line numberDiff line change
@@ -165,6 +165,7 @@ enum class Algorithm {
165165
EltwiseSubtract,
166166
EltwiseDivide,
167167
EltwiseFloor,
168+
EltwiseCeiling,
168169
EltwiseFloorMod,
169170
EltwiseMod,
170171
EltwiseMaximum,

src/plugins/intel_cpu/src/emitters/plugin/aarch64/jit_eltwise_emitters.cpp

+45-1
Original file line numberDiff line numberDiff line change
@@ -516,6 +516,50 @@ std::set<std::vector<element::Type>> jit_floor_emitter::get_supported_precisions
516516
return {{element::f32}};
517517
}
518518

519+
/// CEILING ///
520+
//Initialization of the emitter, taking node as input
521+
jit_ceiling_emitter::jit_ceiling_emitter(dnnl::impl::cpu::aarch64::jit_generator* host,
522+
dnnl::impl::cpu::aarch64::cpu_isa_t host_isa,
523+
const std::shared_ptr<ov::Node>& node)
524+
: jit_emitter(host, host_isa, node, get_arithmetic_binary_exec_precision(node)) {
525+
}
526+
527+
//Initialization of emitter, without taking node as input
528+
jit_ceiling_emitter::jit_ceiling_emitter(dnnl::impl::cpu::aarch64::jit_generator* host,
529+
dnnl::impl::cpu::aarch64::cpu_isa_t host_isa,
530+
const ov::element::Type exec_prc) : jit_emitter(host, host_isa, exec_prc) {
531+
}
532+
533+
//This will tell the JIT compiler that how many inputs the ceiling operation requires (here 1)
534+
size_t jit_ceiling_emitter::get_inputs_count() const { return 1; }
535+
536+
//Main implementation method that emits the JIT code
537+
void jit_ceiling_emitter::emit_impl(const std::vector<size_t> &in_vec_idxs, const std::vector<size_t> &out_vec_idxs) const {
538+
if (host_isa_ == dnnl::impl::cpu::aarch64::asimd) {
539+
emit_isa<dnnl::impl::cpu::aarch64::asimd>(in_vec_idxs, out_vec_idxs);
540+
} else {
541+
OV_CPU_JIT_EMITTER_THROW("Can't create jit eltwise kernel");
542+
}
543+
}
544+
545+
// Template method that generates actual instruction sequence for ceiling operation
546+
// The h->frintp() method rounds up the floating value to the nearest integer.
547+
template <dnnl::impl::cpu::aarch64::cpu_isa_t isa>
548+
void jit_ceiling_emitter::emit_isa(const std::vector<size_t> &in_vec_idxs, const std::vector<size_t> &out_vec_idxs) const {
549+
OV_CPU_JIT_EMITTER_ASSERT(exec_prc_ == ov::element::f32, "unsupported precision: " + exec_prc_.to_string());
550+
551+
using TReg = typename dnnl::impl::cpu::aarch64::cpu_isa_traits<isa>::TReg;
552+
TReg src = TReg(in_vec_idxs[0]);
553+
TReg dst = TReg(out_vec_idxs[0]);
554+
h->frintp(dst.s, src.s);
555+
}
556+
557+
// Template method that generates actual instruction sequence for ceiling operation
558+
// Currently only supports 32-bit floating point (f32)
559+
std::set<std::vector<element::Type>> jit_ceiling_emitter::get_supported_precisions(const std::shared_ptr<ov::Node>& node) {
560+
return {{element::f32}};
561+
}
562+
519563
/// GELU_ERF ///
520564
jit_gelu_erf_emitter::jit_gelu_erf_emitter(dnnl::impl::cpu::aarch64::jit_generator* host,
521565
dnnl::impl::cpu::aarch64::cpu_isa_t host_isa,
@@ -2275,4 +2319,4 @@ std::set<std::vector<element::Type>> jit_tanh_emitter::get_supported_precisions(
22752319

22762320
} // namespace aarch64
22772321
} // namespace intel_cpu
2278-
} // namespace ov
2322+
} // namespace ov

src/plugins/intel_cpu/src/emitters/plugin/aarch64/jit_eltwise_emitters.hpp

+31-1
Original file line numberDiff line numberDiff line change
@@ -214,6 +214,36 @@ class jit_floor_emitter : public jit_emitter {
214214
void emit_isa(const std::vector<size_t> &in_vec_idxs, const std::vector<size_t> &out_vec_idxs) const;
215215
};
216216

217+
class jit_ceiling_emitter : public jit_emitter {
218+
public:
219+
// Constructor with explicit precision
220+
jit_ceiling_emitter(dnnl::impl::cpu::aarch64::jit_generator *host,
221+
dnnl::impl::cpu::aarch64::cpu_isa_t host_isa,
222+
const ov::element::Type exec_prc = ov::element::f32);
223+
224+
// Constructor from node
225+
jit_ceiling_emitter(dnnl::impl::cpu::aarch64::jit_generator *host,
226+
dnnl::impl::cpu::aarch64::cpu_isa_t host_isa,
227+
const std::shared_ptr<ov::Node>& node);
228+
229+
// Get number of inputs
230+
size_t get_inputs_count() const override;
231+
232+
// Get supported precisions
233+
static std::set<std::vector<element::Type>> get_supported_precisions(
234+
const std::shared_ptr<ov::Node>& node = nullptr);
235+
236+
private:
237+
// Implementation of JIT code emission
238+
void emit_impl(const std::vector<size_t> &in_vec_idxs,
239+
const std::vector<size_t> &out_vec_idxs) const override;
240+
241+
// ISA-specific implementation
242+
template <dnnl::impl::cpu::aarch64::cpu_isa_t isa>
243+
void emit_isa(const std::vector<size_t> &in_vec_idxs,
244+
const std::vector<size_t> &out_vec_idxs) const;
245+
};
246+
217247
class jit_gelu_erf_emitter : public jit_emitter {
218248
public:
219249
jit_gelu_erf_emitter(dnnl::impl::cpu::aarch64::jit_generator* host,
@@ -943,4 +973,4 @@ class jit_tanh_emitter : public jit_emitter {
943973

944974
} // namespace aarch64
945975
} // namespace intel_cpu
946-
} // namespace ov
976+
} // namespace ov

src/plugins/intel_cpu/src/nodes/eltwise.cpp

+7
Original file line numberDiff line numberDiff line change
@@ -257,6 +257,7 @@ std::set<std::vector<element::Type>> eltwise_precision_helper::get_supported_pre
257257
OV_CASE(Algorithm::EltwiseMultiply, jit_multiply_emitter),
258258
OV_CASE(Algorithm::EltwiseDivide, jit_divide_emitter),
259259
OV_CASE(Algorithm::EltwiseFloor, jit_floor_emitter),
260+
OV_CASE(Algorithm::EltwiseCeiling, jit_ceiling_emitter),
260261
OV_CASE(Algorithm::EltwiseFloorMod, jit_floor_mod_emitter),
261262
OV_CASE(Algorithm::EltwiseMod, jit_mod_emitter),
262263
OV_CASE(Algorithm::EltwiseMaximum, jit_maximum_emitter),
@@ -636,6 +637,7 @@ struct jit_uni_eltwise_generic : public jit_uni_eltwise_kernel, public jit_gener
636637
OV_CASE(Algorithm::EltwiseMultiply, jit_multiply_emitter),
637638
OV_CASE(Algorithm::EltwiseDivide, jit_divide_emitter),
638639
OV_CASE(Algorithm::EltwiseFloor, jit_floor_emitter),
640+
OV_CASE(Algorithm::EltwiseCeiling, jit_ceiling_emitter),
639641
OV_CASE(Algorithm::EltwiseFloorMod, jit_floor_mod_emitter),
640642
OV_CASE(Algorithm::EltwiseMod, jit_mod_emitter),
641643
OV_CASE(Algorithm::EltwiseMaximum, jit_maximum_emitter),
@@ -1086,6 +1088,9 @@ const std::map<const ov::DiscreteTypeInfo, Eltwise::Initializer>& Eltwise::getIn
10861088
{ov::op::v1::Mod::get_type_info_static(), [](const std::shared_ptr<ov::Node>& op, Eltwise& node) {
10871089
node.algorithm = Algorithm::EltwiseMod;
10881090
}},
1091+
{ov::op::v0::Ceiling::get_type_info_static(), [](const std::shared_ptr<ov::Node>& op, Eltwise& node) {
1092+
node.algorithm = Algorithm::EltwiseCeiling;
1093+
}},
10891094
{ov::op::v0::Floor::get_type_info_static(), [](const std::shared_ptr<ov::Node>& op, Eltwise& node) {
10901095
node.algorithm = Algorithm::EltwiseFloor;
10911096
}},
@@ -1891,6 +1896,7 @@ class EltwiseRefExecutor : public EltwiseRefBaseExecutor<T> {
18911896
case Algorithm::EltwiseSubtract: *dst_ptr_f = src_f[0] - src_f[1]; break;
18921897
case Algorithm::EltwiseMultiply: *dst_ptr_f = src_f[0] * src_f[1]; break;
18931898
case Algorithm::EltwiseDivide: *dst_ptr_f = src_f[0] / src_f[1]; break;
1899+
case Algorithm::EltwiseCeiling: *dst_ptr_f = ceilf(src_f[0]); break;
18941900
case Algorithm::EltwiseFloor: *dst_ptr_f = floorf(src_f[0]); break;
18951901
case Algorithm::EltwiseFloorMod: *dst_ptr_f = src_f[0] - floorf(src_f[0] / src_f[1]) * src_f[1]; break;
18961902
case Algorithm::EltwiseMod: *dst_ptr_f = src_f[0] - truncf(src_f[0] / src_f[1]) * src_f[1]; break;
@@ -2098,6 +2104,7 @@ size_t Eltwise::getOpInputsNum() const {
20982104
case Algorithm::EltwiseRelu:
20992105
case Algorithm::EltwiseGeluErf:
21002106
case Algorithm::EltwiseGeluTanh:
2107+
case Algorithm::EltwiseCeiling:
21012108
case Algorithm::EltwiseFloor:
21022109
case Algorithm::EltwiseElu:
21032110
case Algorithm::EltwiseTanh:

src/plugins/intel_cpu/src/nodes/executors/aarch64/jit_eltwise.cpp

+1
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ bool JitEltwiseExecutor::isSupported(
2626
Algorithm::EltwiseEqual,
2727
Algorithm::EltwiseExp,
2828
Algorithm::EltwiseFloor,
29+
Algorithm::EltwiseCeiling,
2930
Algorithm::EltwiseGeluErf,
3031
Algorithm::EltwiseGeluTanh,
3132
Algorithm::EltwiseGreater,

src/plugins/intel_cpu/src/nodes/kernels/aarch64/jit_uni_eltwise_generic.cpp

+2
Original file line numberDiff line numberDiff line change
@@ -648,6 +648,7 @@ std::shared_ptr<jit_emitter> jit_uni_eltwise_generic<isa>::create_eltwise_emitte
648648
OV_CASE(Algorithm::EltwiseEqual, ov::intel_cpu::aarch64::jit_equal_emitter),
649649
OV_CASE(Algorithm::EltwiseExp, ov::intel_cpu::aarch64::jit_exp_emitter),
650650
OV_CASE(Algorithm::EltwiseFloor, ov::intel_cpu::aarch64::jit_floor_emitter),
651+
OV_CASE(Algorithm::EltwiseCeiling, ov::intel_cpu::aarch64::jit_ceiling_emitter),
651652
OV_CASE(Algorithm::EltwiseHswish, ov::intel_cpu::aarch64::jit_hswish_emitter),
652653
OV_CASE(Algorithm::EltwiseIsFinite, ov::intel_cpu::aarch64::jit_is_finite_emitter),
653654
OV_CASE(Algorithm::EltwiseIsInf, ov::intel_cpu::aarch64::jit_is_inf_emitter),
@@ -828,6 +829,7 @@ std::set<std::vector<element::Type>> eltwise_precision_helper::get_supported_pre
828829
OV_CASE(Algorithm::EltwiseEqual, jit_equal_emitter),
829830
OV_CASE(Algorithm::EltwiseExp, jit_exp_emitter),
830831
OV_CASE(Algorithm::EltwiseFloor, jit_floor_emitter),
832+
OV_CASE(Algorithm::EltwiseCeiling, jit_ceiling_emitter),
831833
OV_CASE(Algorithm::EltwiseGeluErf, jit_gelu_erf_emitter),
832834
OV_CASE(Algorithm::EltwiseGeluTanh, jit_gelu_tanh_emitter),
833835
OV_CASE(Algorithm::EltwiseGreater, jit_greater_emitter),

src/plugins/intel_cpu/tests/functional/custom/single_layer_tests/classes/activation.cpp

+3
Original file line numberDiff line numberDiff line change
@@ -182,6 +182,7 @@ std::string ActivationLayerCPUTest::getPrimitiveType(const utils::ActivationType
182182
(activation_type == utils::ActivationTypes::Elu) ||
183183
(activation_type == utils::ActivationTypes::Exp) ||
184184
(activation_type == utils::ActivationTypes::Floor) ||
185+
(activation_type == utils::ActivationTypes::Ceiling) ||
185186
(activation_type == utils::ActivationTypes::HSwish) ||
186187
(activation_type == utils::ActivationTypes::IsInf) ||
187188
(activation_type == utils::ActivationTypes::HardSigmoid) ||
@@ -206,6 +207,7 @@ std::string ActivationLayerCPUTest::getPrimitiveType(const utils::ActivationType
206207
}
207208
#endif
208209
if ((activation_type == utils::ActivationTypes::Floor) ||
210+
(activation_type == utils::ActivationTypes::Ceiling) ||
209211
(activation_type == utils::ActivationTypes::IsNaN) ||
210212
(activation_type == utils::ActivationTypes::IsFinite)) {
211213
return "ref";
@@ -246,6 +248,7 @@ const std::map<utils::ActivationTypes, std::vector<std::vector<float>>>& activat
246248
{Clamp, {{-2.0f, 2.0f}}},
247249
{Elu, {{0.1f}}},
248250
{Floor, {{}}},
251+
{Ceiling, {{}}},
249252
{Swish, {{0.1f}}},
250253
{HSwish, {{}}},
251254
{PReLu, {{-0.01f}}},

src/plugins/intel_cpu/tests/functional/shared_tests_instances/skip_tests_config.cpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -310,7 +310,7 @@ std::vector<std::string> disabledTestPatterns() {
310310
};
311311

312312
// fp32 floor for bf16 models: conversion issue
313-
retVector.emplace_back(R"(.*smoke.*ActivationLayerCPUTest.*CompareWithRefs/Floor_.*netPRC=bf16.*)");
313+
retVector.emplace_back(R"(.*smoke.*ActivationLayerCPUTest.*CompareWithRefs/(Floor|Ceiling)_.*netPRC=bf16.*)");
314314

315315
#if defined(OPENVINO_ARCH_X86)
316316
retVector.emplace_back(R"(.*DetectionOutputLayerTest.*)");

0 commit comments

Comments
 (0)