Skip to content

Commit 11db1e1

Browse files
authored
[CPU][ARM] JIT Floor Mod Operation (#27706)
### Details: - Added JIT emitter for Eltwise Floor Mod operation on ARM64 SIMD - Implemented fp32 optimization replacing C++ Math implementation - Modified ARM64 executor to support new JIT emitter - Updated kernel files to include Floor Mod in Eltwise operations - Added test coverage for JIT implementation verification - Transitioned operation type from Math to Eltwise for better performance @a-sidorova can you please review the code ? :) ### Tickets: - #27501 ![image](https://github.com/user-attachments/assets/b7501b8f-1c67-493d-9d18-5175d5de090d)
1 parent 9706b78 commit 11db1e1

File tree

5 files changed

+72
-0
lines changed

5 files changed

+72
-0
lines changed

src/plugins/intel_cpu/src/emitters/plugin/aarch64/jit_eltwise_emitters.cpp

+45
Original file line numberDiff line numberDiff line change
@@ -516,6 +516,51 @@ std::set<std::vector<element::Type>> jit_floor_emitter::get_supported_precisions
516516
return {{element::f32}};
517517
}
518518

519+
/// FLOOR_MOD ///
520+
jit_floor_mod_emitter::jit_floor_mod_emitter(dnnl::impl::cpu::aarch64::jit_generator *host,
521+
dnnl::impl::cpu::aarch64::cpu_isa_t host_isa,
522+
const std::shared_ptr<ov::Node>& node)
523+
: jit_emitter(host, host_isa, node, get_arithmetic_binary_exec_precision(node)) {
524+
}
525+
526+
jit_floor_mod_emitter::jit_floor_mod_emitter(dnnl::impl::cpu::aarch64::jit_generator *host,
527+
dnnl::impl::cpu::aarch64::cpu_isa_t host_isa,
528+
const ov::element::Type exec_prc): jit_emitter(host, host_isa, exec_prc) {
529+
}
530+
531+
size_t jit_floor_mod_emitter::get_inputs_count() const { return 2; }
532+
533+
size_t jit_floor_mod_emitter::get_aux_vecs_count() const { return 1; }
534+
535+
void jit_floor_mod_emitter::emit_impl(const std::vector<size_t> &in_vec_idxs, const std::vector<size_t> &out_vec_idxs) const {
536+
if (host_isa_ == dnnl::impl::cpu::aarch64::asimd) {
537+
emit_isa<dnnl::impl::cpu::aarch64::asimd>(in_vec_idxs, out_vec_idxs);
538+
} else {
539+
OV_CPU_JIT_EMITTER_THROW("Can't create jit eltwise kernel");
540+
}
541+
}
542+
543+
template <dnnl::impl::cpu::aarch64::cpu_isa_t isa>
544+
void jit_floor_mod_emitter::emit_isa(const std::vector<size_t> &in_vec_idxs, const std::vector<size_t> &out_vec_idxs) const {
545+
OV_CPU_JIT_EMITTER_ASSERT(exec_prc_ == ov::element::f32, "unsupported precision: " + exec_prc_.to_string());
546+
547+
using TReg = typename dnnl::impl::cpu::aarch64::cpu_isa_traits<isa>::TReg;
548+
549+
TReg dividend = TReg(in_vec_idxs[0]);
550+
TReg divisor = TReg(in_vec_idxs[1]);
551+
TReg r = TReg(out_vec_idxs[0]);
552+
TReg aux = TReg(aux_vec_idxs[0]);
553+
554+
h->fdiv(aux.s, dividend.s, divisor.s);
555+
h->frintm(aux.s, aux.s);
556+
h->fmul(aux.s, aux.s, divisor.s);
557+
h->fsub(r.s, dividend.s, aux.s);
558+
}
559+
560+
std::set<std::vector<element::Type>> jit_floor_mod_emitter::get_supported_precisions(const std::shared_ptr<ov::Node>& node) {
561+
return {{element::f32, element::f32}};
562+
}
563+
519564
/// CEILING ///
520565
//Initialization of the emitter, taking node as input
521566
jit_ceiling_emitter::jit_ceiling_emitter(dnnl::impl::cpu::aarch64::jit_generator* host,

src/plugins/intel_cpu/src/emitters/plugin/aarch64/jit_eltwise_emitters.hpp

+21
Original file line numberDiff line numberDiff line change
@@ -213,7 +213,28 @@ class jit_floor_emitter : public jit_emitter {
213213
template <dnnl::impl::cpu::aarch64::cpu_isa_t isa>
214214
void emit_isa(const std::vector<size_t> &in_vec_idxs, const std::vector<size_t> &out_vec_idxs) const;
215215
};
216+
class jit_floor_mod_emitter : public jit_emitter {
217+
public:
218+
jit_floor_mod_emitter(dnnl::impl::cpu::aarch64::jit_generator *host,
219+
dnnl::impl::cpu::aarch64::cpu_isa_t host_isa,
220+
const ov::element::Type exec_prc = ov::element::f32);
221+
222+
jit_floor_mod_emitter(dnnl::impl::cpu::aarch64::jit_generator *host,
223+
dnnl::impl::cpu::aarch64::cpu_isa_t host_isa,
224+
const std::shared_ptr<ov::Node>& node);
225+
226+
size_t get_inputs_count() const override;
227+
228+
size_t get_aux_vecs_count() const override;
229+
230+
static std::set<std::vector<element::Type>> get_supported_precisions(const std::shared_ptr<ov::Node>& node = nullptr);
216231

232+
private:
233+
void emit_impl(const std::vector<size_t> &in_vec_idxs, const std::vector<size_t> &out_vec_idxs) const override;
234+
235+
template <dnnl::impl::cpu::aarch64::cpu_isa_t isa>
236+
void emit_isa(const std::vector<size_t> &in_vec_idxs, const std::vector<size_t> &out_vec_idxs) const;
237+
};
217238
class jit_ceiling_emitter : public jit_emitter {
218239
public:
219240
// Constructor with explicit precision

src/plugins/intel_cpu/src/nodes/executors/aarch64/jit_eltwise.cpp

+1
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ bool JitEltwiseExecutor::isSupported(
2626
Algorithm::EltwiseEqual,
2727
Algorithm::EltwiseExp,
2828
Algorithm::EltwiseFloor,
29+
Algorithm::EltwiseFloorMod,
2930
Algorithm::EltwiseCeiling,
3031
Algorithm::EltwiseGeluErf,
3132
Algorithm::EltwiseGeluTanh,

src/plugins/intel_cpu/src/nodes/kernels/aarch64/jit_uni_eltwise_generic.cpp

+2
Original file line numberDiff line numberDiff line change
@@ -648,6 +648,7 @@ std::shared_ptr<jit_emitter> jit_uni_eltwise_generic<isa>::create_eltwise_emitte
648648
OV_CASE(Algorithm::EltwiseEqual, ov::intel_cpu::aarch64::jit_equal_emitter),
649649
OV_CASE(Algorithm::EltwiseExp, ov::intel_cpu::aarch64::jit_exp_emitter),
650650
OV_CASE(Algorithm::EltwiseFloor, ov::intel_cpu::aarch64::jit_floor_emitter),
651+
OV_CASE(Algorithm::EltwiseFloorMod, ov::intel_cpu::aarch64::jit_floor_mod_emitter),
651652
OV_CASE(Algorithm::EltwiseCeiling, ov::intel_cpu::aarch64::jit_ceiling_emitter),
652653
OV_CASE(Algorithm::EltwiseHswish, ov::intel_cpu::aarch64::jit_hswish_emitter),
653654
OV_CASE(Algorithm::EltwiseIsFinite, ov::intel_cpu::aarch64::jit_is_finite_emitter),
@@ -830,6 +831,7 @@ std::set<std::vector<element::Type>> eltwise_precision_helper::get_supported_pre
830831
OV_CASE(Algorithm::EltwiseEqual, jit_equal_emitter),
831832
OV_CASE(Algorithm::EltwiseExp, jit_exp_emitter),
832833
OV_CASE(Algorithm::EltwiseFloor, jit_floor_emitter),
834+
OV_CASE(Algorithm::EltwiseFloorMod, jit_floor_mod_emitter),
833835
OV_CASE(Algorithm::EltwiseCeiling, jit_ceiling_emitter),
834836
OV_CASE(Algorithm::EltwiseGeluErf, jit_gelu_erf_emitter),
835837
OV_CASE(Algorithm::EltwiseGeluTanh, jit_gelu_tanh_emitter),

src/plugins/intel_cpu/tests/functional/custom/single_layer_tests/classes/eltwise.cpp

+3
Original file line numberDiff line numberDiff line change
@@ -259,6 +259,7 @@ std::string EltwiseLayerCPUTest::getPrimitiveType(const utils::EltwiseTypes& elt
259259
(eltwise_type == utils::EltwiseTypes::MULTIPLY) ||
260260
(eltwise_type == utils::EltwiseTypes::SUBTRACT) ||
261261
(eltwise_type == utils::EltwiseTypes::DIVIDE) ||
262+
(eltwise_type == utils::EltwiseTypes::FLOOR_MOD) ||
262263
(eltwise_type == utils::EltwiseTypes::MOD)) {
263264
return "jit";
264265
}
@@ -317,6 +318,8 @@ const std::vector<utils::EltwiseTypes>& eltwiseOpTypesBinInp() {
317318
utils::EltwiseTypes::SUBTRACT, // TODO: Fix CVS-105430
318319
utils::EltwiseTypes::DIVIDE, // TODO: Fix CVS-105430
319320
utils::EltwiseTypes::FLOOR_MOD, // TODO: Fix CVS-111875
321+
#elif defined(OPENVINO_ARCH_ARM64)
322+
utils::EltwiseTypes::FLOOR_MOD,
320323
#endif
321324
utils::EltwiseTypes::SQUARED_DIFF,
322325
utils::EltwiseTypes::MOD,

0 commit comments

Comments
 (0)