Skip to content

Commit fb1838f

Browse files
[CPU][ARM64] Implement JIT Emitter for Eltwise Less Operation (openvinotoolkit#28494)
### Details: - Implemented and added `jit_less_emitter` derived class for element wise less operation - Added entry `Algorithm::EltwiseLess`, in executors/aarch64 as one of the supported algorithms - Added entry in the `get_supported_precisions` and `create_eltwise_emitters` in kernel/aarch64 ### Tests - Passed local tests using `./bin/aarch64/Release/ov_cpu_func_tests --gtest_filter='*smoke*ComparisonLayerTest*Less*'` <img width="625" alt="Screenshot 2025-01-16 at 7 23 39 PM" src="https://github.com/user-attachments/assets/3688c211-aa0c-421f-9916-02a8777cf8af" /> ### Tickets: - Closes openvinotoolkit#24415
1 parent c3bdeaf commit fb1838f

File tree

4 files changed

+90
-0
lines changed

4 files changed

+90
-0
lines changed

src/plugins/intel_cpu/src/emitters/plugin/aarch64/jit_eltwise_emitters.cpp

+59
Original file line numberDiff line numberDiff line change
@@ -1363,6 +1363,65 @@ void jit_is_nan_emitter::register_table_entries() {
13631363
push_arg_entry_of("zero", 0x00000000, true);
13641364
}
13651365

1366+
/// LESS ///
1367+
jit_less_emitter::jit_less_emitter(dnnl::impl::cpu::aarch64::jit_generator* host,
1368+
dnnl::impl::cpu::aarch64::cpu_isa_t host_isa,
1369+
const std::shared_ptr<ov::Node>& node)
1370+
: jit_emitter(host, host_isa, node, get_arithmetic_binary_exec_precision(node)) {
1371+
prepare_table();
1372+
}
1373+
1374+
jit_less_emitter::jit_less_emitter(dnnl::impl::cpu::aarch64::jit_generator* host,
1375+
dnnl::impl::cpu::aarch64::cpu_isa_t host_isa,
1376+
const ov::element::Type exec_prc)
1377+
: jit_emitter(host, host_isa, exec_prc) {
1378+
prepare_table();
1379+
}
1380+
1381+
size_t jit_less_emitter::get_inputs_count() const {
1382+
return 2;
1383+
}
1384+
1385+
size_t jit_less_emitter::get_aux_vecs_count() const {
1386+
return 1;
1387+
}
1388+
1389+
size_t jit_less_emitter::get_aux_gprs_count() const {
1390+
return 1;
1391+
}
1392+
1393+
void jit_less_emitter::emit_impl(const std::vector<size_t>& in_vec_idxs,
1394+
const std::vector<size_t>& out_vec_idxs) const {
1395+
if (host_isa_ == dnnl::impl::cpu::aarch64::asimd) {
1396+
emit_isa<dnnl::impl::cpu::aarch64::asimd>(in_vec_idxs, out_vec_idxs);
1397+
} else {
1398+
OV_CPU_JIT_EMITTER_THROW("Can't create jit eltwise kernel");
1399+
}
1400+
}
1401+
1402+
template <dnnl::impl::cpu::aarch64::cpu_isa_t isa>
1403+
void jit_less_emitter::emit_isa(const std::vector<size_t>& in_vec_idxs, const std::vector<size_t>& out_vec_idxs) const {
1404+
OV_CPU_JIT_EMITTER_ASSERT(exec_prc_ == ov::element::f32, "unsupported precision: " + exec_prc_.to_string());
1405+
1406+
using TReg = typename dnnl::impl::cpu::aarch64::cpu_isa_traits<isa>::TReg;
1407+
const TReg src1 = TReg(in_vec_idxs[0]);
1408+
const TReg src2 = TReg(in_vec_idxs[1]);
1409+
const TReg dst = TReg(out_vec_idxs[0]);
1410+
const TReg aux = TReg(aux_vec_idxs[0]);
1411+
1412+
h->fcmgt(dst.s, src2.s, src1.s);
1413+
h->ld1r(aux.s, table_val2("one"));
1414+
h->and_(dst.b16, dst.b16, aux.b16);
1415+
}
1416+
1417+
void jit_less_emitter::register_table_entries() {
1418+
push_arg_entry_of("one", 0x3f800000, true);
1419+
}
1420+
1421+
std::set<std::vector<element::Type>> jit_less_emitter::get_supported_precisions(const std::shared_ptr<ov::Node>& node) {
1422+
return {{element::f32, element::f32}};
1423+
}
1424+
13661425
/// LESS_EQUAL ///
13671426
jit_less_equal_emitter::jit_less_equal_emitter(dnnl::impl::cpu::aarch64::jit_generator* host,
13681427
dnnl::impl::cpu::aarch64::cpu_isa_t host_isa,

src/plugins/intel_cpu/src/emitters/plugin/aarch64/jit_eltwise_emitters.hpp

+28
Original file line numberDiff line numberDiff line change
@@ -608,6 +608,34 @@ class jit_is_inf_emitter : public jit_emitter {
608608
bool detect_positive;
609609
};
610610

611+
class jit_less_emitter : public jit_emitter {
612+
public:
613+
jit_less_emitter(dnnl::impl::cpu::aarch64::jit_generator* host,
614+
dnnl::impl::cpu::aarch64::cpu_isa_t host_isa,
615+
const ov::element::Type exec_prc = ov::element::f32);
616+
617+
jit_less_emitter(dnnl::impl::cpu::aarch64::jit_generator* host,
618+
dnnl::impl::cpu::aarch64::cpu_isa_t host_isa,
619+
const std::shared_ptr<ov::Node>& n);
620+
621+
size_t get_inputs_count() const override;
622+
623+
size_t get_aux_vecs_count() const override;
624+
625+
size_t get_aux_gprs_count() const override;
626+
627+
static std::set<std::vector<element::Type>> get_supported_precisions(
628+
const std::shared_ptr<ov::Node>& node = nullptr);
629+
630+
private:
631+
void emit_impl(const std::vector<size_t>& in_vec_idxs, const std::vector<size_t>& out_vec_idxs) const override;
632+
633+
template <dnnl::impl::cpu::aarch64::cpu_isa_t isa>
634+
void emit_isa(const std::vector<size_t>& in_vec_idxs, const std::vector<size_t>& out_vec_idxs) const;
635+
636+
void register_table_entries() override;
637+
};
638+
611639
class jit_less_equal_emitter : public jit_emitter {
612640
public:
613641
jit_less_equal_emitter(dnnl::impl::cpu::aarch64::jit_generator* host,

src/plugins/intel_cpu/src/nodes/executors/aarch64/jit_eltwise.cpp

+1
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@ bool JitEltwiseExecutor::isSupported(const Algorithm& algorithm,
3838
Algorithm::EltwiseIsFinite,
3939
Algorithm::EltwiseIsInf,
4040
Algorithm::EltwiseIsNaN,
41+
Algorithm::EltwiseLess,
4142
Algorithm::EltwiseLessEqual,
4243
Algorithm::EltwiseLogicalAnd,
4344
Algorithm::EltwiseLogicalOr,

src/plugins/intel_cpu/src/nodes/kernels/aarch64/jit_uni_eltwise_generic.cpp

+2
Original file line numberDiff line numberDiff line change
@@ -670,6 +670,7 @@ std::shared_ptr<jit_emitter> jit_uni_eltwise_generic<isa>::create_eltwise_emitte
670670
OV_CASE(Algorithm::EltwiseHswish, ov::intel_cpu::aarch64::jit_hswish_emitter),
671671
OV_CASE(Algorithm::EltwiseIsFinite, ov::intel_cpu::aarch64::jit_is_finite_emitter),
672672
OV_CASE(Algorithm::EltwiseIsInf, ov::intel_cpu::aarch64::jit_is_inf_emitter),
673+
OV_CASE(Algorithm::EltwiseLess, ov::intel_cpu::aarch64::jit_less_emitter),
673674
OV_CASE(Algorithm::EltwiseLessEqual, ov::intel_cpu::aarch64::jit_less_equal_emitter),
674675
OV_CASE(Algorithm::EltwiseLogicalAnd, ov::intel_cpu::aarch64::jit_logical_and_emitter),
675676
OV_CASE(Algorithm::EltwiseLogicalOr, ov::intel_cpu::aarch64::jit_logical_or_emitter),
@@ -863,6 +864,7 @@ std::set<std::vector<element::Type>> eltwise_precision_helper::get_supported_pre
863864
OV_CASE(Algorithm::EltwiseIsFinite, jit_is_finite_emitter),
864865
OV_CASE(Algorithm::EltwiseIsInf, jit_is_inf_emitter),
865866
OV_CASE(Algorithm::EltwiseIsNaN, jit_is_nan_emitter),
867+
OV_CASE(Algorithm::EltwiseLess, jit_less_emitter),
866868
OV_CASE(Algorithm::EltwiseLessEqual, jit_less_equal_emitter),
867869
OV_CASE(Algorithm::EltwiseLogicalAnd, jit_logical_and_emitter),
868870
OV_CASE(Algorithm::EltwiseLogicalOr, jit_logical_or_emitter),

0 commit comments

Comments
 (0)