Skip to content

Commit 0df5391

Browse files
AlexPeskovazhai219
authored andcommitted
[FIX] Add several uni instruction wrappers into jit_generator
Remove redundancy of two unified instructions 3.5 squash below: [FIX] Add uni_vhsubps and uni_vaddsubps into jit_generator (openvinotoolkit#142) * Add uni_vhsubps and uni_vaddsubps into jit_generator.hpp * Add missing namespace * Adjust indentation and add braces * Improve formatting * Replace assertion in uni_vhsubps and uni_vhaddps with possible workaround for SSE 4.1 [FIX] added uni_vcvttps2dq
1 parent a557905 commit 0df5391

File tree

1 file changed

+60
-0
lines changed

1 file changed

+60
-0
lines changed

src/cpu/x64/jit_generator.hpp

+60
Original file line numberDiff line numberDiff line change
@@ -767,6 +767,18 @@ class jit_generator : public Xbyak::MmapAllocator,
767767
}
768768
}
769769

770+
void uni_vhsubps(const Xbyak::Xmm &x, const Xbyak::Xmm &x2,
771+
const Xbyak::Operand &op) {
772+
if (is_valid_isa(avx)) {
773+
vhsubps(x, x2, op);
774+
} else {
775+
if (!x.isEqualIfNotInherited(x2)) {
776+
movups(x, x2);
777+
}
778+
hsubps(x, op);
779+
}
780+
}
781+
770782
void uni_vpsignd(const Xbyak::Xmm &x1, const Xbyak::Xmm &x2,
771783
const Xbyak::Operand &op) {
772784
if (is_valid_isa(avx))
@@ -867,6 +879,18 @@ class jit_generator : public Xbyak::MmapAllocator,
867879
vsubps(x, op1, op2);
868880
}
869881

882+
void uni_vaddsubps(const Xbyak::Xmm &x, const Xbyak::Operand &op1,
883+
const Xbyak::Operand &op2) {
884+
if (is_valid_isa(avx)) {
885+
vaddsubps(x, op1, op2);
886+
} else {
887+
if (!x.isEqualIfNotInherited(op1)) {
888+
movups(x, op1);
889+
}
890+
addsubps(x, op2);
891+
}
892+
}
893+
870894
void uni_vpmulld(const Xbyak::Xmm &x1, const Xbyak::Xmm &x2,
871895
const Xbyak::Operand &op) {
872896
if (is_valid_isa(avx)) {
@@ -1525,6 +1549,16 @@ class jit_generator : public Xbyak::MmapAllocator,
15251549
vcmpps(x1, x2, op, cmp_predicate);
15261550
}
15271551

1552+
void uni_cmpneqps(const Xbyak::Xmm &x1, const Xbyak::Xmm &x2,
1553+
const Xbyak::Operand &op) {
1554+
if (is_valid_isa(avx))
1555+
vcmpneqps(x1, x2, op);
1556+
else {
1557+
if (x1.getIdx() != x2.getIdx()) uni_vmovups(x1, x2);
1558+
cmpneqps(x1, op);
1559+
}
1560+
}
1561+
15281562
void uni_vtestps(const Xbyak::Xmm &x1, const Xbyak::Operand &op) {
15291563
if (is_valid_isa(avx))
15301564
vtestps(x1, op);
@@ -1637,6 +1671,17 @@ class jit_generator : public Xbyak::MmapAllocator,
16371671
vcvtps2ph(x1, x2, _op_mxcsr);
16381672
}
16391673

1674+
void uni_vcvttps2dq(const Xbyak::Xmm &x, const Xbyak::Operand &op) {
1675+
if (is_valid_isa(avx))
1676+
vcvttps2dq(x, op);
1677+
else
1678+
cvttps2dq(x, op);
1679+
}
1680+
1681+
void uni_vcvttps2dq(const Xbyak::Ymm &x, const Xbyak::Operand &op) {
1682+
vcvttps2dq(x, op);
1683+
}
1684+
16401685
void uni_vmovmskps(const Xbyak::Reg &x1, const Xbyak::Xmm &x2) {
16411686
movmskps(x1.cvt64(), x2);
16421687
}
@@ -1995,6 +2040,10 @@ class jit_generator : public Xbyak::MmapAllocator,
19952040
}
19962041
}
19972042

2043+
void uni_vpminsd(const Xbyak::Ymm &x1, const Xbyak::Ymm &x2, const Xbyak::Operand &op) {
2044+
vpminsd(x1, x2, op);
2045+
}
2046+
19982047
void uni_movshdup(const Xbyak::Xmm &x, const Xbyak::Operand &op) {
19992048
if (is_valid_isa(avx))
20002049
vmovshdup(x, op);
@@ -2021,6 +2070,17 @@ class jit_generator : public Xbyak::MmapAllocator,
20212070

20222071
// End of custom instructions section.
20232072

2073+
void uni_vcmpgtps(const Xbyak::Xmm &x1, const Xbyak::Xmm &x2,
2074+
const Xbyak::Operand &op) {
2075+
assert(x1.getIdx() == x2.getIdx());
2076+
cmpps(x1, op, _cmp_nle_us);
2077+
}
2078+
2079+
void uni_vcmpgtps(const Xbyak::Ymm &x1, const Xbyak::Ymm &x2,
2080+
const Xbyak::Operand &op) {
2081+
vcmpgtps(x1, x2, op);
2082+
}
2083+
20242084
void mul_by_const(
20252085
const Xbyak::Reg &out, const Xbyak::Reg64 &tmp, int value) {
20262086
// Generates a shift + add sequence for multiplicating contents of the

0 commit comments

Comments
 (0)