@@ -767,6 +767,18 @@ class jit_generator : public Xbyak::MmapAllocator,
767
767
}
768
768
}
769
769
770
+ void uni_vhsubps (const Xbyak::Xmm &x, const Xbyak::Xmm &x2,
771
+ const Xbyak::Operand &op) {
772
+ if (is_valid_isa (avx)) {
773
+ vhsubps (x, x2, op);
774
+ } else {
775
+ if (!x.isEqualIfNotInherited (x2)) {
776
+ movups (x, x2);
777
+ }
778
+ hsubps (x, op);
779
+ }
780
+ }
781
+
770
782
void uni_vpsignd (const Xbyak::Xmm &x1, const Xbyak::Xmm &x2,
771
783
const Xbyak::Operand &op) {
772
784
if (is_valid_isa (avx))
@@ -867,6 +879,18 @@ class jit_generator : public Xbyak::MmapAllocator,
867
879
vsubps (x, op1, op2);
868
880
}
869
881
882
+ void uni_vaddsubps (const Xbyak::Xmm &x, const Xbyak::Operand &op1,
883
+ const Xbyak::Operand &op2) {
884
+ if (is_valid_isa (avx)) {
885
+ vaddsubps (x, op1, op2);
886
+ } else {
887
+ if (!x.isEqualIfNotInherited (op1)) {
888
+ movups (x, op1);
889
+ }
890
+ addsubps (x, op2);
891
+ }
892
+ }
893
+
870
894
void uni_vpmulld (const Xbyak::Xmm &x1, const Xbyak::Xmm &x2,
871
895
const Xbyak::Operand &op) {
872
896
if (is_valid_isa (avx)) {
@@ -1525,6 +1549,16 @@ class jit_generator : public Xbyak::MmapAllocator,
1525
1549
vcmpps (x1, x2, op, cmp_predicate);
1526
1550
}
1527
1551
1552
+ void uni_cmpneqps (const Xbyak::Xmm &x1, const Xbyak::Xmm &x2,
1553
+ const Xbyak::Operand &op) {
1554
+ if (is_valid_isa (avx))
1555
+ vcmpneqps (x1, x2, op);
1556
+ else {
1557
+ if (x1.getIdx () != x2.getIdx ()) uni_vmovups (x1, x2);
1558
+ cmpneqps (x1, op);
1559
+ }
1560
+ }
1561
+
1528
1562
void uni_vtestps (const Xbyak::Xmm &x1, const Xbyak::Operand &op) {
1529
1563
if (is_valid_isa (avx))
1530
1564
vtestps (x1, op);
@@ -1637,6 +1671,17 @@ class jit_generator : public Xbyak::MmapAllocator,
1637
1671
vcvtps2ph (x1, x2, _op_mxcsr);
1638
1672
}
1639
1673
1674
+ void uni_vcvttps2dq (const Xbyak::Xmm &x, const Xbyak::Operand &op) {
1675
+ if (is_valid_isa (avx))
1676
+ vcvttps2dq (x, op);
1677
+ else
1678
+ cvttps2dq (x, op);
1679
+ }
1680
+
1681
+ void uni_vcvttps2dq (const Xbyak::Ymm &x, const Xbyak::Operand &op) {
1682
+ vcvttps2dq (x, op);
1683
+ }
1684
+
1640
1685
void uni_vmovmskps (const Xbyak::Reg &x1, const Xbyak::Xmm &x2) {
1641
1686
movmskps (x1.cvt64 (), x2);
1642
1687
}
@@ -1995,6 +2040,10 @@ class jit_generator : public Xbyak::MmapAllocator,
1995
2040
}
1996
2041
}
1997
2042
2043
+ void uni_vpminsd (const Xbyak::Ymm &x1, const Xbyak::Ymm &x2, const Xbyak::Operand &op) {
2044
+ vpminsd (x1, x2, op);
2045
+ }
2046
+
1998
2047
void uni_movshdup (const Xbyak::Xmm &x, const Xbyak::Operand &op) {
1999
2048
if (is_valid_isa (avx))
2000
2049
vmovshdup (x, op);
@@ -2021,6 +2070,17 @@ class jit_generator : public Xbyak::MmapAllocator,
2021
2070
2022
2071
// End of custom instructions section.
2023
2072
2073
+ void uni_vcmpgtps (const Xbyak::Xmm &x1, const Xbyak::Xmm &x2,
2074
+ const Xbyak::Operand &op) {
2075
+ assert (x1.getIdx () == x2.getIdx ());
2076
+ cmpps (x1, op, _cmp_nle_us);
2077
+ }
2078
+
2079
+ void uni_vcmpgtps (const Xbyak::Ymm &x1, const Xbyak::Ymm &x2,
2080
+ const Xbyak::Operand &op) {
2081
+ vcmpgtps (x1, x2, op);
2082
+ }
2083
+
2024
2084
void mul_by_const (
2025
2085
const Xbyak::Reg &out, const Xbyak::Reg64 &tmp, int value) {
2026
2086
// Generates a shift + add sequence for multiplicating contents of the
0 commit comments