Skip to content

Commit b5a919f

Browse files
Fixes for uni_vaddps, uni_vsubps, uni_vpmulld, uni_vmulps, uni_vmaxps, uni_vminps and uni_vcmpps operations
1 parent be3458a commit b5a919f

File tree

1 file changed

+78
-30
lines changed

1 file changed

+78
-30
lines changed

src/cpu/x64/jit_generator.hpp

+78-30
Original file line numberDiff line numberDiff line change
@@ -657,16 +657,22 @@ class jit_generator : public Xbyak::CodeGenerator, public c_compatible {
657657
vdivps(x, op1, op2);
658658
}
659659

660-
void uni_vaddps(const Xbyak::Xmm &x, const Xbyak::Operand &op1,
660+
void uni_vaddps(const Xbyak::Xmm &x, const Xbyak::Xmm &op1,
661661
const Xbyak::Operand &op2) {
662662
if (is_valid_isa(avx))
663663
vaddps(x, op1, op2);
664664
else {
665-
if (!x.isEqualIfNotInherited(op1)) movups(x, op1);
666-
addps(x, op2);
665+
if (x.getIdx() == op1.getIdx()) {
666+
addps(x, op2);
667+
} else if (x.isEqualIfNotInherited(op2)) {
668+
addps(x, op1);
669+
} else {
670+
movups(x, op1);
671+
addps(x, op2);
672+
}
667673
}
668674
}
669-
void uni_vaddps(const Xbyak::Ymm &x, const Xbyak::Operand &op1,
675+
void uni_vaddps(const Xbyak::Ymm &x, const Xbyak::Ymm &op1,
670676
const Xbyak::Operand &op2) {
671677
vaddps(x, op1, op2);
672678
}
@@ -775,16 +781,25 @@ class jit_generator : public Xbyak::CodeGenerator, public c_compatible {
775781
vsubss(x, Xbyak::Xmm(op1.getIdx()), Xbyak::Xmm(op2.getIdx()));
776782
}
777783

778-
void uni_vsubps(const Xbyak::Xmm &x, const Xbyak::Operand &op1,
784+
void uni_vsubps(const Xbyak::Xmm &x, const Xbyak::Xmm &op1,
779785
const Xbyak::Operand &op2) {
780786
if (is_valid_isa(avx))
781787
vsubps(x, op1, op2);
782788
else {
783-
if (!x.isEqualIfNotInherited(op1)) movups(x, op1);
784-
subps(x, op2);
789+
if (x.getIdx() == op1.getIdx()) {
790+
subps(x, op2);
791+
} else if (x.isEqualIfNotInherited(op2)) {
792+
push(op1);
793+
subps(op1, op2);
794+
movups(x, op1);
795+
pop(op1);
796+
} else {
797+
movups(x, op1);
798+
subps(x, op2);
799+
}
785800
}
786801
}
787-
void uni_vsubps(const Xbyak::Ymm &x, const Xbyak::Operand &op1,
802+
void uni_vsubps(const Xbyak::Ymm &x, const Xbyak::Ymm &op1,
788803
const Xbyak::Operand &op2) {
789804
vsubps(x, op1, op2);
790805
}
@@ -818,30 +833,42 @@ class jit_generator : public Xbyak::CodeGenerator, public c_compatible {
818833
}
819834
}
820835

821-
void uni_vpmulld(const Xbyak::Xmm &x1, const Xbyak::Xmm &x2,
822-
const Xbyak::Operand &op) {
836+
void uni_vpmulld(const Xbyak::Xmm &x, const Xbyak::Xmm &op1,
837+
const Xbyak::Operand &op2) {
823838
if (is_valid_isa(avx)) {
824-
vpmulld(x1, x2, op);
839+
vpmulld(x, op1, op2);
825840
} else {
826-
if (x1.getIdx() != x2.getIdx()) movdqa(x1, x2);
827-
pmulld(x1, op);
841+
if (x.getIdx() == op1.getIdx()) {
842+
pmulld(x, op2);
843+
} else if (x.isEqualIfNotInherited(op2)) {
844+
pmulld(x, op1);
845+
} else {
846+
movdqa(x, op1);
847+
pmulld(x, op2);
848+
}
828849
}
829850
}
830851
void uni_vpmulld(const Xbyak::Ymm &x1, const Xbyak::Ymm &x2,
831852
const Xbyak::Operand &op) {
832853
vpmulld(x1, x2, op);
833854
}
834855

835-
void uni_vmulps(const Xbyak::Xmm &x, const Xbyak::Operand &op1,
856+
void uni_vmulps(const Xbyak::Xmm &x, const Xbyak::Xmm &op1,
836857
const Xbyak::Operand &op2) {
837858
if (is_valid_isa(avx))
838859
vmulps(x, op1, op2);
839860
else {
840-
if (!x.isEqualIfNotInherited(op1)) movups(x, op1);
841-
mulps(x, op2);
861+
if (x.getIdx() == op1.getIdx()) {
862+
mulps(x, op2);
863+
} else if (x.isEqualIfNotInherited(op2)) {
864+
mulps(x, op1);
865+
} else {
866+
movups(x, op1);
867+
mulps(x, op2);
868+
}
842869
}
843870
}
844-
void uni_vmulps(const Xbyak::Ymm &x, const Xbyak::Operand &op1,
871+
void uni_vmulps(const Xbyak::Ymm &x, const Xbyak::Ymm &op1,
845872
const Xbyak::Operand &op2) {
846873
vmulps(x, op1, op2);
847874
}
@@ -1243,16 +1270,22 @@ class jit_generator : public Xbyak::CodeGenerator, public c_compatible {
12431270
vpsrld(x, op, imm);
12441271
}
12451272

1246-
void uni_vmaxps(const Xbyak::Xmm &x, const Xbyak::Operand &op1,
1273+
void uni_vmaxps(const Xbyak::Xmm &x, const Xbyak::Xmm &op1,
12471274
const Xbyak::Operand &op2) {
12481275
if (is_valid_isa(avx))
12491276
vmaxps(x, op1, op2);
12501277
else {
1251-
if (!x.isEqualIfNotInherited(op1)) movups(x, op1);
1252-
maxps(x, op2);
1278+
if (x.getIdx() == op1.getIdx()) {
1279+
maxps(x, op2);
1280+
} else if (x.isEqualIfNotInherited(op2)) {
1281+
maxps(x, op1);
1282+
} else {
1283+
movups(x, op1);
1284+
maxps(x, op2);
1285+
}
12531286
}
12541287
}
1255-
void uni_vmaxps(const Xbyak::Ymm &x, const Xbyak::Operand &op1,
1288+
void uni_vmaxps(const Xbyak::Ymm &x, const Xbyak::Ymm &op1,
12561289
const Xbyak::Operand &op2) {
12571290
vmaxps(x, op1, op2);
12581291
}
@@ -1267,17 +1300,23 @@ class jit_generator : public Xbyak::CodeGenerator, public c_compatible {
12671300
}
12681301
}
12691302

1270-
void uni_vminps(const Xbyak::Xmm &x, const Xbyak::Operand &op1,
1303+
void uni_vminps(const Xbyak::Xmm &x, const Xbyak::Xmm &op1,
12711304
const Xbyak::Operand &op2) {
12721305
if (is_valid_isa(avx))
12731306
vminps(x, op1, op2);
12741307
else {
1275-
if (!x.isEqualIfNotInherited(op1)) movups(x, op1);
1276-
minps(x, op2);
1308+
if (x.getIdx() == op1.getIdx()) {
1309+
minps(x, op2);
1310+
} else if (x.isEqualIfNotInherited(op2)) {
1311+
minps(x, op1);
1312+
} else {
1313+
movups(x, op1);
1314+
minps(x, op2);
1315+
}
12771316
}
12781317
}
12791318

1280-
void uni_vminps(const Xbyak::Ymm &x, const Xbyak::Operand &op1,
1319+
void uni_vminps(const Xbyak::Ymm &x, const Xbyak::Ymm &op1,
12811320
const Xbyak::Operand &op2) {
12821321
vminps(x, op1, op2);
12831322
}
@@ -1314,13 +1353,22 @@ class jit_generator : public Xbyak::CodeGenerator, public c_compatible {
13141353
vpmovzxbd(y, op);
13151354
}
13161355

1317-
void uni_vcmpps(const Xbyak::Xmm &x1, const Xbyak::Xmm &x2,
1318-
const Xbyak::Operand &op, int cmp_predicate) {
1356+
void uni_vcmpps(const Xbyak::Xmm &x, const Xbyak::Xmm &op1,
1357+
const Xbyak::Operand &op2, int cmp_predicate) {
13191358
if (is_valid_isa(avx))
1320-
vcmpps(x1, x2, op, cmp_predicate);
1359+
vcmpps(x, op1, op2, cmp_predicate);
13211360
else {
1322-
if (x1.getIdx() != x2.getIdx()) uni_vmovups(x1, x2);
1323-
cmpps(x1, op, cmp_predicate);
1361+
if (x.getIdx() == op1.getIdx()) {
1362+
cmpps(x, op2, cmp_predicate);
1363+
} else if (x.isEqualIfNotInherited(op2)) {
1364+
push(op1);
1365+
cmpps(op1, op2, cmp_predicate);
1366+
movups(x, op1);
1367+
pop(op1);
1368+
} else {
1369+
movups(x, op1);
1370+
cmpps(x, op2, cmp_predicate);
1371+
}
13241372
}
13251373
}
13261374
void uni_vcmpps(const Xbyak::Ymm &x1, const Xbyak::Ymm &x2,

0 commit comments

Comments
 (0)