Skip to content

Commit a39dca1

Browse files
Fixes for uni_vaddps, uni_vsubps, uni_vpmulld, uni_vmulps, uni_vmaxps, uni_vminps and uni_vcmpps operations
1 parent ca8b340 commit a39dca1

File tree

1 file changed

+78
-30
lines changed

1 file changed

+78
-30
lines changed

src/cpu/x64/jit_generator.hpp

+78-30
Original file line numberDiff line numberDiff line change
@@ -713,16 +713,22 @@ class jit_generator : public Xbyak::CodeGenerator, public c_compatible {
713713
vdivps(x, op1, op2);
714714
}
715715

716-
void uni_vaddps(const Xbyak::Xmm &x, const Xbyak::Operand &op1,
716+
void uni_vaddps(const Xbyak::Xmm &x, const Xbyak::Xmm &op1,
717717
const Xbyak::Operand &op2) {
718718
if (is_valid_isa(avx))
719719
vaddps(x, op1, op2);
720720
else {
721-
if (!x.isEqualIfNotInherited(op1)) movups(x, op1);
722-
addps(x, op2);
721+
if (x.getIdx() == op1.getIdx()) {
722+
addps(x, op2);
723+
} else if (x.isEqualIfNotInherited(op2)) {
724+
addps(x, op1);
725+
} else {
726+
movups(x, op1);
727+
addps(x, op2);
728+
}
723729
}
724730
}
725-
void uni_vaddps(const Xbyak::Ymm &x, const Xbyak::Operand &op1,
731+
void uni_vaddps(const Xbyak::Ymm &x, const Xbyak::Ymm &op1,
726732
const Xbyak::Operand &op2) {
727733
vaddps(x, op1, op2);
728734
}
@@ -831,16 +837,25 @@ class jit_generator : public Xbyak::CodeGenerator, public c_compatible {
831837
vsubss(x, Xbyak::Xmm(op1.getIdx()), Xbyak::Xmm(op2.getIdx()));
832838
}
833839

834-
void uni_vsubps(const Xbyak::Xmm &x, const Xbyak::Operand &op1,
840+
void uni_vsubps(const Xbyak::Xmm &x, const Xbyak::Xmm &op1,
835841
const Xbyak::Operand &op2) {
836842
if (is_valid_isa(avx))
837843
vsubps(x, op1, op2);
838844
else {
839-
if (!x.isEqualIfNotInherited(op1)) movups(x, op1);
840-
subps(x, op2);
845+
if (x.getIdx() == op1.getIdx()) {
846+
subps(x, op2);
847+
} else if (x.isEqualIfNotInherited(op2)) {
848+
push(op1);
849+
subps(op1, op2);
850+
movups(x, op1);
851+
pop(op1);
852+
} else {
853+
movups(x, op1);
854+
subps(x, op2);
855+
}
841856
}
842857
}
843-
void uni_vsubps(const Xbyak::Ymm &x, const Xbyak::Operand &op1,
858+
void uni_vsubps(const Xbyak::Ymm &x, const Xbyak::Ymm &op1,
844859
const Xbyak::Operand &op2) {
845860
vsubps(x, op1, op2);
846861
}
@@ -874,30 +889,42 @@ class jit_generator : public Xbyak::CodeGenerator, public c_compatible {
874889
}
875890
}
876891

877-
void uni_vpmulld(const Xbyak::Xmm &x1, const Xbyak::Xmm &x2,
878-
const Xbyak::Operand &op) {
892+
void uni_vpmulld(const Xbyak::Xmm &x, const Xbyak::Xmm &op1,
893+
const Xbyak::Operand &op2) {
879894
if (is_valid_isa(avx)) {
880-
vpmulld(x1, x2, op);
895+
vpmulld(x, op1, op2);
881896
} else {
882-
if (x1.getIdx() != x2.getIdx()) movdqa(x1, x2);
883-
pmulld(x1, op);
897+
if (x.getIdx() == op1.getIdx()) {
898+
pmulld(x, op2);
899+
} else if (x.isEqualIfNotInherited(op2)) {
900+
pmulld(x, op1);
901+
} else {
902+
movdqa(x, op1);
903+
pmulld(x, op2);
904+
}
884905
}
885906
}
886907
void uni_vpmulld(const Xbyak::Ymm &x1, const Xbyak::Ymm &x2,
887908
const Xbyak::Operand &op) {
888909
vpmulld(x1, x2, op);
889910
}
890911

891-
void uni_vmulps(const Xbyak::Xmm &x, const Xbyak::Operand &op1,
912+
void uni_vmulps(const Xbyak::Xmm &x, const Xbyak::Xmm &op1,
892913
const Xbyak::Operand &op2) {
893914
if (is_valid_isa(avx))
894915
vmulps(x, op1, op2);
895916
else {
896-
if (!x.isEqualIfNotInherited(op1)) movups(x, op1);
897-
mulps(x, op2);
917+
if (x.getIdx() == op1.getIdx()) {
918+
mulps(x, op2);
919+
} else if (x.isEqualIfNotInherited(op2)) {
920+
mulps(x, op1);
921+
} else {
922+
movups(x, op1);
923+
mulps(x, op2);
924+
}
898925
}
899926
}
900-
void uni_vmulps(const Xbyak::Ymm &x, const Xbyak::Operand &op1,
927+
void uni_vmulps(const Xbyak::Ymm &x, const Xbyak::Ymm &op1,
901928
const Xbyak::Operand &op2) {
902929
vmulps(x, op1, op2);
903930
}
@@ -1299,16 +1326,22 @@ class jit_generator : public Xbyak::CodeGenerator, public c_compatible {
12991326
vpsrld(x, op, imm);
13001327
}
13011328

1302-
void uni_vmaxps(const Xbyak::Xmm &x, const Xbyak::Operand &op1,
1329+
void uni_vmaxps(const Xbyak::Xmm &x, const Xbyak::Xmm &op1,
13031330
const Xbyak::Operand &op2) {
13041331
if (is_valid_isa(avx))
13051332
vmaxps(x, op1, op2);
13061333
else {
1307-
if (!x.isEqualIfNotInherited(op1)) movups(x, op1);
1308-
maxps(x, op2);
1334+
if (x.getIdx() == op1.getIdx()) {
1335+
maxps(x, op2);
1336+
} else if (x.isEqualIfNotInherited(op2)) {
1337+
maxps(x, op1);
1338+
} else {
1339+
movups(x, op1);
1340+
maxps(x, op2);
1341+
}
13091342
}
13101343
}
1311-
void uni_vmaxps(const Xbyak::Ymm &x, const Xbyak::Operand &op1,
1344+
void uni_vmaxps(const Xbyak::Ymm &x, const Xbyak::Ymm &op1,
13121345
const Xbyak::Operand &op2) {
13131346
vmaxps(x, op1, op2);
13141347
}
@@ -1323,17 +1356,23 @@ class jit_generator : public Xbyak::CodeGenerator, public c_compatible {
13231356
}
13241357
}
13251358

1326-
void uni_vminps(const Xbyak::Xmm &x, const Xbyak::Operand &op1,
1359+
void uni_vminps(const Xbyak::Xmm &x, const Xbyak::Xmm &op1,
13271360
const Xbyak::Operand &op2) {
13281361
if (is_valid_isa(avx))
13291362
vminps(x, op1, op2);
13301363
else {
1331-
if (!x.isEqualIfNotInherited(op1)) movups(x, op1);
1332-
minps(x, op2);
1364+
if (x.getIdx() == op1.getIdx()) {
1365+
minps(x, op2);
1366+
} else if (x.isEqualIfNotInherited(op2)) {
1367+
minps(x, op1);
1368+
} else {
1369+
movups(x, op1);
1370+
minps(x, op2);
1371+
}
13331372
}
13341373
}
13351374

1336-
void uni_vminps(const Xbyak::Ymm &x, const Xbyak::Operand &op1,
1375+
void uni_vminps(const Xbyak::Ymm &x, const Xbyak::Ymm &op1,
13371376
const Xbyak::Operand &op2) {
13381377
vminps(x, op1, op2);
13391378
}
@@ -1370,13 +1409,22 @@ class jit_generator : public Xbyak::CodeGenerator, public c_compatible {
13701409
vpmovzxbd(y, op);
13711410
}
13721411

1373-
void uni_vcmpps(const Xbyak::Xmm &x1, const Xbyak::Xmm &x2,
1374-
const Xbyak::Operand &op, int cmp_predicate) {
1412+
void uni_vcmpps(const Xbyak::Xmm &x, const Xbyak::Xmm &op1,
1413+
const Xbyak::Operand &op2, int cmp_predicate) {
13751414
if (is_valid_isa(avx))
1376-
vcmpps(x1, x2, op, cmp_predicate);
1415+
vcmpps(x, op1, op2, cmp_predicate);
13771416
else {
1378-
if (x1.getIdx() != x2.getIdx()) uni_vmovups(x1, x2);
1379-
cmpps(x1, op, cmp_predicate);
1417+
if (x.getIdx() == op1.getIdx()) {
1418+
cmpps(x, op2, cmp_predicate);
1419+
} else if (x.isEqualIfNotInherited(op2)) {
1420+
push(op1);
1421+
cmpps(op1, op2, cmp_predicate);
1422+
movups(x, op1);
1423+
pop(op1);
1424+
} else {
1425+
movups(x, op1);
1426+
cmpps(x, op2, cmp_predicate);
1427+
}
13801428
}
13811429
}
13821430
void uni_vcmpps(const Xbyak::Ymm &x1, const Xbyak::Ymm &x2,

0 commit comments

Comments
 (0)