Skip to content

Commit 53fa0a4

Browse files
Bugfix of some operations
1 parent cd818eb commit 53fa0a4

File tree

1 file changed

+84
-30
lines changed

1 file changed

+84
-30
lines changed

src/cpu/x64/jit_generator.hpp

+84-30
Original file line numberDiff line numberDiff line change
@@ -713,16 +713,22 @@ class jit_generator : public Xbyak::CodeGenerator, public c_compatible {
713713
vdivps(x, op1, op2);
714714
}
715715

716-
void uni_vaddps(const Xbyak::Xmm &x, const Xbyak::Operand &op1,
716+
void uni_vaddps(const Xbyak::Xmm &x, const Xbyak::Xmm &op1,
717717
const Xbyak::Operand &op2) {
718718
if (is_valid_isa(avx))
719719
vaddps(x, op1, op2);
720720
else {
721-
if (!x.isEqualIfNotInherited(op1)) movups(x, op1);
722-
addps(x, op2);
721+
if (x.getIdx() == op1.getIdx()) {
722+
addps(x, op2);
723+
} else if (x.isEqualIfNotInherited(op2)) {
724+
addps(x, op1);
725+
} else {
726+
movups(x, op1);
727+
addps(x, op2);
728+
}
723729
}
724730
}
725-
void uni_vaddps(const Xbyak::Ymm &x, const Xbyak::Operand &op1,
731+
void uni_vaddps(const Xbyak::Ymm &x, const Xbyak::Ymm &op1,
726732
const Xbyak::Operand &op2) {
727733
vaddps(x, op1, op2);
728734
}
@@ -817,16 +823,25 @@ class jit_generator : public Xbyak::CodeGenerator, public c_compatible {
817823
vsubss(x, Xbyak::Xmm(op1.getIdx()), Xbyak::Xmm(op2.getIdx()));
818824
}
819825

820-
void uni_vsubps(const Xbyak::Xmm &x, const Xbyak::Operand &op1,
826+
void uni_vsubps(const Xbyak::Xmm &x, const Xbyak::Xmm &op1,
821827
const Xbyak::Operand &op2) {
822828
if (is_valid_isa(avx))
823829
vsubps(x, op1, op2);
824830
else {
825-
if (!x.isEqualIfNotInherited(op1)) movups(x, op1);
826-
subps(x, op2);
831+
if (x.getIdx() == op1.getIdx()) {
832+
subps(x, op2);
833+
} else if (x.isEqualIfNotInherited(op2)) {
834+
push(op1);
835+
subps(op1, op2);
836+
movups(x, op1);
837+
pop(op1);
838+
} else {
839+
movups(x, op1);
840+
subps(x, op2);
841+
}
827842
}
828843
}
829-
void uni_vsubps(const Xbyak::Ymm &x, const Xbyak::Operand &op1,
844+
void uni_vsubps(const Xbyak::Ymm &x, const Xbyak::Ymm &op1,
830845
const Xbyak::Operand &op2) {
831846
vsubps(x, op1, op2);
832847
}
@@ -848,30 +863,42 @@ class jit_generator : public Xbyak::CodeGenerator, public c_compatible {
848863
vsubps(x, op1, op2);
849864
}
850865

851-
void uni_vpmulld(const Xbyak::Xmm &x1, const Xbyak::Xmm &x2,
852-
const Xbyak::Operand &op) {
866+
void uni_vpmulld(const Xbyak::Xmm &x, const Xbyak::Xmm &op1,
867+
const Xbyak::Operand &op2) {
853868
if (is_valid_isa(avx)) {
854-
vpmulld(x1, x2, op);
869+
vpmulld(x, op1, op2);
855870
} else {
856-
if (x1.getIdx() != x2.getIdx()) movdqa(x1, x2);
857-
pmulld(x1, op);
871+
if (x.getIdx() == op1.getIdx()) {
872+
pmulld(x, op2);
873+
} else if (x.isEqualIfNotInherited(op2)) {
874+
pmulld(x, op1);
875+
} else {
876+
movdqa(x, op1);
877+
pmulld(x, op2);
878+
}
858879
}
859880
}
860881
void uni_vpmulld(const Xbyak::Ymm &x1, const Xbyak::Ymm &x2,
861882
const Xbyak::Operand &op) {
862883
vpmulld(x1, x2, op);
863884
}
864885

865-
void uni_vmulps(const Xbyak::Xmm &x, const Xbyak::Operand &op1,
886+
void uni_vmulps(const Xbyak::Xmm &x, const Xbyak::Xmm &op1,
866887
const Xbyak::Operand &op2) {
867888
if (is_valid_isa(avx))
868889
vmulps(x, op1, op2);
869890
else {
870-
if (!x.isEqualIfNotInherited(op1)) movups(x, op1);
871-
mulps(x, op2);
891+
if (x.getIdx() == op1.getIdx()) {
892+
mulps(x, op2);
893+
} else if (x.isEqualIfNotInherited(op2)) {
894+
mulps(x, op1);
895+
} else {
896+
movups(x, op1);
897+
mulps(x, op2);
898+
}
872899
}
873900
}
874-
void uni_vmulps(const Xbyak::Ymm &x, const Xbyak::Operand &op1,
901+
void uni_vmulps(const Xbyak::Ymm &x, const Xbyak::Ymm &op1,
875902
const Xbyak::Operand &op2) {
876903
vmulps(x, op1, op2);
877904
}
@@ -1273,16 +1300,25 @@ class jit_generator : public Xbyak::CodeGenerator, public c_compatible {
12731300
vpsrld(x, op, imm);
12741301
}
12751302

1276-
void uni_vmaxps(const Xbyak::Xmm &x, const Xbyak::Operand &op1,
1303+
void uni_vmaxps(const Xbyak::Xmm &x, const Xbyak::Xmm &op1,
12771304
const Xbyak::Operand &op2) {
12781305
if (is_valid_isa(avx))
12791306
vmaxps(x, op1, op2);
12801307
else {
1281-
if (!x.isEqualIfNotInherited(op1)) movups(x, op1);
1282-
maxps(x, op2);
1308+
if (x.getIdx() == op1.getIdx()) {
1309+
maxps(x, op2);
1310+
} else if (x.isEqualIfNotInherited(op2)) {
1311+
push(op1);
1312+
maxps(op1, op2);
1313+
movups(x, op1);
1314+
pop(op1);
1315+
} else {
1316+
movups(x, op1);
1317+
maxps(x, op2);
1318+
}
12831319
}
12841320
}
1285-
void uni_vmaxps(const Xbyak::Ymm &x, const Xbyak::Operand &op1,
1321+
void uni_vmaxps(const Xbyak::Ymm &x, const Xbyak::Ymm &op1,
12861322
const Xbyak::Operand &op2) {
12871323
vmaxps(x, op1, op2);
12881324
}
@@ -1297,17 +1333,26 @@ class jit_generator : public Xbyak::CodeGenerator, public c_compatible {
12971333
}
12981334
}
12991335

1300-
void uni_vminps(const Xbyak::Xmm &x, const Xbyak::Operand &op1,
1336+
void uni_vminps(const Xbyak::Xmm &x, const Xbyak::Xmm &op1,
13011337
const Xbyak::Operand &op2) {
13021338
if (is_valid_isa(avx))
13031339
vminps(x, op1, op2);
13041340
else {
1305-
if (!x.isEqualIfNotInherited(op1)) movups(x, op1);
1306-
minps(x, op2);
1341+
if (x.getIdx() == op1.getIdx()) {
1342+
minps(x, op2);
1343+
} else if (x.isEqualIfNotInherited(op2)) {
1344+
push(op1);
1345+
minps(op1, op2);
1346+
movups(x, op1);
1347+
pop(op1);
1348+
} else {
1349+
movups(x, op1);
1350+
minps(x, op2);
1351+
}
13071352
}
13081353
}
13091354

1310-
void uni_vminps(const Xbyak::Ymm &x, const Xbyak::Operand &op1,
1355+
void uni_vminps(const Xbyak::Ymm &x, const Xbyak::Ymm &op1,
13111356
const Xbyak::Operand &op2) {
13121357
vminps(x, op1, op2);
13131358
}
@@ -1344,13 +1389,22 @@ class jit_generator : public Xbyak::CodeGenerator, public c_compatible {
13441389
vpmovzxbd(y, op);
13451390
}
13461391

1347-
void uni_vcmpps(const Xbyak::Xmm &x1, const Xbyak::Xmm &x2,
1348-
const Xbyak::Operand &op, int cmp_predicate) {
1392+
void uni_vcmpps(const Xbyak::Xmm &x, const Xbyak::Xmm &op1,
1393+
const Xbyak::Operand &op2, int cmp_predicate) {
13491394
if (is_valid_isa(avx))
1350-
vcmpps(x1, x2, op, cmp_predicate);
1395+
vcmpps(x, op1, op2, cmp_predicate);
13511396
else {
1352-
if (x1.getIdx() != x2.getIdx()) uni_vmovups(x1, x2);
1353-
cmpps(x1, op, cmp_predicate);
1397+
if (x.getIdx() == op1.getIdx()) {
1398+
cmpps(x, op2, cmp_predicate);
1399+
} else if (x.isEqualIfNotInherited(op2)) {
1400+
push(op1);
1401+
cmpps(op1, op2, cmp_predicate);
1402+
movups(x, op1);
1403+
pop(op1);
1404+
} else {
1405+
movups(x, op1);
1406+
cmpps(x, op2, cmp_predicate);
1407+
}
13541408
}
13551409
}
13561410
void uni_vcmpps(const Xbyak::Ymm &x1, const Xbyak::Ymm &x2,

0 commit comments

Comments
 (0)