@@ -713,16 +713,22 @@ class jit_generator : public Xbyak::CodeGenerator, public c_compatible {
713
713
vdivps (x, op1, op2);
714
714
}
715
715
716
- void uni_vaddps (const Xbyak::Xmm &x, const Xbyak::Operand &op1,
716
+ void uni_vaddps (const Xbyak::Xmm &x, const Xbyak::Xmm &op1,
717
717
const Xbyak::Operand &op2) {
718
718
if (is_valid_isa (avx))
719
719
vaddps (x, op1, op2);
720
720
else {
721
- if (!x.isEqualIfNotInherited (op1)) movups (x, op1);
722
- addps (x, op2);
721
+ if (x.getIdx () == op1.getIdx ()) {
722
+ addps (x, op2);
723
+ } else if (x.isEqualIfNotInherited (op2)) {
724
+ addps (x, op1);
725
+ } else {
726
+ movups (x, op1);
727
+ addps (x, op2);
728
+ }
723
729
}
724
730
}
725
- void uni_vaddps (const Xbyak::Ymm &x, const Xbyak::Operand &op1,
731
+ void uni_vaddps (const Xbyak::Ymm &x, const Xbyak::Ymm &op1,
726
732
const Xbyak::Operand &op2) {
727
733
vaddps (x, op1, op2);
728
734
}
@@ -817,16 +823,25 @@ class jit_generator : public Xbyak::CodeGenerator, public c_compatible {
817
823
vsubss (x, Xbyak::Xmm (op1.getIdx ()), Xbyak::Xmm (op2.getIdx ()));
818
824
}
819
825
820
- void uni_vsubps (const Xbyak::Xmm &x, const Xbyak::Operand &op1,
826
+ void uni_vsubps (const Xbyak::Xmm &x, const Xbyak::Xmm &op1,
821
827
const Xbyak::Operand &op2) {
822
828
if (is_valid_isa (avx))
823
829
vsubps (x, op1, op2);
824
830
else {
825
- if (!x.isEqualIfNotInherited (op1)) movups (x, op1);
826
- subps (x, op2);
831
+ if (x.getIdx () == op1.getIdx ()) {
832
+ subps (x, op2);
833
+ } else if (x.isEqualIfNotInherited (op2)) {
834
+ push (op1);
835
+ subps (op1, op2);
836
+ movups (x, op1);
837
+ pop (op1);
838
+ } else {
839
+ movups (x, op1);
840
+ subps (x, op2);
841
+ }
827
842
}
828
843
}
829
- void uni_vsubps (const Xbyak::Ymm &x, const Xbyak::Operand &op1,
844
+ void uni_vsubps (const Xbyak::Ymm &x, const Xbyak::Ymm &op1,
830
845
const Xbyak::Operand &op2) {
831
846
vsubps (x, op1, op2);
832
847
}
@@ -848,30 +863,42 @@ class jit_generator : public Xbyak::CodeGenerator, public c_compatible {
848
863
vsubps (x, op1, op2);
849
864
}
850
865
851
- void uni_vpmulld (const Xbyak::Xmm &x1 , const Xbyak::Xmm &x2 ,
852
- const Xbyak::Operand &op ) {
866
+ void uni_vpmulld (const Xbyak::Xmm &x , const Xbyak::Xmm &op1 ,
867
+ const Xbyak::Operand &op2 ) {
853
868
if (is_valid_isa (avx)) {
854
- vpmulld (x1, x2, op );
869
+ vpmulld (x, op1, op2 );
855
870
} else {
856
- if (x1.getIdx () != x2.getIdx ()) movdqa (x1, x2);
857
- pmulld (x1, op);
871
+ if (x.getIdx () == op1.getIdx ()) {
872
+ pmulld (x, op2);
873
+ } else if (x.isEqualIfNotInherited (op2)) {
874
+ pmulld (x, op1);
875
+ } else {
876
+ movdqa (x, op1);
877
+ pmulld (x, op2);
878
+ }
858
879
}
859
880
}
860
881
void uni_vpmulld (const Xbyak::Ymm &x1, const Xbyak::Ymm &x2,
861
882
const Xbyak::Operand &op) {
862
883
vpmulld (x1, x2, op);
863
884
}
864
885
865
- void uni_vmulps (const Xbyak::Xmm &x, const Xbyak::Operand &op1,
886
+ void uni_vmulps (const Xbyak::Xmm &x, const Xbyak::Xmm &op1,
866
887
const Xbyak::Operand &op2) {
867
888
if (is_valid_isa (avx))
868
889
vmulps (x, op1, op2);
869
890
else {
870
- if (!x.isEqualIfNotInherited (op1)) movups (x, op1);
871
- mulps (x, op2);
891
+ if (x.getIdx () == op1.getIdx ()) {
892
+ mulps (x, op2);
893
+ } else if (x.isEqualIfNotInherited (op2)) {
894
+ mulps (x, op1);
895
+ } else {
896
+ movups (x, op1);
897
+ mulps (x, op2);
898
+ }
872
899
}
873
900
}
874
- void uni_vmulps (const Xbyak::Ymm &x, const Xbyak::Operand &op1,
901
+ void uni_vmulps (const Xbyak::Ymm &x, const Xbyak::Ymm &op1,
875
902
const Xbyak::Operand &op2) {
876
903
vmulps (x, op1, op2);
877
904
}
@@ -1273,16 +1300,22 @@ class jit_generator : public Xbyak::CodeGenerator, public c_compatible {
1273
1300
vpsrld (x, op, imm);
1274
1301
}
1275
1302
1276
- void uni_vmaxps (const Xbyak::Xmm &x, const Xbyak::Operand &op1,
1303
+ void uni_vmaxps (const Xbyak::Xmm &x, const Xbyak::Xmm &op1,
1277
1304
const Xbyak::Operand &op2) {
1278
1305
if (is_valid_isa (avx))
1279
1306
vmaxps (x, op1, op2);
1280
1307
else {
1281
- if (!x.isEqualIfNotInherited (op1)) movups (x, op1);
1282
- maxps (x, op2);
1308
+ if (x.getIdx () == op1.getIdx ()) {
1309
+ maxps (x, op2);
1310
+ } else if (x.isEqualIfNotInherited (op2)) {
1311
+ maxps (x, op1);
1312
+ } else {
1313
+ movups (x, op1);
1314
+ maxps (x, op2);
1315
+ }
1283
1316
}
1284
1317
}
1285
- void uni_vmaxps (const Xbyak::Ymm &x, const Xbyak::Operand &op1,
1318
+ void uni_vmaxps (const Xbyak::Ymm &x, const Xbyak::Ymm &op1,
1286
1319
const Xbyak::Operand &op2) {
1287
1320
vmaxps (x, op1, op2);
1288
1321
}
@@ -1297,17 +1330,23 @@ class jit_generator : public Xbyak::CodeGenerator, public c_compatible {
1297
1330
}
1298
1331
}
1299
1332
1300
- void uni_vminps (const Xbyak::Xmm &x, const Xbyak::Operand &op1,
1333
+ void uni_vminps (const Xbyak::Xmm &x, const Xbyak::Xmm &op1,
1301
1334
const Xbyak::Operand &op2) {
1302
1335
if (is_valid_isa (avx))
1303
1336
vminps (x, op1, op2);
1304
1337
else {
1305
- if (!x.isEqualIfNotInherited (op1)) movups (x, op1);
1306
- minps (x, op2);
1338
+ if (x.getIdx () == op1.getIdx ()) {
1339
+ minps (x, op2);
1340
+ } else if (x.isEqualIfNotInherited (op2)) {
1341
+ minps (x, op1);
1342
+ } else {
1343
+ movups (x, op1);
1344
+ minps (x, op2);
1345
+ }
1307
1346
}
1308
1347
}
1309
1348
1310
- void uni_vminps (const Xbyak::Ymm &x, const Xbyak::Operand &op1,
1349
+ void uni_vminps (const Xbyak::Ymm &x, const Xbyak::Ymm &op1,
1311
1350
const Xbyak::Operand &op2) {
1312
1351
vminps (x, op1, op2);
1313
1352
}
@@ -1344,13 +1383,22 @@ class jit_generator : public Xbyak::CodeGenerator, public c_compatible {
1344
1383
vpmovzxbd (y, op);
1345
1384
}
1346
1385
1347
- void uni_vcmpps (const Xbyak::Xmm &x1 , const Xbyak::Xmm &x2 ,
1348
- const Xbyak::Operand &op , int cmp_predicate) {
1386
+ void uni_vcmpps (const Xbyak::Xmm &x , const Xbyak::Xmm &op1 ,
1387
+ const Xbyak::Operand &op2 , int cmp_predicate) {
1349
1388
if (is_valid_isa (avx))
1350
- vcmpps (x1, x2, op , cmp_predicate);
1389
+ vcmpps (x, op1, op2 , cmp_predicate);
1351
1390
else {
1352
- if (x1.getIdx () != x2.getIdx ()) uni_vmovups (x1, x2);
1353
- cmpps (x1, op, cmp_predicate);
1391
+ if (x.getIdx () == op1.getIdx ()) {
1392
+ cmpps (x, op2, cmp_predicate);
1393
+ } else if (x.isEqualIfNotInherited (op2)) {
1394
+ push (op1);
1395
+ cmpps (op1, op2, cmp_predicate);
1396
+ movups (x, op1);
1397
+ pop (op1);
1398
+ } else {
1399
+ movups (x, op1);
1400
+ cmpps (x, op2, cmp_predicate);
1401
+ }
1354
1402
}
1355
1403
}
1356
1404
void uni_vcmpps (const Xbyak::Ymm &x1, const Xbyak::Ymm &x2,
0 commit comments