@@ -713,16 +713,22 @@ class jit_generator : public Xbyak::CodeGenerator, public c_compatible {
713
713
vdivps (x, op1, op2);
714
714
}
715
715
716
- void uni_vaddps (const Xbyak::Xmm &x, const Xbyak::Operand &op1,
716
+ void uni_vaddps (const Xbyak::Xmm &x, const Xbyak::Xmm &op1,
717
717
const Xbyak::Operand &op2) {
718
718
if (is_valid_isa (avx))
719
719
vaddps (x, op1, op2);
720
720
else {
721
- if (!x.isEqualIfNotInherited (op1)) movups (x, op1);
722
- addps (x, op2);
721
+ if (x.getIdx () == op1.getIdx ()) {
722
+ addps (x, op2);
723
+ } else if (x.isEqualIfNotInherited (op2)) {
724
+ addps (x, op1);
725
+ } else {
726
+ movups (x, op1);
727
+ addps (x, op2);
728
+ }
723
729
}
724
730
}
725
- void uni_vaddps (const Xbyak::Ymm &x, const Xbyak::Operand &op1,
731
+ void uni_vaddps (const Xbyak::Ymm &x, const Xbyak::Ymm &op1,
726
732
const Xbyak::Operand &op2) {
727
733
vaddps (x, op1, op2);
728
734
}
@@ -817,16 +823,25 @@ class jit_generator : public Xbyak::CodeGenerator, public c_compatible {
817
823
vsubss (x, Xbyak::Xmm (op1.getIdx ()), Xbyak::Xmm (op2.getIdx ()));
818
824
}
819
825
820
- void uni_vsubps (const Xbyak::Xmm &x, const Xbyak::Operand &op1,
826
+ void uni_vsubps (const Xbyak::Xmm &x, const Xbyak::Xmm &op1,
821
827
const Xbyak::Operand &op2) {
822
828
if (is_valid_isa (avx))
823
829
vsubps (x, op1, op2);
824
830
else {
825
- if (!x.isEqualIfNotInherited (op1)) movups (x, op1);
826
- subps (x, op2);
831
+ if (x.getIdx () == op1.getIdx ()) {
832
+ subps (x, op2);
833
+ } else if (x.isEqualIfNotInherited (op2)) {
834
+ push (op1);
835
+ subps (op1, op2);
836
+ movups (x, op1);
837
+ pop (op1);
838
+ } else {
839
+ movups (x, op1);
840
+ subps (x, op2);
841
+ }
827
842
}
828
843
}
829
- void uni_vsubps (const Xbyak::Ymm &x, const Xbyak::Operand &op1,
844
+ void uni_vsubps (const Xbyak::Ymm &x, const Xbyak::Ymm &op1,
830
845
const Xbyak::Operand &op2) {
831
846
vsubps (x, op1, op2);
832
847
}
@@ -848,30 +863,42 @@ class jit_generator : public Xbyak::CodeGenerator, public c_compatible {
848
863
vsubps (x, op1, op2);
849
864
}
850
865
851
- void uni_vpmulld (const Xbyak::Xmm &x1 , const Xbyak::Xmm &x2 ,
852
- const Xbyak::Operand &op ) {
866
+ void uni_vpmulld (const Xbyak::Xmm &x , const Xbyak::Xmm &op1 ,
867
+ const Xbyak::Operand &op2 ) {
853
868
if (is_valid_isa (avx)) {
854
- vpmulld (x1, x2, op );
869
+ vpmulld (x, op1, op2 );
855
870
} else {
856
- if (x1.getIdx () != x2.getIdx ()) movdqa (x1, x2);
857
- pmulld (x1, op);
871
+ if (x.getIdx () == op1.getIdx ()) {
872
+ pmulld (x, op2);
873
+ } else if (x.isEqualIfNotInherited (op2)) {
874
+ pmulld (x, op1);
875
+ } else {
876
+ movdqa (x, op1);
877
+ pmulld (x, op2);
878
+ }
858
879
}
859
880
}
860
881
void uni_vpmulld (const Xbyak::Ymm &x1, const Xbyak::Ymm &x2,
861
882
const Xbyak::Operand &op) {
862
883
vpmulld (x1, x2, op);
863
884
}
864
885
865
- void uni_vmulps (const Xbyak::Xmm &x, const Xbyak::Operand &op1,
886
+ void uni_vmulps (const Xbyak::Xmm &x, const Xbyak::Xmm &op1,
866
887
const Xbyak::Operand &op2) {
867
888
if (is_valid_isa (avx))
868
889
vmulps (x, op1, op2);
869
890
else {
870
- if (!x.isEqualIfNotInherited (op1)) movups (x, op1);
871
- mulps (x, op2);
891
+ if (x.getIdx () == op1.getIdx ()) {
892
+ mulps (x, op2);
893
+ } else if (x.isEqualIfNotInherited (op2)) {
894
+ mulps (x, op1);
895
+ } else {
896
+ movups (x, op1);
897
+ mulps (x, op2);
898
+ }
872
899
}
873
900
}
874
- void uni_vmulps (const Xbyak::Ymm &x, const Xbyak::Operand &op1,
901
+ void uni_vmulps (const Xbyak::Ymm &x, const Xbyak::Ymm &op1,
875
902
const Xbyak::Operand &op2) {
876
903
vmulps (x, op1, op2);
877
904
}
@@ -1273,16 +1300,25 @@ class jit_generator : public Xbyak::CodeGenerator, public c_compatible {
1273
1300
vpsrld (x, op, imm);
1274
1301
}
1275
1302
1276
- void uni_vmaxps (const Xbyak::Xmm &x, const Xbyak::Operand &op1,
1303
+ void uni_vmaxps (const Xbyak::Xmm &x, const Xbyak::Xmm &op1,
1277
1304
const Xbyak::Operand &op2) {
1278
1305
if (is_valid_isa (avx))
1279
1306
vmaxps (x, op1, op2);
1280
1307
else {
1281
- if (!x.isEqualIfNotInherited (op1)) movups (x, op1);
1282
- maxps (x, op2);
1308
+ if (x.getIdx () == op1.getIdx ()) {
1309
+ maxps (x, op2);
1310
+ } else if (x.isEqualIfNotInherited (op2)) {
1311
+ push (op1);
1312
+ maxps (op1, op2);
1313
+ movups (x, op1);
1314
+ pop (op1);
1315
+ } else {
1316
+ movups (x, op1);
1317
+ maxps (x, op2);
1318
+ }
1283
1319
}
1284
1320
}
1285
- void uni_vmaxps (const Xbyak::Ymm &x, const Xbyak::Operand &op1,
1321
+ void uni_vmaxps (const Xbyak::Ymm &x, const Xbyak::Ymm &op1,
1286
1322
const Xbyak::Operand &op2) {
1287
1323
vmaxps (x, op1, op2);
1288
1324
}
@@ -1297,17 +1333,26 @@ class jit_generator : public Xbyak::CodeGenerator, public c_compatible {
1297
1333
}
1298
1334
}
1299
1335
1300
- void uni_vminps (const Xbyak::Xmm &x, const Xbyak::Operand &op1,
1336
+ void uni_vminps (const Xbyak::Xmm &x, const Xbyak::Xmm &op1,
1301
1337
const Xbyak::Operand &op2) {
1302
1338
if (is_valid_isa (avx))
1303
1339
vminps (x, op1, op2);
1304
1340
else {
1305
- if (!x.isEqualIfNotInherited (op1)) movups (x, op1);
1306
- minps (x, op2);
1341
+ if (x.getIdx () == op1.getIdx ()) {
1342
+ minps (x, op2);
1343
+ } else if (x.isEqualIfNotInherited (op2)) {
1344
+ push (op1);
1345
+ minps (op1, op2);
1346
+ movups (x, op1);
1347
+ pop (op1);
1348
+ } else {
1349
+ movups (x, op1);
1350
+ minps (x, op2);
1351
+ }
1307
1352
}
1308
1353
}
1309
1354
1310
- void uni_vminps (const Xbyak::Ymm &x, const Xbyak::Operand &op1,
1355
+ void uni_vminps (const Xbyak::Ymm &x, const Xbyak::Ymm &op1,
1311
1356
const Xbyak::Operand &op2) {
1312
1357
vminps (x, op1, op2);
1313
1358
}
@@ -1344,13 +1389,22 @@ class jit_generator : public Xbyak::CodeGenerator, public c_compatible {
1344
1389
vpmovzxbd (y, op);
1345
1390
}
1346
1391
1347
- void uni_vcmpps (const Xbyak::Xmm &x1 , const Xbyak::Xmm &x2 ,
1348
- const Xbyak::Operand &op , int cmp_predicate) {
1392
+ void uni_vcmpps (const Xbyak::Xmm &x , const Xbyak::Xmm &op1 ,
1393
+ const Xbyak::Operand &op2 , int cmp_predicate) {
1349
1394
if (is_valid_isa (avx))
1350
- vcmpps (x1, x2, op , cmp_predicate);
1395
+ vcmpps (x, op1, op2 , cmp_predicate);
1351
1396
else {
1352
- if (x1.getIdx () != x2.getIdx ()) uni_vmovups (x1, x2);
1353
- cmpps (x1, op, cmp_predicate);
1397
+ if (x.getIdx () == op1.getIdx ()) {
1398
+ cmpps (x, op2, cmp_predicate);
1399
+ } else if (x.isEqualIfNotInherited (op2)) {
1400
+ push (op1);
1401
+ cmpps (op1, op2, cmp_predicate);
1402
+ movups (x, op1);
1403
+ pop (op1);
1404
+ } else {
1405
+ movups (x, op1);
1406
+ cmpps (x, op2, cmp_predicate);
1407
+ }
1354
1408
}
1355
1409
}
1356
1410
void uni_vcmpps (const Xbyak::Ymm &x1, const Xbyak::Ymm &x2,
0 commit comments