@@ -713,16 +713,22 @@ class jit_generator : public Xbyak::CodeGenerator, public c_compatible {
713
713
vdivps (x, op1, op2);
714
714
}
715
715
716
- void uni_vaddps (const Xbyak::Xmm &x, const Xbyak::Operand &op1,
716
+ void uni_vaddps (const Xbyak::Xmm &x, const Xbyak::Xmm &op1,
717
717
const Xbyak::Operand &op2) {
718
718
if (is_valid_isa (avx))
719
719
vaddps (x, op1, op2);
720
720
else {
721
- if (!x.isEqualIfNotInherited (op1)) movups (x, op1);
722
- addps (x, op2);
721
+ if (x.getIdx () == op1.getIdx ()) {
722
+ addps (x, op2);
723
+ } else if (x.isEqualIfNotInherited (op2)) {
724
+ addps (x, op1);
725
+ } else {
726
+ movups (x, op1);
727
+ addps (x, op2);
728
+ }
723
729
}
724
730
}
725
- void uni_vaddps (const Xbyak::Ymm &x, const Xbyak::Operand &op1,
731
+ void uni_vaddps (const Xbyak::Ymm &x, const Xbyak::Ymm &op1,
726
732
const Xbyak::Operand &op2) {
727
733
vaddps (x, op1, op2);
728
734
}
@@ -831,16 +837,25 @@ class jit_generator : public Xbyak::CodeGenerator, public c_compatible {
831
837
vsubss (x, Xbyak::Xmm (op1.getIdx ()), Xbyak::Xmm (op2.getIdx ()));
832
838
}
833
839
834
- void uni_vsubps (const Xbyak::Xmm &x, const Xbyak::Operand &op1,
840
+ void uni_vsubps (const Xbyak::Xmm &x, const Xbyak::Xmm &op1,
835
841
const Xbyak::Operand &op2) {
836
842
if (is_valid_isa (avx))
837
843
vsubps (x, op1, op2);
838
844
else {
839
- if (!x.isEqualIfNotInherited (op1)) movups (x, op1);
840
- subps (x, op2);
845
+ if (x.getIdx () == op1.getIdx ()) {
846
+ subps (x, op2);
847
+ } else if (x.isEqualIfNotInherited (op2)) {
848
+ push (op1);
849
+ subps (op1, op2);
850
+ movups (x, op1);
851
+ pop (op1);
852
+ } else {
853
+ movups (x, op1);
854
+ subps (x, op2);
855
+ }
841
856
}
842
857
}
843
- void uni_vsubps (const Xbyak::Ymm &x, const Xbyak::Operand &op1,
858
+ void uni_vsubps (const Xbyak::Ymm &x, const Xbyak::Ymm &op1,
844
859
const Xbyak::Operand &op2) {
845
860
vsubps (x, op1, op2);
846
861
}
@@ -874,30 +889,42 @@ class jit_generator : public Xbyak::CodeGenerator, public c_compatible {
874
889
}
875
890
}
876
891
877
- void uni_vpmulld (const Xbyak::Xmm &x1 , const Xbyak::Xmm &x2 ,
878
- const Xbyak::Operand &op ) {
892
+ void uni_vpmulld (const Xbyak::Xmm &x , const Xbyak::Xmm &op1 ,
893
+ const Xbyak::Operand &op2 ) {
879
894
if (is_valid_isa (avx)) {
880
- vpmulld (x1, x2, op );
895
+ vpmulld (x, op1, op2 );
881
896
} else {
882
- if (x1.getIdx () != x2.getIdx ()) movdqa (x1, x2);
883
- pmulld (x1, op);
897
+ if (x.getIdx () == op1.getIdx ()) {
898
+ pmulld (x, op2);
899
+ } else if (x.isEqualIfNotInherited (op2)) {
900
+ pmulld (x, op1);
901
+ } else {
902
+ movdqa (x, op1);
903
+ pmulld (x, op2);
904
+ }
884
905
}
885
906
}
886
907
void uni_vpmulld (const Xbyak::Ymm &x1, const Xbyak::Ymm &x2,
887
908
const Xbyak::Operand &op) {
888
909
vpmulld (x1, x2, op);
889
910
}
890
911
891
- void uni_vmulps (const Xbyak::Xmm &x, const Xbyak::Operand &op1,
912
+ void uni_vmulps (const Xbyak::Xmm &x, const Xbyak::Xmm &op1,
892
913
const Xbyak::Operand &op2) {
893
914
if (is_valid_isa (avx))
894
915
vmulps (x, op1, op2);
895
916
else {
896
- if (!x.isEqualIfNotInherited (op1)) movups (x, op1);
897
- mulps (x, op2);
917
+ if (x.getIdx () == op1.getIdx ()) {
918
+ mulps (x, op2);
919
+ } else if (x.isEqualIfNotInherited (op2)) {
920
+ mulps (x, op1);
921
+ } else {
922
+ movups (x, op1);
923
+ mulps (x, op2);
924
+ }
898
925
}
899
926
}
900
- void uni_vmulps (const Xbyak::Ymm &x, const Xbyak::Operand &op1,
927
+ void uni_vmulps (const Xbyak::Ymm &x, const Xbyak::Ymm &op1,
901
928
const Xbyak::Operand &op2) {
902
929
vmulps (x, op1, op2);
903
930
}
@@ -1299,16 +1326,22 @@ class jit_generator : public Xbyak::CodeGenerator, public c_compatible {
1299
1326
vpsrld (x, op, imm);
1300
1327
}
1301
1328
1302
- void uni_vmaxps (const Xbyak::Xmm &x, const Xbyak::Operand &op1,
1329
+ void uni_vmaxps (const Xbyak::Xmm &x, const Xbyak::Xmm &op1,
1303
1330
const Xbyak::Operand &op2) {
1304
1331
if (is_valid_isa (avx))
1305
1332
vmaxps (x, op1, op2);
1306
1333
else {
1307
- if (!x.isEqualIfNotInherited (op1)) movups (x, op1);
1308
- maxps (x, op2);
1334
+ if (x.getIdx () == op1.getIdx ()) {
1335
+ maxps (x, op2);
1336
+ } else if (x.isEqualIfNotInherited (op2)) {
1337
+ maxps (x, op1);
1338
+ } else {
1339
+ movups (x, op1);
1340
+ maxps (x, op2);
1341
+ }
1309
1342
}
1310
1343
}
1311
- void uni_vmaxps (const Xbyak::Ymm &x, const Xbyak::Operand &op1,
1344
+ void uni_vmaxps (const Xbyak::Ymm &x, const Xbyak::Ymm &op1,
1312
1345
const Xbyak::Operand &op2) {
1313
1346
vmaxps (x, op1, op2);
1314
1347
}
@@ -1323,17 +1356,23 @@ class jit_generator : public Xbyak::CodeGenerator, public c_compatible {
1323
1356
}
1324
1357
}
1325
1358
1326
- void uni_vminps (const Xbyak::Xmm &x, const Xbyak::Operand &op1,
1359
+ void uni_vminps (const Xbyak::Xmm &x, const Xbyak::Xmm &op1,
1327
1360
const Xbyak::Operand &op2) {
1328
1361
if (is_valid_isa (avx))
1329
1362
vminps (x, op1, op2);
1330
1363
else {
1331
- if (!x.isEqualIfNotInherited (op1)) movups (x, op1);
1332
- minps (x, op2);
1364
+ if (x.getIdx () == op1.getIdx ()) {
1365
+ minps (x, op2);
1366
+ } else if (x.isEqualIfNotInherited (op2)) {
1367
+ minps (x, op1);
1368
+ } else {
1369
+ movups (x, op1);
1370
+ minps (x, op2);
1371
+ }
1333
1372
}
1334
1373
}
1335
1374
1336
- void uni_vminps (const Xbyak::Ymm &x, const Xbyak::Operand &op1,
1375
+ void uni_vminps (const Xbyak::Ymm &x, const Xbyak::Ymm &op1,
1337
1376
const Xbyak::Operand &op2) {
1338
1377
vminps (x, op1, op2);
1339
1378
}
@@ -1370,13 +1409,22 @@ class jit_generator : public Xbyak::CodeGenerator, public c_compatible {
1370
1409
vpmovzxbd (y, op);
1371
1410
}
1372
1411
1373
- void uni_vcmpps (const Xbyak::Xmm &x1 , const Xbyak::Xmm &x2 ,
1374
- const Xbyak::Operand &op , int cmp_predicate) {
1412
+ void uni_vcmpps (const Xbyak::Xmm &x , const Xbyak::Xmm &op1 ,
1413
+ const Xbyak::Operand &op2 , int cmp_predicate) {
1375
1414
if (is_valid_isa (avx))
1376
- vcmpps (x1, x2, op , cmp_predicate);
1415
+ vcmpps (x, op1, op2 , cmp_predicate);
1377
1416
else {
1378
- if (x1.getIdx () != x2.getIdx ()) uni_vmovups (x1, x2);
1379
- cmpps (x1, op, cmp_predicate);
1417
+ if (x.getIdx () == op1.getIdx ()) {
1418
+ cmpps (x, op2, cmp_predicate);
1419
+ } else if (x.isEqualIfNotInherited (op2)) {
1420
+ push (op1);
1421
+ cmpps (op1, op2, cmp_predicate);
1422
+ movups (x, op1);
1423
+ pop (op1);
1424
+ } else {
1425
+ movups (x, op1);
1426
+ cmpps (x, op2, cmp_predicate);
1427
+ }
1380
1428
}
1381
1429
}
1382
1430
void uni_vcmpps (const Xbyak::Ymm &x1, const Xbyak::Ymm &x2,
0 commit comments