@@ -657,16 +657,22 @@ class jit_generator : public Xbyak::CodeGenerator, public c_compatible {
657
657
vdivps (x, op1, op2);
658
658
}
659
659
660
- void uni_vaddps (const Xbyak::Xmm &x, const Xbyak::Operand &op1,
660
+ void uni_vaddps (const Xbyak::Xmm &x, const Xbyak::Xmm &op1,
661
661
const Xbyak::Operand &op2) {
662
662
if (is_valid_isa (avx))
663
663
vaddps (x, op1, op2);
664
664
else {
665
- if (!x.isEqualIfNotInherited (op1)) movups (x, op1);
666
- addps (x, op2);
665
+ if (x.getIdx () == op1.getIdx ()) {
666
+ addps (x, op2);
667
+ } else if (x.isEqualIfNotInherited (op2)) {
668
+ addps (x, op1);
669
+ } else {
670
+ movups (x, op1);
671
+ addps (x, op2);
672
+ }
667
673
}
668
674
}
669
- void uni_vaddps (const Xbyak::Ymm &x, const Xbyak::Operand &op1,
675
+ void uni_vaddps (const Xbyak::Ymm &x, const Xbyak::Ymm &op1,
670
676
const Xbyak::Operand &op2) {
671
677
vaddps (x, op1, op2);
672
678
}
@@ -775,16 +781,25 @@ class jit_generator : public Xbyak::CodeGenerator, public c_compatible {
775
781
vsubss (x, Xbyak::Xmm (op1.getIdx ()), Xbyak::Xmm (op2.getIdx ()));
776
782
}
777
783
778
- void uni_vsubps (const Xbyak::Xmm &x, const Xbyak::Operand &op1,
784
+ void uni_vsubps (const Xbyak::Xmm &x, const Xbyak::Xmm &op1,
779
785
const Xbyak::Operand &op2) {
780
786
if (is_valid_isa (avx))
781
787
vsubps (x, op1, op2);
782
788
else {
783
- if (!x.isEqualIfNotInherited (op1)) movups (x, op1);
784
- subps (x, op2);
789
+ if (x.getIdx () == op1.getIdx ()) {
790
+ subps (x, op2);
791
+ } else if (x.isEqualIfNotInherited (op2)) {
792
+ push (op1);
793
+ subps (op1, op2);
794
+ movups (x, op1);
795
+ pop (op1);
796
+ } else {
797
+ movups (x, op1);
798
+ subps (x, op2);
799
+ }
785
800
}
786
801
}
787
- void uni_vsubps (const Xbyak::Ymm &x, const Xbyak::Operand &op1,
802
+ void uni_vsubps (const Xbyak::Ymm &x, const Xbyak::Ymm &op1,
788
803
const Xbyak::Operand &op2) {
789
804
vsubps (x, op1, op2);
790
805
}
@@ -818,30 +833,42 @@ class jit_generator : public Xbyak::CodeGenerator, public c_compatible {
818
833
}
819
834
}
820
835
821
- void uni_vpmulld (const Xbyak::Xmm &x1 , const Xbyak::Xmm &x2 ,
822
- const Xbyak::Operand &op ) {
836
+ void uni_vpmulld (const Xbyak::Xmm &x , const Xbyak::Xmm &op1 ,
837
+ const Xbyak::Operand &op2 ) {
823
838
if (is_valid_isa (avx)) {
824
- vpmulld (x1, x2, op );
839
+ vpmulld (x, op1, op2 );
825
840
} else {
826
- if (x1.getIdx () != x2.getIdx ()) movdqa (x1, x2);
827
- pmulld (x1, op);
841
+ if (x.getIdx () == op1.getIdx ()) {
842
+ pmulld (x, op2);
843
+ } else if (x.isEqualIfNotInherited (op2)) {
844
+ pmulld (x, op1);
845
+ } else {
846
+ movdqa (x, op1);
847
+ pmulld (x, op2);
848
+ }
828
849
}
829
850
}
830
851
void uni_vpmulld (const Xbyak::Ymm &x1, const Xbyak::Ymm &x2,
831
852
const Xbyak::Operand &op) {
832
853
vpmulld (x1, x2, op);
833
854
}
834
855
835
- void uni_vmulps (const Xbyak::Xmm &x, const Xbyak::Operand &op1,
856
+ void uni_vmulps (const Xbyak::Xmm &x, const Xbyak::Xmm &op1,
836
857
const Xbyak::Operand &op2) {
837
858
if (is_valid_isa (avx))
838
859
vmulps (x, op1, op2);
839
860
else {
840
- if (!x.isEqualIfNotInherited (op1)) movups (x, op1);
841
- mulps (x, op2);
861
+ if (x.getIdx () == op1.getIdx ()) {
862
+ mulps (x, op2);
863
+ } else if (x.isEqualIfNotInherited (op2)) {
864
+ mulps (x, op1);
865
+ } else {
866
+ movups (x, op1);
867
+ mulps (x, op2);
868
+ }
842
869
}
843
870
}
844
- void uni_vmulps (const Xbyak::Ymm &x, const Xbyak::Operand &op1,
871
+ void uni_vmulps (const Xbyak::Ymm &x, const Xbyak::Ymm &op1,
845
872
const Xbyak::Operand &op2) {
846
873
vmulps (x, op1, op2);
847
874
}
@@ -1243,16 +1270,22 @@ class jit_generator : public Xbyak::CodeGenerator, public c_compatible {
1243
1270
vpsrld (x, op, imm);
1244
1271
}
1245
1272
1246
- void uni_vmaxps (const Xbyak::Xmm &x, const Xbyak::Operand &op1,
1273
+ void uni_vmaxps (const Xbyak::Xmm &x, const Xbyak::Xmm &op1,
1247
1274
const Xbyak::Operand &op2) {
1248
1275
if (is_valid_isa (avx))
1249
1276
vmaxps (x, op1, op2);
1250
1277
else {
1251
- if (!x.isEqualIfNotInherited (op1)) movups (x, op1);
1252
- maxps (x, op2);
1278
+ if (x.getIdx () == op1.getIdx ()) {
1279
+ maxps (x, op2);
1280
+ } else if (x.isEqualIfNotInherited (op2)) {
1281
+ maxps (x, op1);
1282
+ } else {
1283
+ movups (x, op1);
1284
+ maxps (x, op2);
1285
+ }
1253
1286
}
1254
1287
}
1255
- void uni_vmaxps (const Xbyak::Ymm &x, const Xbyak::Operand &op1,
1288
+ void uni_vmaxps (const Xbyak::Ymm &x, const Xbyak::Ymm &op1,
1256
1289
const Xbyak::Operand &op2) {
1257
1290
vmaxps (x, op1, op2);
1258
1291
}
@@ -1267,17 +1300,23 @@ class jit_generator : public Xbyak::CodeGenerator, public c_compatible {
1267
1300
}
1268
1301
}
1269
1302
1270
- void uni_vminps (const Xbyak::Xmm &x, const Xbyak::Operand &op1,
1303
+ void uni_vminps (const Xbyak::Xmm &x, const Xbyak::Xmm &op1,
1271
1304
const Xbyak::Operand &op2) {
1272
1305
if (is_valid_isa (avx))
1273
1306
vminps (x, op1, op2);
1274
1307
else {
1275
- if (!x.isEqualIfNotInherited (op1)) movups (x, op1);
1276
- minps (x, op2);
1308
+ if (x.getIdx () == op1.getIdx ()) {
1309
+ minps (x, op2);
1310
+ } else if (x.isEqualIfNotInherited (op2)) {
1311
+ minps (x, op1);
1312
+ } else {
1313
+ movups (x, op1);
1314
+ minps (x, op2);
1315
+ }
1277
1316
}
1278
1317
}
1279
1318
1280
- void uni_vminps (const Xbyak::Ymm &x, const Xbyak::Operand &op1,
1319
+ void uni_vminps (const Xbyak::Ymm &x, const Xbyak::Ymm &op1,
1281
1320
const Xbyak::Operand &op2) {
1282
1321
vminps (x, op1, op2);
1283
1322
}
@@ -1314,13 +1353,22 @@ class jit_generator : public Xbyak::CodeGenerator, public c_compatible {
1314
1353
vpmovzxbd (y, op);
1315
1354
}
1316
1355
1317
- void uni_vcmpps (const Xbyak::Xmm &x1 , const Xbyak::Xmm &x2 ,
1318
- const Xbyak::Operand &op , int cmp_predicate) {
1356
+ void uni_vcmpps (const Xbyak::Xmm &x , const Xbyak::Xmm &op1 ,
1357
+ const Xbyak::Operand &op2 , int cmp_predicate) {
1319
1358
if (is_valid_isa (avx))
1320
- vcmpps (x1, x2, op , cmp_predicate);
1359
+ vcmpps (x, op1, op2 , cmp_predicate);
1321
1360
else {
1322
- if (x1.getIdx () != x2.getIdx ()) uni_vmovups (x1, x2);
1323
- cmpps (x1, op, cmp_predicate);
1361
+ if (x.getIdx () == op1.getIdx ()) {
1362
+ cmpps (x, op2, cmp_predicate);
1363
+ } else if (x.isEqualIfNotInherited (op2)) {
1364
+ push (op1);
1365
+ cmpps (op1, op2, cmp_predicate);
1366
+ movups (x, op1);
1367
+ pop (op1);
1368
+ } else {
1369
+ movups (x, op1);
1370
+ cmpps (x, op2, cmp_predicate);
1371
+ }
1324
1372
}
1325
1373
}
1326
1374
void uni_vcmpps (const Xbyak::Ymm &x1, const Xbyak::Ymm &x2,
0 commit comments