@@ -161,6 +161,18 @@ class X87StackOptimization final : public Pass {
161
161
// Helpers
162
162
Ref RotateRight8 (uint32_t V, Ref Amount);
163
163
164
+ // Helper to check if a Ref is a Zero constant
165
+ bool IsZero (Ref Node) {
166
+ auto Header = IR->GetOp <IR::IROp_Header>(Node);
167
+ if (Header->Op != OP_CONSTANT) {
168
+ return false ;
169
+ }
170
+
171
+ auto Const = Header->C <IROp_Constant>();
172
+ return Const->Constant == 0 ;
173
+ }
174
+
175
+
164
176
// Handles a Unary operation.
165
177
// Takes the op we are handling, the Node for the reduced precision case and the node for the normal case.
166
178
// Depending on the type of Op64, we might need to pass a couple of extra constant arguments, this happens
@@ -245,6 +257,7 @@ class X87StackOptimization final : public Pass {
245
257
bool SlowPath = false ;
246
258
// Keeping IREmitter not to pass arguments around
247
259
IREmitter* IREmit = nullptr ;
260
+ IRListView* IR;
248
261
};
249
262
250
263
inline void X87StackOptimization::InvalidateCaches () {
@@ -537,6 +550,7 @@ void X87StackOptimization::Run(IREmitter* Emit) {
537
550
538
551
// Initialize IREmit member
539
552
IREmit = Emit;
553
+ IR = &CurrentIR;
540
554
541
555
// Run optimization proper
542
556
for (auto [BlockNode, BlockHeader] : CurrentIR.GetBlocks ()) {
@@ -780,11 +794,12 @@ void X87StackOptimization::Run(IREmitter* Emit) {
780
794
break ;
781
795
}
782
796
783
- case OP_STORESTACKMEMORY : {
784
- const auto * Op = IROp->C <IROp_StoreStackMemory >();
797
+ case OP_STORESTACKMEM : {
798
+ const auto * Op = IROp->C <IROp_StoreStackMem >();
785
799
const auto & Value = MigrateToSlowPath_IfInvalid ();
786
800
Ref StackNode = SlowPath ? LoadStackValueAtOffset_Slow () : Value->StackDataNode ;
787
801
Ref AddrNode = CurrentIR.GetNode (Op->Addr );
802
+ Ref Offset = CurrentIR.GetNode (Op->Offset );
788
803
789
804
// On the fast path we can optimize memory copies.
790
805
// If we are doing:
@@ -796,45 +811,49 @@ void X87StackOptimization::Run(IREmitter* Emit) {
796
811
// or similar. As long as the source size and dest size are one and the same.
797
812
// This will avoid any conversions between source and stack element size and conversion back.
798
813
if (!SlowPath && Value->Source && Value->Source ->first == Op->StoreSize && Value->InterpretAsFloat ) {
799
- IREmit->_StoreMem (Value->InterpretAsFloat ? FPRClass : GPRClass, Op->StoreSize , AddrNode, Value->Source ->second );
814
+ IREmit->_StoreMem (Value->InterpretAsFloat ? FPRClass : GPRClass, Op->StoreSize , Value->Source ->second , AddrNode, Offset,
815
+ OpSize::iInvalid, MEM_OFFSET_SXTX, 1 );
800
816
} else {
801
817
if (ReducedPrecisionMode) {
802
818
switch (Op->StoreSize ) {
803
- case OpSize::i32Bit: {
804
- StackNode = IREmit->_Float_FToF (OpSize::i32Bit, OpSize::i64Bit, StackNode);
805
- IREmit->_StoreMem (FPRClass, OpSize::i32Bit, AddrNode, StackNode);
806
- break ;
807
- }
819
+ case OpSize::i32Bit:
808
820
case OpSize::i64Bit: {
809
- IREmit->_StoreMem (FPRClass, OpSize::i64Bit, AddrNode, StackNode);
821
+ if (Op->StoreSize == OpSize::i32Bit) {
822
+ StackNode = IREmit->_Float_FToF (OpSize::i32Bit, OpSize::i64Bit, StackNode);
823
+ }
824
+ IREmit->_StoreMem (FPRClass, Op->StoreSize , StackNode, AddrNode, Offset, OpSize::iInvalid, MEM_OFFSET_SXTX, 1 );
810
825
break ;
811
826
}
812
827
case OpSize::f80Bit: {
813
828
StackNode = IREmit->_F80CVTTo (StackNode, OpSize::i64Bit);
814
- IREmit->_StoreMem (FPRClass, OpSize::i64Bit, AddrNode, StackNode );
829
+ IREmit->_StoreMem (FPRClass, OpSize::i64Bit, StackNode, AddrNode, Offset, OpSize::iInvalid, MEM_OFFSET_SXTX, 1 );
815
830
auto Upper = IREmit->_VExtractToGPR (OpSize::i128Bit, OpSize::i64Bit, StackNode, 1 );
816
- IREmit->_StoreMem (GPRClass, OpSize::i16Bit, Upper, AddrNode, GetConstant (8 ), OpSize::i64Bit, MEM_OFFSET_SXTX, 1 );
831
+ auto NewOffset = IREmit->_Add (OpSize::i64Bit, Offset, GetConstant (8 ));
832
+ IREmit->_StoreMem (GPRClass, OpSize::i16Bit, Upper, AddrNode, NewOffset, OpSize::i64Bit, MEM_OFFSET_SXTX, 1 );
817
833
break ;
818
834
}
819
835
default : ERROR_AND_DIE_FMT (" Unsupported x87 size" );
820
836
}
821
- } else {
837
+ } else { // !ReducedPrecisionMode
822
838
if (Op->StoreSize != OpSize::f80Bit) { // if it's not 80bits then convert
823
839
StackNode = IREmit->_F80CVT (Op->StoreSize , StackNode);
824
840
}
825
841
if (Op->StoreSize == OpSize::f80Bit) { // Part of code from StoreResult_WithOpSize()
826
842
if (Features.SupportsSVE128 || Features.SupportsSVE256 ) {
827
843
auto PReg = IREmit->_InitPredicate (OpSize::i16Bit, FEXCore::ToUnderlying (ARMEmitter::PredicatePattern::SVE_VL5));
844
+ if (!IsZero (Offset)) {
845
+ AddrNode = IREmit->_Add (OpSize::i64Bit, AddrNode, Offset);
846
+ }
828
847
IREmit->_StoreMemPredicate (OpSize::i128Bit, OpSize::i16Bit, StackNode, PReg, AddrNode);
829
848
} else {
830
849
// For X87 extended doubles, split before storing
831
- IREmit->_StoreMem (FPRClass, OpSize::i64Bit, AddrNode, StackNode );
850
+ IREmit->_StoreMem (FPRClass, OpSize::i64Bit, StackNode, AddrNode, Offset, OpSize::iInvalid, MEM_OFFSET_SXTX, 1 );
832
851
auto Upper = IREmit->_VExtractToGPR (OpSize::i128Bit, OpSize::i64Bit, StackNode, 1 );
833
- auto DestAddr = IREmit->_Add (OpSize::i64Bit, AddrNode , GetConstant (8 ));
834
- IREmit->_StoreMem (GPRClass, OpSize::i16Bit, DestAddr, Upper, OpSize::i64Bit);
852
+ auto NewOffset = IREmit->_Add (OpSize::i64Bit, Offset , GetConstant (8 ));
853
+ IREmit->_StoreMem (GPRClass, OpSize::i16Bit, Upper, AddrNode, NewOffset, OpSize::i64Bit, MEM_OFFSET_SXTX, 1 );
835
854
}
836
855
} else {
837
- IREmit->_StoreMem (FPRClass, Op->StoreSize , AddrNode, StackNode );
856
+ IREmit->_StoreMem (FPRClass, Op->StoreSize , StackNode, AddrNode, Offset, OpSize::iInvalid, MEM_OFFSET_SXTX, 1 );
838
857
}
839
858
}
840
859
}
0 commit comments