@@ -3440,13 +3440,17 @@ void ThreadState::StepNext(ShaderDebugState *state, DebugAPIWrapper *apiWrapper,
3440
3440
{
3441
3441
data += dataOffset;
3442
3442
3443
+ int boundsClampedComps = 4 ;
3444
+
3443
3445
uint32_t srcIdx = 1 ;
3444
3446
if (op.operation == OPCODE_LD_STRUCTURED)
3445
3447
{
3446
3448
srcIdx = 2 ;
3447
3449
fmt.byteWidth = 4 ;
3448
3450
3449
3451
fmt.numComps = 4 ;
3452
+ boundsClampedComps = int ((stride - structOffset) / sizeof (uint32_t ));
3453
+ fmt.numComps = RDCMIN (fmt.numComps , boundsClampedComps);
3450
3454
3451
3455
if (op.operands [0 ].comps [0 ] != 0xff && op.operands [0 ].comps [1 ] == 0xff &&
3452
3456
op.operands [0 ].comps [2 ] == 0xff && op.operands [0 ].comps [3 ] == 0xff )
@@ -3468,7 +3472,8 @@ void ThreadState::StepNext(ShaderDebugState *state, DebugAPIWrapper *apiWrapper,
3468
3472
fmt.numComps = 4 ;
3469
3473
3470
3474
// do not allow writing beyond the stride (we don't expect fxc to emit writes like this anyway)
3471
- fmt.numComps = RDCMIN (fmt.numComps , int ((stride - structOffset) / sizeof (uint32_t )));
3475
+ boundsClampedComps = int ((stride - structOffset) / sizeof (uint32_t ));
3476
+ fmt.numComps = RDCMIN (fmt.numComps , boundsClampedComps);
3472
3477
3473
3478
for (int c = 0 ; c < 4 ; c++)
3474
3479
{
@@ -3489,7 +3494,8 @@ void ThreadState::StepNext(ShaderDebugState *state, DebugAPIWrapper *apiWrapper,
3489
3494
fmt.numComps = 4 ;
3490
3495
3491
3496
// clamp to out of bounds based on numElems
3492
- fmt.numComps = RDCMIN (fmt.numComps , int (numElems - elemIdx) / 4 );
3497
+ boundsClampedComps = int (numElems - elemIdx) / 4 ;
3498
+ fmt.numComps = RDCMIN (fmt.numComps , boundsClampedComps);
3493
3499
3494
3500
if (op.operands [0 ].comps [0 ] != 0xff && op.operands [0 ].comps [1 ] == 0xff &&
3495
3501
op.operands [0 ].comps [2 ] == 0xff && op.operands [0 ].comps [3 ] == 0xff )
@@ -3510,7 +3516,7 @@ void ThreadState::StepNext(ShaderDebugState *state, DebugAPIWrapper *apiWrapper,
3510
3516
fmt.numComps = 4 ;
3511
3517
3512
3518
// clamp to out of bounds based on numElems
3513
- int boundsClampedComps = int (numElems - elemIdx) / 4 ;
3519
+ boundsClampedComps = int (numElems - elemIdx) / 4 ;
3514
3520
fmt.numComps = RDCMIN (fmt.numComps , boundsClampedComps);
3515
3521
3516
3522
for (int c = 0 ; c < boundsClampedComps; c++)
@@ -3528,10 +3534,17 @@ void ThreadState::StepNext(ShaderDebugState *state, DebugAPIWrapper *apiWrapper,
3528
3534
{
3529
3535
ShaderVariable result = TypedUAVLoad (fmt, data);
3530
3536
3537
+ // clamp the result to any out of bounds loads so that we don't fill in with w=1
3538
+ for (int c = boundsClampedComps; c < 4 ; c++)
3539
+ result.value .u32v [c] = 0 ;
3540
+
3531
3541
// apply the swizzle on the resource operand
3532
3542
ShaderVariable fetch (" " , 0U , 0U , 0U , 0U );
3533
3543
3534
- for (int c = 0 ; c < fmt.numComps ; c++)
3544
+ // always process all 4 components, as this is applying a swizzle to the returned resource
3545
+ // data, and we could swizzle a 1-component texture result into .y with .yxzw if we then
3546
+ // go on to scalar-assign it to .y of the output
3547
+ for (int c = 0 ; c < 4 ; c++)
3535
3548
{
3536
3549
uint8_t comp = resComps[c];
3537
3550
if (comp == 0xff )
0 commit comments