Skip to content

Commit fb50fab

Browse files
authored
Fix RWTexture issues on CUDA (shader-slang#1876)
* #include an absolute path didn't work - because paths were taken to always be relative. * Re-enable CUDA RWTexture tests. Re-enable RWTexture1D test Make sure tests have only single mip for RWTexture (required for CUDA) * Fix issue with reading CUDA surface. Re-enable working CUDA RWTextureTest. Enable 1D case.
1 parent 5974f3e commit fb50fab

7 files changed

+35
-31
lines changed

source/slang/core.meta.slang

+15-7
Original file line numberDiff line numberDiff line change
@@ -934,6 +934,12 @@ for (int tt = 0; tt < kBaseTextureTypeCount; ++tt)
934934
{
935935
sb << '.' << char(i + 'x');
936936
}
937+
938+
// Surface access is *byte* addressed in x in CUDA
939+
if (i == 0)
940+
{
941+
sb << " * $E";
942+
}
937943
}
938944
if (isArray)
939945
{
@@ -949,7 +955,9 @@ for (int tt = 0; tt < kBaseTextureTypeCount; ++tt)
949955
sb << "Layered";
950956
}
951957
sb << "read";
952-
sb << "<$T0>($0, ($1).x, ($1).y, ($1).z";
958+
959+
// Surface access is *byte* addressed in x in CUDA
960+
sb << "<$T0>($0, ($1).x * $E, ($1).y, ($1).z";
953961
if (isArray)
954962
{
955963
sb << ", int(($1).w)";
@@ -1090,12 +1098,12 @@ for (int tt = 0; tt < kBaseTextureTypeCount; ++tt)
10901098
sb << ", ($1)";
10911099
if (vecCount > 1)
10921100
{
1093-
sb << '.' << char(i + 'x');
1094-
// Surface access is *byte* addressed in x in CUDA
1095-
if (i == 0)
1096-
{
1097-
sb << " * $E";
1098-
}
1101+
sb << '.' << char(i + 'x');
1102+
}
1103+
// Surface access is *byte* addressed in x in CUDA
1104+
if (i == 0)
1105+
{
1106+
sb << " * $E";
10991107
}
11001108
}
11011109

tests/compute/half-rw-texture-convert.slang

+3-9
Original file line numberDiff line numberDiff line change
@@ -11,15 +11,9 @@
1111
//DIABLE_TEST(compute, vulkan):COMPARE_COMPUTE_EX:-vk -compute -output-using-type -shaderobj
1212

1313
// Note that this test is a little silly. The output does not confirm that the write actually worked.
14-
// This is because it's not trivial on CUDA to do so. If there is a write conversion, the RWTexture
15-
// is backed by a surface. There is a texture converting write (in sust.p) but not in reading.
16-
//
17-
// In practice if if you wanted a texture read, you'd either only be able to read *without* format
18-
// conversion, or via a separate binding of the same surface as a Texture.
19-
// There's no simple way to describe either, so this test just confirms it outputs PTX that can
20-
// be executed, and unfortunately doesn't test if the write conversion actually *worked*
21-
22-
//DISABLE_TEST(compute):COMPARE_COMPUTE_EX:-cuda -compute -output-using-type -shaderobj -render-features half
14+
// half-rw-texture-convert2.slang tests this
15+
16+
//TEST(compute):COMPARE_COMPUTE_EX:-cuda -compute -output-using-type -shaderobj -render-features half
2317

2418
//TEST_INPUT: RWTexture2D(format=R_Float16, size=4, content = one, mipMaps = 1):name rwt2D
2519
[format("r16f")]

tests/compute/half-rw-texture-simple.slang

+4-1
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,9 @@
1010
//DIABLE_TEST(compute, vulkan):COMPARE_COMPUTE_EX:-vk -compute -output-using-type -shaderobj
1111
//TEST(compute):COMPARE_COMPUTE_EX:-cuda -compute -output-using-type -shaderobj -render-features half
1212

13+
//TEST_INPUT: RWTexture1D(format=R_Float16, size=4, content = one, mipMaps = 1):name rwt1D
14+
RWTexture1D<half> rwt1D;
15+
1316
//TEST_INPUT: RWTexture2D(format=R_Float16, size=4, content = one, mipMaps = 1):name rwt2D
1417
RWTexture2D<half> rwt2D;
1518

@@ -26,7 +29,7 @@ void computeMain(uint3 dispatchThreadID : SV_DispatchThreadID)
2629

2730
float val = 0.0f;
2831

29-
//val += rwt1D[idx];
32+
val += rwt1D[idx];
3033

3134
half h0 = rwt2D[uint2(idx, idx)];
3235

Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
type: float
2-
5.000000
32
6.000000
43
7.000000
54
8.000000
5+
9.000000

tests/compute/rw-texture-simple.slang

+6-10
Original file line numberDiff line numberDiff line change
@@ -6,14 +6,14 @@
66
//TEST(compute):COMPARE_COMPUTE_EX:-slang -compute -dx12 -profile cs_6_0 -use-dxil -output-using-type -shaderobj
77
// TODO(JS): Doesn't work on vk currently, because createTextureView not implemented on vk renderer
88
//DISABLE_TEST(compute, vulkan):COMPARE_COMPUTE_EX:-vk -compute -output-using-type -shaderobj
9-
// TODO(JS): Doesn't work on certain CI systems.
10-
//DISABLE_TEST(compute):COMPARE_COMPUTE_EX:-cuda -compute -output-using-type -shaderobj
119

12-
//TEST_INPUT: RWTexture1D(format=R_Float32, size=4, content = one):name rwt1D
10+
//TEST(compute):COMPARE_COMPUTE_EX:-cuda -compute -output-using-type -shaderobj
11+
12+
//TEST_INPUT: RWTexture1D(format=R_Float32, size=4, content = one, mipMaps = 1):name rwt1D
1313
RWTexture1D<float> rwt1D;
14-
//TEST_INPUT: RWTexture2D(format=R_Float32, size=4, content = one):name rwt2D
14+
//TEST_INPUT: RWTexture2D(format=R_Float32, size=4, content = one, mipMaps = 1):name rwt2D
1515
RWTexture2D<float> rwt2D;
16-
//TEST_INPUT: RWTexture3D(format=R_Float32, size=4, content = one):name rwt3D
16+
//TEST_INPUT: RWTexture3D(format=R_Float32, size=4, content = one, mipMaps = 1):name rwt3D
1717
RWTexture3D<float> rwt3D;
1818

1919
//TEST_INPUT: ubuffer(data=[0 0 0 0], stride=4):out,name outputBuffer
@@ -26,15 +26,11 @@ void computeMain(uint3 dispatchThreadID : SV_DispatchThreadID)
2626

2727
float val = 0.0f;
2828

29-
//val += rwt1D.Load(idx);
3029
val += rwt1D[idx];
3130
val += rwt2D[uint2(idx, idx)];
3231
val += rwt3D[uint3(idx, idx, idx)];
3332

34-
// NOTE! This is disabled because on CUDA, whilst this has an effect it is not what is expected.
35-
// The value read back has changed but seems to always be 1.
36-
// rwt1D[idx] = idx;
37-
33+
rwt1D[idx] = idx;
3834
rwt2D[uint2(idx, idx)] = idx;
3935
rwt3D[uint3(idx, idx, idx)] = idx;
4036

Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
type: float
2-
4.000000
2+
3.000000
33
6.000000
4-
8.000000
5-
10.000000
4+
9.000000
5+
12.000000

tools/gfx/cuda/render-cuda.cpp

+3
Original file line numberDiff line numberDiff line change
@@ -1625,6 +1625,9 @@ class CUDADevice : public RendererBase
16251625
//
16261626
if (desc.allowedStates.contains(ResourceState::UnorderedAccess))
16271627
{
1628+
// On CUDA surfaces only support a single MIP map
1629+
SLANG_ASSERT(desc.numMipLevels == 1);
1630+
16281631
SLANG_CUDA_RETURN_ON_FAIL(cuSurfObjectCreate(&tex->m_cudaSurfObj, &resDesc));
16291632
}
16301633

0 commit comments

Comments
 (0)