Skip to content

Commit e510a28

Browse files
authored
Half texture support (shader-slang#1836)
* #include an absolute path didn't work - because paths were taken to always be relative. * Split out StringEscapeUtil. * Added StringEscapeUtil. * Fix typo in unix quoting type. * Small comment improvements. * Try to fix linux linking issue. * Fix typo. * Attempt to fix linux link issue. * Update VS proj even though nothing really changed. * Fix another typo issue. * Fix for windows issue. Fixed bug. * Make separate Utils for escaping. * Fix typo. * Split out into StringEscapeHandler. * Windows shell does handle removing quotes (so remove code to remove them). * Handle unescaping if not initiating using the shell. * Slight improvement around shell like decoding. * Simplify command extraction. * Add shared-library category type. * Fix bug in command extraction. * Typo in transcendental category. * Enable unit-test on in smoke test category. * Make parsing failing output as a failing test. * Fixes for transcendental tests. Disable tests that do not work. * Changed category parsing. * Removed the TestResult parameter from _gatherTestsForFile. Made testsList only output. * Remove testing if all tests were disabled. * Make args of CommandLine always unescaped. * Add category. * Don't need escaping on unix/linux. * Remove some no longer used functions. * Add requireSMVersion to CUDAExtensionTracker. * half-calc.slang now works for CUDA. * bit-cast-16-bit works on CUDA. * WIP handling of CUDA vector<half> types. * Half swizzle CUDA. * Half vector test. * Fix swizzle half bug. * Fix compilation issue with narrowing to Index. * Add unary ops. * Add some vector scalar maths ops. * Add half vector conversions for CUDA. * Fix erroneous comment. * Support for half comparisons. * First pass test for half compare. * Fix bug in CUDA specialized emit control. Updated tests to have pre and post inc/dec. * Removed unneeded parts of the cuda prelude. * Half structured buffer works on CUDA. * Added name lookup for Gfx::Format * Support half texture type in test system. * Test for half reading on CUDA. * Add half formats to Vk and D3D utils. * Fix getAt for CUDA - where there might not be a .x member in a vector.
1 parent 85632e8 commit e510a28

12 files changed

+471
-96
lines changed

slang-gfx.h

+36-1
Original file line numberDiff line numberDiff line change
@@ -123,8 +123,30 @@ class IShaderProgram: public ISlangUnknown
123123
0x9d32d0ad, 0x915c, 0x4ffd, { 0x91, 0xe2, 0x50, 0x85, 0x54, 0xa0, 0x4a, 0x76 } \
124124
}
125125

126+
// Dont' change without keeping in sync with Format
127+
#define GFX_FORMAT(x) \
128+
x( Unknown, 0) \
129+
\
130+
x(RGBA_Float32, sizeof(float) * 4) \
131+
x(RGB_Float32, sizeof(float) * 3) \
132+
x(RG_Float32, sizeof(float) * 2) \
133+
x(R_Float32, sizeof(float)) \
134+
\
135+
x(RGBA_Float16, sizeof(uint16_t) * 4) \
136+
x(RG_Float16, sizeof(uint16_t) * 2) \
137+
x(R_Float16, sizeof(uint16_t)) \
138+
\
139+
x(RGBA_Unorm_UInt8, sizeof(uint32_t)) \
140+
x(BGRA_Unorm_UInt8, sizeof(uint32_t)) \
141+
\
142+
x(R_UInt16, sizeof(uint16_t)) \
143+
x(R_UInt32, sizeof(uint32_t)) \
144+
\
145+
x(D_Float32, sizeof(float)) \
146+
x(D_Unorm24_S8, sizeof(uint32_t))
147+
126148
/// Different formats of things like pixels or elements of vertices
127-
/// NOTE! Any change to this type (adding, removing, changing order) - must also be reflected in changes to RendererUtil
149+
/// NOTE! Any change to this type (adding, removing, changing order) - must also be reflected in changes GFX_FORMAT
128150
enum class Format
129151
{
130152
Unknown,
@@ -134,6 +156,10 @@ enum class Format
134156
RG_Float32,
135157
R_Float32,
136158

159+
RGBA_Float16,
160+
RG_Float16,
161+
R_Float16,
162+
137163
RGBA_Unorm_UInt8,
138164
BGRA_Unorm_UInt8,
139165

@@ -146,6 +172,12 @@ enum class Format
146172
CountOf,
147173
};
148174

175+
struct FormatInfo
176+
{
177+
uint8_t channelCount; ///< The amount of channels in the format. Only set if the channelType is set
178+
uint8_t channelType; ///< One of SlangScalarType None if type isn't made up of elements of type.
179+
};
180+
149181
struct InputElementDesc
150182
{
151183
char const* semanticName;
@@ -1342,6 +1374,9 @@ extern "C"
13421374
/// Gets the size in bytes of a Format type. Returns 0 if a size is not defined/invalid
13431375
SLANG_GFX_API size_t SLANG_MCALL gfxGetFormatSize(Format format);
13441376

1377+
/// Gets information about the format
1378+
SLANG_GFX_API FormatInfo gfxGetFormatInfo(Format format);
1379+
13451380
/// Given a type returns a function that can construct it, or nullptr if there isn't one
13461381
SLANG_GFX_API SlangResult SLANG_MCALL
13471382
gfxCreateDevice(const IDevice::Desc* desc, IDevice** outDevice);

source/slang/slang-emit-cpp.cpp

+7-2
Original file line numberDiff line numberDiff line change
@@ -1028,10 +1028,15 @@ void CPPSourceEmitter::_emitGetAtDefinition(const UnownedStringSlice& funcName,
10281028
writer->emit("SLANG_PRELUDE_ASSERT(b >= 0 && b < ");
10291029
writer->emit(vecSize);
10301030
writer->emit(");\n");
1031+
1032+
writer->emit("return ((");
1033+
emitType(specOp->returnType);
1034+
writer->emit("*)");
1035+
10311036
if (lValue)
1032-
writer->emit("return (&a->x) + b;\n");
1037+
writer->emit("a) + b;\n");
10331038
else
1034-
writer->emit("return (&a.x)[b];\n");
1039+
writer->emit("&a)[b];\n");
10351040
}
10361041
else if (auto matrixType = as<IRMatrixType>(srcType))
10371042
{
+53
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
//DISABLE_TEST(compute):COMPARE_COMPUTE_EX:-cpu -compute -shaderobj
2+
//DISABLE_TEST(compute):COMPARE_COMPUTE_EX:-slang -compute -shaderobj
3+
//DISABLE_TEST(compute):COMPARE_COMPUTE_EX:-slang -compute -dx12 -shaderobj
4+
//DISABLE_TEST(compute):COMPARE_COMPUTE_EX:-slang -compute -dx12 -profile cs_6_0 -use-dxil -shaderobj
5+
// TODO(JS): Doesn't work on vk currently, because createTextureView not implemented on vk renderer
6+
//DISABLE_TEST(compute, vulkan):COMPARE_COMPUTE_EX:-vk -compute -shaderobj
7+
//TEST(compute):COMPARE_COMPUTE_EX:-cuda -compute -shaderobj
8+
9+
// Doesn't work on CUDA, not clear why yet
10+
//DISABLE_TEST_INPUT: Texture1D(format=R_Float16, size=4, content = one, mipMaps=1):name tLoad1D
11+
//Texture1D<float> tLoad1D;
12+
13+
//TEST_INPUT: Texture1D(format=R_Float16, size=4, content = one):name t1D
14+
Texture1D<float> t1D;
15+
//TEST_INPUT: Texture2D(format=R_Float16, size=4, content = one):name t2D
16+
Texture2D<float> t2D;
17+
//TEST_INPUT: Texture3D(format=R_Float16, size=4, content = one):name t3D
18+
Texture3D<float> t3D;
19+
//TEST_INPUT: TextureCube(format=R_Float16, size=4, content = one):name tCube
20+
TextureCube<float> tCube;
21+
22+
//TEST_INPUT: Texture1D(format=R_Float16, size=4, content = one, arrayLength=2):name t1DArray
23+
Texture1DArray<float> t1DArray;
24+
//TEST_INPUT: Texture2D(format=R_Float16, size=4, content = one, arrayLength=2):name t2DArray
25+
Texture2DArray<float> t2DArray;
26+
//TEST_INPUT: TextureCube(format=R_Float16, size=4, content = one, arrayLength=2):name tCubeArray
27+
TextureCubeArray<float> tCubeArray;
28+
29+
//TEST_INPUT: Sampler:name samplerState
30+
SamplerState samplerState;
31+
32+
//TEST_INPUT: ubuffer(data=[0 0 0 0], stride=4):out,name outputBuffer
33+
RWStructuredBuffer<float> outputBuffer;
34+
35+
[numthreads(4, 1, 1)]
36+
void computeMain(uint3 dispatchThreadID : SV_DispatchThreadID)
37+
{
38+
int idx = dispatchThreadID.x;
39+
float u = idx * (1.0f / 4);
40+
41+
float val = 0.0f;
42+
43+
val += t1D.SampleLevel(samplerState, u, 0);
44+
val += t2D.SampleLevel(samplerState, float2(u, u), 0);
45+
val += t3D.SampleLevel(samplerState, float3(u, u, u), 0);
46+
val += tCube.SampleLevel(samplerState, normalize(float3(u, 1 - u, u)), 0);
47+
48+
val += t1DArray.SampleLevel(samplerState, float2(u, 0), 0);
49+
val += t2DArray.SampleLevel(samplerState, float3(u, u, 0), 0);
50+
val += tCubeArray.SampleLevel(samplerState, float4(u, u, u, 0), 0);
51+
52+
outputBuffer[idx] = val;
53+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
40E00000
2+
40E00000
3+
40E00000
4+
40E00000

tools/gfx/cpu/render-cpu.cpp

+44-29
Original file line numberDiff line numberDiff line change
@@ -89,6 +89,18 @@ void _unpackFloatTexel(void const* texelData, void* outData, size_t outSize)
8989
memcpy(outData, temp, outSize);
9090
}
9191

92+
template<int N>
93+
void _unpackFloat16Texel(void const* texelData, void* outData, size_t outSize)
94+
{
95+
auto input = (int16_t const*)texelData;
96+
97+
float temp[4] = { 0.0f, 0.0f, 0.0f, 1.0f };
98+
for (int i = 0; i < N; ++i)
99+
temp[i] = HalfToFloat(input[i]);
100+
101+
memcpy(outData, temp, outSize);
102+
}
103+
92104
static inline float _unpackUnorm8Value(uint8_t value)
93105
{
94106
return value / 255.0f;
@@ -143,42 +155,45 @@ void _unpackUInt32Texel(void const* texelData, void* outData, size_t outSize)
143155
memcpy(outData, temp, outSize);
144156
}
145157

146-
#define TEXTURE_FORMAT_INFO(FORMAT) static const CPUTextureFormatInfo kCPUTextureFormatInfo_##FORMAT
158+
struct CPUFormatInfoMap
159+
{
160+
CPUFormatInfoMap()
161+
{
162+
memset(m_infos, 0, sizeof(m_infos));
147163

148-
TEXTURE_FORMAT_INFO(RGBA_Float32) = { &_unpackFloatTexel<4> };
149-
TEXTURE_FORMAT_INFO(RGB_Float32) = { &_unpackFloatTexel<3> };
150-
TEXTURE_FORMAT_INFO(RG_Float32) = { &_unpackFloatTexel<2> };
151-
TEXTURE_FORMAT_INFO(R_Float32) = { &_unpackFloatTexel<1> };
152-
TEXTURE_FORMAT_INFO(RGBA_Unorm_UInt8) = { &_unpackUnorm8Texel<4> };
153-
TEXTURE_FORMAT_INFO(BGRA_Unorm_UInt8) = { &_unpackUnormBGRA8Texel };
154-
TEXTURE_FORMAT_INFO(R_UInt16) = { &_unpackUInt16Texel<1> };
155-
TEXTURE_FORMAT_INFO(R_UInt32) = { &_unpackUInt32Texel<1> };
156-
TEXTURE_FORMAT_INFO(D_Float32) = { &_unpackFloatTexel<1> };
164+
set(Format::RGBA_Float32, &_unpackFloatTexel<4>);
165+
set(Format::RGB_Float32, &_unpackFloatTexel<3>);
157166

158-
#undef TEXTURE_FORMAT_INFO
167+
set(Format::RG_Float32, &_unpackFloatTexel<2>);
168+
set(Format::R_Float32, &_unpackFloatTexel<1>);
159169

160-
static CPUTextureFormatInfo const* _getFormatInfo(Format format)
161-
{
162-
switch(format)
170+
set(Format::RGBA_Float16, &_unpackFloat16Texel<4>);
171+
set(Format::RG_Float16, &_unpackFloat16Texel<2>);
172+
set(Format::R_Float16, &_unpackFloat16Texel<1>);
173+
174+
set(Format::RGBA_Unorm_UInt8, &_unpackUnorm8Texel<4>);
175+
set(Format::BGRA_Unorm_UInt8, &_unpackUnormBGRA8Texel);
176+
set(Format::R_UInt16, &_unpackUInt16Texel<1>);
177+
set(Format::R_UInt32, &_unpackUInt32Texel<1>);
178+
set(Format::D_Float32, &_unpackFloatTexel<1>);
179+
}
180+
181+
void set(Format format, CPUTextureUnpackFunc func)
163182
{
164-
case Format::D_Unorm24_S8:
165-
default:
166-
return nullptr;
183+
auto& info = m_infos[Index(format)];
184+
info.unpackFunc = func;
185+
}
186+
SLANG_FORCE_INLINE const CPUTextureFormatInfo& get(Format format) const { return m_infos[Index(format)]; }
167187

188+
CPUTextureFormatInfo m_infos[Index(Format::CountOf)];
189+
};
168190

169-
#define CASE(FORMAT) case Format::FORMAT: return &kCPUTextureFormatInfo_##FORMAT;
170-
CASE(RGBA_Float32)
171-
CASE(RGB_Float32)
172-
CASE(RG_Float32)
173-
CASE(R_Float32)
174-
CASE(RGBA_Unorm_UInt8)
175-
CASE(BGRA_Unorm_UInt8)
176-
CASE(R_UInt16)
177-
CASE(R_UInt32)
178-
CASE(D_Float32)
191+
static const CPUFormatInfoMap g_formatInfoMap;
179192

180-
#undef CASE
181-
}
193+
static CPUTextureFormatInfo const* _getFormatInfo(Format format)
194+
{
195+
const CPUTextureFormatInfo& info = g_formatInfoMap.get(format);
196+
return info.unpackFunc ? &info : nullptr;
182197
}
183198

184199
class CPUTextureResource : public TextureResource

tools/gfx/cuda/render-cuda.cpp

+15-1
Original file line numberDiff line numberDiff line change
@@ -1432,14 +1432,28 @@ class CUDADevice : public RendererBase
14321432

14331433
switch (desc.format)
14341434
{
1435+
case Format::RGBA_Float32:
1436+
case Format::RGB_Float32:
1437+
case Format::RG_Float32:
14351438
case Format::R_Float32:
14361439
case Format::D_Float32:
14371440
{
1441+
const FormatInfo info = gfxGetFormatInfo(desc.format);
14381442
format = CU_AD_FORMAT_FLOAT;
1439-
numChannels = 1;
1443+
numChannels = info.channelCount;
14401444
elementSize = sizeof(float);
14411445
break;
14421446
}
1447+
case Format::RGBA_Float16:
1448+
case Format::RG_Float16:
1449+
case Format::R_Float16:
1450+
{
1451+
const FormatInfo info = gfxGetFormatInfo(desc.format);
1452+
format = CU_AD_FORMAT_HALF;
1453+
numChannels = info.channelCount;
1454+
elementSize = sizeof(uint16_t);
1455+
break;
1456+
}
14431457
case Format::RGBA_Unorm_UInt8:
14441458
{
14451459
format = CU_AD_FORMAT_UNSIGNED_INT8;

tools/gfx/d3d/d3d-util.cpp

+4
Original file line numberDiff line numberDiff line change
@@ -115,6 +115,10 @@ D3D12_DEPTH_STENCILOP_DESC D3DUtil::translateStencilOpDesc(DepthStencilOpDesc de
115115
case Format::RGBA_Unorm_UInt8: return DXGI_FORMAT_R8G8B8A8_UNORM;
116116
case Format::BGRA_Unorm_UInt8: return DXGI_FORMAT_B8G8R8A8_UNORM;
117117

118+
case Format::RGBA_Float16: return DXGI_FORMAT_R16G16B16A16_FLOAT;
119+
case Format::RG_Float16: return DXGI_FORMAT_R16G16_FLOAT;
120+
case Format::R_Float16: return DXGI_FORMAT_R16_FLOAT;
121+
118122
case Format::R_UInt16: return DXGI_FORMAT_R16_UINT;
119123
case Format::R_UInt32: return DXGI_FORMAT_R32_UINT;
120124

tools/gfx/render.cpp

+63-12
Original file line numberDiff line numberDiff line change
@@ -19,24 +19,70 @@ static bool debugLayerEnabled = false;
1919

2020
/* !!!!!!!!!!!!!!!!!!!!!!!!!!!!!! Global Renderer Functions !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! */
2121

22-
static const uint8_t s_formatSize[] = {
23-
0, // Unknown,
22+
#define GFX_FORMAT_SIZE(name, size) uint8_t(size),
2423

25-
uint8_t(sizeof(float) * 4), // RGBA_Float32,
26-
uint8_t(sizeof(float) * 3), // RGB_Float32,
27-
uint8_t(sizeof(float) * 2), // RG_Float32,
28-
uint8_t(sizeof(float) * 1), // R_Float32,
24+
static const uint8_t s_formatSize[] =
25+
{
26+
GFX_FORMAT(GFX_FORMAT_SIZE)
27+
};
28+
29+
static bool _checkFormat()
30+
{
31+
Index value = 0;
32+
Index count = 0;
33+
34+
// Check the values are in the same order
35+
#define GFX_FORMAT_CHECK(name, size) count += Index(Index(Format::name) == value++);
36+
GFX_FORMAT(GFX_FORMAT_CHECK)
37+
38+
const bool r = (count == Index(Format::CountOf));
39+
SLANG_ASSERT(r);
40+
return r;
41+
}
42+
43+
// We don't make static because we will get a warning that it's unused
44+
static const bool _checkFormatResult = _checkFormat();
45+
46+
struct FormatInfoMap
47+
{
48+
FormatInfoMap()
49+
{
50+
// Set all to nothing initially
51+
for (auto& info : m_infos)
52+
{
53+
info.channelCount = 0;
54+
info.channelType = SLANG_SCALAR_TYPE_NONE;
55+
}
56+
57+
set(Format::RGBA_Float16, SLANG_SCALAR_TYPE_FLOAT16, 4);
58+
set(Format::RG_Float16, SLANG_SCALAR_TYPE_FLOAT16, 2);
59+
set(Format::R_Float16, SLANG_SCALAR_TYPE_FLOAT16, 1);
60+
61+
set(Format::RGBA_Float32, SLANG_SCALAR_TYPE_FLOAT32, 4);
62+
set(Format::RGB_Float32, SLANG_SCALAR_TYPE_FLOAT32, 3);
63+
set(Format::RG_Float32, SLANG_SCALAR_TYPE_FLOAT32, 2);
64+
set(Format::R_Float32, SLANG_SCALAR_TYPE_FLOAT32, 1);
65+
66+
set(Format::R_UInt16, SLANG_SCALAR_TYPE_UINT16, 1);
67+
set(Format::R_UInt32, SLANG_SCALAR_TYPE_UINT32, 1);
68+
69+
set(Format::D_Float32, SLANG_SCALAR_TYPE_FLOAT32, 1);
70+
}
2971

30-
uint8_t(sizeof(uint32_t)), // RGBA_Unorm_UInt8,
31-
uint8_t(sizeof(uint32_t)), // BGRA_Unorm_UInt8,
72+
void set(Format format, SlangScalarType type, Index channelCount)
73+
{
74+
FormatInfo& info = m_infos[Index(format)];
75+
info.channelCount = uint8_t(channelCount);
76+
info.channelType = uint8_t(type);
77+
}
3278

33-
uint8_t(sizeof(uint16_t)), // R_UInt16,
34-
uint8_t(sizeof(uint32_t)), // R_UInt32,
79+
const FormatInfo& get(Format format) const { return m_infos[Index(format)]; }
3580

36-
uint8_t(sizeof(float)), // D_Float32,
37-
uint8_t(sizeof(uint32_t)), // D_Unorm24_S8,
81+
FormatInfo m_infos[Index(Format::CountOf)];
3882
};
3983

84+
static const FormatInfoMap s_formatInfoMap;
85+
4086
static void _compileTimeAsserts()
4187
{
4288
SLANG_COMPILE_TIME_ASSERT(SLANG_COUNT_OF(s_formatSize) == int(Format::CountOf));
@@ -49,6 +95,11 @@ extern "C"
4995
return s_formatSize[int(format)];
5096
}
5197

98+
SLANG_GFX_API FormatInfo gfxGetFormatInfo(Format format)
99+
{
100+
return s_formatInfoMap.get(format);
101+
}
102+
52103
SlangResult _createDevice(const IDevice::Desc* desc, IDevice** outDevice)
53104
{
54105
switch (desc->deviceType)

tools/gfx/vulkan/vk-util.cpp

+5
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,11 @@ namespace gfx {
1414
case Format::RGB_Float32: return VK_FORMAT_R32G32B32_SFLOAT;
1515
case Format::RG_Float32: return VK_FORMAT_R32G32_SFLOAT;
1616
case Format::R_Float32: return VK_FORMAT_R32_SFLOAT;
17+
18+
case Format::RGBA_Float16: return VK_FORMAT_R16G16B16A16_SFLOAT;
19+
case Format::RG_Float16: return VK_FORMAT_R16G16_SFLOAT;
20+
case Format::R_Float16: return VK_FORMAT_R16_SFLOAT;
21+
1722
case Format::RGBA_Unorm_UInt8: return VK_FORMAT_R8G8B8A8_UNORM;
1823
case Format::BGRA_Unorm_UInt8: return VK_FORMAT_B8G8R8A8_UNORM;
1924
case Format::R_UInt32: return VK_FORMAT_R32_UINT;

0 commit comments

Comments
 (0)