Skip to content

Commit 1027225

Browse files
authored
Support for HW format conversions for RWTexture on CUDA (shader-slang#1840)
* #include an absolute path didn't work - because paths were taken to always be relative. * Fix for writing to RWTexture with half types on CUDA. * CUDA half functionality doc updates. * First pass support for sust.p RWTexture format conversion on write. * Tidy up implementation of $C. Made clamping mode #define able. * A simple test for RWTexture CUDA format conversion.
1 parent 1856b8a commit 1027225

9 files changed

+267
-62
lines changed

prelude/slang-cuda-prelude.h

+32-1
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,15 @@
6363

6464
#ifndef SLANG_CUDA_BOUNDARY_MODE
6565
# define SLANG_CUDA_BOUNDARY_MODE cudaBoundaryModeZero
66+
67+
// Can be one of SLANG_CUDA_PTX_BOUNDARY_MODE. Only applies *PTX* emitted CUDA operations
68+
// which currently is just RWTextureRW format writes
69+
//
70+
// .trap causes an execution trap on out-of-bounds addresses
71+
// .clamp stores data at the nearest surface location (sized appropriately)
72+
// .zero drops stores to out-of-bounds addresses
73+
74+
# define SLANG_PTX_BOUNDARY_MODE "zero"
6675
#endif
6776

6877
struct TypeInfo
@@ -371,9 +380,31 @@ SLANG_SURFACE_WRITE(surf1DLayeredwrite, (int x, int layer), (x, layer))
371380
SLANG_SURFACE_WRITE(surf2DLayeredwrite, (int x, int y, int layer), (x, y, layer))
372381
SLANG_SURFACE_WRITE(surfCubemapwrite, (int x, int y, int face), (x, y, face))
373382
SLANG_SURFACE_WRITE(surfCubemapLayeredwrite, (int x, int y, int layerFace), (x, y, layerFace))
374-
383+
375384
#endif
376385

386+
// Support for doing format conversion when writing to a surface/RWTexture
387+
388+
template <typename T>
389+
SLANG_FORCE_INLINE SLANG_CUDA_CALL void surf1Dwrite_convert(T, cudaSurfaceObject_t surfObj, int x, cudaSurfaceBoundaryMode boundaryMode);
390+
template <typename T>
391+
SLANG_FORCE_INLINE SLANG_CUDA_CALL void surf2Dwrite_convert(T, cudaSurfaceObject_t surfObj, int x, int y, cudaSurfaceBoundaryMode boundaryMode);
392+
393+
// https://docs.nvidia.com/cuda/inline-ptx-assembly/index.html
394+
// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#surface-instructions-sust
395+
396+
template <>
397+
SLANG_FORCE_INLINE SLANG_CUDA_CALL void surf1Dwrite_convert<float>(float v, cudaSurfaceObject_t surfObj, int x, cudaSurfaceBoundaryMode boundaryMode)
398+
{
399+
asm volatile ( "{sust.p.1d.b32." SLANG_PTX_BOUNDARY_MODE " [%0, {%1}], {%2};}\n\t" :: "l"(surfObj),"r"(x),"f"(v));
400+
}
401+
402+
template <>
403+
SLANG_FORCE_INLINE SLANG_CUDA_CALL void surf2Dwrite_convert<float>(float v, cudaSurfaceObject_t surfObj, int x, int y, cudaSurfaceBoundaryMode boundaryMode)
404+
{
405+
asm volatile ( "{sust.p.2d.b32." SLANG_PTX_BOUNDARY_MODE " [%0, {%1,%2}], {%3};}\n\t" :: "l"(surfObj),"r"(x),"r"(y),"f"(v));
406+
}
407+
377408
// ----------------------------- F32 -----------------------------------------
378409

379410
// Unary

source/slang/core.meta.slang

+1-1
Original file line numberDiff line numberDiff line change
@@ -1132,7 +1132,7 @@ for (int tt = 0; tt < kBaseTextureTypeCount; ++tt)
11321132
}
11331133

11341134
sb << (isArray ? "Layered" : "");
1135-
sb << "write<$T0>($2, $0";
1135+
sb << "write$C<$T0>($2, $0";
11361136
for (int i = 0; i < vecCount; ++i)
11371137
{
11381138
sb << ", ($1)";

source/slang/slang-ast-support-types.h

+11-1
Original file line numberDiff line numberDiff line change
@@ -122,10 +122,20 @@ namespace Slang
122122

123123
enum class ImageFormat
124124
{
125-
#define FORMAT(NAME) NAME,
125+
#define FORMAT(NAME, OTHER) NAME,
126126
#include "slang-image-format-defs.h"
127127
};
128128

129+
struct ImageFormatInfo
130+
{
131+
SlangScalarType scalarType; ///< If image format is not made up of channels of set sizes this will be SLANG_SCALAR_TYPE_NONE
132+
uint8_t channelCount; ///< The number of channels
133+
uint8_t sizeInBytes; ///< Size in bytes
134+
UnownedStringSlice name; ///< The name associated with this type. NOTE! Currently these names *are* the GLSL format names.
135+
};
136+
137+
const ImageFormatInfo& getImageFormatInfo(ImageFormat format);
138+
129139
bool findImageFormatByName(char const* name, ImageFormat* outFormat);
130140
char const* getGLSLNameForImageFormat(ImageFormat format);
131141

source/slang/slang-image-format-defs.h

+40-40
Original file line numberDiff line numberDiff line change
@@ -3,45 +3,45 @@
33
#error Must define FORMAT macro before including image-format-defs.h
44
#endif
55

6-
FORMAT(unknown)
7-
FORMAT(rgba32f)
8-
FORMAT(rgba16f)
9-
FORMAT(rg32f)
10-
FORMAT(rg16f)
11-
FORMAT(r11f_g11f_b10f)
12-
FORMAT(r32f)
13-
FORMAT(r16f)
14-
FORMAT(rgba16)
15-
FORMAT(rgb10_a2)
16-
FORMAT(rgba8)
17-
FORMAT(rg16)
18-
FORMAT(rg8)
19-
FORMAT(r16)
20-
FORMAT(r8)
21-
FORMAT(rgba16_snorm)
22-
FORMAT(rgba8_snorm)
23-
FORMAT(rg16_snorm)
24-
FORMAT(rg8_snorm)
25-
FORMAT(r16_snorm)
26-
FORMAT(r8_snorm)
27-
FORMAT(rgba32i)
28-
FORMAT(rgba16i)
29-
FORMAT(rgba8i)
30-
FORMAT(rg32i)
31-
FORMAT(rg16i)
32-
FORMAT(rg8i)
33-
FORMAT(r32i)
34-
FORMAT(r16i)
35-
FORMAT(r8i)
36-
FORMAT(rgba32ui)
37-
FORMAT(rgba16ui)
38-
FORMAT(rgb10_a2ui)
39-
FORMAT(rgba8ui)
40-
FORMAT(rg32ui)
41-
FORMAT(rg16ui)
42-
FORMAT(rg8ui)
43-
FORMAT(r32ui)
44-
FORMAT(r16ui)
45-
FORMAT(r8ui)
6+
FORMAT(unknown, (NONE, 0, 0))
7+
FORMAT(rgba32f, (FLOAT32, 4, sizeof(float) * 4))
8+
FORMAT(rgba16f, (FLOAT16, 4, sizeof(uint16_t) * 4))
9+
FORMAT(rg32f, (FLOAT32, 2, sizeof(float) * 2))
10+
FORMAT(rg16f, (FLOAT16, 2, sizeof(uint16_t) * 2))
11+
FORMAT(r11f_g11f_b10f, (NONE, 3, sizeof(uint32_t)))
12+
FORMAT(r32f, (FLOAT32, 1, sizeof(float)))
13+
FORMAT(r16f, (FLOAT16, 1, sizeof(uint16_t)))
14+
FORMAT(rgba16, (UINT16, 4, sizeof(uint16_t) * 4))
15+
FORMAT(rgb10_a2, (NONE, 4, sizeof(uint32_t)))
16+
FORMAT(rgba8, (UINT8, 4, sizeof(uint32_t)))
17+
FORMAT(rg16, (UINT16, 2, sizeof(uint16_t) * 2 ))
18+
FORMAT(rg8, (UINT8, 2, sizeof(char) * 2))
19+
FORMAT(r16, (UINT16, 1, sizeof(uint16_t)))
20+
FORMAT(r8, (UINT8, 1, sizeof(uint8_t)))
21+
FORMAT(rgba16_snorm, (UINT16, 4, sizeof(uint16_t) * 4))
22+
FORMAT(rgba8_snorm, (UINT8, 4, sizeof(uint8_t) * 4))
23+
FORMAT(rg16_snorm, (UINT16, 2, sizeof(uint16_t) * 2))
24+
FORMAT(rg8_snorm, (UINT8, 2, sizeof(uint8_t) * 2))
25+
FORMAT(r16_snorm, (UINT16, 1, sizeof(uint16_t)))
26+
FORMAT(r8_snorm, (UINT8, 1, sizeof(uint8_t)))
27+
FORMAT(rgba32i, (INT32, 4, sizeof(int32_t) * 4))
28+
FORMAT(rgba16i, (INT16, 4, sizeof(int16_t) * 4))
29+
FORMAT(rgba8i, (INT8, 4, sizeof(int8_t) * 4))
30+
FORMAT(rg32i, (INT32, 2, sizeof(int32_t) * 2))
31+
FORMAT(rg16i, (INT16, 2, sizeof(int16_t) * 2))
32+
FORMAT(rg8i, (INT8, 2, sizeof(int8_t) * 2))
33+
FORMAT(r32i, (INT32, 1, sizeof(int32_t)))
34+
FORMAT(r16i, (INT16, 1, sizeof(int16_t)))
35+
FORMAT(r8i, (INT8, 1, sizeof(int8_t)))
36+
FORMAT(rgba32ui, (UINT32, 4, sizeof(uint32_t) * 4))
37+
FORMAT(rgba16ui, (UINT16, 4, sizeof(uint16_t) * 4))
38+
FORMAT(rgb10_a2ui, (NONE, 4, sizeof(uint32_t)))
39+
FORMAT(rgba8ui, (UINT8, 4, sizeof(uint8_t) * 4))
40+
FORMAT(rg32ui, (UINT32, 2, sizeof(uint32_t) * 2))
41+
FORMAT(rg16ui, (UINT16, 2, sizeof(uint16_t) * 2))
42+
FORMAT(rg8ui, (UINT8, 2, sizeof(uint8_t) * 2))
43+
FORMAT(r32ui, (UINT32, 1, sizeof(uint32_t)))
44+
FORMAT(r16ui, (UINT16, 1, sizeof(uint16_t)))
45+
FORMAT(r8ui, (UINT8, 1, sizeof(uint8_t)))
4646

4747
#undef FORMAT

source/slang/slang-intrinsic-expand.cpp

+114
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ void IntrinsicExpandContext::emit(IRCall* inst, IRUse* args, Int argCount, const
88
m_args = args;
99
m_argCount = argCount;
1010
m_text = intrinsicText;
11+
m_callInst = inst;
1112

1213
const auto returnType = inst->getDataType();
1314

@@ -59,6 +60,93 @@ void IntrinsicExpandContext::emit(IRCall* inst, IRUse* args, Int argCount, const
5960
}
6061
}
6162

63+
static BaseType _getBaseTypeFromScalarType(SlangScalarType type)
64+
{
65+
switch (type)
66+
{
67+
case SLANG_SCALAR_TYPE_INT32: return BaseType::Int;
68+
case SLANG_SCALAR_TYPE_UINT32: return BaseType::UInt;
69+
case SLANG_SCALAR_TYPE_INT16: return BaseType::Int16;
70+
case SLANG_SCALAR_TYPE_UINT16: return BaseType::UInt16;
71+
case SLANG_SCALAR_TYPE_INT64: return BaseType::Int64;
72+
case SLANG_SCALAR_TYPE_UINT64: return BaseType::UInt64;
73+
case SLANG_SCALAR_TYPE_INT8: return BaseType::Int8;
74+
case SLANG_SCALAR_TYPE_UINT8: return BaseType::UInt8;
75+
case SLANG_SCALAR_TYPE_FLOAT16: return BaseType::Half;
76+
case SLANG_SCALAR_TYPE_FLOAT32: return BaseType::Float;
77+
case SLANG_SCALAR_TYPE_FLOAT64: return BaseType::Double;
78+
case SLANG_SCALAR_TYPE_BOOL: return BaseType::Bool;
79+
default: return BaseType::Void;
80+
}
81+
}
82+
83+
// TODO(JS): There is an inherent problem here:
84+
//
85+
// TimF: The big gotcha you'd have with trying to look up the IRVar or whatever from an intrinsic is that it is very easy for the user to "smuggle" a resource-type value through an intermediate function:
86+
//
87+
// ```
88+
// Imagine this is user code...
89+
// void f(RWTexture2D t) { t.YourOpThatYouAdded(...); }[attributeYouCareAbout(...)]
90+
// RWTexture2D gTex;
91+
// ...
92+
// f(gTex);
93+
//
94+
// ```
95+
//
96+
// So when emitting IR code for f, there is no way to trace t back to gTex and get at[attributeYouCareAbout(...)]
97+
// Structurally, you can get back to the IRParam for t and that's it.
98+
// And even if there was some magic way to trace back through the call site, you would run into the problem that some call sites
99+
// might call f(gTex) and other might call f(gSomeOtherTex) and there is no guarantee the attributes on those two textures would match.
100+
//
101+
// The VK back-end gets away with this kind of coincidentally, since the "legalization" we have to do for resources means that there wouldn't be a single f() function any more.
102+
// But for CUDA and C++ that's not the case or generally desirable.
103+
104+
IRFormatDecoration* _findImageFormatDecoration(IRInst* inst)
105+
{
106+
// JS(TODO):
107+
// There could perhaps be other situations, that need to be covered
108+
109+
// If this is a load, we need to get the decoration from the field key
110+
if (IRLoad* load = as<IRLoad>(inst))
111+
{
112+
if (IRFieldAddress* fieldAddress = as<IRFieldAddress>(load->getOperand(0)))
113+
{
114+
IRInst* field = fieldAddress->getField();
115+
return field->findDecoration<IRFormatDecoration>();
116+
}
117+
}
118+
// Otherwise just try on the instruction
119+
return inst->findDecoration<IRFormatDecoration>();
120+
}
121+
122+
bool _isImageFormatCompatible(ImageFormat imageFormat, IRType* dataType)
123+
{
124+
int numElems = 1;
125+
126+
if (auto vecType = as<IRVectorType>(dataType))
127+
{
128+
numElems = int(getIntVal(vecType->getElementCount()));
129+
dataType = vecType->getElementType();
130+
}
131+
132+
BaseType baseType = BaseType::Void;
133+
if (auto basicType = as<IRBasicType>(dataType))
134+
{
135+
baseType = basicType->getBaseType();
136+
}
137+
138+
const auto& imageFormatInfo = getImageFormatInfo(imageFormat);
139+
const BaseType formatBaseType = _getBaseTypeFromScalarType(imageFormatInfo.scalarType);
140+
141+
if (numElems != imageFormatInfo.channelCount)
142+
{
143+
SLANG_ASSERT(!"Format doesn't match channel count");
144+
return false;
145+
}
146+
147+
return formatBaseType == baseType;
148+
}
149+
62150
const char* IntrinsicExpandContext::_emitSpecial(const char* cursor)
63151
{
64152
const char*const end = m_text.end();
@@ -168,6 +256,32 @@ const char* IntrinsicExpandContext::_emitSpecial(const char* cursor)
168256
}
169257
break;
170258

259+
case 'C':
260+
{
261+
// The $C intrinsic is a mechanism to change the name of an invocation depending on if there is a format
262+
// conversion required between the type associated by the resource and the backing ImageFormat.
263+
// Currently this is only implemented on CUDA, where there are specialized versions of the RWTexture
264+
// writes that will do a format conversion.
265+
if (m_emitter->getTarget() == CodeGenTarget::CUDASource)
266+
{
267+
IRInst* arg0 = m_callInst->getArg(0);
268+
269+
if (IRFormatDecoration* formatDecoration = _findImageFormatDecoration(arg0))
270+
{
271+
const ImageFormat imageFormat = formatDecoration->getFormat();
272+
auto textureType = as<IRTextureTypeBase>(arg0->getDataType());
273+
IRType* elementType = textureType ? textureType->getElementType() : nullptr;
274+
275+
if (elementType && ! _isImageFormatCompatible(imageFormat, elementType))
276+
{
277+
// Append _convert on the name to signify we need to use a code path, that will automatically
278+
// do the format conversion.
279+
m_writer->emit("_convert");
280+
}
281+
}
282+
}
283+
break;
284+
}
171285
case 'c':
172286
{
173287
// When doing texture access in glsl the result may need to be cast.

source/slang/slang-intrinsic-expand.h

+1
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ struct IntrinsicExpandContext
2424

2525
SourceWriter* m_writer;
2626
UnownedStringSlice m_text;
27+
IRCall* m_callInst;
2728
IRUse* m_args = nullptr;
2829
Int m_argCount = 0;
2930
Index m_openParenCount = 0;

source/slang/slang-syntax.cpp

+21-19
Original file line numberDiff line numberDiff line change
@@ -1186,38 +1186,40 @@ Module* getModule(Decl* decl)
11861186
return moduleDecl->module;
11871187
}
11881188

1189-
bool findImageFormatByName(char const* name, ImageFormat* outFormat)
1189+
static const ImageFormatInfo kImageFormatInfos[] =
11901190
{
1191-
static const struct
1192-
{
1193-
char const* name;
1194-
ImageFormat format;
1195-
} kFormats[] =
1196-
{
1197-
#define FORMAT(NAME) { #NAME, ImageFormat::NAME },
1191+
#define SLANG_IMAGE_FORMAT_INFO(TYPE, COUNT, SIZE) SLANG_SCALAR_TYPE_##TYPE, uint8_t(COUNT), uint8_t(SIZE)
1192+
#define FORMAT(NAME, OTHER) \
1193+
{ SLANG_IMAGE_FORMAT_INFO OTHER, UnownedStringSlice::fromLiteral(#NAME) },
11981194
#include "slang-image-format-defs.h"
1199-
};
1195+
#undef FORMAT
1196+
#undef SLANG_IMAGE_FORMAT_INFO
1197+
};
12001198

1201-
for( auto item : kFormats )
1199+
bool findImageFormatByName(char const* inName, ImageFormat* outFormat)
1200+
{
1201+
const UnownedStringSlice name(inName);
1202+
1203+
for (Index i = 0; i < SLANG_COUNT_OF(kImageFormatInfos); ++i)
12021204
{
1203-
if( strcmp(item.name, name) == 0 )
1205+
const auto& info = kImageFormatInfos[i];
1206+
if (info.name == name)
12041207
{
1205-
*outFormat = item.format;
1208+
*outFormat = ImageFormat(i);
12061209
return true;
12071210
}
12081211
}
1209-
12101212
return false;
12111213
}
12121214

12131215
char const* getGLSLNameForImageFormat(ImageFormat format)
12141216
{
1215-
switch( format )
1216-
{
1217-
default: return "unhandled";
1218-
#define FORMAT(NAME) case ImageFormat::NAME: return #NAME;
1219-
#include "slang-image-format-defs.h"
1220-
}
1217+
return kImageFormatInfos[Index(format)].name.begin();
12211218
}
12221219

1220+
const ImageFormatInfo& getImageFormatInfo(ImageFormat format)
1221+
{
1222+
return kImageFormatInfos[Index(format)];
1223+
}
1224+
12231225
} // namespace Slang

0 commit comments

Comments
 (0)