Skip to content

Commit bfe7561

Browse files
authored
Surface access on CUDA is byte addressed in X (shader-slang#1841)
* #include an absolute path didn't work - because paths were taken to always be relative. * Fix for writing to RWTexture with half types on CUDA. * CUDA half functionality doc updates. * First pass support for sust.p RWTexture format conversion on write. * Tidy up implementation of $C. Made clamping mode #define able. * A simple test for RWTexture CUDA format conversion. * Use $E to fix byte addressing in X in CUDA. * Do not scale when accessing via _convert versions of surface functions.
1 parent 1027225 commit bfe7561

File tree

3 files changed

+88
-6
lines changed

3 files changed

+88
-6
lines changed

prelude/slang-cuda-prelude.h

+3
Original file line numberDiff line numberDiff line change
@@ -385,6 +385,9 @@ SLANG_SURFACE_WRITE(surfCubemapLayeredwrite, (int x, int y, int layerFace), (x,
385385

386386
// Support for doing format conversion when writing to a surface/RWTexture
387387

388+
// NOTE! For normal surface access x values are *byte* addressed.
389+
// For the _convert versions they are *not*. They don't need to be because sust.p does not require it.
390+
388391
template <typename T>
389392
SLANG_FORCE_INLINE SLANG_CUDA_CALL void surf1Dwrite_convert(T, cudaSurfaceObject_t surfObj, int x, cudaSurfaceBoundaryMode boundaryMode);
390393
template <typename T>

source/slang/core.meta.slang

+11
Original file line numberDiff line numberDiff line change
@@ -1091,6 +1091,11 @@ for (int tt = 0; tt < kBaseTextureTypeCount; ++tt)
10911091
if (vecCount > 1)
10921092
{
10931093
sb << '.' << char(i + 'x');
1094+
// Surface access is *byte* addressed in x in CUDA
1095+
if (i == 0)
1096+
{
1097+
sb << " * $E";
1098+
}
10941099
}
10951100
}
10961101

@@ -1140,6 +1145,12 @@ for (int tt = 0; tt < kBaseTextureTypeCount; ++tt)
11401145
{
11411146
sb << '.' << char(i + 'x');
11421147
}
1148+
1149+
// Surface access is *byte* addressed in x in CUDA
1150+
if (i == 0)
1151+
{
1152+
sb << " * $E";
1153+
}
11431154
}
11441155

11451156
sb << ", SLANG_CUDA_BOUNDARY_MODE)\")\n";

source/slang/slang-intrinsic-expand.cpp

+74-6
Original file line numberDiff line numberDiff line change
@@ -101,7 +101,7 @@ static BaseType _getBaseTypeFromScalarType(SlangScalarType type)
101101
// The VK back-end gets away with this kind of coincidentally, since the "legalization" we have to do for resources means that there wouldn't be a single f() function any more.
102102
// But for CUDA and C++ that's not the case or generally desirable.
103103

104-
IRFormatDecoration* _findImageFormatDecoration(IRInst* inst)
104+
static IRFormatDecoration* _findImageFormatDecoration(IRInst* inst)
105105
{
106106
// JS(TODO):
107107
// There could perhaps be other situations, that need to be covered
@@ -119,7 +119,9 @@ IRFormatDecoration* _findImageFormatDecoration(IRInst* inst)
119119
return inst->findDecoration<IRFormatDecoration>();
120120
}
121121

122-
bool _isImageFormatCompatible(ImageFormat imageFormat, IRType* dataType)
122+
// Returns true if dataType and imageFormat are compatible - that they have the same representation,
123+
// and no conversion is required.
124+
static bool _isImageFormatCompatible(ImageFormat imageFormat, IRType* dataType)
123125
{
124126
int numElems = 1;
125127

@@ -147,6 +149,63 @@ bool _isImageFormatCompatible(ImageFormat imageFormat, IRType* dataType)
147149
return formatBaseType == baseType;
148150
}
149151

152+
static bool _isConvertRequired(ImageFormat imageFormat, IRInst* resourceVar)
153+
{
154+
auto textureType = as<IRTextureTypeBase>(resourceVar->getDataType());
155+
IRType* elementType = textureType ? textureType->getElementType() : nullptr;
156+
return elementType && !_isImageFormatCompatible(imageFormat, elementType);
157+
}
158+
159+
static size_t _calcBackingElementSizeInBytes(IRInst* resourceVar)
160+
{
161+
// First see if there is a format associated with the resource
162+
if (IRFormatDecoration* formatDecoration = _findImageFormatDecoration(resourceVar))
163+
{
164+
const ImageFormat imageFormat = formatDecoration->getFormat();
165+
166+
if (_isConvertRequired(imageFormat, resourceVar))
167+
{
168+
// If the access is a converting access then the x coordinate is *NOT* scaled
169+
// This is a CUDA specific issue(!).
170+
return 1;
171+
}
172+
173+
const auto& imageFormatInfo = getImageFormatInfo(imageFormat);
174+
return imageFormatInfo.sizeInBytes;
175+
}
176+
else
177+
{
178+
// If not we *assume* the backing format is the same as the element type used for access.
179+
/// Ie in RWTexture<T>, this would return sizeof(T)
180+
181+
auto textureType = as<IRTextureTypeBase>(resourceVar->getDataType());
182+
IRType* elementType = textureType ? textureType->getElementType() : nullptr;
183+
184+
if (elementType)
185+
{
186+
int numElems = 1;
187+
188+
if (auto vecType = as<IRVectorType>(elementType))
189+
{
190+
numElems = int(getIntVal(vecType->getElementCount()));
191+
elementType = vecType->getElementType();
192+
}
193+
194+
BaseType baseType = BaseType::Void;
195+
if (auto basicType = as<IRBasicType>(elementType))
196+
{
197+
baseType = basicType->getBaseType();
198+
}
199+
200+
const auto& info = BaseTypeInfo::getInfo(baseType);
201+
return info.sizeInBytes * numElems;
202+
}
203+
}
204+
205+
// When in doubt 4 is not a terrible guess based on limitations around DX11 etc
206+
return 4;
207+
}
208+
150209
const char* IntrinsicExpandContext::_emitSpecial(const char* cursor)
151210
{
152211
const char*const end = m_text.end();
@@ -269,10 +328,7 @@ const char* IntrinsicExpandContext::_emitSpecial(const char* cursor)
269328
if (IRFormatDecoration* formatDecoration = _findImageFormatDecoration(arg0))
270329
{
271330
const ImageFormat imageFormat = formatDecoration->getFormat();
272-
auto textureType = as<IRTextureTypeBase>(arg0->getDataType());
273-
IRType* elementType = textureType ? textureType->getElementType() : nullptr;
274-
275-
if (elementType && ! _isImageFormatCompatible(imageFormat, elementType))
331+
if (_isConvertRequired(imageFormat, arg0))
276332
{
277333
// Append _convert on the name to signify we need to use a code path, that will automatically
278334
// do the format conversion.
@@ -282,6 +338,18 @@ const char* IntrinsicExpandContext::_emitSpecial(const char* cursor)
282338
}
283339
break;
284340
}
341+
342+
case 'E':
343+
{
344+
/// Sometimes accesses need to be scaled. For example in CUDA the x coordinate for surface
345+
/// access is byte addressed.
346+
/// $E will return the byte size of the *backing element*.
347+
size_t elemSizeInBytes = _calcBackingElementSizeInBytes(m_callInst->getArg(0));
348+
SLANG_ASSERT(elemSizeInBytes > 0);
349+
m_writer->emitUInt64(UInt64(elemSizeInBytes));
350+
break;
351+
}
352+
285353
case 'c':
286354
{
287355
// When doing texture access in glsl the result may need to be cast.

0 commit comments

Comments
 (0)