Skip to content

Commit b3e0b0d

Browse files
authoredJan 28, 2020
Synthesizing CUDA tests (shader-slang#1183)
* When using setUniform clamp the amount of data written to the buffer size. * CUDA implement StructuredBuffer/ByteAddressBuffer as pointer/count as is on CPU. Allow bounds check to zero index. Update docs. * Synthesize tests. * Fix bug in CUDA output. * Fixing more tests to run on CUDA. * Added BaseType for layout of Vector and Matrix - as they are held as int32_t vector array types. * Enable unbound array support on CUDA. * Added unsized array support for CUDA documentation.
1 parent 5c6ab6d commit b3e0b0d

11 files changed

+261
-37
lines changed
 

‎docs/cuda-target.md

+20-1
Original file line numberDiff line numberDiff line change
@@ -97,7 +97,26 @@ The UniformState and UniformEntryPointParams struct typically vary by shader. Un
9797

9898
## Unsized arrays
9999

100-
WIP: Not implemented yet.
100+
Unsized arrays can be used, which are indicated by an array with no size as in `[]`. For example
101+
102+
```
103+
RWStructuredBuffer<int> arrayOfArrays[];
104+
```
105+
106+
With normal 'sized' arrays, the elements are just stored contiguously within wherever they are defined. With an unsized array they map to `Array<T>` which is...
107+
108+
```
109+
T* data;
110+
size_t count;
111+
```
112+
113+
Note that there is no method in the shader source to get the `count`, even though on the CUDA target it is stored and easily available. This is because of the behavior on GPU targets
114+
115+
* That the count has to be stored elsewhere (unlike with CUDA)
116+
* On some GPU targets there is no bounds checking - accessing outside the bound values can cause *undefined behavior*
117+
* The elements may be laid out *contiguously* on GPU
118+
119+
In practice this means if you want to access the `count` in shader code it will need to be passed by another mechanism - such as within a constant buffer. It is possible in the future support may be added to allow direct access of `count` work across targets transparently.
101120

102121
## Prelude
103122

‎prelude/slang-cuda-prelude.h

+17
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,18 @@ struct FixedArray
3838
T m_data[SIZE];
3939
};
4040

41+
// An array that has no specified size, becomes a 'Array'. This stores the size so it can potentially
42+
// do bounds checking.
43+
template <typename T>
44+
struct Array
45+
{
46+
SLANG_CUDA_CALL const T& operator[](size_t index) const { SLANG_CUDA_BOUND_CHECK(index, count); return data[index]; }
47+
SLANG_CUDA_CALL T& operator[](size_t index) { SLANG_CUDA_BOUND_CHECK(index, count); return data[index]; }
48+
49+
T* data;
50+
size_t count;
51+
};
52+
4153
// Typically defined in cuda.h, but we can't ship/rely on that, so just define here
4254
typedef unsigned long long CUtexObject;
4355
typedef unsigned long long CUsurfObject;
@@ -49,6 +61,11 @@ typedef unsigned long long CUsurfObject;
4961
struct SamplerStateUnused;
5062
typedef SamplerStateUnused* SamplerState;
5163

64+
65+
// TODO(JS): Not clear yet if this can be handled on CUDA, by just ignoring.
66+
// For now, just map to the index type.
67+
typedef size_t NonUniformResourceIndex;
68+
5269
// Code generator will generate the specific type
5370
template <typename T, int ROWS, int COLS>
5471
struct Matrix;

‎source/slang/slang-emit.cpp

+1
Original file line numberDiff line numberDiff line change
@@ -518,6 +518,7 @@ String emitEntryPointSourceFromIR(
518518

519519
case SourceStyle::CPP:
520520
case SourceStyle::C:
521+
case SourceStyle::CUDA:
521522
linkingAndOptimizationOptions.shouldLegalizeExistentialAndResourceTypes = false;
522523
break;
523524
}

‎source/slang/slang-type-layout.cpp

+135-14
Original file line numberDiff line numberDiff line change
@@ -112,16 +112,17 @@ struct DefaultLayoutRulesImpl : SimpleLayoutRulesImpl
112112
return arrayInfo;
113113
}
114114

115-
SimpleLayoutInfo GetVectorLayout(SimpleLayoutInfo elementInfo, size_t elementCount) override
115+
SimpleLayoutInfo GetVectorLayout(BaseType elementType, SimpleLayoutInfo elementInfo, size_t elementCount) override
116116
{
117+
SLANG_UNUSED(elementType);
117118
SimpleLayoutInfo vectorInfo;
118119
vectorInfo.kind = elementInfo.kind;
119120
vectorInfo.size = elementInfo.size * elementCount;
120121
vectorInfo.alignment = elementInfo.alignment;
121122
return vectorInfo;
122123
}
123124

124-
SimpleArrayLayoutInfo GetMatrixLayout(SimpleLayoutInfo elementInfo, size_t rowCount, size_t columnCount) override
125+
SimpleArrayLayoutInfo GetMatrixLayout(BaseType elementType, SimpleLayoutInfo elementInfo, size_t rowCount, size_t columnCount) override
125126
{
126127
// The default behavior here is to lay out a matrix
127128
// as an array of row vectors (that is row-major).
@@ -131,7 +132,7 @@ struct DefaultLayoutRulesImpl : SimpleLayoutRulesImpl
131132
// to get layouts with a different convention.
132133
//
133134
return GetArrayLayout(
134-
GetVectorLayout(elementInfo, columnCount),
135+
GetVectorLayout(elementType, elementInfo, columnCount),
135136
rowCount);
136137
}
137138

@@ -204,8 +205,9 @@ struct GLSLBaseLayoutRulesImpl : DefaultLayoutRulesImpl
204205
{
205206
typedef DefaultLayoutRulesImpl Super;
206207

207-
SimpleLayoutInfo GetVectorLayout(SimpleLayoutInfo elementInfo, size_t elementCount) override
208+
SimpleLayoutInfo GetVectorLayout(BaseType elementType, SimpleLayoutInfo elementInfo, size_t elementCount) override
208209
{
210+
SLANG_UNUSED(elementType);
209211
// The `std140` and `std430` rules require vectors to be aligned to the next power of
210212
// two up from their size (so a `float2` is 8-byte aligned, and a `float3` is
211213
// 16-byte aligned).
@@ -224,7 +226,7 @@ struct GLSLBaseLayoutRulesImpl : DefaultLayoutRulesImpl
224226
return vectorInfo;
225227
}
226228

227-
SimpleArrayLayoutInfo GetArrayLayout( SimpleLayoutInfo elementInfo, LayoutSize elementCount) override
229+
SimpleArrayLayoutInfo GetArrayLayout(SimpleLayoutInfo elementInfo, LayoutSize elementCount) override
228230
{
229231
// The size of an array must be rounded up to be a multiple of its alignment.
230232
//
@@ -376,7 +378,7 @@ struct CPULayoutRulesImpl : DefaultLayoutRulesImpl
376378

377379
// So it is actually a Array<T> on CPU which is a pointer and a size
378380
info.size = sizeof(void*) * 2;
379-
info.alignment = sizeof(void*);
381+
info.alignment = SLANG_ALIGN_OF(void*);
380382

381383
return info;
382384
}
@@ -398,12 +400,115 @@ struct CPULayoutRulesImpl : DefaultLayoutRulesImpl
398400
}
399401
};
400402

401-
// TODO(JS): Most likely wrong. For layout for CUDA, we'll just do the default to get things up and running
402403
struct CUDALayoutRulesImpl : DefaultLayoutRulesImpl
403404
{
404405
typedef DefaultLayoutRulesImpl Super;
405-
};
406406

407+
SimpleLayoutInfo GetScalarLayout(BaseType baseType) override
408+
{
409+
switch (baseType)
410+
{
411+
case BaseType::Bool:
412+
{
413+
// In memory a bool is a byte. BUT when in a vector or matrix it will actually be a int32_t
414+
return SimpleLayoutInfo(LayoutResourceKind::Uniform, sizeof(uint8_t), SLANG_ALIGN_OF(uint8_t));
415+
}
416+
417+
default: return Super::GetScalarLayout(baseType);
418+
}
419+
}
420+
421+
SimpleArrayLayoutInfo GetArrayLayout(SimpleLayoutInfo elementInfo, LayoutSize elementCount) override
422+
{
423+
SLANG_RELEASE_ASSERT(elementInfo.size.isFinite());
424+
auto elementSize = elementInfo.size.getFiniteValue();
425+
auto elementAlignment = elementInfo.alignment;
426+
auto elementStride = RoundToAlignment(elementSize, elementAlignment);
427+
428+
if (elementCount.isInfinite())
429+
{
430+
// This is an unsized array, get information for element
431+
auto info = Super::GetArrayLayout(elementInfo, LayoutSize(1));
432+
433+
// So it is actually a Array<T> on CUDA which is a pointer and a size
434+
info.size = sizeof(void*) * 2;
435+
info.alignment = SLANG_ALIGN_OF(void*);
436+
return info;
437+
}
438+
439+
// An array with no elements will have zero size.
440+
//
441+
LayoutSize arraySize = 0;
442+
//
443+
// Any array with a non-zero number of elements will need
444+
// to have space for N elements of size `elementSize`, with
445+
// the constraints that there must be `elementStride` bytes
446+
// between consecutive elements.
447+
//
448+
if (elementCount > 0)
449+
{
450+
// We can think of this as either allocating (N-1)
451+
// chunks of size `elementStride` (for most of the elements)
452+
// and then one final chunk of size `elementSize` for
453+
// the last element, or equivalently as allocating
454+
// N chunks of size `elementStride` and then "giving back"
455+
// the final `elementStride - elementSize` bytes.
456+
//
457+
arraySize = (elementStride * (elementCount - 1)) + elementSize;
458+
}
459+
460+
SimpleArrayLayoutInfo arrayInfo;
461+
arrayInfo.kind = elementInfo.kind;
462+
arrayInfo.size = arraySize;
463+
arrayInfo.alignment = elementAlignment;
464+
arrayInfo.elementStride = elementStride;
465+
return arrayInfo;
466+
}
467+
468+
SimpleLayoutInfo GetVectorLayout(BaseType elementType, SimpleLayoutInfo elementInfo, size_t elementCount) override
469+
{
470+
// Special case bool
471+
if (elementType == BaseType::Bool)
472+
{
473+
SimpleLayoutInfo fixInfo(elementInfo);
474+
fixInfo.size = sizeof(int32_t);
475+
fixInfo.alignment = SLANG_ALIGN_OF(int32_t);
476+
return GetVectorLayout(BaseType::Int, fixInfo, elementCount);
477+
}
478+
479+
SimpleLayoutInfo vectorInfo;
480+
vectorInfo.kind = elementInfo.kind;
481+
vectorInfo.size = elementInfo.size * elementCount;
482+
vectorInfo.alignment = elementInfo.alignment;
483+
484+
return vectorInfo;
485+
}
486+
487+
SimpleArrayLayoutInfo GetMatrixLayout(BaseType elementType, SimpleLayoutInfo elementInfo, size_t rowCount, size_t columnCount) override
488+
{
489+
// Special case bool
490+
if (elementType == BaseType::Bool)
491+
{
492+
SimpleLayoutInfo fixInfo(elementInfo);
493+
fixInfo.size = sizeof(int32_t);
494+
fixInfo.alignment = SLANG_ALIGN_OF(int32_t);
495+
return GetMatrixLayout(BaseType::Int, fixInfo, rowCount, columnCount);
496+
}
497+
498+
return Super::GetMatrixLayout(elementType, elementInfo, rowCount, columnCount);
499+
}
500+
501+
UniformLayoutInfo BeginStructLayout() override
502+
{
503+
return Super::BeginStructLayout();
504+
}
505+
506+
void EndStructLayout(UniformLayoutInfo* ioStructInfo) override
507+
{
508+
// Conform to CUDA/C/C++ size is adjusted to the largest alignment
509+
ioStructInfo->size = RoundToAlignment(ioStructInfo->size, ioStructInfo->alignment);
510+
}
511+
};
407512

408513
struct HLSLStructuredBufferLayoutRulesImpl : DefaultLayoutRulesImpl
409514
{
@@ -436,8 +541,9 @@ struct DefaultVaryingLayoutRulesImpl : DefaultLayoutRulesImpl
436541
1);
437542
}
438543

439-
SimpleLayoutInfo GetVectorLayout(SimpleLayoutInfo, size_t) override
544+
SimpleLayoutInfo GetVectorLayout(BaseType elementType, SimpleLayoutInfo, size_t) override
440545
{
546+
SLANG_UNUSED(elementType);
441547
// Vectors take up one slot by default
442548
//
443549
// TODO: some platforms may decide that vectors of `double` need
@@ -479,8 +585,9 @@ struct GLSLSpecializationConstantLayoutRulesImpl : DefaultLayoutRulesImpl
479585
1);
480586
}
481587

482-
SimpleLayoutInfo GetVectorLayout(SimpleLayoutInfo, size_t elementCount) override
588+
SimpleLayoutInfo GetVectorLayout(BaseType elementType, SimpleLayoutInfo, size_t elementCount) override
483589
{
590+
SLANG_UNUSED(elementType);
484591
// GLSL doesn't support vectors of specialization constants,
485592
// but we will assume that, if supported, they would use one slot per element.
486593
return SimpleLayoutInfo(
@@ -3052,7 +3159,13 @@ static TypeLayoutResult _createTypeLayout(
30523159
context,
30533160
elementType);
30543161

3055-
auto info = rules->GetVectorLayout(element.info, elementCount);
3162+
BaseType elementBaseType = BaseType::Void;
3163+
if (auto elementBasicType = as<BasicExpressionType>(elementType))
3164+
{
3165+
elementBaseType = elementBasicType->baseType;
3166+
}
3167+
3168+
auto info = rules->GetVectorLayout(elementBaseType, element.info, elementCount);
30563169

30573170
RefPtr<VectorTypeLayout> typeLayout = new VectorTypeLayout();
30583171
typeLayout->type = type;
@@ -3078,6 +3191,12 @@ static TypeLayoutResult _createTypeLayout(
30783191
auto elementTypeLayout = elementResult.layout;
30793192
auto elementInfo = elementResult.info;
30803193

3194+
BaseType elementBaseType = BaseType::Void;
3195+
if (auto elementBasicType = as<BasicExpressionType>(elementType))
3196+
{
3197+
elementBaseType = elementBasicType->baseType;
3198+
}
3199+
30813200
// The `GetMatrixLayout` implementation in the layout rules
30823201
// currently defaults to assuming row-major layout,
30833202
// so if we want column-major layout we achieve it here by
@@ -3092,6 +3211,7 @@ static TypeLayoutResult _createTypeLayout(
30923211
layoutMinorCount = tmp;
30933212
}
30943213
auto info = rules->GetMatrixLayout(
3214+
elementBaseType,
30953215
elementInfo,
30963216
layoutMajorCount,
30973217
layoutMinorCount);
@@ -3100,6 +3220,7 @@ static TypeLayoutResult _createTypeLayout(
31003220
RefPtr<VectorTypeLayout> rowTypeLayout = new VectorTypeLayout();
31013221

31023222
auto rowInfo = rules->GetVectorLayout(
3223+
elementBaseType,
31033224
elementInfo,
31043225
colCount);
31053226

@@ -3680,7 +3801,7 @@ RefPtr<TypeLayout> getSimpleVaryingParameterTypeLayout(
36803801
{
36813802
auto varyingRuleSet = varyingRules[rr];
36823803
auto elementInfo = varyingRuleSet->GetScalarLayout(elementBaseType);
3683-
auto info = varyingRuleSet->GetVectorLayout(elementInfo, elementCount);
3804+
auto info = varyingRuleSet->GetVectorLayout(elementBaseType, elementInfo, elementCount);
36843805
typeLayout->addResourceUsage(info.kind, info.size);
36853806
}
36863807

@@ -3735,14 +3856,14 @@ RefPtr<TypeLayout> getSimpleVaryingParameterTypeLayout(
37353856
auto varyingRuleSet = varyingRules[rr];
37363857
auto elementInfo = varyingRuleSet->GetScalarLayout(elementBaseType);
37373858

3738-
auto info = varyingRuleSet->GetMatrixLayout(elementInfo, layoutMajorCount, layoutMinorCount);
3859+
auto info = varyingRuleSet->GetMatrixLayout(elementBaseType, elementInfo, layoutMajorCount, layoutMinorCount);
37393860
typeLayout->addResourceUsage(info.kind, info.size);
37403861

37413862
if(context.matrixLayoutMode == kMatrixLayoutMode_RowMajor)
37423863
{
37433864
// For row-major matrices only, we can compute an effective
37443865
// resource usage for the row type.
3745-
auto rowInfo = varyingRuleSet->GetVectorLayout(elementInfo, colCount);
3866+
auto rowInfo = varyingRuleSet->GetVectorLayout(elementBaseType, elementInfo, colCount);
37463867
rowTypeLayout->addResourceUsage(rowInfo.kind, rowInfo.size);
37473868
}
37483869
}

‎source/slang/slang-type-layout.h

+6-6
Original file line numberDiff line numberDiff line change
@@ -814,8 +814,8 @@ struct SimpleLayoutRulesImpl
814814
virtual SimpleArrayLayoutInfo GetArrayLayout(SimpleLayoutInfo elementInfo, LayoutSize elementCount) = 0;
815815

816816
// Get layout for a vector or matrix type
817-
virtual SimpleLayoutInfo GetVectorLayout(SimpleLayoutInfo elementInfo, size_t elementCount) = 0;
818-
virtual SimpleArrayLayoutInfo GetMatrixLayout(SimpleLayoutInfo elementInfo, size_t rowCount, size_t columnCount) = 0;
817+
virtual SimpleLayoutInfo GetVectorLayout(BaseType elementType, SimpleLayoutInfo elementInfo, size_t elementCount) = 0;
818+
virtual SimpleArrayLayoutInfo GetMatrixLayout(BaseType elementType, SimpleLayoutInfo elementInfo, size_t rowCount, size_t columnCount) = 0;
819819

820820
// Begin doing layout on a `struct` type
821821
virtual UniformLayoutInfo BeginStructLayout() = 0;
@@ -851,14 +851,14 @@ struct LayoutRulesImpl
851851
return simpleRules->GetArrayLayout(elementInfo, elementCount);
852852
}
853853

854-
SimpleLayoutInfo GetVectorLayout(SimpleLayoutInfo elementInfo, size_t elementCount)
854+
SimpleLayoutInfo GetVectorLayout(BaseType elementType, SimpleLayoutInfo elementInfo, size_t elementCount)
855855
{
856-
return simpleRules->GetVectorLayout(elementInfo, elementCount);
856+
return simpleRules->GetVectorLayout(elementType, elementInfo, elementCount);
857857
}
858858

859-
SimpleArrayLayoutInfo GetMatrixLayout(SimpleLayoutInfo elementInfo, size_t rowCount, size_t columnCount)
859+
SimpleArrayLayoutInfo GetMatrixLayout(BaseType elementType, SimpleLayoutInfo elementInfo, size_t rowCount, size_t columnCount)
860860
{
861-
return simpleRules->GetMatrixLayout(elementInfo, rowCount, columnCount);
861+
return simpleRules->GetMatrixLayout(elementType, elementInfo, rowCount, columnCount);
862862
}
863863

864864
UniformLayoutInfo BeginStructLayout()
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
13080308
2+
23080308
3+
33080308
4+
43080308

‎tools/render-test/cuda/cuda-compute-util.cpp

+7-5
Original file line numberDiff line numberDiff line change
@@ -572,13 +572,15 @@ static SlangResult _compute(CUcontext context, CUmodule module, const ShaderComp
572572
auto elementCount = int(typeLayout->getElementCount());
573573
if (elementCount == 0)
574574
{
575-
void** array = location.getUniform<void*>();
576-
// If set, we setup the data needed for array on CPU side
577-
if (value && array)
575+
CUDAComputeUtil::Array array = { nullptr, 0 };
576+
auto resource = CUDAResource::getCUDAResource(value);
577+
if (resource)
578578
{
579-
// TODO(JS): For now we'll just assume a pointer...
580-
*array = CUDAResource::getCUDAData(value);
579+
array.data = resource->m_cudaMemory;
580+
array.count = value->m_elementCount;
581581
}
582+
583+
location.setUniform(&array, sizeof(array));
582584
}
583585
break;
584586
}

0 commit comments

Comments
 (0)