Skip to content

Commit 6437c38

Browse files
authored
Lower all ByteAddressBuffer uses for SPIRV. (shader-slang#3143)
Co-authored-by: Yong He <yhe@nvidia.com>
1 parent cdd5e66 commit 6437c38

19 files changed

+286
-67
lines changed

prelude/slang-cuda-prelude.h

+8-4
Original file line numberDiff line numberDiff line change
@@ -1251,7 +1251,9 @@ struct ByteAddressBuffer
12511251
SLANG_CUDA_CALL T Load(size_t index) const
12521252
{
12531253
SLANG_BOUND_CHECK_BYTE_ADDRESS(index, sizeof(T), sizeInBytes);
1254-
return *(const T*)(((const char*)data) + index);
1254+
T data;
1255+
memcpy(&data, ((const char*)this->data) + index, sizeof(T));
1256+
return data;
12551257
}
12561258

12571259
const uint32_t* data;
@@ -1292,7 +1294,9 @@ struct RWByteAddressBuffer
12921294
SLANG_CUDA_CALL T Load(size_t index) const
12931295
{
12941296
SLANG_BOUND_CHECK_BYTE_ADDRESS(index, sizeof(T), sizeInBytes);
1295-
return *(const T*)((const char*)data + index);
1297+
T data;
1298+
memcpy(&data, ((const char*)this->data) + index, sizeof(T));
1299+
return data;
12961300
}
12971301

12981302
SLANG_CUDA_CALL void Store(size_t index, uint32_t v) const
@@ -1328,14 +1332,14 @@ struct RWByteAddressBuffer
13281332
SLANG_CUDA_CALL void Store(size_t index, T const& value) const
13291333
{
13301334
SLANG_BOUND_CHECK_BYTE_ADDRESS(index, sizeof(T), sizeInBytes);
1331-
*(T*)(((char*)data) + index) = value;
1335+
memcpy((char*)data + index, &value, sizeof(T));
13321336
}
13331337

13341338
/// Can be used in stdlib to gain access
13351339
template <typename T>
13361340
SLANG_CUDA_CALL T* _getPtrAt(size_t index)
13371341
{
1338-
SLANG_BOUND_CHECK_BYTE_ADDRESS(index, sizeof(T), sizeInBytes);
1342+
SLANG_BOUND_CHECK_BYTE_ADDRESS(index, 4, sizeInBytes);
13391343
return (T*)(((char*)data) + index);
13401344
}
13411345

source/slang/hlsl.meta.slang

+125-36
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,15 @@ __generic<T>
1414
__intrinsic_op($(kIROp_StructuredBufferGetDimensions))
1515
uint2 __structuredBufferGetDimensions(ConsumeStructuredBuffer<T> buffer);
1616

17+
__intrinsic_op($(kIROp_StructuredBufferGetDimensions))
18+
uint2 __structuredBufferGetDimensions<T>(StructuredBuffer<T> buffer);
19+
20+
__intrinsic_op($(kIROp_StructuredBufferGetDimensions))
21+
uint2 __structuredBufferGetDimensions<T>(RWStructuredBuffer<T> buffer);
22+
23+
__intrinsic_op($(kIROp_StructuredBufferGetDimensions))
24+
uint2 __structuredBufferGetDimensions<T>(RasterizerOrderedStructuredBuffer<T> buffer);
25+
1726
__generic<T>
1827
__magic_type(HLSLAppendStructuredBufferType)
1928
__intrinsic_type($(kIROp_HLSLAppendStructuredBufferType))
@@ -37,35 +46,61 @@ __magic_type(HLSLByteAddressBufferType)
3746
__intrinsic_type($(kIROp_HLSLByteAddressBufferType))
3847
struct ByteAddressBuffer
3948
{
40-
__target_intrinsic(glsl, "$1 = $0._data.length() * 4")
4149
[__readNone]
42-
void GetDimensions(
43-
out uint dim);
50+
__target_intrinsic(hlsl)
51+
__target_intrinsic(cpp)
52+
__target_intrinsic(cuda)
53+
[__unsafeForceInlineEarly]
54+
void GetDimensions(out uint dim);
55+
56+
[__unsafeForceInlineEarly]
57+
__specialized_for_target(spirv)
58+
__specialized_for_target(glsl)
59+
void GetDimensions(out uint dim)
60+
{
61+
dim = __structuredBufferGetDimensions(__getEquivalentStructuredBuffer<uint>(this)).x*4;
62+
}
4463

45-
__target_intrinsic(glsl, "$0._data[$1/4]")
4664
[__readNone]
47-
uint Load(int location);
65+
[ForceInline]
66+
__target_intrinsic(hlsl)
67+
uint Load(int location)
68+
{
69+
return __byteAddressBufferLoad<uint>(this, location);
70+
}
4871

4972
[__readNone]
5073
uint Load(int location, out uint status);
5174

52-
__target_intrinsic(glsl, "uvec2($0._data[$1/4], $0._data[$1/4+1])")
5375
[__readNone]
54-
uint2 Load2(int location);
76+
[ForceInline]
77+
__target_intrinsic(hlsl)
78+
uint2 Load2(int location)
79+
{
80+
return __byteAddressBufferLoad<uint2>(this, location);
81+
}
5582

5683
[__readNone]
5784
uint2 Load2(int location, out uint status);
5885

59-
__target_intrinsic(glsl, "uvec3($0._data[$1/4], $0._data[$1/4+1], $0._data[$1/4+2])")
6086
[__readNone]
61-
uint3 Load3(int location);
87+
[ForceInline]
88+
__target_intrinsic(hlsl)
89+
uint3 Load3(int location)
90+
{
91+
return __byteAddressBufferLoad<uint3>(this, location);
92+
}
6293

6394
[__readNone]
6495
uint3 Load3(int location, out uint status);
6596

66-
__target_intrinsic(glsl, "uvec4($0._data[$1/4], $0._data[$1/4+1], $0._data[$1/4+2], $0._data[$1/4+3])")
6797
[__readNone]
68-
uint4 Load4(int location);
98+
[ForceInline]
99+
__target_intrinsic(hlsl)
100+
uint4 Load4(int location)
101+
{
102+
return __byteAddressBufferLoad<uint4>(this, location);
103+
}
69104

70105
[__readNone]
71106
uint4 Load4(int location, out uint status);
@@ -244,11 +279,16 @@ __magic_type(HLSLStructuredBufferType)
244279
__intrinsic_type($(kIROp_HLSLStructuredBufferType))
245280
struct StructuredBuffer
246281
{
247-
__target_intrinsic(glsl, "$1 = $0._data.length(); $2 = 0")
248282
[__readNone]
283+
[__unsafeForceInlineEarly]
249284
void GetDimensions(
250285
out uint numStructs,
251-
out uint stride);
286+
out uint stride)
287+
{
288+
let rs = __structuredBufferGetDimensions(this);
289+
numStructs = rs.x;
290+
stride = rs.y;
291+
}
252292

253293
__intrinsic_op($(kIROp_StructuredBufferLoad))
254294
__target_intrinsic(glsl, "$0._data[$1]")
@@ -321,34 +361,56 @@ struct $(item.name)
321361
// Note(tfoley): supports all operations from `ByteAddressBuffer`
322362
// TODO(tfoley): can this be made a sub-type?
323363

324-
__target_intrinsic(glsl, "$1 = $0._data.length() * 4")
325-
void GetDimensions(
326-
out uint dim);
364+
__target_intrinsic(hlsl)
365+
__target_intrinsic(cpp)
366+
__target_intrinsic(cuda)
367+
[__unsafeForceInlineEarly]
368+
void GetDimensions(out uint dim);
327369

328-
__target_intrinsic(glsl, "$0._data[$1/4]")
370+
[__unsafeForceInlineEarly]
371+
__specialized_for_target(spirv)
372+
__specialized_for_target(glsl)
373+
void GetDimensions(out uint dim)
374+
{
375+
dim = __structuredBufferGetDimensions(__getEquivalentStructuredBuffer<uint>(this)).x*4;
376+
}
377+
378+
__target_intrinsic(hlsl)
329379
[__NoSideEffect]
330-
uint Load(int location);
380+
uint Load(int location)
381+
{
382+
return __byteAddressBufferLoad<uint>(this, location);
383+
}
331384

332385
[__NoSideEffect]
333386
uint Load(int location, out uint status);
334387

335-
__target_intrinsic(glsl, "uvec2($0._data[$1/4], $0._data[$1/4+1])")
388+
__target_intrinsic(hlsl)
336389
[__NoSideEffect]
337-
uint2 Load2(int location);
390+
uint2 Load2(int location)
391+
{
392+
return __byteAddressBufferLoad<uint2>(this, location);
393+
}
338394

339395
[__NoSideEffect]
340396
uint2 Load2(int location, out uint status);
341397

342-
__target_intrinsic(glsl, "uvec3($0._data[$1/4], $0._data[$1/4+1], $0._data[$1/4+2])")
398+
__target_intrinsic(hlsl)
343399
[__NoSideEffect]
344-
uint3 Load3(int location);
400+
uint3 Load3(int location)
401+
{
402+
return __byteAddressBufferLoad<uint3>(this, location);
403+
}
345404

346405
[__NoSideEffect]
347406
uint3 Load3(int location, out uint status);
348407

349-
__target_intrinsic(glsl, "uvec4($0._data[$1/4], $0._data[$1/4+1], $0._data[$1/4+2], $0._data[$1/4+3])")
408+
__target_intrinsic(hlsl)
350409
[__NoSideEffect]
351-
uint4 Load4(int location);
410+
uint4 Load4(int location)
411+
{
412+
return __byteAddressBufferLoad<uint4>(this, location);
413+
}
352414

353415
[__NoSideEffect]
354416
uint4 Load4(int location, out uint status);
@@ -689,25 +751,39 @@ ${{{{
689751
UINT dest,
690752
UINT value);
691753

692-
__target_intrinsic(glsl, "$0._data[$1/4] = $2")
754+
__target_intrinsic(hlsl)
755+
[ForceInline]
693756
void Store(
694757
uint address,
695-
uint value);
758+
uint value)
759+
{
760+
__byteAddressBufferStore(this, address, value);
761+
}
696762

697-
__target_intrinsic(glsl, "$0._data[$1/4] = $2.x, $0._data[$1/4+1] = $2.y")
698-
void Store2(
699-
uint address,
700-
uint2 value);
763+
__target_intrinsic(hlsl)
764+
[ForceInline]
765+
void Store2(uint address, uint2 value)
766+
{
767+
__byteAddressBufferStore(this, address, value);
768+
}
701769

702-
__target_intrinsic(glsl, "$0._data[$1/4] = $2.x, $0._data[$1/4+1] = $2.y, $0._data[$1/4+2] = $2.z")
770+
__target_intrinsic(hlsl)
771+
[ForceInline]
703772
void Store3(
704773
uint address,
705-
uint3 value);
774+
uint3 value)
775+
{
776+
__byteAddressBufferStore(this, address, value);
777+
}
706778

707-
__target_intrinsic(glsl, "$0._data[$1/4] = $2.x, $0._data[$1/4+1] = $2.y, $0._data[$1/4+2] = $2.z, $0._data[$1/4+3] = $2.w")
779+
__target_intrinsic(hlsl)
780+
[ForceInline]
708781
void Store4(
709782
uint address,
710-
uint4 value);
783+
uint4 value)
784+
{
785+
__byteAddressBufferStore(this, address, value);
786+
}
711787

712788
void Store<T>(int offset, T value)
713789
{
@@ -738,10 +814,17 @@ struct $(item.name)
738814
{
739815
uint DecrementCounter();
740816

741-
__target_intrinsic(glsl, "$1 = $0._data.length(); $2 = 0")
817+
[__readNone]
818+
[__unsafeForceInlineEarly]
819+
__target_intrinsic(hlsl)
742820
void GetDimensions(
743821
out uint numStructs,
744-
out uint stride);
822+
out uint stride)
823+
{
824+
let rs = __structuredBufferGetDimensions(this);
825+
numStructs = rs.x;
826+
stride = rs.y;
827+
}
745828

746829
uint IncrementCounter();
747830

@@ -1850,6 +1933,12 @@ __generic<T : __BuiltinFloatingPointType> vector<T,4> dst(vector<T,4> x, vector<
18501933
__intrinsic_op($(kIROp_GetEquivalentStructuredBuffer))
18511934
RWStructuredBuffer<T> __getEquivalentStructuredBuffer<T>(RWByteAddressBuffer b);
18521935

1936+
__intrinsic_op($(kIROp_GetEquivalentStructuredBuffer))
1937+
StructuredBuffer<T> __getEquivalentStructuredBuffer<T>(ByteAddressBuffer b);
1938+
1939+
__intrinsic_op($(kIROp_GetEquivalentStructuredBuffer))
1940+
RasterizerOrderedStructuredBuffer<T> __getEquivalentStructuredBuffer<T>(RasterizerOrderedByteAddressBuffer b);
1941+
18531942
// Error message
18541943

18551944
// void errorf( string format, ... );

source/slang/slang-emit-cpp.cpp

+34
Original file line numberDiff line numberDiff line change
@@ -1192,6 +1192,40 @@ const UnownedStringSlice* CPPSourceEmitter::getVectorElementNames(IRVectorType*
11921192
return getVectorElementNames(basicType->getBaseType(), elemCount);
11931193
}
11941194

1195+
bool CPPSourceEmitter::tryEmitInstStmtImpl(IRInst* inst)
1196+
{
1197+
switch (inst->getOp())
1198+
{
1199+
case kIROp_StructuredBufferGetDimensions:
1200+
{
1201+
auto count = _generateUniqueName(UnownedStringSlice("_elementCount"));
1202+
auto stride = _generateUniqueName(UnownedStringSlice("_stride"));
1203+
1204+
m_writer->emit("uint ");
1205+
m_writer->emit(count);
1206+
m_writer->emit(";\n");
1207+
m_writer->emit("uint ");
1208+
m_writer->emit(stride);
1209+
m_writer->emit(";\n");
1210+
emitOperand(inst->getOperand(0), leftSide(getInfo(EmitOp::General), getInfo(EmitOp::Postfix)));
1211+
m_writer->emit(".GetDimensions(&");
1212+
m_writer->emit(count);
1213+
m_writer->emit(", &");
1214+
m_writer->emit(stride);
1215+
m_writer->emit(");\n");
1216+
emitInstResultDecl(inst);
1217+
m_writer->emit("uint2(");
1218+
m_writer->emit(count);
1219+
m_writer->emit(", ");
1220+
m_writer->emit(stride);
1221+
m_writer->emit(");\n");
1222+
return true;
1223+
}
1224+
default:
1225+
return false;
1226+
}
1227+
}
1228+
11951229
bool CPPSourceEmitter::tryEmitInstExprImpl(IRInst* inst, const EmitOpInfo& inOuterPrec)
11961230
{
11971231
switch (inst->getOp())

source/slang/slang-emit-cpp.h

+2
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,8 @@ class CPPSourceEmitter: public CLikeSourceEmitter
5353
virtual void _emitType(IRType* type, DeclaratorInfo* declarator) SLANG_OVERRIDE;
5454
virtual void emitVectorTypeNameImpl(IRType* elementType, IRIntegerValue elementCount) SLANG_OVERRIDE;
5555
virtual bool tryEmitInstExprImpl(IRInst* inst, const EmitOpInfo& inOuterPrec) SLANG_OVERRIDE;
56+
virtual bool tryEmitInstStmtImpl(IRInst* inst) SLANG_OVERRIDE;
57+
5658
virtual void emitPreModuleImpl() SLANG_OVERRIDE;
5759
virtual void emitSimpleValueImpl(IRInst* value) SLANG_OVERRIDE;
5860
virtual void emitSimpleFuncParamImpl(IRParam* param) SLANG_OVERRIDE;

source/slang/slang-emit-cuda.cpp

+34
Original file line numberDiff line numberDiff line change
@@ -449,6 +449,40 @@ void CUDASourceEmitter::emitIntrinsicCallExprImpl(IRCall* inst, IRTargetIntrinsi
449449
Super::emitIntrinsicCallExprImpl(inst, targetIntrinsic, inOuterPrec);
450450
}
451451

452+
bool CUDASourceEmitter::tryEmitInstStmtImpl(IRInst* inst)
453+
{
454+
switch (inst->getOp())
455+
{
456+
case kIROp_StructuredBufferGetDimensions:
457+
{
458+
auto count = _generateUniqueName(UnownedStringSlice("_elementCount"));
459+
auto stride = _generateUniqueName(UnownedStringSlice("_stride"));
460+
461+
m_writer->emit("uint ");
462+
m_writer->emit(count);
463+
m_writer->emit(";\n");
464+
m_writer->emit("uint ");
465+
m_writer->emit(stride);
466+
m_writer->emit(";\n");
467+
emitOperand(inst->getOperand(0), leftSide(getInfo(EmitOp::General), getInfo(EmitOp::Postfix)));
468+
m_writer->emit(".GetDimensions(&");
469+
m_writer->emit(count);
470+
m_writer->emit(", &");
471+
m_writer->emit(stride);
472+
m_writer->emit(");\n");
473+
emitInstResultDecl(inst);
474+
m_writer->emit("make_uint2(");
475+
m_writer->emit(count);
476+
m_writer->emit(", ");
477+
m_writer->emit(stride);
478+
m_writer->emit(");\n");
479+
return true;
480+
}
481+
default:
482+
return false;
483+
}
484+
}
485+
452486
bool CUDASourceEmitter::tryEmitInstExprImpl(IRInst* inst, const EmitOpInfo& inOuterPrec)
453487
{
454488
switch(inst->getOp())

source/slang/slang-emit-cuda.h

+1
Original file line numberDiff line numberDiff line change
@@ -92,6 +92,7 @@ class CUDASourceEmitter : public CPPSourceEmitter
9292

9393
virtual bool tryEmitGlobalParamImpl(IRGlobalParam* varDecl, IRType* varType) SLANG_OVERRIDE;
9494
virtual bool tryEmitInstExprImpl(IRInst* inst, const EmitOpInfo& inOuterPrec) SLANG_OVERRIDE;
95+
virtual bool tryEmitInstStmtImpl(IRInst* inst) SLANG_OVERRIDE;
9596
virtual void emitIntrinsicCallExprImpl(IRCall* inst, IRTargetIntrinsicDecoration* targetIntrinsic, EmitOpInfo const& inOuterPrec) SLANG_OVERRIDE;
9697

9798
virtual void emitModuleImpl(IRModule* module, DiagnosticSink* sink) SLANG_OVERRIDE;

0 commit comments

Comments
 (0)