Skip to content

Commit ffcb103

Browse files
authored
Add intrinsics for aligned load/store. (#5736)
* Add intrinsics for aligned load/store. * Fix. * Update comment. * Implement aligned load/store as intrinsic_op. * Fix. * Add proposal doc. * fix typo.
1 parent a49461b commit ffcb103

File tree

6 files changed

+142
-8
lines changed

6 files changed

+142
-8
lines changed
+58
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,58 @@
1+
SP #013: Aligned load store
2+
=========================================
3+
4+
Status: Experimental
5+
6+
Implementation: [PR 5736](https://github.com/shader-slang/slang/pull/5736)
7+
8+
Author: Yong He (yhe@nvidia.com)
9+
10+
Reviewer:
11+
12+
Introduction
13+
----------
14+
15+
On many architectures, aligned vector loads (e.g. loading a float4 with 16 byte alignment) is often more efficient than ordinary unaligned loads. Slang's pointer type does not encode any additional alignment info, and all pointer read/writes are by default assuming the alignment of the underlying pointee type, which is 4 bytes for float4 vectors. This means that loading from a `float4*` will result in unaligned load instructions.
16+
17+
This proposal attempts to provide a way for performance sensitive code to specify an aligned load/store through Slang pointers.
18+
19+
20+
Proposed Approach
21+
------------
22+
23+
We propose to add intrinsic functions to perform aligned load/store through a pointer:
24+
25+
```
26+
T loadAligned<int alignment, T>(T* ptr);
27+
void storeAligned<int alignment, T>(T* ptr, T value);
28+
```
29+
30+
Example:
31+
32+
```
33+
uniform float4* data;
34+
35+
[numthreads(1,1,1)]
36+
void computeMain()
37+
{
38+
var v = loadAligned<8>(data);
39+
storeAligned<16>(data+1, v);
40+
}
41+
```
42+
43+
Related Work
44+
------------
45+
46+
### GLSL ###
47+
48+
GLSL supports the `align` layout on a `buffer_reference` block to specify the alignment of the buffer pointer.
49+
50+
### SPIRV ###
51+
52+
In SPIRV, the alignment can either be encoded as a decoration on the pointer type, or as a memory operand on the OpLoad and OpStore operations.
53+
54+
### Other Languages ###
55+
56+
Most C-like languages allow users to put additional attributes on types to specify the alignment of the type. All loads/stores through pointers of the type will use the alignment.
57+
58+
Instead of introducing type modifiers on data or pointer types, Slang should explicitly provide a `loadAligned` and `storeAligned` intrinsic functions to leads to `OpLoad` and `OpStore` with the `Aligned` memory operand when generating SPIRV. This way we don't have to deal with the complexity around rules of handling type coercion between modified/unmodified types and recalculate alignment for pointers representing an access chain. Developers writing performance sentisitive code can always be assured that the alignment specified on each critical load or store will be assumed, without having to work backwards through type modifications and thinking about the typing rules associated with such modifiers.

source/slang/core.meta.slang

+41
Original file line numberDiff line numberDiff line change
@@ -1038,6 +1038,47 @@ struct Ptr
10381038
}
10391039
};
10401040

1041+
//@hidden:
1042+
__intrinsic_op($(kIROp_AlignedAttr))
1043+
void __align_attr(int alignment);
1044+
1045+
__intrinsic_op($(kIROp_Load))
1046+
T __load_aligned<T, U>(T* ptr, U alignmentAttr);
1047+
1048+
__intrinsic_op($(kIROp_Store))
1049+
void __store_aligned<T, U>(T* ptr, T value, U alignmentAttr);
1050+
1051+
//@public:
1052+
1053+
/// Load a value from a pointer with a known alignment.
1054+
/// Aligned loads are more efficient than unaligned loads on some platforms.
1055+
/// @param alignment The alignment of the load operation.
1056+
/// @param ptr The pointer to load from.
1057+
/// @return The value loaded from the pointer.
1058+
/// @remarks When targeting SPIRV, this function maps to an `OpLoad` instruction with the `Aligned` memory operand.
1059+
/// The functions maps to normal load operation on other targets.
1060+
///
1061+
[__NoSideEffect]
1062+
[ForceInline]
1063+
T loadAligned<int alignment, T>(T* ptr)
1064+
{
1065+
return __load_aligned(ptr, __align_attr(alignment));
1066+
}
1067+
1068+
/// Store a value to a pointer with a known alignment.
1069+
/// Aligned stores are more efficient than unaligned stores on some platforms.
1070+
/// @param alignment The alignment of the store operation.
1071+
/// @param ptr The pointer to store value to.
1072+
/// @param value The value to store.
1073+
/// @remarks When targeting SPIRV, this function maps to an `OpStore` instruction with the `Aligned` memory operand.
1074+
/// The functions maps to normal store operation on other targets.
1075+
///
1076+
[ForceInline]
1077+
void storeAligned<int alignment, T>(T* ptr, T value)
1078+
{
1079+
__store_aligned(ptr, value, __align_attr(alignment));
1080+
}
1081+
10411082
//@hidden:
10421083
__intrinsic_op($(kIROp_Load))
10431084
T __load<T, let addrSpace : uint64_t>(Ptr<T, addrSpace> ptr);

source/slang/slang-emit-spirv.cpp

+22-8
Original file line numberDiff line numberDiff line change
@@ -5975,10 +5975,17 @@ struct SPIRVEmitContext : public SourceEmitterBase, public SPIRVEmitSharedContex
59755975
SpvStorageClassPhysicalStorageBuffer)
59765976
{
59775977
IRSizeAndAlignment sizeAndAlignment;
5978-
getNaturalSizeAndAlignment(
5979-
m_targetProgram->getOptionSet(),
5980-
ptrType->getValueType(),
5981-
&sizeAndAlignment);
5978+
if (auto alignedAttr = inst->findAttr<IRAlignedAttr>())
5979+
{
5980+
sizeAndAlignment.alignment = (int)getIntVal(alignedAttr->getAlignment());
5981+
}
5982+
else
5983+
{
5984+
getNaturalSizeAndAlignment(
5985+
m_targetProgram->getOptionSet(),
5986+
ptrType->getValueType(),
5987+
&sizeAndAlignment);
5988+
}
59825989
return emitOpLoadAligned(
59835990
parent,
59845991
inst,
@@ -5999,10 +6006,17 @@ struct SPIRVEmitContext : public SourceEmitterBase, public SPIRVEmitSharedContex
59996006
SpvStorageClassPhysicalStorageBuffer)
60006007
{
60016008
IRSizeAndAlignment sizeAndAlignment;
6002-
getNaturalSizeAndAlignment(
6003-
m_targetProgram->getOptionSet(),
6004-
ptrType->getValueType(),
6005-
&sizeAndAlignment);
6009+
if (auto alignedAttr = inst->findAttr<IRAlignedAttr>())
6010+
{
6011+
sizeAndAlignment.alignment = (int)getIntVal(alignedAttr->getAlignment());
6012+
}
6013+
else
6014+
{
6015+
getNaturalSizeAndAlignment(
6016+
m_targetProgram->getOptionSet(),
6017+
ptrType->getValueType(),
6018+
&sizeAndAlignment);
6019+
}
60066020
return emitOpStoreAligned(
60076021
parent,
60086022
inst,

source/slang/slang-ir-inst-defs.h

+2
Original file line numberDiff line numberDiff line change
@@ -1250,6 +1250,7 @@ INST_RANGE(Layout, VarLayout, EntryPointLayout)
12501250
INST(SNormAttr, snorm, 0, HOISTABLE)
12511251
INST(NoDiffAttr, no_diff, 0, HOISTABLE)
12521252
INST(NonUniformAttr, nonuniform, 0, HOISTABLE)
1253+
INST(AlignedAttr, Aligned, 1, HOISTABLE)
12531254

12541255
/* SemanticAttr */
12551256
INST(UserSemanticAttr, userSemantic, 2, HOISTABLE)
@@ -1260,6 +1261,7 @@ INST_RANGE(Layout, VarLayout, EntryPointLayout)
12601261
INST(VarOffsetAttr, offset, 2, HOISTABLE)
12611262
INST_RANGE(LayoutResourceInfoAttr, TypeSizeAttr, VarOffsetAttr)
12621263
INST(FuncThrowTypeAttr, FuncThrowType, 1, HOISTABLE)
1264+
12631265
INST_RANGE(Attr, PendingLayoutAttr, FuncThrowTypeAttr)
12641266

12651267
/* Liveness */

source/slang/slang-ir-insts.h

+6
Original file line numberDiff line numberDiff line change
@@ -2389,6 +2389,12 @@ struct IRCall : IRInst
23892389
void setArg(UInt index, IRInst* arg) { setOperand(index + 1, arg); }
23902390
};
23912391

2392+
struct IRAlignedAttr : IRAttr
2393+
{
2394+
IR_LEAF_ISA(AlignedAttr)
2395+
IRInst* getAlignment() { return getOperand(0); }
2396+
};
2397+
23922398
struct IRLoad : IRInst
23932399
{
23942400
IRUse ptr;

tests/spirv/aligned-load-store.slang

+13
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
//TEST:SIMPLE(filecheck=CHECK): -target spirv
2+
3+
// CHECK: OpLoad {{.*}} Aligned 8
4+
// CHECK: OpStore {{.*}} Aligned 16
5+
6+
uniform float4* data;
7+
8+
[numthreads(1,1,1)]
9+
void computeMain()
10+
{
11+
var v = loadAligned<8>((float2x4*)data);
12+
storeAligned<16>((float2x4*)data+1, v);
13+
}

0 commit comments

Comments
 (0)