Skip to content

Commit 484c1e6

Browse files
authored
ForceInline ByteAddressBuffer operations in stdlib (shader-slang#4003)
* ForceInline ByteAddressBuffer operations in stdlib * fixup
1 parent 22fbca5 commit 484c1e6

File tree

1 file changed

+38
-13
lines changed

1 file changed

+38
-13
lines changed

source/slang/hlsl.meta.slang

+38-13
Original file line numberDiff line numberDiff line change
@@ -145,6 +145,7 @@ struct ByteAddressBuffer
145145
uint4 Load4(int location, out uint status);
146146

147147
[__readNone]
148+
[ForceInline]
148149
T Load<T>(int location)
149150
{
150151
return __byteAddressBufferLoad<T>(this, location);
@@ -325,8 +326,8 @@ extension __TextureImpl<T,Shape,isArray,isMS,sampleCount,0,isShadow,1,format>
325326
__intrinsic_op($(kIROp_CombinedTextureSamplerGetSampler))
326327
SamplerComparisonState __getComparisonSampler();
327328

328-
[ForceInline]
329329
[__readNone]
330+
[ForceInline]
330331
[require(glsl_hlsl_spirv, texture_querylod)]
331332
float CalculateLevelOfDetail(TextureCoord location)
332333
{
@@ -346,8 +347,8 @@ extension __TextureImpl<T,Shape,isArray,isMS,sampleCount,0,isShadow,1,format>
346347
}
347348
}
348349

349-
[ForceInline]
350350
[__readNone]
351+
[ForceInline]
351352
[require(glsl_hlsl_spirv, texture_querylod)]
352353
float CalculateLevelOfDetailUnclamped(TextureCoord location)
353354
{
@@ -368,6 +369,7 @@ extension __TextureImpl<T,Shape,isArray,isMS,sampleCount,0,isShadow,1,format>
368369
}
369370

370371
[__readNone]
372+
[ForceInline]
371373
[require(cpp_cuda_glsl_hlsl_spirv, texture_sm_4_1_fragment)]
372374
T Sample(vector<float, Shape.dimensions+isArray> location)
373375
{
@@ -417,6 +419,7 @@ extension __TextureImpl<T,Shape,isArray,isMS,sampleCount,0,isShadow,1,format>
417419
}
418420

419421
[__readNone]
422+
[ForceInline]
420423
__glsl_extension(GL_ARB_sparse_texture_clamp)
421424
[require(cpp_glsl_hlsl_spirv, texture_sm_4_1_fragment)]
422425
T Sample(vector<float, Shape.dimensions+isArray> location, vector<int, Shape.planeDimensions> offset, float clamp)
@@ -439,6 +442,7 @@ extension __TextureImpl<T,Shape,isArray,isMS,sampleCount,0,isShadow,1,format>
439442
}
440443

441444
[__readNone]
445+
[ForceInline]
442446
__target_intrinsic(hlsl)
443447
[require(cpp_glsl_hlsl_spirv, texture_sm_4_1_fragment)]
444448
T Sample(vector<float, Shape.dimensions+isArray> location, vector<int, Shape.planeDimensions> offset, float clamp, out uint status)
@@ -448,6 +452,7 @@ extension __TextureImpl<T,Shape,isArray,isMS,sampleCount,0,isShadow,1,format>
448452
}
449453

450454
[__readNone]
455+
[ForceInline]
451456
[require(cpp_glsl_hlsl_spirv, texture_sm_4_1_fragment)]
452457
T SampleBias(vector<float, Shape.dimensions+isArray> location, float bias)
453458
{
@@ -469,6 +474,7 @@ extension __TextureImpl<T,Shape,isArray,isMS,sampleCount,0,isShadow,1,format>
469474
}
470475

471476
[__readNone]
477+
[ForceInline]
472478
[require(cpp_glsl_hlsl_spirv, texture_sm_4_1_fragment)]
473479
T SampleBias(vector<float, Shape.dimensions+isArray> location, float bias, constexpr vector<int, Shape.planeDimensions> offset)
474480
{
@@ -599,6 +605,7 @@ extension __TextureImpl<T,Shape,isArray,isMS,sampleCount,0,isShadow,1,format>
599605
}
600606

601607
[__readNone]
608+
[ForceInline]
602609
[require(cpp_glsl_hlsl_spirv, texture_sm_4_1)]
603610
T SampleGrad(vector<float, Shape.dimensions+isArray> location, vector<float, Shape.dimensions> gradX, vector<float, Shape.dimensions> gradY)
604611
{
@@ -620,6 +627,7 @@ extension __TextureImpl<T,Shape,isArray,isMS,sampleCount,0,isShadow,1,format>
620627
}
621628

622629
[__readNone]
630+
[ForceInline]
623631
[require(cpp_glsl_hlsl_spirv, texture_sm_4_1)]
624632
T SampleGrad(vector<float, Shape.dimensions+isArray> location, vector<float, Shape.dimensions> gradX, vector<float, Shape.dimensions> gradY, constexpr vector<int, Shape.dimensions> offset)
625633
{
@@ -639,8 +647,9 @@ extension __TextureImpl<T,Shape,isArray,isMS,sampleCount,0,isShadow,1,format>
639647
}
640648
}
641649

642-
__glsl_extension(GL_ARB_sparse_texture_clamp)
643650
[__readNone]
651+
[ForceInline]
652+
__glsl_extension(GL_ARB_sparse_texture_clamp)
644653
[require(cpp_glsl_hlsl_spirv, texture_sm_4_1)]
645654
T SampleGrad(vector<float, Shape.dimensions+isArray> location, vector<float, Shape.dimensions> gradX, vector<float, Shape.dimensions> gradY, constexpr vector<int, Shape.dimensions> offset, float lodClamp)
646655
{
@@ -785,6 +794,7 @@ __generic<T:IFloat, Shape: __ITextureShape, let isArray:int, let isMS:int, let s
785794
extension __TextureImpl<T,Shape,isArray,isMS,sampleCount,0,isShadow,0,format>
786795
{
787796
[__readNone]
797+
[ForceInline]
788798
[require(cpp_cuda_glsl_hlsl_spirv, texture_sm_4_1_fragment)]
789799
T Sample(SamplerState s, vector<float, Shape.dimensions+isArray> location)
790800
{
@@ -837,6 +847,7 @@ extension __TextureImpl<T,Shape,isArray,isMS,sampleCount,0,isShadow,0,format>
837847
}
838848

839849
[__readNone]
850+
[ForceInline]
840851
[require(cpp_glsl_hlsl_spirv, texture_sm_4_1_fragment)]
841852
T Sample(SamplerState s, vector<float, Shape.dimensions+isArray> location, constexpr vector<int, Shape.planeDimensions> offset)
842853
{
@@ -858,6 +869,7 @@ extension __TextureImpl<T,Shape,isArray,isMS,sampleCount,0,isShadow,0,format>
858869
}
859870

860871
[__readNone]
872+
[ForceInline]
861873
__glsl_extension(GL_ARB_sparse_texture_clamp)
862874
[require(cpp_glsl_hlsl_spirv, texture_sm_4_1_fragment)]
863875
T Sample(SamplerState s, vector<float, Shape.dimensions+isArray> location, constexpr vector<int, Shape.planeDimensions> offset, float clamp)
@@ -880,15 +892,17 @@ extension __TextureImpl<T,Shape,isArray,isMS,sampleCount,0,isShadow,0,format>
880892
}
881893
}
882894

883-
[__readNone]
884895
__target_intrinsic(hlsl)
896+
[__readNone]
897+
[ForceInline]
885898
T Sample(SamplerState s, vector<float, Shape.dimensions+isArray> location, constexpr vector<int, Shape.planeDimensions> offset, float clamp, out uint status)
886899
{
887900
status = 0;
888901
return Sample(s, location, offset, clamp);
889902
}
890903

891904
[__readNone]
905+
[ForceInline]
892906
[require(cpp_glsl_hlsl_spirv, texture_sm_4_1_fragment)]
893907
T SampleBias(SamplerState s, vector<float, Shape.dimensions+isArray> location, float bias)
894908
{
@@ -910,6 +924,7 @@ extension __TextureImpl<T,Shape,isArray,isMS,sampleCount,0,isShadow,0,format>
910924
}
911925

912926
[__readNone]
927+
[ForceInline]
913928
[require(cpp_glsl_hlsl_spirv, texture_sm_4_1_fragment)]
914929
T SampleBias(SamplerState s, vector<float, Shape.dimensions+isArray> location, float bias, constexpr vector<int, Shape.planeDimensions> offset)
915930
{
@@ -930,7 +945,8 @@ extension __TextureImpl<T,Shape,isArray,isMS,sampleCount,0,isShadow,0,format>
930945
}
931946
}
932947

933-
[__readNone] [ForceInline]
948+
[__readNone]
949+
[ForceInline]
934950
[require(glsl_hlsl_spirv, texture_shadowlod)]
935951
float SampleCmp(SamplerComparisonState s, vector<float, Shape.dimensions+isArray> location, float compareValue)
936952
{
@@ -960,7 +976,8 @@ extension __TextureImpl<T,Shape,isArray,isMS,sampleCount,0,isShadow,0,format>
960976
}
961977
}
962978

963-
[__readNone] [ForceInline]
979+
[__readNone]
980+
[ForceInline]
964981
[require(glsl_hlsl_spirv, texture_shadowlod)]
965982
float SampleCmpLevelZero(SamplerComparisonState s, vector<float, Shape.dimensions+isArray> location, float compareValue)
966983
{
@@ -987,7 +1004,8 @@ extension __TextureImpl<T,Shape,isArray,isMS,sampleCount,0,isShadow,0,format>
9871004
}
9881005
}
9891006

990-
[__readNone] [ForceInline]
1007+
[__readNone]
1008+
[ForceInline]
9911009
[require(glsl_hlsl_spirv, texture_shadowlod)]
9921010
float SampleCmp(SamplerComparisonState s, vector<float, Shape.dimensions+isArray> location, float compareValue, constexpr vector<int, Shape.planeDimensions> offset)
9931011
{
@@ -1013,7 +1031,8 @@ extension __TextureImpl<T,Shape,isArray,isMS,sampleCount,0,isShadow,0,format>
10131031
}
10141032
}
10151033

1016-
[__readNone] [ForceInline]
1034+
[__readNone]
1035+
[ForceInline]
10171036
[require(glsl_hlsl_spirv, texture_shadowlod)]
10181037
float SampleCmpLevelZero(SamplerComparisonState s, vector<float, Shape.dimensions+isArray> location, float compareValue, constexpr vector<int, Shape.planeDimensions> offset)
10191038
{
@@ -1041,6 +1060,7 @@ extension __TextureImpl<T,Shape,isArray,isMS,sampleCount,0,isShadow,0,format>
10411060
}
10421061

10431062
[__readNone]
1063+
[ForceInline]
10441064
[require(cpp_glsl_hlsl_spirv, texture_sm_4_1)]
10451065
T SampleGrad(SamplerState s, vector<float, Shape.dimensions+isArray> location, vector<float, Shape.dimensions> gradX, vector<float, Shape.dimensions> gradY)
10461066
{
@@ -1062,6 +1082,7 @@ extension __TextureImpl<T,Shape,isArray,isMS,sampleCount,0,isShadow,0,format>
10621082
}
10631083

10641084
[__readNone]
1085+
[ForceInline]
10651086
[require(cpp_glsl_hlsl_spirv, texture_sm_4_1)]
10661087
T SampleGrad(SamplerState s, vector<float, Shape.dimensions+isArray> location, vector<float, Shape.dimensions> gradX, vector<float, Shape.dimensions> gradY, constexpr vector<int, Shape.dimensions> offset)
10671088
{
@@ -1083,8 +1104,9 @@ extension __TextureImpl<T,Shape,isArray,isMS,sampleCount,0,isShadow,0,format>
10831104
}
10841105
}
10851106

1086-
__glsl_extension(GL_ARB_sparse_texture_clamp)
10871107
[__readNone]
1108+
[ForceInline]
1109+
__glsl_extension(GL_ARB_sparse_texture_clamp)
10881110
[require(cpp_glsl_hlsl_spirv, texture_sm_4_1)]
10891111
T SampleGrad(SamplerState s, vector<float, Shape.dimensions+isArray> location, vector<float, Shape.dimensions> gradX, vector<float, Shape.dimensions> gradY, constexpr vector<int, Shape.dimensions> offset, float lodClamp)
10901112
{
@@ -2813,7 +2835,6 @@ ${{{{
28132835
[__requiresNVAPI]
28142836
[ForceInline]
28152837
__cuda_sm_version(2.0)
2816-
[ForceInline]
28172838
[require(cuda_glsl_hlsl_spirv, atomic_glsl_hlsl_cuda_float1)]
28182839
void InterlockedAddF32(uint byteAddress, float valueToAdd)
28192840
{
@@ -2834,7 +2855,6 @@ ${{{{
28342855
// Int64 Add
28352856
[ForceInline]
28362857
__cuda_sm_version(6.0)
2837-
[ForceInline]
28382858
[require(cuda_glsl_hlsl_spirv, atomic_glsl_hlsl_cuda6_int64)]
28392859
void InterlockedAddI64(uint byteAddress, int64_t valueToAdd, out int64_t originalValue)
28402860
{
@@ -2858,15 +2878,13 @@ ${{{{
28582878
[require(cuda_glsl_hlsl_spirv, atomic_glsl_hlsl_cuda6_int64)]
28592879
void InterlockedAddI64(uint byteAddress, int64_t valueToAdd);
28602880

2861-
[ForceInline]
28622881
__specialized_for_target(hlsl)
28632882
[ForceInline]
28642883
void InterlockedAddI64(uint byteAddress, int64_t valueToAdd)
28652884
{
28662885
__atomicAdd(this, byteAddress, __asuint2(valueToAdd));
28672886
}
28682887

2869-
[ForceInline]
28702888
__specialized_for_target(glsl)
28712889
__specialized_for_target(spirv)
28722890
[ForceInline]
@@ -2906,6 +2924,7 @@ ${{{{
29062924
uint64_t InterlockedMaxU64(uint byteAddress, uint64_t value);
29072925

29082926
__specialized_for_target(hlsl)
2927+
[ForceInline]
29092928
uint64_t InterlockedMaxU64(uint byteAddress, uint64_t value) { return __asuint64(__atomicMax(this, byteAddress, __asuint2(value))); }
29102929

29112930
__specialized_for_target(glsl)
@@ -2965,6 +2984,7 @@ ${{{{
29652984
uint64_t InterlockedMinU64(uint byteAddress, uint64_t value);
29662985

29672986
__specialized_for_target(hlsl)
2987+
[ForceInline]
29682988
uint64_t InterlockedMinU64(uint byteAddress, uint64_t value) { return __asuint64(__atomicMin(this, byteAddress, __asuint2(value))); }
29692989

29702990
__specialized_for_target(glsl)
@@ -3024,6 +3044,7 @@ ${{{{
30243044
uint64_t InterlockedAndU64(uint byteAddress, uint64_t value);
30253045

30263046
__specialized_for_target(hlsl)
3047+
[ForceInline]
30273048
uint64_t InterlockedAndU64(uint byteAddress, uint64_t value) { return __asuint64(__atomicAnd(this, byteAddress, __asuint2(value))); }
30283049

30293050
__specialized_for_target(glsl)
@@ -3063,6 +3084,7 @@ ${{{{
30633084
uint64_t InterlockedOrU64(uint byteAddress, uint64_t value);
30643085

30653086
__specialized_for_target(hlsl)
3087+
[ForceInline]
30663088
uint64_t InterlockedOrU64(uint byteAddress, uint64_t value) { return __asuint64(__atomicOr(this, byteAddress, __asuint2(value))); }
30673089

30683090
__specialized_for_target(glsl)
@@ -3102,6 +3124,7 @@ ${{{{
31023124
uint64_t InterlockedXorU64(uint byteAddress, uint64_t value);
31033125

31043126
__specialized_for_target(hlsl)
3127+
[ForceInline]
31053128
uint64_t InterlockedXorU64(uint byteAddress, uint64_t value) { return __asuint64(__atomicXor(this, byteAddress, __asuint2(value))); }
31063129

31073130
__specialized_for_target(glsl)
@@ -3140,6 +3163,7 @@ ${{{{
31403163
uint64_t InterlockedExchangeU64(uint byteAddress, uint64_t value);
31413164

31423165
__specialized_for_target(hlsl)
3166+
[ForceInline]
31433167
uint64_t InterlockedExchangeU64(uint byteAddress, uint64_t value) { return __asuint64(__atomicExchange(this, byteAddress, __asuint2(value))); }
31443168

31453169
__specialized_for_target(glsl)
@@ -3255,6 +3279,7 @@ ${{{{
32553279
return;
32563280
}
32573281
}
3282+
32583283
[ForceInline]
32593284
void InterlockedCompareExchange64(uint byteAddress, uint64_t compareValue, uint64_t value, out uint64_t outOriginalValue)
32603285
{

0 commit comments

Comments
 (0)