Skip to content

Commit 2eb2c15

Browse files
committed
Add glsl implementation of Texture.InterlockedAddF32
1 parent 71439f7 commit 2eb2c15

File tree

2 files changed

+6
-5
lines changed

2 files changed

+6
-5
lines changed

source/slang/hlsl.meta.slang

+4-3
Original file line numberDiff line numberDiff line change
@@ -2330,7 +2330,6 @@ ${{{{
23302330
}
23312331

23322332
// FP16x2
2333-
__cuda_sm_version(2.0)
23342333
[__requiresNVAPI]
23352334
uint _NvInterlockedAddFp16x2(uint byteAddress, uint fp16x2Value)
23362335
{
@@ -2341,7 +2340,6 @@ ${{{{
23412340
}
23422341
}
23432342

2344-
__cuda_sm_version(2.0)
23452343
[__requiresNVAPI]
23462344
[ForceInline]
23472345
void InterlockedAddF16(uint byteAddress, half value, out half originalValue)
@@ -2357,7 +2355,7 @@ ${{{{
23572355
else
23582356
{
23592357
byteAddress = byteAddress & ~3;
2360-
uint packedInput = asuint16(value) << 16;
2358+
uint packedInput = ((uint)asuint16(value)) << 16;
23612359
originalValue = asfloat16((uint16_t)(_NvInterlockedAddFp16x2(byteAddress, packedInput) >> 16));
23622360
}
23632361
return;
@@ -12816,13 +12814,16 @@ __generic<Shape:__ITextureShape1D2D3D, let format : int>
1281612814
extension __TextureImpl<float, Shape, 0, 0, 0, $(kStdlibResourceAccessReadWrite), 0, 0, format>
1281712815
{
1281812816
[__requiresNVAPI]
12817+
__glsl_extension(GL_EXT_shader_atomic_float)
1281912818
void InterlockedAddF32(vector<uint, Shape.dimensions> coord, float value, out float originalValue)
1282012819
{
1282112820
__target_switch
1282212821
{
1282312822
case spirv:
1282412823
originalValue = __atomicAdd(this[coord], value);
1282512824
return;
12825+
case glsl:
12826+
__intrinsic_asm "$3 = imageAtomicAdd($0, $1, $2)";
1282612827
case hlsl:
1282712828
__intrinsic_asm "$3 = NvInterlockedAddFp32($0, $1, $2)";
1282812829
}

tests/hlsl-intrinsic/byte-address-buffer/byte-address-half-atomics.slang

+2-2
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
// Disabled because validation layer doesn't like vector atomics, although nv driver does allow it.
55
//DISABLED_TEST(compute, vulkan):COMPARE_COMPUTE(filecheck-buffer=CHECK): -vk -compute -profile cs_6_2 -render-features half -shaderobj -emit-spirv-directly -output-using-type
66
//TEST:SIMPLE(filecheck=SPIRV):-target spirv -entry computeMain -stage compute -emit-spirv-directly -skip-spirv-validation
7-
7+
//TEST:SIMPLE(filecheck=HLSL):-target hlsl -entry computeMain -profile cs_6_3
88
//TEST_INPUT:set tmpBuffer = ubuffer(data=[0 0 0 0], stride=4)
99
RWByteAddressBuffer tmpBuffer;
1010

@@ -17,7 +17,7 @@ void computeMain(int3 dispatchThreadID : SV_DispatchThreadID)
1717
half originalValue;
1818

1919
// SPIRV: OpAtomicFAddEXT
20-
20+
// HLSL: NvInterlockedAddFp16x2
2121
tmpBuffer.InterlockedAddF16(0, 1.0h, originalValue);
2222
tmpBuffer.InterlockedAddF16(2, 2.0h, originalValue);
2323

0 commit comments

Comments
 (0)