@@ -2498,6 +2498,7 @@ __generic<T : __BuiltinArithmeticType>
2498
2498
__glsl_extension(GL_KHR_shader_subgroup_arithmetic)
2499
2499
__spirv_version(1 . 3 )
2500
2500
__target_intrinsic (glsl, " subgroupExclusiveMul($0)" )
2501
+ __target_intrinsic (cuda, " _wavePrefixProduct($0)" )
2501
2502
T WavePrefixProduct(T expr);
2502
2503
__generic < T : __BuiltinArithmeticType, let N : int >
2503
2504
__glsl_extension(GL_KHR_shader_subgroup_arithmetic)
@@ -2521,10 +2522,54 @@ vector<T,N> WavePrefixSum(vector<T,N> expr);
2521
2522
__generic < T : __BuiltinArithmeticType, let N : int , let M : int >
2522
2523
matrix< T,N,M> WavePrefixSum(matrix< T,N,M> expr);
2523
2524
2525
+ __generic < T : __BuiltinType>
2526
+ __glsl_extension(GL_KHR_shader_subgroup_ballot)
2527
+ __spirv_version(1 . 3 )
2528
+ __target_intrinsic (glsl, " subgroupBroadcastFirst($0)" )
2529
+ __target_intrinsic (cuda, " _waveReadFirst($0)" )
2530
+ T WaveReadLaneFirst(T expr);
2531
+ __generic < T : __BuiltinType, let N : int >
2532
+ __glsl_extension(GL_KHR_shader_subgroup_ballot)
2533
+ __spirv_version(1 . 3 )
2534
+ __target_intrinsic (glsl, " subgroupBroadcastFirst($0)" )
2535
+ __target_intrinsic (cuda, " _waveReadFirstMultiple($0)" )
2536
+ vector< T,N> WaveReadLaneFirst(vector< T,N> expr);
2537
+ __generic < T : __BuiltinType, let N : int , let M : int >
2538
+ __target_intrinsic (cuda, " _waveReadFirstMultiple($0)" )
2539
+ matrix< T,N,M> WaveReadLaneFirst(matrix< T,N,M> expr);
2540
+
2541
+ // NOTE! On GLSL based targets the lane index *must* be a compile time expression!
2542
+ // See https://github.com/KhronosGroup/GLSL/blob/master/extensions/khr/GL_KHR_shader_subgroup.txt
2543
+ __generic < T : __BuiltinType>
2544
+ __glsl_extension(GL_KHR_shader_subgroup_ballot)
2545
+ __spirv_version(1 . 3 )
2546
+ __target_intrinsic (glsl, " subgroupBroadcast($0, $1)" )
2547
+ __target_intrinsic (cuda, " __shfl_sync(__activemask(), $0, $1)" )
2548
+ T WaveReadLaneAt(T value, int lane);
2549
+ __generic < T : __BuiltinType, let N : int >
2550
+ __spirv_version(1 . 3 )
2551
+ __target_intrinsic (glsl, " subgroupBroadcast($0, $1)" )
2552
+ __target_intrinsic (cuda, " _waveReadLaneAtMultiple($0, $1)" )
2553
+ vector< T,N> WaveReadLaneAt(vector< T,N> value, int lane);
2554
+ __generic < T : __BuiltinType, let N : int , let M : int >
2555
+ __target_intrinsic (cuda, " _waveReadLaneAtMultiple($0, $1)" )
2556
+ matrix< T,N,M> WaveReadLaneAt(matrix< T,N,M> value, int lane);
2557
+
2558
+ __glsl_extension(GL_KHR_shader_subgroup_ballot)
2559
+ __spirv_version(1 . 3 )
2560
+ __target_intrinsic (glsl, " subgroupBallotExclusiveBitCount(subgroupBallot($0))" )
2561
+ __target_intrinsic (cuda, " __popc(__ballot_sync(__activemask(), $0) & _getLaneLtMask())" )
2562
+ uint WavePrefixCountBits(bool value);
2563
+
2564
+ // Shader model 6.5 stuff
2565
+ // https://github.com/microsoft/DirectX-Specs/blob/master/d3d/HLSL_ShaderModel6_5.md
2566
+ // TODO(JS): Looks like they need a mask parameter
2567
+
2524
2568
__generic < T : __BuiltinArithmeticType>
2525
2569
__glsl_extension(GL_KHR_shader_subgroup_arithmetic)
2526
2570
__spirv_version(1 . 3 )
2527
2571
__target_intrinsic (glsl, " subgroupExclusiveAnd($0)" )
2572
+ __target_intrinsic (cuda, " _wavePrefixAnd($0)" )
2528
2573
T WaveMultiPrefixBitAnd(T expr);
2529
2574
__glsl_extension(GL_KHR_shader_subgroup_arithmetic)
2530
2575
__spirv_version(1 . 3 )
@@ -2538,6 +2583,7 @@ __generic<T : __BuiltinArithmeticType>
2538
2583
__glsl_extension(GL_KHR_shader_subgroup_arithmetic)
2539
2584
__spirv_version(1 . 3 )
2540
2585
__target_intrinsic (glsl, " subgroupExclusiveOr($0)" )
2586
+ __target_intrinsic (cuda, " _wavePrefixOr($0)" )
2541
2587
T WaveMultiPrefixBitOr(T expr);
2542
2588
__generic < T : __BuiltinArithmeticType, let N : int >
2543
2589
__glsl_extension(GL_KHR_shader_subgroup_arithmetic)
@@ -2551,6 +2597,7 @@ __generic<T : __BuiltinArithmeticType>
2551
2597
__glsl_extension(GL_KHR_shader_subgroup_arithmetic)
2552
2598
__spirv_version(1 . 3 )
2553
2599
__target_intrinsic (glsl, " subgroupExclusiveXor($0)" )
2600
+ __target_intrinsic (cuda, " _wavePrefixXor($0)" )
2554
2601
T WaveMultiPrefixBitXor(T expr);
2555
2602
__generic < T : __BuiltinArithmeticType, let N : int >
2556
2603
__glsl_extension(GL_KHR_shader_subgroup_arithmetic)
@@ -2560,11 +2607,6 @@ vector<T,N> WaveMultiPrefixBitXor(vector<T,N> expr);
2560
2607
__generic < T : __BuiltinArithmeticType, let N : int , let M : int >
2561
2608
matrix< T,N,M> WaveMultiPrefixBitXor(matrix< T,N,M> expr);
2562
2609
2563
- __glsl_extension(GL_KHR_shader_subgroup_ballot)
2564
- __spirv_version(1 . 3 )
2565
- __target_intrinsic (glsl, " subgroupBallotExclusiveBitCount(subgroupBallot($0))" )
2566
- __target_intrinsic (cuda, " __popc(__ballot_sync(__activemask(), $0) & _getLaneLtMask())" )
2567
- uint WavePrefixCountBits(bool value);
2568
2610
2569
2611
uint WaveMultiPrefixCountBits(bool value, uint4 mask);
2570
2612
@@ -2576,40 +2618,6 @@ __generic<T : __BuiltinArithmeticType> T WaveMultiPrefixSum(T value, uint4 mask)
2576
2618
__generic < T : __BuiltinArithmeticType, let N : int > vector< T,N> WaveMultiPrefixSum(vector< T,N> value, uint4 mask);
2577
2619
__generic < T : __BuiltinArithmeticType, let N : int , let M : int > matrix< T,N,M> WaveMultiPrefixSum(matrix< T,N,M> value, uint4 mask);
2578
2620
2579
- __generic < T : __BuiltinType>
2580
- __glsl_extension(GL_KHR_shader_subgroup_ballot)
2581
- __spirv_version(1 . 3 )
2582
- __target_intrinsic (glsl, " subgroupBroadcastFirst($0)" )
2583
- __target_intrinsic (cuda, " _waveReadFirst($0)" )
2584
- T WaveReadLaneFirst(T expr);
2585
- __generic < T : __BuiltinType, let N : int >
2586
- __glsl_extension(GL_KHR_shader_subgroup_ballot)
2587
- __spirv_version(1 . 3 )
2588
- __target_intrinsic (glsl, " subgroupBroadcastFirst($0)" )
2589
- __target_intrinsic (cuda, " _waveReadFirstMultiple($0)" )
2590
- vector< T,N> WaveReadLaneFirst(vector< T,N> expr);
2591
- __generic < T : __BuiltinType, let N : int , let M : int >
2592
- __target_intrinsic (cuda, " _waveReadFirstMultiple($0)" )
2593
- matrix< T,N,M> WaveReadLaneFirst(matrix< T,N,M> expr);
2594
-
2595
- // NOTE! On GLSL based targets the lane index *must* be a compile time expression!
2596
- // See https://github.com/KhronosGroup/GLSL/blob/master/extensions/khr/GL_KHR_shader_subgroup.txt
2597
- __generic < T : __BuiltinType>
2598
- __glsl_extension(GL_KHR_shader_subgroup_ballot)
2599
- __spirv_version(1 . 3 )
2600
- __target_intrinsic (glsl, " subgroupBroadcast($0, $1)" )
2601
- __target_intrinsic (cuda, " __shfl_sync(__activemask(), $0, $1)" )
2602
- T WaveReadLaneAt(T value, int lane);
2603
- __generic < T : __BuiltinType, let N : int >
2604
- __spirv_version(1 . 3 )
2605
- __target_intrinsic (glsl, " subgroupBroadcast($0, $1)" )
2606
- __target_intrinsic (cuda, " _waveReadLaneAtMultiple($0, $1)" )
2607
- vector< T,N> WaveReadLaneAt(vector< T,N> value, int lane);
2608
- __generic < T : __BuiltinType, let N : int , let M : int >
2609
- __target_intrinsic (cuda, " _waveReadLaneAtMultiple($0, $1)" )
2610
- matrix< T,N,M> WaveReadLaneAt(matrix< T,N,M> value, int lane);
2611
-
2612
-
2613
2621
// `typedef`s to help with the fact that HLSL has been sorta-kinda case insensitive at various points
2614
2622
typedef Texture2D texture2D;
2615
2623
0 commit comments