diff --git a/source/slang-core-module/slang-embedded-core-module-source.cpp b/source/slang-core-module/slang-embedded-core-module-source.cpp index 557b50eb2d..9dc324d2a6 100644 --- a/source/slang-core-module/slang-embedded-core-module-source.cpp +++ b/source/slang-core-module/slang-embedded-core-module-source.cpp @@ -56,12 +56,6 @@ enum BaseTypeConversionRank : uint8_t kBaseTypeConversionRank_Int32, kBaseTypeConversionRank_IntPtr, kBaseTypeConversionRank_Int64, - - // Packed type conversion ranks where the overall rank order does not apply. - // They must be explicitly casted to another type. - kBaseTypeConversionRank_Int8x4Packed, - kBaseTypeConversionRank_UInt8x4Packed, - kBaseTypeConversionRank_Error, }; @@ -155,17 +149,6 @@ static const BaseTypeConversionInfo kBaseTypes[] = { UINT_MASK, kBaseTypeConversionKind_Unsigned, kBaseTypeConversionRank_IntPtr}, - - {"int8_t4_packed", - BaseType::Int8x4Packed, - 0, - kBaseTypeConversionKind_Unsigned, - kBaseTypeConversionRank_Int8x4Packed}, - {"uint8_t4_packed", - BaseType::UInt8x4Packed, - 0, - kBaseTypeConversionKind_Unsigned, - kBaseTypeConversionRank_UInt8x4Packed}, }; void Session::finalizeSharedASTBuilder() @@ -192,12 +175,6 @@ void Session::finalizeSharedASTBuilder() globalAstBuilder->getBuiltinType(baseType.tag); } -static bool isConversionRankPackedType(BaseTypeConversionRank rank) -{ - return (rank == BaseTypeConversionRank::kBaseTypeConversionRank_Int8x4Packed) || - (rank == BaseTypeConversionRank::kBaseTypeConversionRank_UInt8x4Packed); -} - // Given two base types, we need to be able to compute the cost of converting between them. ConversionCost getBaseTypeConversionCost( BaseTypeConversionInfo const& toInfo, @@ -210,14 +187,6 @@ ConversionCost getBaseTypeConversionCost( return kConversionCost_None; } - // Handle special case for packed types, where they must be explicitly casted to another type. - bool isToPackedType = isConversionRankPackedType(toInfo.conversionRank); - bool isFromPackedType = isConversionRankPackedType(fromInfo.conversionRank); - if (isToPackedType || isFromPackedType) - { - return kConversionCost_GeneralConversion; - } - // Conversions within the same kind are easist to handle if (toInfo.conversionKind == fromInfo.conversionKind) { diff --git a/source/slang/hlsl.meta.slang b/source/slang/hlsl.meta.slang index c9f3fb5337..a671a3dc44 100644 --- a/source/slang/hlsl.meta.slang +++ b/source/slang/hlsl.meta.slang @@ -17309,8 +17309,8 @@ uint dot4add_u8packed(uint x, uint y, uint acc) result:$$uint = OpIAdd %dotResult $acc; }; default: - uint4 vecX = unpack_u8u32(uint8_t4_packed(x)); - uint4 vecY = unpack_u8u32(uint8_t4_packed(y)); + uint4 vecX = unpackUint4x8ToUint32(x); + uint4 vecY = unpackUint4x8ToUint32(y); return dot(vecX, vecY) + acc; } } @@ -17337,8 +17337,8 @@ int dot4add_i8packed(uint x, uint y, int acc) result:$$int = OpIAdd %dotResult $acc; }; default: - int4 vecX = unpack_s8s32(int8_t4_packed(x)); - int4 vecY = unpack_s8s32(int8_t4_packed(y)); + int4 vecX = unpackInt4x8ToInt32(x); + int4 vecY = unpackInt4x8ToInt32(y); return dot(vecX, vecY) + acc; } } @@ -24035,383 +24035,426 @@ T workgroupUniformLoad(__ref T v) } // -// Pack/Unpack Math Intrinsics +// HLSL Pack/Unpack Math Intrinsics // // These were introduced in SM 6.6 but requirements are dropped to SM 5.0 here // to expose these intrinsics on targets that do not have SM 6.6 features. // -//@hidden: +//@public: + +typealias uint8_t4_packed = uint; +typealias int8_t4_packed = uint; + +/// Unpack 4 signed 8-bit values into a vector of 16 bit integers. [__readNone] [ForceInline] -uint16_t __lsb_as_u16(uint32_t val) +[require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, shader5_sm_5_0)] +int16_t4 unpack_s8s16(int8_t4_packed packed) { - return uint16_t(val & 0xFFU); + return unpackInt4x8ToInt16(packed); } -//@hidden: +/// Unpack 4 unsigned 8-bit values into a vector of 16 bit integers. [__readNone] [ForceInline] -uint32_t __lsb_as_u32(uint32_t val) +[require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, shader5_sm_5_0)] +uint16_t4 unpack_u8u16(uint8_t4_packed packed) { - return (val & 0xFFU); + return unpackUint4x8ToUint16(packed); } -//@hidden: +/// Unpack 4 signed 8-bit values into a vector of 32 bit integers. [__readNone] [ForceInline] -int8_t __lsb_as_s8(uint32_t val) +[require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, shader5_sm_5_0)] +int32_t4 unpack_s8s32(int8_t4_packed packed) { - return int8_t(val & 0xFFU); + return unpackInt4x8ToInt32(packed); } -//@hidden: +/// Unpack 4 unsigned 8-bit values into a vector of 32 bit integers. [__readNone] [ForceInline] -int16_t __lsb_as_s16(uint32_t val) +[require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, shader5_sm_5_0)] +uint32_t4 unpack_u8u32(uint8_t4_packed packed) { - return int16_t(__lsb_as_s8(val)); + return unpackUint4x8ToUint32(packed); } -//@hidden: +/// Pack a vector of 4 unsigned 32 bit integers into a packed value of 4 8-bit integers, dropping unused bits. [__readNone] [ForceInline] -int32_t __lsb_as_s32(uint32_t val) +[require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, shader5_sm_5_0)] +uint8_t4_packed pack_u8(uint32_t4 unpackedValue) { - return int32_t(__lsb_as_s8(val)); + return packUint4x8(unpackedValue); } -//@hidden: +/// Pack a vector of 4 signed 32 bit integers into a packed value of 4 8-bit integers, dropping unused bits. +[__readNone] +[ForceInline] +[require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, shader5_sm_5_0)] +int8_t4_packed pack_s8(int32_t4 unpackedValue) +{ + return packInt4x8(unpackedValue); +} + +/// Pack a vector of 4 unsigned 16 bit integers into a packed value of 4 8-bit integers, dropping unused bits. +[__readNone] +[ForceInline] +[require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, shader5_sm_5_0)] +uint8_t4_packed pack_u8(uint16_t4 unpackedValue) +{ + return packUint4x8(unpackedValue); +} + +/// Pack a vector of 4 signed 16 bit integers into a packed value of 4 8-bit integers, dropping unused bits. +[__readNone] +[ForceInline] +[require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, shader5_sm_5_0)] +int8_t4_packed pack_s8(int16_t4 unpackedValue) +{ + return packInt4x8(unpackedValue); +} + +/// Pack a vector of 4 unsigned 32 bit integers into a packed value of 4 8-bit integers, +/// clamping each value to the range [0, 255] to ensure it fits within 8 bits. +[__readNone] +[ForceInline] +[require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, shader5_sm_5_0)] +uint8_t4_packed pack_clamp_u8(int32_t4 unpackedValue) +{ + return packUint4x8Clamp(unpackedValue); +} + +/// Pack a vector of 4 signed 32 bit integers into a packed value of 4 8-bit integers, +/// clamping each value to the range [-128, 127] to ensure it fits within 8 bits. +[__readNone] +[ForceInline] +[require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, shader5_sm_5_0)] +int8_t4_packed pack_clamp_s8(int32_t4 unpackedValue) +{ + return packInt4x8Clamp(unpackedValue); +} + +/// Pack a vector of 4 unsigned 16 bit integers into a packed value of 4 8-bit integers, +/// clamping each value to the range [0, 255] to ensure it fits within 8 bits. [__readNone] [ForceInline] -uint32_t __lsb_clamp_u8_as_u32(int32_t val) +[require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, shader5_sm_5_0)] +uint8_t4_packed pack_clamp_u8(int16_t4 unpackedValue) { - return clamp(val, 0, 255); + return packUint4x8Clamp(unpackedValue); } +/// Pack a vector of 4 signed 16 bit integers into a packed value of 4 8-bit integers, +/// clamping each value to the range [-128, 127] to ensure it fits within 8 bits. +[__readNone] +[ForceInline] +[require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, shader5_sm_5_0)] +int8_t4_packed pack_clamp_s8(int16_t4 unpackedValue) +{ + return packInt4x8Clamp(unpackedValue); +} + +// Work-graphs + +//@public: +/// read-only input to Broadcasting launch node. +__generic +//TODO: DispatchNodeInputRecord should be available only for broadcasting node shader. +//[require(broadcasting_node)] +[require(spirv)] +struct DispatchNodeInputRecord +{ + /// Provide an access to a record object that only holds a single record. + NodePayloadPtr Get() + { + int index = 0; + __target_switch + { + case spirv: + return spirv_asm + { + %in_payload_t = OpTypeNodePayloadArrayAMDX $$T; + %in_payload_ptr_t = OpTypePointer NodePayloadAMDX %in_payload_t; + %var = OpVariable %in_payload_ptr_t NodePayloadAMDX; + result : $$NodePayloadPtr = OpAccessChain %var $index; + }; + } + } +}; + +// +// Pack/Unpack Intrinsics +// + //@hidden: + +[__readNone] +[ForceInline] +uint16_t __lsbAsUint16(uint32_t val) +{ + return uint16_t(val & 0xFFU); +} + +[__readNone] +[ForceInline] +uint32_t __lsbAsUint32(uint32_t val) +{ + return (val & 0xFFU); +} + +[__readNone] +[ForceInline] +int8_t __lsbAsInt8(uint32_t val) +{ + return int8_t(val); +} + [__readNone] [ForceInline] -uint32_t __lsb_clamp_s8_as_u32(int32_t val) +int16_t __lsbAsInt16(uint32_t val) { - return (uint32_t(clamp(val, -128, 127)) & 0xFFU); + return int16_t(__lsbAsInt8(val)); +} + +[__readNone] +[ForceInline] +int32_t __lsbAsInt32(uint32_t val) +{ + return int32_t(__lsbAsInt8(val)); } //@public: -/// Unpack 4 signed 8-bit values into a vector of 16 bit integers. + +/// Unpack 4 unsigned 8-bit values into a vector of 32 bit integers. [__readNone] [ForceInline] [require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, shader5_sm_5_0)] -int16_t4 unpack_s8s16(int8_t4_packed packed) +[require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, shader5_sm_5_0)] +uint32_t4 unpackUint4x8ToUint32(uint packedValue) { __target_switch { - case hlsl: __intrinsic_asm "unpack_s8s16"; - case spirv: + case hlsl: __intrinsic_asm "unpack_u8u32"; + case wgsl: __intrinsic_asm "unpack4xU8"; + case spirv: return spirv_asm { - %s8Vec = OpBitcast $$vector $packed; - result:$$vector = OpSConvert %s8Vec + %u8Vec = OpBitcast $$vector $packedValue; + result:$$vector = OpUConvert %u8Vec }; default: - uint32_t packedValue = uint32_t(packed); - return int16_t4 + return uint32_t4 ( - __lsb_as_s16(packedValue), - __lsb_as_s16(packedValue >> 8U), - __lsb_as_s16(packedValue >> 16U), - __lsb_as_s16(packedValue >> 24U), + __lsbAsUint32(packedValue), + __lsbAsUint32(packedValue >> 8U), + __lsbAsUint32(packedValue >> 16U), + uint32_t(packedValue >> 24U), ); } } -//@public: /// Unpack 4 unsigned 8-bit values into a vector of 16 bit integers. [__readNone] [ForceInline] [require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, shader5_sm_5_0)] -uint16_t4 unpack_u8u16(uint8_t4_packed packed) +uint16_t4 unpackUint4x8ToUint16(uint packedValue) { __target_switch { case hlsl: __intrinsic_asm "unpack_u8u16"; - case spirv: + case spirv: return spirv_asm { - %u8Vec = OpBitcast $$vector $packed; + %u8Vec = OpBitcast $$vector $packedValue; result:$$vector = OpUConvert %u8Vec }; default: - uint32_t packedValue = uint32_t(packed); return uint16_t4 ( - __lsb_as_u16(packedValue), - __lsb_as_u16(packedValue >> 8U), - __lsb_as_u16(packedValue >> 16U), - __lsb_as_u16(packedValue >> 24U), + __lsbAsUint16(packedValue), + __lsbAsUint16(packedValue >> 8U), + __lsbAsUint16(packedValue >> 16U), + uint16_t(packedValue >> 24U), ); } } -//@public: /// Unpack 4 signed 8-bit values into a vector of 32 bit integers. [__readNone] [ForceInline] [require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, shader5_sm_5_0)] -int32_t4 unpack_s8s32(int8_t4_packed packed) +int32_t4 unpackInt4x8ToInt32(uint packedValue) { __target_switch { case hlsl: __intrinsic_asm "unpack_s8s32"; case wgsl: __intrinsic_asm "unpack4xI8"; - case spirv: + case spirv: return spirv_asm { - %s8Vec = OpBitcast $$vector $packed; + %s8Vec = OpBitcast $$vector $packedValue; result:$$vector = OpSConvert %s8Vec }; default: - uint32_t packedValue = uint32_t(packed); return int32_t4 ( - __lsb_as_s32(packedValue), - __lsb_as_s32(packedValue >> 8U), - __lsb_as_s32(packedValue >> 16U), - __lsb_as_s32(packedValue >> 24U), + __lsbAsInt32(packedValue), + __lsbAsInt32(packedValue >> 8U), + __lsbAsInt32(packedValue >> 16U), + int32_t(int8_t(packedValue >> 24U)), ); } } -//@public: -/// Unpack 4 unsigned 8-bit values into a vector of 32 bit integers. +/// Unpack 4 signed 8-bit values into a vector of 16 bit integers. [__readNone] [ForceInline] [require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, shader5_sm_5_0)] -uint32_t4 unpack_u8u32(uint8_t4_packed packed) +int16_t4 unpackInt4x8ToInt16(uint packedValue) { __target_switch { - case hlsl: __intrinsic_asm "unpack_u8u32"; - case wgsl: __intrinsic_asm "unpack4xU8"; - case spirv: + case hlsl: __intrinsic_asm "unpack_s8s16"; + case spirv: return spirv_asm { - %u8Vec = OpBitcast $$vector $packed; - result:$$vector = OpUConvert %u8Vec + %s8Vec = OpBitcast $$vector $packedValue; + result:$$vector = OpSConvert %s8Vec }; default: - uint32_t packedValue = uint32_t(packed); - return uint32_t4 + return int16_t4 ( - __lsb_as_u32(packedValue), - __lsb_as_u32(packedValue >> 8U), - __lsb_as_u32(packedValue >> 16U), - __lsb_as_u32(packedValue >> 24U), + __lsbAsInt16(packedValue), + __lsbAsInt16(packedValue >> 8U), + __lsbAsInt16(packedValue >> 16U), + int16_t(int8_t(packedValue >> 24U)), ); } } -//@public: /// Pack a vector of 4 unsigned 32 bit integers into a packed value of 4 8-bit integers, dropping unused bits. [__readNone] [ForceInline] [require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, shader5_sm_5_0)] -uint8_t4_packed pack_u8(uint32_t4 unpackedValue) +uint packUint4x8(uint32_t4 unpackedValue) { __target_switch { case hlsl: __intrinsic_asm "pack_u8"; case wgsl: __intrinsic_asm "pack4xU8"; default: - return uint8_t4_packed - ( - __lsb_as_u32(unpackedValue.x) - | (__lsb_as_u32(unpackedValue.y) << 8U) - | (__lsb_as_u32(unpackedValue.z) << 16U) - | (__lsb_as_u32(unpackedValue.w) << 24U) - ); + return __lsbAsUint32(unpackedValue.x) + | (__lsbAsUint32(unpackedValue.y) << 8U) + | (__lsbAsUint32(unpackedValue.z) << 16U) + | (unpackedValue.w << 24U); } } -//@public: -/// Pack a vector of 4 signed 32 bit integers into a packed value of 4 8-bit integers, dropping unused bits. +/// Pack a vector of 4 unsigned 16 bit integers into a packed value of 4 8-bit integers, dropping unused bits. [__readNone] [ForceInline] [require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, shader5_sm_5_0)] -int8_t4_packed pack_s8(int32_t4 unpackedValue) +uint packUint4x8(uint16_t4 unpackedValue) { __target_switch { - case hlsl: __intrinsic_asm "pack_s8"; - case wgsl: __intrinsic_asm "pack4xI8"; + case hlsl: __intrinsic_asm "pack_u8"; default: - return int8_t4_packed - ( - __lsb_as_u32(unpackedValue.x) - | (__lsb_as_u32(unpackedValue.y) << 8U) - | (__lsb_as_u32(unpackedValue.z) << 16U) - | (__lsb_as_u32(unpackedValue.w) << 24U) - ); + return packUint4x8(uint32_t4(unpackedValue)); } } -//@public: -/// Pack a vector of 4 unsigned 16 bit integers into a packed value of 4 8-bit integers, dropping unused bits. +/// Pack a vector of 4 signed 32 bit integers into a packed value of 4 8-bit integers, dropping unused bits. [__readNone] [ForceInline] [require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, shader5_sm_5_0)] -uint8_t4_packed pack_u8(uint16_t4 unpackedValue) +uint packInt4x8(int32_t4 unpackedValue) { __target_switch { - case hlsl: __intrinsic_asm "pack_u8"; + case hlsl: __intrinsic_asm "pack_s8"; + case wgsl: __intrinsic_asm "pack4xI8"; default: - return uint8_t4_packed - ( - __lsb_as_u32(unpackedValue.x) - | (__lsb_as_u32(unpackedValue.y) << 8U) - | (__lsb_as_u32(unpackedValue.z) << 16U) - | (__lsb_as_u32(unpackedValue.w) << 24U) - ); + return packUint4x8(uint32_t4(unpackedValue)); } } -//@public: /// Pack a vector of 4 signed 16 bit integers into a packed value of 4 8-bit integers, dropping unused bits. [__readNone] [ForceInline] [require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, shader5_sm_5_0)] -int8_t4_packed pack_s8(int16_t4 unpackedValue) +uint packInt4x8(int16_t4 unpackedValue) { __target_switch { case hlsl: __intrinsic_asm "pack_s8"; default: - return int8_t4_packed - ( - __lsb_as_u32(unpackedValue.x) - | (__lsb_as_u32(unpackedValue.y) << 8U) - | (__lsb_as_u32(unpackedValue.z) << 16U) - | (__lsb_as_u32(unpackedValue.w) << 24U) - ); + return packUint4x8(uint32_t4(unpackedValue)); } } -//@public: -/// Pack a vector of 4 unsigned 32 bit integers into a packed value of 4 8-bit integers, -/// clamping each value to the range [0, 255] to ensure it fits within 8 bits. +/// Pack a vector of 4 signed 32 bit integers into a packed value of 4 8-bit integers, +/// clamping each value to the range [-128, 127] to ensure it fits within 8 bits. [__readNone] [ForceInline] [require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, shader5_sm_5_0)] -uint8_t4_packed pack_clamp_u8(int32_t4 unpackedValue) +uint packUint4x8Clamp(int32_t4 unpackedValue) { __target_switch { case hlsl: __intrinsic_asm "pack_clamp_u8"; case wgsl: __intrinsic_asm "pack4xU8Clamp(vec4($0))"; default: - return uint8_t4_packed - ( - __lsb_clamp_u8_as_u32(unpackedValue.x) - | (__lsb_clamp_u8_as_u32(unpackedValue.y) << 8U) - | (__lsb_clamp_u8_as_u32(unpackedValue.z) << 16U) - | (__lsb_clamp_u8_as_u32(unpackedValue.w) << 24U) - ); + return packInt4x8(clamp(unpackedValue, 0, 255)); } } -//@public: -/// Pack a vector of 4 signed 32 bit integers into a packed value of 4 8-bit integers, -/// clamping each value to the range [-128, 127] to ensure it fits within 8 bits. +/// Pack a vector of 4 unsigned 16 bit integers into a packed value of 4 8-bit integers, +/// clamping each value to the range [0, 255] to ensure it fits within 8 bits. [__readNone] [ForceInline] [require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, shader5_sm_5_0)] -int8_t4_packed pack_clamp_s8(int32_t4 unpackedValue) +uint packUint4x8Clamp(int16_t4 unpackedValue) { __target_switch { - case hlsl: __intrinsic_asm "pack_clamp_s8"; - case wgsl: __intrinsic_asm "pack4xI8Clamp"; + case hlsl: __intrinsic_asm "pack_clamp_u8"; default: - return int8_t4_packed - ( - __lsb_clamp_s8_as_u32(unpackedValue.x) - | (__lsb_clamp_s8_as_u32(unpackedValue.y) << 8U) - | (__lsb_clamp_s8_as_u32(unpackedValue.z) << 16U) - | (__lsb_clamp_s8_as_u32(unpackedValue.w) << 24U) - ); + return packInt4x8(clamp(unpackedValue, 0, 255)); } } -//@public: -/// Pack a vector of 4 unsigned 16 bit integers into a packed value of 4 8-bit integers, -/// clamping each value to the range [0, 255] to ensure it fits within 8 bits. +/// Pack a vector of 4 signed 32 bit integers into a packed value of 4 8-bit integers, +/// clamping each value to the range [-128, 127] to ensure it fits within 8 bits. [__readNone] [ForceInline] [require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, shader5_sm_5_0)] -uint8_t4_packed pack_clamp_u8(int16_t4 unpackedValue) +uint packInt4x8Clamp(int32_t4 unpackedValue) { __target_switch { - case hlsl: __intrinsic_asm "pack_clamp_u8"; + case hlsl: __intrinsic_asm "pack_clamp_s8"; + case wgsl: __intrinsic_asm "pack4xI8Clamp"; default: - return uint8_t4_packed - ( - __lsb_clamp_u8_as_u32(unpackedValue.x) - | (__lsb_clamp_u8_as_u32(unpackedValue.y) << 8U) - | (__lsb_clamp_u8_as_u32(unpackedValue.z) << 16U) - | (__lsb_clamp_u8_as_u32(unpackedValue.w) << 24U) - ); + return packInt4x8(clamp(unpackedValue, -128, 127)); } } -//@public: /// Pack a vector of 4 signed 16 bit integers into a packed value of 4 8-bit integers, /// clamping each value to the range [-128, 127] to ensure it fits within 8 bits. [__readNone] [ForceInline] [require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, shader5_sm_5_0)] -int8_t4_packed pack_clamp_s8(int16_t4 unpackedValue) +uint packInt4x8Clamp(int16_t4 unpackedValue) { __target_switch { case hlsl: __intrinsic_asm "pack_clamp_s8"; default: - return int8_t4_packed - ( - __lsb_clamp_s8_as_u32(unpackedValue.x) - | (__lsb_clamp_s8_as_u32(unpackedValue.y) << 8U) - | (__lsb_clamp_s8_as_u32(unpackedValue.z) << 16U) - | (__lsb_clamp_s8_as_u32(unpackedValue.w) << 24U) - ); + return packInt4x8(clamp(unpackedValue, -128, 127)); } } - -// Work-graphs - -//@public: -/// read-only input to Broadcasting launch node. -__generic -//TODO: DispatchNodeInputRecord should be available only for broadcasting node shader. -//[require(broadcasting_node)] -[require(spirv)] -struct DispatchNodeInputRecord -{ - /// Provide an access to a record object that only holds a single record. - NodePayloadPtr Get() - { - int index = 0; - __target_switch - { - case spirv: - return spirv_asm - { - %in_payload_t = OpTypeNodePayloadArrayAMDX $$T; - %in_payload_ptr_t = OpTypePointer NodePayloadAMDX %in_payload_t; - %var = OpVariable %in_payload_ptr_t NodePayloadAMDX; - result : $$NodePayloadPtr = OpAccessChain %var $index; - }; - } - } -}; - diff --git a/source/slang/slang-check-conversion.cpp b/source/slang/slang-check-conversion.cpp index a9785a585e..c5cf192ee3 100644 --- a/source/slang/slang-check-conversion.cpp +++ b/source/slang/slang-check-conversion.cpp @@ -1026,8 +1026,6 @@ int getTypeBitSize(Type* t) return 16; case BaseType::Int: case BaseType::UInt: - case BaseType::Int8x4Packed: - case BaseType::UInt8x4Packed: return 32; case BaseType::Int64: case BaseType::UInt64: diff --git a/source/slang/slang-check-decl.cpp b/source/slang/slang-check-decl.cpp index 486ac6e9c9..5ee8ba3efe 100644 --- a/source/slang/slang-check-decl.cpp +++ b/source/slang/slang-check-decl.cpp @@ -1981,8 +1981,6 @@ void SemanticsDeclHeaderVisitor::checkVarDeclCommon(VarDeclBase* varDecl) case BaseType::UInt: case BaseType::UInt64: case BaseType::UIntPtr: - case BaseType::Int8x4Packed: - case BaseType::UInt8x4Packed: break; default: getSink()->diagnose(varDecl, Diagnostics::staticConstRequirementMustBeIntOrBool); diff --git a/source/slang/slang-emit-c-like.cpp b/source/slang/slang-emit-c-like.cpp index 1c48d98efd..ff40d5b286 100644 --- a/source/slang/slang-emit-c-like.cpp +++ b/source/slang/slang-emit-c-like.cpp @@ -266,11 +266,6 @@ void CLikeSourceEmitter::emitSimpleType(IRType* type) case kIROp_UIntPtrType: return UnownedStringSlice("uintptr_t"); - case kIROp_Int8x4PackedType: - return UnownedStringSlice("int8_t4_packed"); - case kIROp_UInt8x4PackedType: - return UnownedStringSlice("uint8_t4_packed"); - case kIROp_HalfType: return UnownedStringSlice("half"); @@ -1334,8 +1329,6 @@ void CLikeSourceEmitter::emitSimpleValueImpl(IRInst* inst) return; } case BaseType::UInt: - case BaseType::Int8x4Packed: - case BaseType::UInt8x4Packed: { m_writer->emit(UInt(uint32_t(litInst->value.intVal))); m_writer->emit("U"); @@ -4045,8 +4038,6 @@ void CLikeSourceEmitter::emitVecNOrScalar( m_writer->emit("ushort"); break; case kIROp_UIntType: - case kIROp_Int8x4PackedType: - case kIROp_UInt8x4PackedType: m_writer->emit("uint"); break; case kIROp_UInt64Type: diff --git a/source/slang/slang-emit-cpp.cpp b/source/slang/slang-emit-cpp.cpp index 13a85e8abf..bfc021677d 100644 --- a/source/slang/slang-emit-cpp.cpp +++ b/source/slang/slang-emit-cpp.cpp @@ -101,13 +101,8 @@ static const char s_xyzwNames[] = "xyzw"; case kIROp_UIntPtrType: return UnownedStringSlice("uintptr_t"); - case kIROp_Int8x4PackedType: - case kIROp_UInt8x4PackedType: - return UnownedStringSlice("uint32_t"); - // Not clear just yet how we should handle half... we want all processing as float // probly, but when reading/writing to memory converting - case kIROp_HalfType: return UnownedStringSlice("half"); diff --git a/source/slang/slang-emit-cuda.cpp b/source/slang/slang-emit-cuda.cpp index 58ac377bfb..8657b3707e 100644 --- a/source/slang/slang-emit-cuda.cpp +++ b/source/slang/slang-emit-cuda.cpp @@ -84,9 +84,6 @@ UnownedStringSlice CUDASourceEmitter::getBuiltinTypeName(IROp op) case kIROp_UIntPtrType: return UnownedStringSlice("uint"); #endif - case kIROp_Int8x4PackedType: - case kIROp_UInt8x4PackedType: - return UnownedStringSlice("uint"); case kIROp_HalfType: return UnownedStringSlice("__half"); diff --git a/source/slang/slang-emit-glsl.cpp b/source/slang/slang-emit-glsl.cpp index 696830bf24..fca5a8933b 100644 --- a/source/slang/slang-emit-glsl.cpp +++ b/source/slang/slang-emit-glsl.cpp @@ -1330,8 +1330,6 @@ void GLSLSourceEmitter::emitSimpleValueImpl(IRInst* inst) return; } case BaseType::UInt: - case BaseType::Int8x4Packed: - case BaseType::UInt8x4Packed: { m_writer->emit(UInt(uint32_t(litInst->value.intVal))); m_writer->emit("U"); @@ -2175,8 +2173,6 @@ bool GLSLSourceEmitter::tryEmitInstExprImpl(IRInst* inst, const EmitOpInfo& inOu break; case BaseType::UInt: - case BaseType::Int8x4Packed: - case BaseType::UInt8x4Packed: if (fromType == BaseType::Float) { m_writer->emit("floatBitsToUint"); @@ -3186,18 +3182,6 @@ void GLSLSourceEmitter::emitSimpleTypeImpl(IRType* type) #endif return; } - case kIROp_Int8x4PackedType: - { - _requireBaseType(BaseType::Int8x4Packed); - m_writer->emit("uint"); - return; - } - case kIROp_UInt8x4PackedType: - { - _requireBaseType(BaseType::UInt8x4Packed); - m_writer->emit("uint"); - return; - } case kIROp_VoidType: case kIROp_BoolType: case kIROp_Int8Type: diff --git a/source/slang/slang-emit-hlsl.cpp b/source/slang/slang-emit-hlsl.cpp index 59d40d3a14..89300e13e3 100644 --- a/source/slang/slang-emit-hlsl.cpp +++ b/source/slang/slang-emit-hlsl.cpp @@ -902,8 +902,6 @@ bool HLSLSourceEmitter::tryEmitInstExprImpl(IRInst* inst, const EmitOpInfo& inOu case BaseType::UInt64: case BaseType::UIntPtr: case BaseType::Bool: - case BaseType::Int8x4Packed: - case BaseType::UInt8x4Packed: // Because the intermediate type will always // be an integer type, we can convert to // another integer type of the same size @@ -943,8 +941,6 @@ bool HLSLSourceEmitter::tryEmitInstExprImpl(IRInst* inst, const EmitOpInfo& inOu case BaseType::UInt: case BaseType::Int: case BaseType::Bool: - case BaseType::Int8x4Packed: - case BaseType::UInt8x4Packed: break; case BaseType::UInt16: case BaseType::Int16: @@ -1330,8 +1326,6 @@ void HLSLSourceEmitter::emitSimpleTypeImpl(IRType* type) case kIROp_Int16Type: case kIROp_UInt16Type: case kIROp_HalfType: - case kIROp_Int8x4PackedType: - case kIROp_UInt8x4PackedType: { m_writer->emit(getDefaultBuiltinTypeName(type->getOp())); return; diff --git a/source/slang/slang-emit-metal.cpp b/source/slang/slang-emit-metal.cpp index 0a7db8b288..1bb7383469 100644 --- a/source/slang/slang-emit-metal.cpp +++ b/source/slang/slang-emit-metal.cpp @@ -1099,10 +1099,6 @@ void MetalSourceEmitter::emitSimpleTypeImpl(IRType* type) case kIROp_UIntPtrType: m_writer->emit("ulong"); return; - case kIROp_Int8x4PackedType: - case kIROp_UInt8x4PackedType: - m_writer->emit("uint"); - return; case kIROp_StructType: m_writer->emit(getName(type)); return; diff --git a/source/slang/slang-emit-spirv.cpp b/source/slang/slang-emit-spirv.cpp index 802df915e6..c7e222247c 100644 --- a/source/slang/slang-emit-spirv.cpp +++ b/source/slang/slang-emit-spirv.cpp @@ -1466,8 +1466,6 @@ struct SPIRVEmitContext : public SourceEmitterBase, public SPIRVEmitSharedContex case kIROp_Int8Type: case kIROp_IntType: case kIROp_Int64Type: - case kIROp_Int8x4PackedType: - case kIROp_UInt8x4PackedType: { const IntInfo i = getIntTypeInfo(as(inst)); if (i.width == 16) @@ -7642,8 +7640,6 @@ struct SPIRVEmitContext : public SourceEmitterBase, public SPIRVEmitSharedContex case kIROp_UInt64Type: case kIROp_UInt8Type: case kIROp_UIntPtrType: - case kIROp_Int8x4PackedType: - case kIROp_UInt8x4PackedType: spvEncoding = 6; // Unsigned break; case kIROp_FloatType: diff --git a/source/slang/slang-emit-wgsl.cpp b/source/slang/slang-emit-wgsl.cpp index 7c83b194d5..d87cd06deb 100644 --- a/source/slang/slang-emit-wgsl.cpp +++ b/source/slang/slang-emit-wgsl.cpp @@ -511,10 +511,6 @@ void WGSLSourceEmitter::emitSimpleTypeImpl(IRType* type) case kIROp_UIntPtrType: m_writer->emit("u64"); return; - case kIROp_Int8x4PackedType: - case kIROp_UInt8x4PackedType: - m_writer->emit("u32"); - return; case kIROp_StructType: m_writer->emit(getName(type)); return; @@ -967,8 +963,6 @@ void WGSLSourceEmitter::emitSimpleValueImpl(IRInst* inst) return; } case BaseType::UInt: - case BaseType::Int8x4Packed: - case BaseType::UInt8x4Packed: { m_writer->emit("u32("); m_writer->emit(UInt(uint32_t(litInst->value.intVal))); diff --git a/source/slang/slang-ir-any-value-marshalling.cpp b/source/slang/slang-ir-any-value-marshalling.cpp index 2dc91d299a..d124b293d4 100644 --- a/source/slang/slang-ir-any-value-marshalling.cpp +++ b/source/slang/slang-ir-any-value-marshalling.cpp @@ -153,8 +153,6 @@ struct AnyValueMarshallingContext case kIROp_IntPtrType: case kIROp_UIntPtrType: case kIROp_PtrType: - case kIROp_Int8x4PackedType: - case kIROp_UInt8x4PackedType: context->marshalBasicType(builder, dataType, concreteTypedVar); break; case kIROp_VectorType: @@ -311,8 +309,6 @@ struct AnyValueMarshallingContext break; } case kIROp_UIntType: - case kIROp_Int8x4PackedType: - case kIROp_UInt8x4PackedType: #if SLANG_PTR_IS_32 case kIROp_UIntPtrType: #endif @@ -560,8 +556,6 @@ struct AnyValueMarshallingContext break; } case kIROp_UIntType: - case kIROp_Int8x4PackedType: - case kIROp_UInt8x4PackedType: { ensureOffsetAt4ByteBoundary(); if (fieldOffset < static_cast(anyValInfo->fieldKeys.getCount())) @@ -861,8 +855,6 @@ SlangInt _getAnyValueSizeRaw(IRType* type, SlangInt offset) case kIROp_FloatType: case kIROp_UIntType: case kIROp_BoolType: - case kIROp_Int8x4PackedType: - case kIROp_UInt8x4PackedType: return alignUp(offset, 4) + 4; case kIROp_UInt64Type: case kIROp_Int64Type: diff --git a/source/slang/slang-ir-byte-address-legalize.cpp b/source/slang/slang-ir-byte-address-legalize.cpp index 9207e6a2fc..617c8c7c4f 100644 --- a/source/slang/slang-ir-byte-address-legalize.cpp +++ b/source/slang/slang-ir-byte-address-legalize.cpp @@ -840,8 +840,6 @@ struct ByteAddressBufferLegalizationContext case kIROp_IntType: case kIROp_FloatType: case kIROp_BoolType: - case kIROp_Int8x4PackedType: - case kIROp_UInt8x4PackedType: // The basic 32-bit types (and `bool`) can be handled by // loading `uint` values and then bit-casting. // diff --git a/source/slang/slang-ir-layout.cpp b/source/slang/slang-ir-layout.cpp index 1332c8a25a..7ce19bf670 100644 --- a/source/slang/slang-ir-layout.cpp +++ b/source/slang/slang-ir-layout.cpp @@ -128,9 +128,6 @@ static Result _calcSizeAndAlignment( BASE(UIntPtr, kPointerSize); BASE(Double, 8); - BASE(Int8x4Packed, 4); - BASE(UInt8x4Packed, 4); - // We are currently handling `bool` following the HLSL // precednet of storing it in 4 bytes. // diff --git a/source/slang/slang-ir-lower-bit-cast.cpp b/source/slang/slang-ir-lower-bit-cast.cpp index 0a77836397..450ad8ac94 100644 --- a/source/slang/slang-ir-lower-bit-cast.cpp +++ b/source/slang/slang-ir-lower-bit-cast.cpp @@ -178,8 +178,6 @@ struct BitCastLoweringContext case kIROp_UIntType: case kIROp_FloatType: case kIROp_BoolType: - case kIROp_Int8x4PackedType: - case kIROp_UInt8x4PackedType: #if SLANG_PTR_IS_32 case kIROp_IntPtrType: case kIROp_UIntPtrType: diff --git a/source/slang/slang-ir-util.cpp b/source/slang/slang-ir-util.cpp index 39c1c5bb18..f75a24ac6a 100644 --- a/source/slang/slang-ir-util.cpp +++ b/source/slang/slang-ir-util.cpp @@ -107,8 +107,6 @@ IROp getTypeStyle(IROp op) case kIROp_UInt64Type: case kIROp_IntPtrType: case kIROp_UIntPtrType: - case kIROp_Int8x4PackedType: - case kIROp_UInt8x4PackedType: { // All int like return kIROp_IntType; @@ -144,8 +142,6 @@ IROp getTypeStyle(BaseType op) case BaseType::UInt: case BaseType::UInt64: case BaseType::UIntPtr: - case BaseType::Int8x4Packed: - case BaseType::UInt8x4Packed: return kIROp_IntType; case BaseType::Half: case BaseType::Float: @@ -476,12 +472,6 @@ void getTypeNameHint(StringBuilder& sb, IRInst* type) case kIROp_UIntPtrType: sb << "uintptr"; break; - case kIROp_Int8x4PackedType: - sb << "int8_t4_packed"; - break; - case kIROp_UInt8x4PackedType: - sb << "uint8_t4_packed"; - break; case kIROp_CharType: sb << "char"; break; @@ -1862,10 +1852,6 @@ UnownedStringSlice getBasicTypeNameHint(IRType* basicType) return UnownedStringSlice::fromLiteral("uint64"); case kIROp_UIntPtrType: return UnownedStringSlice::fromLiteral("uintptr"); - case kIROp_Int8x4PackedType: - return UnownedStringSlice::fromLiteral("int8_t4_packed"); - case kIROp_UInt8x4PackedType: - return UnownedStringSlice::fromLiteral("uint8_t4_packed"); case kIROp_FloatType: return UnownedStringSlice::fromLiteral("float"); case kIROp_HalfType: diff --git a/source/slang/slang-ir.cpp b/source/slang/slang-ir.cpp index 1b0f99cd39..b982385fa2 100644 --- a/source/slang/slang-ir.cpp +++ b/source/slang/slang-ir.cpp @@ -3814,8 +3814,6 @@ IRInst* IRBuilder::emitDefaultConstruct(IRType* type, bool fallback) case kIROp_UIntType: case kIROp_UIntPtrType: case kIROp_UInt64Type: - case kIROp_Int8x4PackedType: - case kIROp_UInt8x4PackedType: case kIROp_CharType: return getIntValue(type, 0); case kIROp_BoolType: @@ -7543,8 +7541,6 @@ bool isIntegralType(IRType* t) case BaseType::UInt64: case BaseType::IntPtr: case BaseType::UIntPtr: - case BaseType::Int8x4Packed: - case BaseType::UInt8x4Packed: return true; default: return false; @@ -7591,10 +7587,6 @@ IntInfo getIntTypeInfo(const IRType* intType) case kIROp_Int64Type: return {64, true}; - case kIROp_Int8x4PackedType: - case kIROp_UInt8x4PackedType: - return {32, false}; - case kIROp_IntPtrType: // target platform dependent case kIROp_UIntPtrType: // target platform dependent default: diff --git a/source/slang/slang-lower-to-ir.cpp b/source/slang/slang-lower-to-ir.cpp index e5037bf04a..e5ca77634c 100644 --- a/source/slang/slang-lower-to-ir.cpp +++ b/source/slang/slang-lower-to-ir.cpp @@ -4633,8 +4633,6 @@ struct ExprLoweringVisitorBase : public ExprVisitor case BaseType::UInt64: case BaseType::UIntPtr: case BaseType::IntPtr: - case BaseType::Int8x4Packed: - case BaseType::UInt8x4Packed: return LoweredValInfo::simple(getBuilder()->getIntValue(type, 0)); case BaseType::Half: diff --git a/source/slang/slang-mangle.cpp b/source/slang/slang-mangle.cpp index dedbb2d486..d51fafb6bd 100644 --- a/source/slang/slang-mangle.cpp +++ b/source/slang/slang-mangle.cpp @@ -186,13 +186,6 @@ void emitBaseType(ManglingContext* context, BaseType baseType) case BaseType::IntPtr: emitRaw(context, "ip"); break; - case BaseType::Int8x4Packed: - emitRaw(context, "c4p"); - break; - case BaseType::UInt8x4Packed: - emitRaw(context, "C4p"); - break; - default: SLANG_UNEXPECTED("unimplemented case in base type mangling"); break; diff --git a/source/slang/slang-type-layout.cpp b/source/slang/slang-type-layout.cpp index 7968450b49..e6729ca850 100644 --- a/source/slang/slang-type-layout.cpp +++ b/source/slang/slang-type-layout.cpp @@ -111,10 +111,6 @@ struct DefaultLayoutRulesImpl : SimpleLayoutRulesImpl sizeof(intptr_t), sizeof(intptr_t)); - case BaseType::Int8x4Packed: - case BaseType::UInt8x4Packed: - return SimpleLayoutInfo(LayoutResourceKind::Uniform, 4, 4); - case BaseType::Half: return SimpleLayoutInfo(LayoutResourceKind::Uniform, 2, 2); case BaseType::Float: diff --git a/source/slang/slang-type-system-shared.h b/source/slang/slang-type-system-shared.h index 583eb22166..d7bd43122e 100644 --- a/source/slang/slang-type-system-shared.h +++ b/source/slang/slang-type-system-shared.h @@ -22,8 +22,6 @@ namespace Slang X(Char) \ X(IntPtr) \ X(UIntPtr) \ - X(Int8x4Packed) \ - X(UInt8x4Packed) \ /* end */ enum class BaseType diff --git a/source/slang/slang.cpp b/source/slang/slang.cpp index fe7ecc4e94..924a4371ac 100644 --- a/source/slang/slang.cpp +++ b/source/slang/slang.cpp @@ -82,8 +82,6 @@ namespace Slang BaseTypeInfo::Flag::Signed | BaseTypeInfo::Flag::Integer, uint8_t(BaseType::IntPtr)}, {uint8_t(sizeof(uintptr_t)), BaseTypeInfo::Flag::Integer, uint8_t(BaseType::UIntPtr)}, - {uint8_t(sizeof(uint32_t)), BaseTypeInfo::Flag::Integer, uint8_t(BaseType::Int8x4Packed)}, - {uint8_t(sizeof(uint32_t)), BaseTypeInfo::Flag::Integer, uint8_t(BaseType::UInt8x4Packed)}, }; /* static */ bool BaseTypeInfo::check() @@ -135,10 +133,6 @@ namespace Slang return UnownedStringSlice::fromLiteral("intptr_t"); case BaseType::UIntPtr: return UnownedStringSlice::fromLiteral("uintptr_t"); - case BaseType::Int8x4Packed: - return UnownedStringSlice::fromLiteral("int8_t4_packed"); - case BaseType::UInt8x4Packed: - return UnownedStringSlice::fromLiteral("uint8_t4_packed"); default: { SLANG_ASSERT(!"Unknown basic type"); diff --git a/tests/hlsl-intrinsic/packed/pack-unpack.slang b/tests/hlsl-intrinsic/packed/pack-unpack.slang index b20e69fa84..0b9490ee4a 100644 --- a/tests/hlsl-intrinsic/packed/pack-unpack.slang +++ b/tests/hlsl-intrinsic/packed/pack-unpack.slang @@ -9,6 +9,18 @@ // Debug info for inlining errors can be given out, so disable them for this test. //TEST(compute):COMPARE_COMPUTE_EX:-cuda -compute -g0 +//TEST(compute):COMPARE_COMPUTE_EX:-vk -compute -shaderobj -xslang -DUSE_SLANG_SYNTAX +//TEST(compute):COMPARE_COMPUTE_EX:-slang -compute -shaderobj -render-feature hardware-device -xslang -DUSE_SLANG_SYNTAX +//TEST(compute):COMPARE_COMPUTE_EX:-slang -compute -profile cs_6_6 -dx12 -use-dxil -shaderobj -render-feature hardware-device -xslang -DUSE_SLANG_SYNTAX +//TEST(compute):COMPARE_COMPUTE_EX:-metal -compute -shaderobj -xslang -DUSE_SLANG_SYNTAX +//TEST(compute):COMPARE_COMPUTE_EX:-cpu -compute -shaderobj -xslang -DUSE_SLANG_SYNTAX + +// 16 bit variants are not supported by WGSL. +//TEST(compute):COMPARE_COMPUTE_EX:-wgpu -compute -shaderobj -xslang -DWGSL -xslang -DUSE_SLANG_SYNTAX +// Debug info for inlining errors can be given out, so disable them for this test. +//TEST(compute):COMPARE_COMPUTE_EX:-cuda -compute -g0 -xslang -DUSE_SLANG_SYNTAX + + //TEST_INPUT:ubuffer(data=[0xD37A83FF], stride=4):name unpackTestBuffer StructuredBuffer unpackTestBuffer; @@ -27,17 +39,125 @@ StructuredBuffer packClampSTestBuffer; //TEST_INPUT:ubuffer(data=[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0], stride=4):out,name outputBuffer RWStructuredBuffer outputBuffer; +uint32_t4 __unpack_u8u32(uint value) +{ +#if defined(USE_SLANG_SYNTAX) + return unpackUint4x8ToUint32(value); +#else + return unpack_u8u32(uint8_t4_packed(value)); +#endif +} + +uint16_t4 __unpack_u8u16(uint value) +{ +#if defined(USE_SLANG_SYNTAX) + return unpackUint4x8ToUint16(value); +#else + return unpack_u8u16(uint8_t4_packed(value)); +#endif +} + +int32_t4 __unpack_s8s32(uint value) +{ +#if defined(USE_SLANG_SYNTAX) + return unpackInt4x8ToInt32(value); +#else + return unpack_s8s32(int8_t4_packed(value)); +#endif +} + +int16_t4 __unpack_s8s16(uint value) +{ +#if defined(USE_SLANG_SYNTAX) + return unpackInt4x8ToInt16(value); +#else + return unpack_s8s16(int8_t4_packed(value)); +#endif +} + +uint __pack_u8(uint32_t4 value) +{ +#if defined(USE_SLANG_SYNTAX) + return packUint4x8(value); +#else + return uint(pack_u8(value)); +#endif +} + +uint __pack_u8(uint16_t4 value) +{ +#if defined(USE_SLANG_SYNTAX) + return packUint4x8(value); +#else + return uint(pack_u8(value)); +#endif +} + +uint __pack_s8(int32_t4 value) +{ +#if defined(USE_SLANG_SYNTAX) + return packInt4x8(value); +#else + return uint(pack_s8(value)); +#endif +} + +uint __pack_s8(int16_t4 value) +{ +#if defined(USE_SLANG_SYNTAX) + return packInt4x8(value); +#else + return uint(pack_s8(value)); +#endif +} + +uint __pack_clamp_u8(int32_t4 value) +{ +#if defined(USE_SLANG_SYNTAX) + return packUint4x8Clamp(value); +#else + return uint(pack_clamp_u8(value)); +#endif +} + +uint __pack_clamp_u8(int16_t4 value) +{ +#if defined(USE_SLANG_SYNTAX) + return packUint4x8Clamp(value); +#else + return uint(pack_clamp_u8(value)); +#endif +} + +uint __pack_clamp_s8(int32_t4 value) +{ +#if defined(USE_SLANG_SYNTAX) + return packInt4x8Clamp(value); +#else + return uint(pack_clamp_s8(value)); +#endif +} + +uint __pack_clamp_s8(int16_t4 value) +{ +#if defined(USE_SLANG_SYNTAX) + return packInt4x8Clamp(value); +#else + return uint(pack_clamp_s8(value)); +#endif +} + [numthreads(1, 1, 1)] void computeMain(uint3 dispatchThreadID : SV_DispatchThreadID) { - const uint8_t4_packed unpackTestValue = uint8_t4_packed(unpackTestBuffer[0]); + const uint unpackTestValue = unpackTestBuffer[0]; uint index = 0; /* * Unpack without sign extension. */ - uint32_t4 unpackedU32 = unpack_u8u32(unpackTestValue); + uint32_t4 unpackedU32 = __unpack_u8u32(unpackTestValue); // 0xFF outputBuffer[index++] = uint(unpackedU32.x); // 0x83 @@ -48,7 +168,7 @@ void computeMain(uint3 dispatchThreadID : SV_DispatchThreadID) outputBuffer[index++] = uint(unpackedU32.w); #if !defined(WGSL) - uint16_t4 unpackedU16 = unpack_u8u16(unpackTestValue); + uint16_t4 unpackedU16 = __unpack_u8u16(unpackTestValue); // 0xFF outputBuffer[index++] = uint(unpackedU16.x); // 0x83 @@ -67,7 +187,7 @@ void computeMain(uint3 dispatchThreadID : SV_DispatchThreadID) /* * Unpack with sign extension. */ - int32_t4 unpackedS32 = unpack_s8s32(int8_t4_packed(unpackTestValue)); + int32_t4 unpackedS32 = __unpack_s8s32(unpackTestValue); // 0xFFFFFFFF outputBuffer[index++] = uint(unpackedS32.x); // 0xFFFFFF83 @@ -78,7 +198,7 @@ void computeMain(uint3 dispatchThreadID : SV_DispatchThreadID) outputBuffer[index++] = uint(unpackedS32.w); #if !defined(WGSL) - int16_t4 unpackedS16 = unpack_s8s16(int8_t4_packed(unpackTestValue)); + int16_t4 unpackedS16 = __unpack_s8s16(unpackTestValue); // 0xFFFFFFFF outputBuffer[index++] = uint(unpackedS16.x); // 0xFFFFFF83 @@ -100,22 +220,17 @@ void computeMain(uint3 dispatchThreadID : SV_DispatchThreadID) */ uint32_t4 packU32TestValues = packTestBuffer[0]; int32_t4 packS32TestValues = packU32TestValues; - uint8_t4_packed packU32Result = pack_u8(packU32TestValues); - int8_t4_packed packS32Result = pack_s8(packS32TestValues); // 0xD4236A3F - outputBuffer[index++] = uint(packU32Result); - outputBuffer[index++] = uint(packS32Result); + outputBuffer[index++] = __pack_u8(packU32TestValues); + outputBuffer[index++] = __pack_s8(packS32TestValues); #if !defined(WGSL) uint16_t4 packU16TestValues = int16_t4(int16_t(packU32TestValues.x), int16_t(packU32TestValues.y), int16_t(packU32TestValues.z), int16_t(packU32TestValues.w)); int16_t4 packS16TestValues = packU16TestValues; - uint8_t4_packed packU16Result = pack_u8(packU16TestValues); - int8_t4_packed packS16Result = pack_s8(packS16TestValues); - - outputBuffer[index++] = uint(packU16Result); - outputBuffer[index++] = uint(packS16Result); + outputBuffer[index++] = __pack_u8(packU16TestValues); + outputBuffer[index++] = __pack_s8(packS16TestValues); #else outputBuffer[index++] = 0xD4236A3F; outputBuffer[index++] = 0xD4236A3F; @@ -123,17 +238,15 @@ void computeMain(uint3 dispatchThreadID : SV_DispatchThreadID) /* * Pack with unsigned clamping. - */ + */ int32_t4 packClampU32TestValues = packClampUTestBuffer[0]; - uint8_t4_packed packClampU32Result = pack_clamp_u8(packClampU32TestValues); // 0xFEFFFF05 - outputBuffer[index++] = uint(packClampU32Result); + outputBuffer[index++] = __pack_clamp_u8(packClampU32TestValues); #if !defined(WGSL) int16_t4 packClampU16TestValues = int16_t4(int16_t(packClampU32TestValues.x), int16_t(packClampU32TestValues.y), int16_t(packClampU32TestValues.z), int16_t(packClampU32TestValues.w)); - uint8_t4_packed packClampU16Result = pack_clamp_u8(packClampU16TestValues); - outputBuffer[index++] = uint(packClampU16Result); + outputBuffer[index++] = __pack_clamp_u8(packClampU16TestValues); #else outputBuffer[index++] = 0xFEFFFF05; #endif @@ -142,15 +255,13 @@ void computeMain(uint3 dispatchThreadID : SV_DispatchThreadID) * Pack with signed clamping */ int32_t4 packClampS32TestValues = packClampSTestBuffer[0]; - int8_t4_packed packClampS32Result = pack_clamp_s8(packClampS32TestValues); // 0x81807FFF - outputBuffer[index++] = uint(packClampS32Result); + outputBuffer[index++] = __pack_clamp_s8(packClampS32TestValues); #if !defined(WGSL) int16_t4 packClampS16TestValues = int16_t4(int16_t(packClampS32TestValues.x), int16_t(packClampS32TestValues.y), int16_t(packClampS32TestValues.z), int16_t(packClampS32TestValues.w)); - int8_t4_packed packClampS16Result = pack_clamp_s8(packClampS16TestValues); - outputBuffer[index++] = uint(packClampS16Result); + outputBuffer[index++] = __pack_clamp_s8(packClampS16TestValues); #else outputBuffer[index++] = 0x81807FFF; #endif diff --git a/tests/hlsl-intrinsic/packed/packed-types-error.slang b/tests/hlsl-intrinsic/packed/packed-types-error.slang deleted file mode 100644 index 7034f50a90..0000000000 --- a/tests/hlsl-intrinsic/packed/packed-types-error.slang +++ /dev/null @@ -1,21 +0,0 @@ -//TEST(compute):SIMPLE(filecheck=CHECK): -target spirv - -[numthreads(1, 1, 1)] -void computeMain(uint3 dispatchThreadID : SV_DispatchThreadID) { - uint8_t4_packed packedU1 = 0x0U; - uint8_t4_packed packedU2 = 0xFU; - int8_t4_packed packedS1 = 0xFU; - int8_t4_packed packedS2 = 0xFU; - - // Arithmetic and logical (bitwise) operations are not supported on packed types. - // An attempt to overload these operators will fail during compilation due to ambiguity caused by multiple possible overloads. - - // CHECK: error 39999: ambiguous call to '-' with arguments of type - uint8_t4_packed val1 = packedU1 - packedU2; - // CHECK: error 39999: ambiguous call to '*' with arguments of type - int8_t4_packed val2 = packedS1 * packedS2; - // CHECK: error 39999: ambiguous call to '&' with arguments of type - uint8_t4_packed val3 = packedU1 & packedS1; - // CHECK: error 39999: ambiguous call to '|' with arguments of type - int8_t4_packed val4 = packedU1 | packedS1; -} diff --git a/tests/hlsl-intrinsic/packed/packed-types-warning.slang b/tests/hlsl-intrinsic/packed/packed-types-warning.slang deleted file mode 100644 index 0b46925a40..0000000000 --- a/tests/hlsl-intrinsic/packed/packed-types-warning.slang +++ /dev/null @@ -1,72 +0,0 @@ -//TEST(compute):SIMPLE(filecheck=CHECK): -target spirv - -[numthreads(1, 1, 1)] -void computeMain(uint3 dispatchThreadID : SV_DispatchThreadID) { - uint8_t4_packed packedU = 0U; - int8_t4_packed packedS = 0xFU; - uint val1 = 0xFU; - - // - // Implicit conversions between the packed types are not OK, they must be explicit. - // - - // CHECK: warning 30081: implicit conversion from 'int8_t4_packed' to 'uint8_t4_packed' is not recommended - packedU = packedS; - // CHECK: warning 30081: implicit conversion from 'uint8_t4_packed' to 'int8_t4_packed' is not recommended - packedS = packedU; - - - // Implicit casting from 32 bit literals are OK. - packedU = 32U; - packedS = 32; - - // Implicit casting from 64 bit literals are not OK. - // CHECK: warning 30081: implicit conversion from 'uint64_t' to 'uint8_t4_packed' is not recommended - packedU = 0xFFFFFFFFFFULL; - // CHECK: warning 30081: implicit conversion from 'int64_t' to 'uint8_t4_packed' is not recommended - packedU = 0xFFFFFFFFFFLL; - - // - // Explicit casting from other builtin integer types are OK. - // - packedU = uint8_t4_packed(val1); - packedU = uint8_t4_packed(uint16_t(123)); - val1 = uint(packedS); - - // - // Implicit casting from other builtin integer types are not OK. - // - - // CHECK: warning 30081: implicit conversion from 'uint' to 'uint8_t4_packed' is not recommended - packedU = val1; - // CHECK: warning 30081: implicit conversion from 'uint' to 'int8_t4_packed' is not recommended - packedS = val1; - // CHECK: warning 30081: implicit conversion from 'uint8_t4_packed' to 'uint' is not recommended - val1 = packedU; - // CHECK: warning 30081: implicit conversion from 'int8_t4_packed' to 'uint' is not recommended - val1 = packedS; - - // CHECK: warning 30081: implicit conversion from 'uint8_t' to 'uint8_t4_packed' is not recommended - packedU = uint8_t(1); - // CHECK: warning 30081: implicit conversion from 'int64_t' to 'int8_t4_packed' is not recommended - packedS = int64_t(1); - // CHECK: warning 30081: implicit conversion from 'uint8_t4_packed' to 'uint64_t' is not recommended - uint64_t val2 = packedU; - // CHECK: warning 30081: implicit conversion from 'int8_t4_packed' to 'int16_t' is not recommended - int16_t val3 = packedS; - - // - // Arithmetic and logical (bitwise) operations are not supported on packed types, - // but overload to integer types will be made and unrecommended conversion warnings - // should be thrown out. - // - - // CHECK: warning 30081: implicit conversion from 'uint8_t4_packed' to 'int' is not recommended - packedU = uint8_t4_packed(packedU + 32); - // CHECK: warning 30081: implicit conversion from 'uint8_t4_packed' to 'int' is not recommended - packedU = uint8_t4_packed(packedU / 2); - // CHECK: warning 30081: implicit conversion from 'uint8_t4_packed' to 'int' is not recommended - packedU = uint8_t4_packed(packedU | 0xF); - // CHECK: warning 30081: implicit conversion from 'uint8_t4_packed' to 'int' is not recommended - packedU = uint8_t4_packed(packedU & 0x3); -} diff --git a/tests/hlsl-intrinsic/packed/packed-types.slang b/tests/hlsl-intrinsic/packed/packed-types.slang deleted file mode 100644 index 0bbc6f4049..0000000000 --- a/tests/hlsl-intrinsic/packed/packed-types.slang +++ /dev/null @@ -1,36 +0,0 @@ -//TEST(compute):COMPARE_COMPUTE_EX:-vk -compute -shaderobj -//TEST(compute):COMPARE_COMPUTE_EX:-slang -compute -shaderobj -render-feature hardware-device -//TEST(compute):COMPARE_COMPUTE_EX:-slang -compute -profile cs_6_6 -dx12 -use-dxil -shaderobj -render-feature hardware-device -//TEST(compute):COMPARE_COMPUTE_EX:-metal -compute -shaderobj -//TEST(compute):COMPARE_COMPUTE_EX:-cpu -compute -shaderobj -//TEST(compute):COMPARE_COMPUTE_EX:-wgpu -compute -shaderobj -//TEST(compute):COMPARE_COMPUTE_EX:-cuda -compute -g0 - -//TEST_INPUT:ubuffer(data=[0xD37A83FF], stride=4):name packedUArray -StructuredBuffer packedUArray; - -//TEST_INPUT:ubuffer(data=[0xDEADBEEF], stride=4):name packedSArray -StructuredBuffer packedSArray; - -//TEST_INPUT:ubuffer(data=[0 0], stride=4):out,name outputBuffer -RWStructuredBuffer outputBuffer; - -// Test type layout works. -struct Custom { - uint8_t4_packed packedU; - uint3 other1; - int8_t4_packed packedS; - float other2; -} - -[numthreads(1, 1, 1)] -void computeMain(uint3 dispatchThreadID : SV_DispatchThreadID) { - uint id = dispatchThreadID.x; - - Custom val; - val.packedU = packedUArray[id]; - val.packedS = packedSArray[id]; - - outputBuffer[id] = val.packedU; - outputBuffer[id + 1] = uint8_t4_packed(val.packedS); -} diff --git a/tests/hlsl-intrinsic/packed/packed-types.slang.expected.txt b/tests/hlsl-intrinsic/packed/packed-types.slang.expected.txt deleted file mode 100644 index a348b3dfd1..0000000000 --- a/tests/hlsl-intrinsic/packed/packed-types.slang.expected.txt +++ /dev/null @@ -1,2 +0,0 @@ -D37A83FF -DEADBEEF