From 4eee73b7f82be7ddbed46b5d2fa7f0fb98f0ef84 Mon Sep 17 00:00:00 2001
From: fairywreath <nerradfour@gmail.com>
Date: Wed, 26 Feb 2025 01:21:31 -0500
Subject: [PATCH 1/8] update hlsl meta

---
 source/slang/hlsl.meta.slang | 384 +++++++++++++++++++----------------
 1 file changed, 212 insertions(+), 172 deletions(-)
diff --git a/source/slang/hlsl.meta.slang b/source/slang/hlsl.meta.slang
index a2b685b692..d32fe4d2c5 100644
--- a/source/slang/hlsl.meta.slang
+++ b/source/slang/hlsl.meta.slang
@@ -24023,377 +24023,417 @@ T workgroupUniformLoad<T>(__ref T v)
 // to expose these intrinsics on targets that do not have SM 6.6 features.
 //
 
-//@hidden:
+//@public:
+
+/// Unpack 4 signed 8-bit values into a vector of 16 bit integers.
 [__readNone]
 [ForceInline]
-uint16_t __lsb_as_u16(uint32_t val)
+[require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, shader5_sm_5_0)]
+int16_t4 unpack_s8s16(int8_t4_packed packed)
 {
-    return uint16_t(val & 0xFFU);
+    return unpackInt4x8ToInt16(uint(packed));
 }
 
-//@hidden:
+/// Unpack 4 unsigned 8-bit values into a vector of 16 bit integers.
 [__readNone]
 [ForceInline]
-uint32_t __lsb_as_u32(uint32_t val)
+[require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, shader5_sm_5_0)]
+uint16_t4 unpack_u8u16(uint8_t4_packed packed)
 {
-    return (val & 0xFFU);
+    return unpackUint4x8ToUint16(uint(packed));
 }
 
-//@hidden:
+/// Unpack 4 signed 8-bit values into a vector of 32 bit integers.
 [__readNone]
 [ForceInline]
-int8_t __lsb_as_s8(uint32_t val)
+[require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, shader5_sm_5_0)]
+int32_t4 unpack_s8s32(int8_t4_packed packed)
 {
-    return int8_t(val & 0xFFU);
+    return unpackInt4x8ToInt32(uint(packed));
 }
 
-//@hidden:
+/// Unpack 4 unsigned 8-bit values into a vector of 32 bit integers.
 [__readNone]
 [ForceInline]
-int16_t __lsb_as_s16(uint32_t val)
+[require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, shader5_sm_5_0)]
+uint32_t4 unpack_u8u32(uint8_t4_packed packed)
 {
-    return int16_t(__lsb_as_s8(val));
+    return unpackUint4x8ToUint32(uint(packed));
 }
 
-//@hidden:
+/// Pack a vector of 4 unsigned 32 bit integers into a packed value of 4 8-bit integers, dropping unused bits.
 [__readNone]
 [ForceInline]
-int32_t __lsb_as_s32(uint32_t val)
+[require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, shader5_sm_5_0)]
+uint8_t4_packed pack_u8(uint32_t4 unpackedValue)
 {
-    return int32_t(__lsb_as_s8(val));
+    return uint8_t4_packed(packUint4x8(unpackedValue));
 }
 
-//@hidden:
+/// Pack a vector of 4 signed 32 bit integers into a packed value of 4 8-bit integers, dropping unused bits.
 [__readNone]
 [ForceInline]
-uint32_t __lsb_clamp_u8_as_u32(int32_t val)
+[require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, shader5_sm_5_0)]
+int8_t4_packed pack_s8(int32_t4 unpackedValue)
 {
-    return clamp(val, 0, 255);
+    return int8_t4_packed(packInt4x8(unpackedValue));
 }
 
+/// Pack a vector of 4 unsigned 16 bit integers into a packed value of 4 8-bit integers, dropping unused bits.
+[__readNone]
+[ForceInline]
+[require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, shader5_sm_5_0)]
+uint8_t4_packed pack_u8(uint16_t4 unpackedValue)
+{
+    return uint8_t4_packed(packUint4x8(unpackedValue));
+}
+
+/// Pack a vector of 4 signed 16 bit integers into a packed value of 4 8-bit integers, dropping unused bits.
+[__readNone]
+[ForceInline]
+[require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, shader5_sm_5_0)]
+int8_t4_packed pack_s8(int16_t4 unpackedValue)
+{
+    return int8_t4_packed(packInt4x8(unpackedValue));
+}
+
+/// Pack a vector of 4 unsigned 32 bit integers into a packed value of 4 8-bit integers,
+/// clamping each value to the range [0, 255] to ensure it fits within 8 bits.
+[__readNone]
+[ForceInline]
+[require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, shader5_sm_5_0)]
+uint8_t4_packed pack_clamp_u8(int32_t4 unpackedValue)
+{
+    return uint8_t4_packed(packUint4x8Clamp(unpackedValue));
+}
+
+/// Pack a vector of 4 signed 32 bit integers into a packed value of 4 8-bit integers,
+/// clamping each value to the range [-128, 127] to ensure it fits within 8 bits.
+[__readNone]
+[ForceInline]
+[require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, shader5_sm_5_0)]
+int8_t4_packed pack_clamp_s8(int32_t4 unpackedValue)
+{
+    return int8_t4_packed(packInt4x8Clamp(unpackedValue));
+}
+
+/// Pack a vector of 4 unsigned 16 bit integers into a packed value of 4 8-bit integers,
+/// clamping each value to the range [0, 255] to ensure it fits within 8 bits.
+[__readNone]
+[ForceInline]
+[require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, shader5_sm_5_0)]
+uint8_t4_packed pack_clamp_u8(int16_t4 unpackedValue)
+{
+    return uint8_t4_packed(packUint4x8Clamp(unpackedValue));
+}
+
+/// Pack a vector of 4 signed 16 bit integers into a packed value of 4 8-bit integers,
+/// clamping each value to the range [-128, 127] to ensure it fits within 8 bits.
+[__readNone]
+[ForceInline]
+[require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, shader5_sm_5_0)]
+int8_t4_packed pack_clamp_s8(int16_t4 unpackedValue)
+{
+    return int8_t4_packed(packInt4x8Clamp(unpackedValue));
+}
+
+// Work-graphs
+
+//@public:
+/// read-only input to Broadcasting launch node.
+__generic<T>
+//TODO: DispatchNodeInputRecord should be available only for broadcasting node shader.
+//[require(broadcasting_node)]
+[require(spirv)]
+struct DispatchNodeInputRecord
+{
+    /// Provide an access to a record object that only holds a single record.
+    NodePayloadPtr<T> Get()
+    {
+        int index = 0;
+        __target_switch
+        {
+        case spirv:
+            return spirv_asm
+            {
+                %in_payload_t = OpTypeNodePayloadArrayAMDX $$T;
+                %in_payload_ptr_t = OpTypePointer NodePayloadAMDX %in_payload_t;
+                %var = OpVariable %in_payload_ptr_t NodePayloadAMDX;
+                result : $$NodePayloadPtr<T> = OpAccessChain %var $index;
+            };
+        }
+    }
+};
+
+//
+// Pack/Unpack Intrinsics
+//
+
 //@hidden:
+
+[__readNone]
+[ForceInline]
+uint16_t __lsb_as_u16(uint32_t val)
+{
+    return uint16_t(val & 0xFFU);
+}
+
+[__readNone]
+[ForceInline]
+uint32_t __lsb_as_u32(uint32_t val)
+{
+    return (val & 0xFFU);
+}
+
+[__readNone]
+[ForceInline]
+int8_t __lsb_as_s8(uint32_t val)
+{
+    return int8_t(val);
+}
+
 [__readNone]
 [ForceInline]
-uint32_t __lsb_clamp_s8_as_u32(int32_t val)
+int16_t __lsb_as_s16(uint32_t val)
 {
-    return (uint32_t(clamp(val, -128, 127)) & 0xFFU);
+    return int16_t(__lsb_as_s8(val));
+}
+
+[__readNone]
+[ForceInline]
+int32_t __lsb_as_s32(uint32_t val)
+{
+    return int32_t(__lsb_as_s8(val));
 }
 
 //@public:
-/// Unpack 4 signed 8-bit values into a vector of 16 bit integers.
+
+/// Unpack 4 unsigned 8-bit values into a vector of 32 bit integers.
 [__readNone]
 [ForceInline]
 [require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, shader5_sm_5_0)]
-int16_t4 unpack_s8s16(int8_t4_packed packed)
+[require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, shader5_sm_5_0)]
+uint32_t4 unpackUint4x8ToUint32(uint packedValue)
 {
     __target_switch
     {
-    case hlsl: __intrinsic_asm "unpack_s8s16";
-    case spirv: 
+    case hlsl: __intrinsic_asm "unpack_u8u32";
+    case wgsl: __intrinsic_asm "unpack4xU8";
+    case spirv:
         return spirv_asm
         {
-            %s8Vec = OpBitcast $$vector<int8_t, 4> $packed;
-            result:$$vector<int16_t, 4> = OpSConvert %s8Vec
+            %u8Vec = OpBitcast $$vector<uint8_t, 4> $packedValue;
+            result:$$vector<uint32_t, 4> = OpUConvert %u8Vec
         };
     default:
-        uint32_t packedValue = uint32_t(packed);
-        return int16_t4
+        return uint32_t4
         (
-            __lsb_as_s16(packedValue),
-            __lsb_as_s16(packedValue >> 8U),
-            __lsb_as_s16(packedValue >> 16U),
-            __lsb_as_s16(packedValue >> 24U),
+            __lsb_as_u32(packedValue),
+            __lsb_as_u32(packedValue >> 8U),
+            __lsb_as_u32(packedValue >> 16U),
+            uint32_t(packedValue >> 24U),
         );
     }
 }
 
-//@public:
 /// Unpack 4 unsigned 8-bit values into a vector of 16 bit integers.
 [__readNone]
 [ForceInline]
 [require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, shader5_sm_5_0)]
-uint16_t4 unpack_u8u16(uint8_t4_packed packed)
+uint16_t4 unpackUint4x8ToUint16(uint packedValue)
 {
     __target_switch
     {
     case hlsl: __intrinsic_asm "unpack_u8u16";
-    case spirv: 
+    case spirv:
         return spirv_asm
         {
-            %u8Vec = OpBitcast $$vector<uint8_t, 4> $packed;
+            %u8Vec = OpBitcast $$vector<uint8_t, 4> $packedValue;
             result:$$vector<uint16_t, 4> = OpUConvert %u8Vec
         };
     default:
-        uint32_t packedValue = uint32_t(packed);
         return uint16_t4
         (
             __lsb_as_u16(packedValue),
             __lsb_as_u16(packedValue >> 8U),
             __lsb_as_u16(packedValue >> 16U),
-            __lsb_as_u16(packedValue >> 24U),
+            uint16_t(packedValue >> 24U),
         );
     }
 }
 
-//@public:
 /// Unpack 4 signed 8-bit values into a vector of 32 bit integers.
 [__readNone]
 [ForceInline]
 [require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, shader5_sm_5_0)]
-int32_t4 unpack_s8s32(int8_t4_packed packed)
+int32_t4 unpackInt4x8ToInt32(uint packedValue)
 {
     __target_switch
     {
     case hlsl: __intrinsic_asm "unpack_s8s32";
     case wgsl: __intrinsic_asm "unpack4xI8";
-    case spirv: 
+    case spirv:
         return spirv_asm
         {
-            %s8Vec = OpBitcast $$vector<int8_t, 4> $packed;
+            %s8Vec = OpBitcast $$vector<int8_t, 4> $packedValue;
             result:$$vector<int32_t, 4> = OpSConvert %s8Vec
         };
     default:
-        uint32_t packedValue = uint32_t(packed);
         return int32_t4
         (
             __lsb_as_s32(packedValue),
             __lsb_as_s32(packedValue >> 8U),
             __lsb_as_s32(packedValue >> 16U),
-            __lsb_as_s32(packedValue >> 24U),
+            int32_t(int8_t(packedValue >> 24U)),
         );
     }
 }
 
-//@public:
-/// Unpack 4 unsigned 8-bit values into a vector of 32 bit integers.
+/// Unpack 4 signed 8-bit values into a vector of 16 bit integers.
 [__readNone]
 [ForceInline]
 [require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, shader5_sm_5_0)]
-uint32_t4 unpack_u8u32(uint8_t4_packed packed)
+int16_t4 unpackInt4x8ToInt16(uint packedValue)
 {
     __target_switch
     {
-    case hlsl: __intrinsic_asm "unpack_u8u32";
-    case wgsl: __intrinsic_asm "unpack4xU8";
-    case spirv: 
+    case hlsl: __intrinsic_asm "unpack_s8s16";
+    case spirv:
         return spirv_asm
         {
-            %u8Vec = OpBitcast $$vector<uint8_t, 4> $packed;
-            result:$$vector<uint32_t, 4> = OpUConvert %u8Vec
+            %s8Vec = OpBitcast $$vector<int8_t, 4> $packedValue;
+            result:$$vector<int16_t, 4> = OpSConvert %s8Vec
         };
     default:
-        uint32_t packedValue = uint32_t(packed);
-        return uint32_t4
+        return int16_t4
         (
-            __lsb_as_u32(packedValue),
-            __lsb_as_u32(packedValue >> 8U),
-            __lsb_as_u32(packedValue >> 16U),
-            __lsb_as_u32(packedValue >> 24U),
+            __lsb_as_s16(packedValue),
+            __lsb_as_s16(packedValue >> 8U),
+            __lsb_as_s16(packedValue >> 16U),
+            int16_t(int8_t(packedValue >> 24U)),
         );
     }
 }
 
-//@public:
 /// Pack a vector of 4 unsigned 32 bit integers into a packed value of 4 8-bit integers, dropping unused bits.
 [__readNone]
 [ForceInline]
 [require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, shader5_sm_5_0)]
-uint8_t4_packed pack_u8(uint32_t4 unpackedValue)
+uint packUint4x8(uint32_t4 unpackedValue)
 {
     __target_switch
     {
     case hlsl: __intrinsic_asm "pack_u8";
     case wgsl: __intrinsic_asm "pack4xU8";
     default:
-        return uint8_t4_packed
-        (
-            __lsb_as_u32(unpackedValue.x)
+        return __lsb_as_u32(unpackedValue.x)
             | (__lsb_as_u32(unpackedValue.y) << 8U)
             | (__lsb_as_u32(unpackedValue.z) << 16U)
-            | (__lsb_as_u32(unpackedValue.w) << 24U)
-        );
+            | (unpackedValue.w << 24U);
     }
 }
 
-//@public:
-/// Pack a vector of 4 signed 32 bit integers into a packed value of 4 8-bit integers, dropping unused bits.
+/// Pack a vector of 4 unsigned 16 bit integers into a packed value of 4 8-bit integers, dropping unused bits.
 [__readNone]
 [ForceInline]
 [require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, shader5_sm_5_0)]
-int8_t4_packed pack_s8(int32_t4 unpackedValue)
+uint packUint4x8(uint16_t4 unpackedValue)
 {
     __target_switch
     {
-    case hlsl: __intrinsic_asm "pack_s8";
-    case wgsl: __intrinsic_asm "pack4xI8";
+    case hlsl: __intrinsic_asm "pack_u8";
     default:
-        return int8_t4_packed
-        (
-            __lsb_as_u32(unpackedValue.x)
-            | (__lsb_as_u32(unpackedValue.y) << 8U)
-            | (__lsb_as_u32(unpackedValue.z) << 16U)
-            | (__lsb_as_u32(unpackedValue.w) << 24U)
-        );
+        return packUint4x8(uint32_t4(unpackedValue));
     }
 }
 
-//@public:
-/// Pack a vector of 4 unsigned 16 bit integers into a packed value of 4 8-bit integers, dropping unused bits.
+/// Pack a vector of 4 signed 32 bit integers into a packed value of 4 8-bit integers, dropping unused bits.
 [__readNone]
 [ForceInline]
 [require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, shader5_sm_5_0)]
-uint8_t4_packed pack_u8(uint16_t4 unpackedValue)
+uint packInt4x8(int32_t4 unpackedValue)
 {
     __target_switch
     {
-    case hlsl: __intrinsic_asm "pack_u8";
+    case hlsl: __intrinsic_asm "pack_s8";
+    case wgsl: __intrinsic_asm "pack4xI8";
     default:
-        return uint8_t4_packed
-        (
-            __lsb_as_u32(unpackedValue.x)
-            | (__lsb_as_u32(unpackedValue.y) << 8U)
-            | (__lsb_as_u32(unpackedValue.z) << 16U)
-            | (__lsb_as_u32(unpackedValue.w) << 24U)
-        );
+        return packUint4x8(uint32_t4(unpackedValue));
     }
 }
 
-//@public:
 /// Pack a vector of 4 signed 16 bit integers into a packed value of 4 8-bit integers, dropping unused bits.
 [__readNone]
 [ForceInline]
 [require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, shader5_sm_5_0)]
-int8_t4_packed pack_s8(int16_t4 unpackedValue)
+uint packInt4x8(int16_t4 unpackedValue)
 {
     __target_switch
     {
     case hlsl: __intrinsic_asm "pack_s8";
     default:
-        return int8_t4_packed
-        (
-            __lsb_as_u32(unpackedValue.x)
-            | (__lsb_as_u32(unpackedValue.y) << 8U)
-            | (__lsb_as_u32(unpackedValue.z) << 16U)
-            | (__lsb_as_u32(unpackedValue.w) << 24U)
-        );
+        return packUint4x8(uint32_t4(unpackedValue));
     }
 }
 
-//@public:
-/// Pack a vector of 4 unsigned 32 bit integers into a packed value of 4 8-bit integers,
-/// clamping each value to the range [0, 255] to ensure it fits within 8 bits.
+/// Pack a vector of 4 signed 32 bit integers into a packed value of 4 8-bit integers,
+/// clamping each value to the range [-128, 127] to ensure it fits within 8 bits.
 [__readNone]
 [ForceInline]
 [require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, shader5_sm_5_0)]
-uint8_t4_packed pack_clamp_u8(int32_t4 unpackedValue)
+uint packUint4x8Clamp(int32_t4 unpackedValue)
 {
     __target_switch
     {
-    case hlsl: __intrinsic_asm "pack_clamp_u8";
-    case wgsl: __intrinsic_asm "pack4xU8Clamp(vec4<u32>($0))";
+    case hlsl: __intrinsic_asm "pack_clamp_s8";
+    case wgsl: __intrinsic_asm "pack4xI8Clamp";
     default:
-        return uint8_t4_packed
-        (
-            __lsb_clamp_u8_as_u32(unpackedValue.x)
-            | (__lsb_clamp_u8_as_u32(unpackedValue.y) << 8U)
-            | (__lsb_clamp_u8_as_u32(unpackedValue.z) << 16U)
-            | (__lsb_clamp_u8_as_u32(unpackedValue.w) << 24U)
-        );
+        return packInt4x8(clamp(unpackedValue, 0, 255));
     }
 }
 
-//@public:
-/// Pack a vector of 4 signed 32 bit integers into a packed value of 4 8-bit integers,
-/// clamping each value to the range [-128, 127] to ensure it fits within 8 bits.
+/// Pack a vector of 4 unsigned 16 bit integers into a packed value of 4 8-bit integers,
+/// clamping each value to the range [0, 255] to ensure it fits within 8 bits.
 [__readNone]
 [ForceInline]
 [require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, shader5_sm_5_0)]
-int8_t4_packed pack_clamp_s8(int32_t4 unpackedValue)
+uint packUint4x8Clamp(int16_t4 unpackedValue)
 {
     __target_switch
     {
     case hlsl: __intrinsic_asm "pack_clamp_s8";
-    case wgsl: __intrinsic_asm "pack4xI8Clamp";
     default:
-        return int8_t4_packed
-        (
-            __lsb_clamp_s8_as_u32(unpackedValue.x)
-            | (__lsb_clamp_s8_as_u32(unpackedValue.y) << 8U)
-            | (__lsb_clamp_s8_as_u32(unpackedValue.z) << 16U)
-            | (__lsb_clamp_s8_as_u32(unpackedValue.w) << 24U)
-        );
+        return packInt4x8(clamp(unpackedValue, 0, 255));
     }
 }
 
-//@public:
-/// Pack a vector of 4 unsigned 16 bit integers into a packed value of 4 8-bit integers,
-/// clamping each value to the range [0, 255] to ensure it fits within 8 bits.
+/// Pack a vector of 4 signed 32 bit integers into a packed value of 4 8-bit integers,
+/// clamping each value to the range [-128, 127] to ensure it fits within 8 bits.
 [__readNone]
 [ForceInline]
 [require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, shader5_sm_5_0)]
-uint8_t4_packed pack_clamp_u8(int16_t4 unpackedValue)
+uint packInt4x8Clamp(int32_t4 unpackedValue)
 {
     __target_switch
     {
-    case hlsl: __intrinsic_asm "pack_clamp_u8";
+    case hlsl: __intrinsic_asm "pack_clamp_s8";
+    case wgsl: __intrinsic_asm "pack4xI8Clamp";
     default:
-        return uint8_t4_packed
-        (
-            __lsb_clamp_u8_as_u32(unpackedValue.x)
-            | (__lsb_clamp_u8_as_u32(unpackedValue.y) << 8U)
-            | (__lsb_clamp_u8_as_u32(unpackedValue.z) << 16U)
-            | (__lsb_clamp_u8_as_u32(unpackedValue.w) << 24U)
-        );
+        return packInt4x8(clamp(unpackedValue, -128, 127));
     }
 }
 
-//@public:
 /// Pack a vector of 4 signed 16 bit integers into a packed value of 4 8-bit integers,
 /// clamping each value to the range [-128, 127] to ensure it fits within 8 bits.
 [__readNone]
 [ForceInline]
 [require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, shader5_sm_5_0)]
-int8_t4_packed pack_clamp_s8(int16_t4 unpackedValue)
+uint packInt4x8Clamp(int16_t4 unpackedValue)
 {
     __target_switch
     {
     case hlsl: __intrinsic_asm "pack_clamp_s8";
     default:
-        return int8_t4_packed
-        (
-            __lsb_clamp_s8_as_u32(unpackedValue.x)
-            | (__lsb_clamp_s8_as_u32(unpackedValue.y) << 8U)
-            | (__lsb_clamp_s8_as_u32(unpackedValue.z) << 16U)
-            | (__lsb_clamp_s8_as_u32(unpackedValue.w) << 24U)
-        );
+        return packInt4x8(clamp(unpackedValue, -128, 127));
     }
 }
-
-// Work-graphs
-
-//@public:
-/// read-only input to Broadcasting launch node.
-__generic<T>
-//TODO: DispatchNodeInputRecord should be available only for broadcasting node shader.
-//[require(broadcasting_node)]
-[require(spirv)]
-struct DispatchNodeInputRecord
-{
-    /// Provide an access to a record object that only holds a single record.
-    NodePayloadPtr<T> Get()
-    {
-        int index = 0;
-        __target_switch
-        {
-        case spirv:
-            return spirv_asm
-            {
-                %in_payload_t = OpTypeNodePayloadArrayAMDX $$T;
-                %in_payload_ptr_t = OpTypePointer NodePayloadAMDX %in_payload_t;
-                %var = OpVariable %in_payload_ptr_t NodePayloadAMDX;
-                result : $$NodePayloadPtr<T> = OpAccessChain %var $index;
-            };
-        }
-    }
-};
-

From 660da2061dd70b6a422cea643b51d4a2fe9f406d Mon Sep 17 00:00:00 2001
From: fairywreath <nerradfour@gmail.com>
Date: Wed, 26 Feb 2025 01:32:09 -0500
Subject: [PATCH 2/8] update test

---
 tests/hlsl-intrinsic/packed/pack-unpack.slang | 157 +++++++++++++++---
 1 file changed, 134 insertions(+), 23 deletions(-)

diff --git a/tests/hlsl-intrinsic/packed/pack-unpack.slang b/tests/hlsl-intrinsic/packed/pack-unpack.slang
index b20e69fa84..0b9490ee4a 100644
--- a/tests/hlsl-intrinsic/packed/pack-unpack.slang
+++ b/tests/hlsl-intrinsic/packed/pack-unpack.slang
@@ -9,6 +9,18 @@
 // Debug info for inlining errors can be given out, so disable them for this test.
 //TEST(compute):COMPARE_COMPUTE_EX:-cuda -compute -g0
 
+//TEST(compute):COMPARE_COMPUTE_EX:-vk -compute -shaderobj -xslang -DUSE_SLANG_SYNTAX
+//TEST(compute):COMPARE_COMPUTE_EX:-slang -compute -shaderobj -render-feature hardware-device -xslang -DUSE_SLANG_SYNTAX
+//TEST(compute):COMPARE_COMPUTE_EX:-slang -compute -profile cs_6_6 -dx12 -use-dxil -shaderobj -render-feature hardware-device -xslang -DUSE_SLANG_SYNTAX
+//TEST(compute):COMPARE_COMPUTE_EX:-metal -compute -shaderobj -xslang -DUSE_SLANG_SYNTAX
+//TEST(compute):COMPARE_COMPUTE_EX:-cpu -compute -shaderobj -xslang -DUSE_SLANG_SYNTAX
+
+// 16 bit variants are not supported by WGSL.
+//TEST(compute):COMPARE_COMPUTE_EX:-wgpu -compute -shaderobj -xslang -DWGSL -xslang -DUSE_SLANG_SYNTAX
+// Debug info for inlining errors can be given out, so disable them for this test.
+//TEST(compute):COMPARE_COMPUTE_EX:-cuda -compute -g0 -xslang -DUSE_SLANG_SYNTAX
+
+
 //TEST_INPUT:ubuffer(data=[0xD37A83FF], stride=4):name unpackTestBuffer
 StructuredBuffer<uint32_t> unpackTestBuffer;
 
@@ -27,17 +39,125 @@ StructuredBuffer<int32_t4> packClampSTestBuffer;
 //TEST_INPUT:ubuffer(data=[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0], stride=4):out,name outputBuffer
 RWStructuredBuffer<uint> outputBuffer;
 
+uint32_t4 __unpack_u8u32(uint value)
+{
+#if defined(USE_SLANG_SYNTAX)
+    return unpackUint4x8ToUint32(value);
+#else
+    return unpack_u8u32(uint8_t4_packed(value));
+#endif
+}
+
+uint16_t4 __unpack_u8u16(uint value)
+{
+#if defined(USE_SLANG_SYNTAX)
+    return unpackUint4x8ToUint16(value);
+#else
+    return unpack_u8u16(uint8_t4_packed(value));
+#endif
+}
+
+int32_t4 __unpack_s8s32(uint value)
+{
+#if defined(USE_SLANG_SYNTAX)
+    return unpackInt4x8ToInt32(value);
+#else
+    return unpack_s8s32(int8_t4_packed(value));
+#endif
+}
+
+int16_t4 __unpack_s8s16(uint value)
+{
+#if defined(USE_SLANG_SYNTAX)
+    return unpackInt4x8ToInt16(value);
+#else
+    return unpack_s8s16(int8_t4_packed(value));
+#endif
+}
+
+uint __pack_u8(uint32_t4 value)
+{
+#if defined(USE_SLANG_SYNTAX)
+    return packUint4x8(value);
+#else
+    return uint(pack_u8(value));
+#endif
+}
+
+uint __pack_u8(uint16_t4 value)
+{
+#if defined(USE_SLANG_SYNTAX)
+    return packUint4x8(value);
+#else
+    return uint(pack_u8(value));
+#endif
+}
+
+uint __pack_s8(int32_t4 value)
+{
+#if defined(USE_SLANG_SYNTAX)
+    return packInt4x8(value);
+#else
+    return uint(pack_s8(value));
+#endif
+}
+
+uint __pack_s8(int16_t4 value)
+{
+#if defined(USE_SLANG_SYNTAX)
+    return packInt4x8(value);
+#else
+    return uint(pack_s8(value));
+#endif
+}
+
+uint __pack_clamp_u8(int32_t4 value)
+{
+#if defined(USE_SLANG_SYNTAX)
+    return packUint4x8Clamp(value);
+#else
+    return uint(pack_clamp_u8(value));
+#endif
+}
+
+uint __pack_clamp_u8(int16_t4 value)
+{
+#if defined(USE_SLANG_SYNTAX)
+    return packUint4x8Clamp(value);
+#else
+    return uint(pack_clamp_u8(value));
+#endif
+}
+
+uint __pack_clamp_s8(int32_t4 value)
+{
+#if defined(USE_SLANG_SYNTAX)
+    return packInt4x8Clamp(value);
+#else
+    return uint(pack_clamp_s8(value));
+#endif
+}
+
+uint __pack_clamp_s8(int16_t4 value)
+{
+#if defined(USE_SLANG_SYNTAX)
+    return packInt4x8Clamp(value);
+#else
+    return uint(pack_clamp_s8(value));
+#endif
+}
+
 [numthreads(1, 1, 1)]
 void computeMain(uint3 dispatchThreadID : SV_DispatchThreadID)
 {
-    const uint8_t4_packed unpackTestValue = uint8_t4_packed(unpackTestBuffer[0]);
+    const uint unpackTestValue = unpackTestBuffer[0];
 
     uint index = 0;
 
     /*
      * Unpack without sign extension.
      */
-    uint32_t4 unpackedU32 = unpack_u8u32(unpackTestValue);
+    uint32_t4 unpackedU32 = __unpack_u8u32(unpackTestValue);
     // 0xFF
     outputBuffer[index++] = uint(unpackedU32.x);
     // 0x83
@@ -48,7 +168,7 @@ void computeMain(uint3 dispatchThreadID : SV_DispatchThreadID)
     outputBuffer[index++] = uint(unpackedU32.w);
 
 #if !defined(WGSL)
-    uint16_t4 unpackedU16 = unpack_u8u16(unpackTestValue);
+    uint16_t4 unpackedU16 = __unpack_u8u16(unpackTestValue);
     // 0xFF
     outputBuffer[index++] = uint(unpackedU16.x);
     // 0x83
@@ -67,7 +187,7 @@ void computeMain(uint3 dispatchThreadID : SV_DispatchThreadID)
     /*
      * Unpack with sign extension.
      */
-    int32_t4 unpackedS32 = unpack_s8s32(int8_t4_packed(unpackTestValue));
+    int32_t4 unpackedS32 = __unpack_s8s32(unpackTestValue);
     // 0xFFFFFFFF
     outputBuffer[index++] = uint(unpackedS32.x);
     // 0xFFFFFF83
@@ -78,7 +198,7 @@ void computeMain(uint3 dispatchThreadID : SV_DispatchThreadID)
     outputBuffer[index++] = uint(unpackedS32.w);
 
 #if !defined(WGSL)
-    int16_t4 unpackedS16 = unpack_s8s16(int8_t4_packed(unpackTestValue));
+    int16_t4 unpackedS16 = __unpack_s8s16(unpackTestValue);
     // 0xFFFFFFFF
     outputBuffer[index++] = uint(unpackedS16.x);
     // 0xFFFFFF83
@@ -100,22 +220,17 @@ void computeMain(uint3 dispatchThreadID : SV_DispatchThreadID)
      */
     uint32_t4 packU32TestValues = packTestBuffer[0];
     int32_t4 packS32TestValues = packU32TestValues;
-    uint8_t4_packed packU32Result = pack_u8(packU32TestValues);
-    int8_t4_packed packS32Result = pack_s8(packS32TestValues);
 
     // 0xD4236A3F
-    outputBuffer[index++] = uint(packU32Result);
-    outputBuffer[index++] = uint(packS32Result);
+    outputBuffer[index++] = __pack_u8(packU32TestValues);
+    outputBuffer[index++] = __pack_s8(packS32TestValues);
 
 #if !defined(WGSL)
     uint16_t4 packU16TestValues = int16_t4(int16_t(packU32TestValues.x), int16_t(packU32TestValues.y), 
                                            int16_t(packU32TestValues.z), int16_t(packU32TestValues.w));
     int16_t4 packS16TestValues = packU16TestValues;
-    uint8_t4_packed packU16Result = pack_u8(packU16TestValues);
-    int8_t4_packed packS16Result = pack_s8(packS16TestValues);
-
-    outputBuffer[index++] = uint(packU16Result);
-    outputBuffer[index++] = uint(packS16Result);
+    outputBuffer[index++] = __pack_u8(packU16TestValues);
+    outputBuffer[index++] = __pack_s8(packS16TestValues);
 #else
     outputBuffer[index++] = 0xD4236A3F;
     outputBuffer[index++] = 0xD4236A3F;
@@ -123,17 +238,15 @@ void computeMain(uint3 dispatchThreadID : SV_DispatchThreadID)
 
     /*
      * Pack with unsigned clamping.
-     */ 
+     */
     int32_t4 packClampU32TestValues = packClampUTestBuffer[0];
-    uint8_t4_packed packClampU32Result = pack_clamp_u8(packClampU32TestValues);
     // 0xFEFFFF05
-    outputBuffer[index++] = uint(packClampU32Result);
+    outputBuffer[index++] = __pack_clamp_u8(packClampU32TestValues);
 
 #if !defined(WGSL)
     int16_t4 packClampU16TestValues = int16_t4(int16_t(packClampU32TestValues.x), int16_t(packClampU32TestValues.y),
                                                int16_t(packClampU32TestValues.z), int16_t(packClampU32TestValues.w));
-    uint8_t4_packed packClampU16Result = pack_clamp_u8(packClampU16TestValues);
-    outputBuffer[index++] = uint(packClampU16Result);
+    outputBuffer[index++] = __pack_clamp_u8(packClampU16TestValues);
 #else
     outputBuffer[index++] = 0xFEFFFF05;
 #endif
@@ -142,15 +255,13 @@ void computeMain(uint3 dispatchThreadID : SV_DispatchThreadID)
      * Pack with signed clamping
      */
     int32_t4 packClampS32TestValues = packClampSTestBuffer[0];
-    int8_t4_packed packClampS32Result = pack_clamp_s8(packClampS32TestValues);
     // 0x81807FFF
-    outputBuffer[index++] = uint(packClampS32Result);
+    outputBuffer[index++] = __pack_clamp_s8(packClampS32TestValues);
 
 #if !defined(WGSL)
     int16_t4 packClampS16TestValues = int16_t4(int16_t(packClampS32TestValues.x), int16_t(packClampS32TestValues.y),
                                                int16_t(packClampS32TestValues.z), int16_t(packClampS32TestValues.w));
-    int8_t4_packed packClampS16Result = pack_clamp_s8(packClampS16TestValues);
-    outputBuffer[index++] = uint(packClampS16Result);
+    outputBuffer[index++] = __pack_clamp_s8(packClampS16TestValues);
 #else
     outputBuffer[index++] = 0x81807FFF;
 #endif

From 7dd639758e00f017fe5a616a81d138f82e681a38 Mon Sep 17 00:00:00 2001
From: fairywreath <nerradfour@gmail.com>
Date: Wed, 26 Feb 2025 01:36:39 -0500
Subject: [PATCH 3/8] use slang syntax in meta file

---
 source/slang/hlsl.meta.slang | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/source/slang/hlsl.meta.slang b/source/slang/hlsl.meta.slang
index d32fe4d2c5..0aac748f73 100644
--- a/source/slang/hlsl.meta.slang
+++ b/source/slang/hlsl.meta.slang
@@ -17291,8 +17291,8 @@ uint dot4add_u8packed(uint x, uint y, uint acc)
             result:$$uint = OpIAdd %dotResult $acc;
         };
     default:
-        uint4 vecX = unpack_u8u32(uint8_t4_packed(x));
-        uint4 vecY = unpack_u8u32(uint8_t4_packed(y));
+        uint4 vecX = unpackUint4x8ToUint32(x);
+        uint4 vecY = unpackUint4x8ToUint32(y);
         return dot(vecX, vecY) + acc;
     }
 }
@@ -17319,8 +17319,8 @@ int dot4add_i8packed(uint x, uint y, int acc)
             result:$$int = OpIAdd %dotResult $acc;
         };
     default:
-        int4 vecX = unpack_s8s32(int8_t4_packed(x));
-        int4 vecY = unpack_s8s32(int8_t4_packed(y));
+        int4 vecX = unpackInt4x8ToInt32(x);
+        int4 vecY = unpackInt4x8ToInt32(y);
         return dot(vecX, vecY) + acc;
     }
 }

From 1fc223c5f0b23fe070c969afa361c30ec541b4bb Mon Sep 17 00:00:00 2001
From: fairywreath <nerradfour@gmail.com>
Date: Wed, 26 Feb 2025 01:58:43 -0500
Subject: [PATCH 4/8] improve meta file

---
 source/slang/hlsl.meta.slang | 44 ++++++++++++++++++------------------
 1 file changed, 22 insertions(+), 22 deletions(-)

diff --git a/source/slang/hlsl.meta.slang b/source/slang/hlsl.meta.slang
index 0aac748f73..249e713f2d 100644
--- a/source/slang/hlsl.meta.slang
+++ b/source/slang/hlsl.meta.slang
@@ -24173,37 +24173,37 @@ struct DispatchNodeInputRecord
 
 [__readNone]
 [ForceInline]
-uint16_t __lsb_as_u16(uint32_t val)
+uint16_t __lsbAsUint16(uint32_t val)
 {
     return uint16_t(val & 0xFFU);
 }
 
 [__readNone]
 [ForceInline]
-uint32_t __lsb_as_u32(uint32_t val)
+uint32_t __lsbAsUint32(uint32_t val)
 {
     return (val & 0xFFU);
 }
 
 [__readNone]
 [ForceInline]
-int8_t __lsb_as_s8(uint32_t val)
+int8_t __lsbAsInt8(uint32_t val)
 {
     return int8_t(val);
 }
 
 [__readNone]
 [ForceInline]
-int16_t __lsb_as_s16(uint32_t val)
+int16_t __lsbAsInt16(uint32_t val)
 {
-    return int16_t(__lsb_as_s8(val));
+    return int16_t(__lsbAsInt8(val));
 }
 
 [__readNone]
 [ForceInline]
-int32_t __lsb_as_s32(uint32_t val)
+int32_t __lsbAsInt32(uint32_t val)
 {
-    return int32_t(__lsb_as_s8(val));
+    return int32_t(__lsbAsInt8(val));
 }
 
 //@public:
@@ -24228,9 +24228,9 @@ uint32_t4 unpackUint4x8ToUint32(uint packedValue)
     default:
         return uint32_t4
         (
-            __lsb_as_u32(packedValue),
-            __lsb_as_u32(packedValue >> 8U),
-            __lsb_as_u32(packedValue >> 16U),
+            __lsbAsUint32(packedValue),
+            __lsbAsUint32(packedValue >> 8U),
+            __lsbAsUint32(packedValue >> 16U),
             uint32_t(packedValue >> 24U),
         );
     }
@@ -24254,9 +24254,9 @@ uint16_t4 unpackUint4x8ToUint16(uint packedValue)
     default:
         return uint16_t4
         (
-            __lsb_as_u16(packedValue),
-            __lsb_as_u16(packedValue >> 8U),
-            __lsb_as_u16(packedValue >> 16U),
+            __lsbAsUint16(packedValue),
+            __lsbAsUint16(packedValue >> 8U),
+            __lsbAsUint16(packedValue >> 16U),
             uint16_t(packedValue >> 24U),
         );
     }
@@ -24281,9 +24281,9 @@ int32_t4 unpackInt4x8ToInt32(uint packedValue)
     default:
         return int32_t4
         (
-            __lsb_as_s32(packedValue),
-            __lsb_as_s32(packedValue >> 8U),
-            __lsb_as_s32(packedValue >> 16U),
+            __lsbAsInt32(packedValue),
+            __lsbAsInt32(packedValue >> 8U),
+            __lsbAsInt32(packedValue >> 16U),
             int32_t(int8_t(packedValue >> 24U)),
         );
     }
@@ -24307,9 +24307,9 @@ int16_t4 unpackInt4x8ToInt16(uint packedValue)
     default:
         return int16_t4
         (
-            __lsb_as_s16(packedValue),
-            __lsb_as_s16(packedValue >> 8U),
-            __lsb_as_s16(packedValue >> 16U),
+            __lsbAsInt16(packedValue),
+            __lsbAsInt16(packedValue >> 8U),
+            __lsbAsInt16(packedValue >> 16U),
             int16_t(int8_t(packedValue >> 24U)),
         );
     }
@@ -24326,9 +24326,9 @@ uint packUint4x8(uint32_t4 unpackedValue)
     case hlsl: __intrinsic_asm "pack_u8";
     case wgsl: __intrinsic_asm "pack4xU8";
     default:
-        return __lsb_as_u32(unpackedValue.x)
-            | (__lsb_as_u32(unpackedValue.y) << 8U)
-            | (__lsb_as_u32(unpackedValue.z) << 16U)
+        return __lsbAsUint32(unpackedValue.x)
+            | (__lsbAsUint32(unpackedValue.y) << 8U)
+            | (__lsbAsUint32(unpackedValue.z) << 16U)
             | (unpackedValue.w << 24U);
     }
 }

From 55c127c8d39a74c7a71b964af8b3989ecc845ba0 Mon Sep 17 00:00:00 2001
From: fairywreath <nerradfour@gmail.com>
Date: Wed, 26 Feb 2025 08:56:29 -0600
Subject: [PATCH 5/8] fix pack clamp u8

---
 source/slang/hlsl.meta.slang | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/source/slang/hlsl.meta.slang b/source/slang/hlsl.meta.slang
index 249e713f2d..c4fa240acf 100644
--- a/source/slang/hlsl.meta.slang
+++ b/source/slang/hlsl.meta.slang
@@ -24385,8 +24385,8 @@ uint packUint4x8Clamp(int32_t4 unpackedValue)
 {
     __target_switch
     {
-    case hlsl: __intrinsic_asm "pack_clamp_s8";
-    case wgsl: __intrinsic_asm "pack4xI8Clamp";
+    case hlsl: __intrinsic_asm "pack_clamp_u8";
+    case wgsl: __intrinsic_asm "pack4xU8Clamp";
     default:
         return packInt4x8(clamp(unpackedValue, 0, 255));
     }
@@ -24401,7 +24401,7 @@ uint packUint4x8Clamp(int16_t4 unpackedValue)
 {
     __target_switch
     {
-    case hlsl: __intrinsic_asm "pack_clamp_s8";
+    case hlsl: __intrinsic_asm "pack_clamp_u8";
     default:
         return packInt4x8(clamp(unpackedValue, 0, 255));
     }

From f9330144a2c766932f811525ccffe0e59249062d Mon Sep 17 00:00:00 2001
From: fairywreath <nerradfour@gmail.com>
Date: Wed, 26 Feb 2025 10:27:13 -0600
Subject: [PATCH 6/8] remove builtin packed types, use typealias instead

---
 .../slang-embedded-core-module-source.cpp     | 31 --------
 source/slang/hlsl.meta.slang                  | 29 ++++----
 source/slang/slang-check-conversion.cpp       |  2 -
 source/slang/slang-check-decl.cpp             |  2 -
 source/slang/slang-emit-c-like.cpp            |  9 ---
 source/slang/slang-emit-cpp.cpp               |  5 --
 source/slang/slang-emit-cuda.cpp              |  3 -
 source/slang/slang-emit-glsl.cpp              | 16 -----
 source/slang/slang-emit-hlsl.cpp              |  6 --
 source/slang/slang-emit-metal.cpp             |  4 --
 source/slang/slang-emit-spirv.cpp             |  4 --
 source/slang/slang-emit-wgsl.cpp              |  6 --
 .../slang/slang-ir-any-value-marshalling.cpp  |  8 ---
 .../slang/slang-ir-byte-address-legalize.cpp  |  2 -
 source/slang/slang-ir-layout.cpp              |  3 -
 source/slang/slang-ir-lower-bit-cast.cpp      |  2 -
 source/slang/slang-ir-util.cpp                | 14 ----
 source/slang/slang-ir.cpp                     |  8 ---
 source/slang/slang-lower-to-ir.cpp            |  2 -
 source/slang/slang-mangle.cpp                 |  7 --
 source/slang/slang-type-layout.cpp            |  4 --
 source/slang/slang-type-system-shared.h       |  2 -
 source/slang/slang.cpp                        |  9 +--
 .../packed/packed-types-error.slang           | 21 ------
 .../packed/packed-types-warning.slang         | 72 -------------------
 .../hlsl-intrinsic/packed/packed-types.slang  | 36 ----------
 .../packed/packed-types.slang.expected.txt    |  2 -
 27 files changed, 18 insertions(+), 291 deletions(-)
 delete mode 100644 tests/hlsl-intrinsic/packed/packed-types-error.slang
 delete mode 100644 tests/hlsl-intrinsic/packed/packed-types-warning.slang
 delete mode 100644 tests/hlsl-intrinsic/packed/packed-types.slang
 delete mode 100644 tests/hlsl-intrinsic/packed/packed-types.slang.expected.txt

diff --git a/source/slang-core-module/slang-embedded-core-module-source.cpp b/source/slang-core-module/slang-embedded-core-module-source.cpp
index 557b50eb2d..9dc324d2a6 100644
--- a/source/slang-core-module/slang-embedded-core-module-source.cpp
+++ b/source/slang-core-module/slang-embedded-core-module-source.cpp
@@ -56,12 +56,6 @@ enum BaseTypeConversionRank : uint8_t
     kBaseTypeConversionRank_Int32,
     kBaseTypeConversionRank_IntPtr,
     kBaseTypeConversionRank_Int64,
-
-    // Packed type conversion ranks where the overall rank order does not apply.
-    // They must be explicitly casted to another type.
-    kBaseTypeConversionRank_Int8x4Packed,
-    kBaseTypeConversionRank_UInt8x4Packed,
-
     kBaseTypeConversionRank_Error,
 };
 
@@ -155,17 +149,6 @@ static const BaseTypeConversionInfo kBaseTypes[] = {
      UINT_MASK,
      kBaseTypeConversionKind_Unsigned,
      kBaseTypeConversionRank_IntPtr},
-
-    {"int8_t4_packed",
-     BaseType::Int8x4Packed,
-     0,
-     kBaseTypeConversionKind_Unsigned,
-     kBaseTypeConversionRank_Int8x4Packed},
-    {"uint8_t4_packed",
-     BaseType::UInt8x4Packed,
-     0,
-     kBaseTypeConversionKind_Unsigned,
-     kBaseTypeConversionRank_UInt8x4Packed},
 };
 
 void Session::finalizeSharedASTBuilder()
@@ -192,12 +175,6 @@ void Session::finalizeSharedASTBuilder()
         globalAstBuilder->getBuiltinType(baseType.tag);
 }
 
-static bool isConversionRankPackedType(BaseTypeConversionRank rank)
-{
-    return (rank == BaseTypeConversionRank::kBaseTypeConversionRank_Int8x4Packed) ||
-           (rank == BaseTypeConversionRank::kBaseTypeConversionRank_UInt8x4Packed);
-}
-
 // Given two base types, we need to be able to compute the cost of converting between them.
 ConversionCost getBaseTypeConversionCost(
     BaseTypeConversionInfo const& toInfo,
@@ -210,14 +187,6 @@ ConversionCost getBaseTypeConversionCost(
         return kConversionCost_None;
     }
 
-    // Handle special case for packed types, where they must be explicitly casted to another type.
-    bool isToPackedType = isConversionRankPackedType(toInfo.conversionRank);
-    bool isFromPackedType = isConversionRankPackedType(fromInfo.conversionRank);
-    if (isToPackedType || isFromPackedType)
-    {
-        return kConversionCost_GeneralConversion;
-    }
-
     // Conversions within the same kind are easist to handle
     if (toInfo.conversionKind == fromInfo.conversionKind)
     {
diff --git a/source/slang/hlsl.meta.slang b/source/slang/hlsl.meta.slang
index c4fa240acf..cccc23acb5 100644
--- a/source/slang/hlsl.meta.slang
+++ b/source/slang/hlsl.meta.slang
@@ -24017,7 +24017,7 @@ T workgroupUniformLoad<T>(__ref T v)
 }
 
 //
-// Pack/Unpack Math Intrinsics
+// HLSL Pack/Unpack Math Intrinsics
 //
 // These were introduced in SM 6.6 but requirements are dropped to SM 5.0 here
 // to expose these intrinsics on targets that do not have SM 6.6 features.
@@ -24025,13 +24025,16 @@ T workgroupUniformLoad<T>(__ref T v)
 
 //@public:
 
+typealias uint8_t4_packed = uint;
+typealias int8_t4_packed = uint;
+
 /// Unpack 4 signed 8-bit values into a vector of 16 bit integers.
 [__readNone]
 [ForceInline]
 [require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, shader5_sm_5_0)]
 int16_t4 unpack_s8s16(int8_t4_packed packed)
 {
-    return unpackInt4x8ToInt16(uint(packed));
+    return unpackInt4x8ToInt16(packed);
 }
 
 /// Unpack 4 unsigned 8-bit values into a vector of 16 bit integers.
@@ -24040,7 +24043,7 @@ int16_t4 unpack_s8s16(int8_t4_packed packed)
 [require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, shader5_sm_5_0)]
 uint16_t4 unpack_u8u16(uint8_t4_packed packed)
 {
-    return unpackUint4x8ToUint16(uint(packed));
+    return unpackUint4x8ToUint16(packed);
 }
 
 /// Unpack 4 signed 8-bit values into a vector of 32 bit integers.
@@ -24049,7 +24052,7 @@ uint16_t4 unpack_u8u16(uint8_t4_packed packed)
 [require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, shader5_sm_5_0)]
 int32_t4 unpack_s8s32(int8_t4_packed packed)
 {
-    return unpackInt4x8ToInt32(uint(packed));
+    return unpackInt4x8ToInt32(packed);
 }
 
 /// Unpack 4 unsigned 8-bit values into a vector of 32 bit integers.
@@ -24058,7 +24061,7 @@ int32_t4 unpack_s8s32(int8_t4_packed packed)
 [require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, shader5_sm_5_0)]
 uint32_t4 unpack_u8u32(uint8_t4_packed packed)
 {
-    return unpackUint4x8ToUint32(uint(packed));
+    return unpackUint4x8ToUint32(packed);
 }
 
 /// Pack a vector of 4 unsigned 32 bit integers into a packed value of 4 8-bit integers, dropping unused bits.
@@ -24067,7 +24070,7 @@ uint32_t4 unpack_u8u32(uint8_t4_packed packed)
 [require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, shader5_sm_5_0)]
 uint8_t4_packed pack_u8(uint32_t4 unpackedValue)
 {
-    return uint8_t4_packed(packUint4x8(unpackedValue));
+    return packUint4x8(unpackedValue);
 }
 
 /// Pack a vector of 4 signed 32 bit integers into a packed value of 4 8-bit integers, dropping unused bits.
@@ -24076,7 +24079,7 @@ uint8_t4_packed pack_u8(uint32_t4 unpackedValue)
 [require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, shader5_sm_5_0)]
 int8_t4_packed pack_s8(int32_t4 unpackedValue)
 {
-    return int8_t4_packed(packInt4x8(unpackedValue));
+    return packInt4x8(unpackedValue);
 }
 
 /// Pack a vector of 4 unsigned 16 bit integers into a packed value of 4 8-bit integers, dropping unused bits.
@@ -24085,7 +24088,7 @@ int8_t4_packed pack_s8(int32_t4 unpackedValue)
 [require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, shader5_sm_5_0)]
 uint8_t4_packed pack_u8(uint16_t4 unpackedValue)
 {
-    return uint8_t4_packed(packUint4x8(unpackedValue));
+    return packUint4x8(unpackedValue);
 }
 
 /// Pack a vector of 4 signed 16 bit integers into a packed value of 4 8-bit integers, dropping unused bits.
@@ -24094,7 +24097,7 @@ uint8_t4_packed pack_u8(uint16_t4 unpackedValue)
 [require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, shader5_sm_5_0)]
 int8_t4_packed pack_s8(int16_t4 unpackedValue)
 {
-    return int8_t4_packed(packInt4x8(unpackedValue));
+    return packInt4x8(unpackedValue);
 }
 
 /// Pack a vector of 4 unsigned 32 bit integers into a packed value of 4 8-bit integers,
@@ -24104,7 +24107,7 @@ int8_t4_packed pack_s8(int16_t4 unpackedValue)
 [require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, shader5_sm_5_0)]
 uint8_t4_packed pack_clamp_u8(int32_t4 unpackedValue)
 {
-    return uint8_t4_packed(packUint4x8Clamp(unpackedValue));
+    return packUint4x8Clamp(unpackedValue);
 }
 
 /// Pack a vector of 4 signed 32 bit integers into a packed value of 4 8-bit integers,
@@ -24114,7 +24117,7 @@ uint8_t4_packed pack_clamp_u8(int32_t4 unpackedValue)
 [require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, shader5_sm_5_0)]
 int8_t4_packed pack_clamp_s8(int32_t4 unpackedValue)
 {
-    return int8_t4_packed(packInt4x8Clamp(unpackedValue));
+    return packInt4x8Clamp(unpackedValue);
 }
 
 /// Pack a vector of 4 unsigned 16 bit integers into a packed value of 4 8-bit integers,
@@ -24124,7 +24127,7 @@ int8_t4_packed pack_clamp_s8(int32_t4 unpackedValue)
 [require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, shader5_sm_5_0)]
 uint8_t4_packed pack_clamp_u8(int16_t4 unpackedValue)
 {
-    return uint8_t4_packed(packUint4x8Clamp(unpackedValue));
+    return packUint4x8Clamp(unpackedValue);
 }
 
 /// Pack a vector of 4 signed 16 bit integers into a packed value of 4 8-bit integers,
@@ -24134,7 +24137,7 @@ uint8_t4_packed pack_clamp_u8(int16_t4 unpackedValue)
 [require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, shader5_sm_5_0)]
 int8_t4_packed pack_clamp_s8(int16_t4 unpackedValue)
 {
-    return int8_t4_packed(packInt4x8Clamp(unpackedValue));
+    return packInt4x8Clamp(unpackedValue);
 }
 
 // Work-graphs
diff --git a/source/slang/slang-check-conversion.cpp b/source/slang/slang-check-conversion.cpp
index a9785a585e..c5cf192ee3 100644
--- a/source/slang/slang-check-conversion.cpp
+++ b/source/slang/slang-check-conversion.cpp
@@ -1026,8 +1026,6 @@ int getTypeBitSize(Type* t)
         return 16;
     case BaseType::Int:
     case BaseType::UInt:
-    case BaseType::Int8x4Packed:
-    case BaseType::UInt8x4Packed:
         return 32;
     case BaseType::Int64:
     case BaseType::UInt64:
diff --git a/source/slang/slang-check-decl.cpp b/source/slang/slang-check-decl.cpp
index 0c42817c81..e6e8645257 100644
--- a/source/slang/slang-check-decl.cpp
+++ b/source/slang/slang-check-decl.cpp
@@ -1978,8 +1978,6 @@ void SemanticsDeclHeaderVisitor::checkVarDeclCommon(VarDeclBase* varDecl)
             case BaseType::UInt:
             case BaseType::UInt64:
             case BaseType::UIntPtr:
-            case BaseType::Int8x4Packed:
-            case BaseType::UInt8x4Packed:
                 break;
             default:
                 getSink()->diagnose(varDecl, Diagnostics::staticConstRequirementMustBeIntOrBool);
diff --git a/source/slang/slang-emit-c-like.cpp b/source/slang/slang-emit-c-like.cpp
index db2c0150f7..4e90c0e961 100644
--- a/source/slang/slang-emit-c-like.cpp
+++ b/source/slang/slang-emit-c-like.cpp
@@ -266,11 +266,6 @@ void CLikeSourceEmitter::emitSimpleType(IRType* type)
     case kIROp_UIntPtrType:
         return UnownedStringSlice("uintptr_t");
 
-    case kIROp_Int8x4PackedType:
-        return UnownedStringSlice("int8_t4_packed");
-    case kIROp_UInt8x4PackedType:
-        return UnownedStringSlice("uint8_t4_packed");
-
     case kIROp_HalfType:
         return UnownedStringSlice("half");
 
@@ -1315,8 +1310,6 @@ void CLikeSourceEmitter::emitSimpleValueImpl(IRInst* inst)
                         return;
                     }
                 case BaseType::UInt:
-                case BaseType::Int8x4Packed:
-                case BaseType::UInt8x4Packed:
                     {
                         m_writer->emit(UInt(uint32_t(litInst->value.intVal)));
                         m_writer->emit("U");
@@ -4025,8 +4018,6 @@ void CLikeSourceEmitter::emitVecNOrScalar(
                 m_writer->emit("ushort");
                 break;
             case kIROp_UIntType:
-            case kIROp_Int8x4PackedType:
-            case kIROp_UInt8x4PackedType:
                 m_writer->emit("uint");
                 break;
             case kIROp_UInt64Type:
diff --git a/source/slang/slang-emit-cpp.cpp b/source/slang/slang-emit-cpp.cpp
index 13a85e8abf..bfc021677d 100644
--- a/source/slang/slang-emit-cpp.cpp
+++ b/source/slang/slang-emit-cpp.cpp
@@ -101,13 +101,8 @@ static const char s_xyzwNames[] = "xyzw";
     case kIROp_UIntPtrType:
         return UnownedStringSlice("uintptr_t");
 
-    case kIROp_Int8x4PackedType:
-    case kIROp_UInt8x4PackedType:
-        return UnownedStringSlice("uint32_t");
-
         // Not clear just yet how we should handle half... we want all processing as float
         // probly, but when reading/writing to memory converting
-
     case kIROp_HalfType:
         return UnownedStringSlice("half");
 
diff --git a/source/slang/slang-emit-cuda.cpp b/source/slang/slang-emit-cuda.cpp
index 58ac377bfb..8657b3707e 100644
--- a/source/slang/slang-emit-cuda.cpp
+++ b/source/slang/slang-emit-cuda.cpp
@@ -84,9 +84,6 @@ UnownedStringSlice CUDASourceEmitter::getBuiltinTypeName(IROp op)
     case kIROp_UIntPtrType:
         return UnownedStringSlice("uint");
 #endif
-    case kIROp_Int8x4PackedType:
-    case kIROp_UInt8x4PackedType:
-        return UnownedStringSlice("uint");
 
     case kIROp_HalfType:
         return UnownedStringSlice("__half");
diff --git a/source/slang/slang-emit-glsl.cpp b/source/slang/slang-emit-glsl.cpp
index 25dab3fb35..d8389fadbb 100644
--- a/source/slang/slang-emit-glsl.cpp
+++ b/source/slang/slang-emit-glsl.cpp
@@ -1330,8 +1330,6 @@ void GLSLSourceEmitter::emitSimpleValueImpl(IRInst* inst)
                         return;
                     }
                 case BaseType::UInt:
-                case BaseType::Int8x4Packed:
-                case BaseType::UInt8x4Packed:
                     {
                         m_writer->emit(UInt(uint32_t(litInst->value.intVal)));
                         m_writer->emit("U");
@@ -2169,8 +2167,6 @@ bool GLSLSourceEmitter::tryEmitInstExprImpl(IRInst* inst, const EmitOpInfo& inOu
                 break;
 
             case BaseType::UInt:
-            case BaseType::Int8x4Packed:
-            case BaseType::UInt8x4Packed:
                 if (fromType == BaseType::Float)
                 {
                     m_writer->emit("floatBitsToUint");
@@ -3180,18 +3176,6 @@ void GLSLSourceEmitter::emitSimpleTypeImpl(IRType* type)
 #endif
             return;
         }
-    case kIROp_Int8x4PackedType:
-        {
-            _requireBaseType(BaseType::Int8x4Packed);
-            m_writer->emit("uint");
-            return;
-        }
-    case kIROp_UInt8x4PackedType:
-        {
-            _requireBaseType(BaseType::UInt8x4Packed);
-            m_writer->emit("uint");
-            return;
-        }
     case kIROp_VoidType:
     case kIROp_BoolType:
     case kIROp_Int8Type:
diff --git a/source/slang/slang-emit-hlsl.cpp b/source/slang/slang-emit-hlsl.cpp
index 59d40d3a14..89300e13e3 100644
--- a/source/slang/slang-emit-hlsl.cpp
+++ b/source/slang/slang-emit-hlsl.cpp
@@ -902,8 +902,6 @@ bool HLSLSourceEmitter::tryEmitInstExprImpl(IRInst* inst, const EmitOpInfo& inOu
             case BaseType::UInt64:
             case BaseType::UIntPtr:
             case BaseType::Bool:
-            case BaseType::Int8x4Packed:
-            case BaseType::UInt8x4Packed:
                 // Because the intermediate type will always
                 // be an integer type, we can convert to
                 // another integer type of the same size
@@ -943,8 +941,6 @@ bool HLSLSourceEmitter::tryEmitInstExprImpl(IRInst* inst, const EmitOpInfo& inOu
             case BaseType::UInt:
             case BaseType::Int:
             case BaseType::Bool:
-            case BaseType::Int8x4Packed:
-            case BaseType::UInt8x4Packed:
                 break;
             case BaseType::UInt16:
             case BaseType::Int16:
@@ -1330,8 +1326,6 @@ void HLSLSourceEmitter::emitSimpleTypeImpl(IRType* type)
     case kIROp_Int16Type:
     case kIROp_UInt16Type:
     case kIROp_HalfType:
-    case kIROp_Int8x4PackedType:
-    case kIROp_UInt8x4PackedType:
         {
             m_writer->emit(getDefaultBuiltinTypeName(type->getOp()));
             return;
diff --git a/source/slang/slang-emit-metal.cpp b/source/slang/slang-emit-metal.cpp
index 0a7db8b288..1bb7383469 100644
--- a/source/slang/slang-emit-metal.cpp
+++ b/source/slang/slang-emit-metal.cpp
@@ -1099,10 +1099,6 @@ void MetalSourceEmitter::emitSimpleTypeImpl(IRType* type)
     case kIROp_UIntPtrType:
         m_writer->emit("ulong");
         return;
-    case kIROp_Int8x4PackedType:
-    case kIROp_UInt8x4PackedType:
-        m_writer->emit("uint");
-        return;
     case kIROp_StructType:
         m_writer->emit(getName(type));
         return;
diff --git a/source/slang/slang-emit-spirv.cpp b/source/slang/slang-emit-spirv.cpp
index ef015df7f9..23b2808a25 100644
--- a/source/slang/slang-emit-spirv.cpp
+++ b/source/slang/slang-emit-spirv.cpp
@@ -1466,8 +1466,6 @@ struct SPIRVEmitContext : public SourceEmitterBase, public SPIRVEmitSharedContex
         case kIROp_Int8Type:
         case kIROp_IntType:
         case kIROp_Int64Type:
-        case kIROp_Int8x4PackedType:
-        case kIROp_UInt8x4PackedType:
             {
                 const IntInfo i = getIntTypeInfo(as<IRType>(inst));
                 if (i.width == 16)
@@ -7551,8 +7549,6 @@ struct SPIRVEmitContext : public SourceEmitterBase, public SPIRVEmitSharedContex
             case kIROp_UInt64Type:
             case kIROp_UInt8Type:
             case kIROp_UIntPtrType:
-            case kIROp_Int8x4PackedType:
-            case kIROp_UInt8x4PackedType:
                 spvEncoding = 6; // Unsigned
                 break;
             case kIROp_FloatType:
diff --git a/source/slang/slang-emit-wgsl.cpp b/source/slang/slang-emit-wgsl.cpp
index 13c79e9acc..c41f35a8cb 100644
--- a/source/slang/slang-emit-wgsl.cpp
+++ b/source/slang/slang-emit-wgsl.cpp
@@ -511,10 +511,6 @@ void WGSLSourceEmitter::emitSimpleTypeImpl(IRType* type)
     case kIROp_UIntPtrType:
         m_writer->emit("u64");
         return;
-    case kIROp_Int8x4PackedType:
-    case kIROp_UInt8x4PackedType:
-        m_writer->emit("u32");
-        return;
     case kIROp_StructType:
         m_writer->emit(getName(type));
         return;
@@ -967,8 +963,6 @@ void WGSLSourceEmitter::emitSimpleValueImpl(IRInst* inst)
                         return;
                     }
                 case BaseType::UInt:
-                case BaseType::Int8x4Packed:
-                case BaseType::UInt8x4Packed:
                     {
                         m_writer->emit("u32(");
                         m_writer->emit(UInt(uint32_t(litInst->value.intVal)));
diff --git a/source/slang/slang-ir-any-value-marshalling.cpp b/source/slang/slang-ir-any-value-marshalling.cpp
index 2dc91d299a..d124b293d4 100644
--- a/source/slang/slang-ir-any-value-marshalling.cpp
+++ b/source/slang/slang-ir-any-value-marshalling.cpp
@@ -153,8 +153,6 @@ struct AnyValueMarshallingContext
         case kIROp_IntPtrType:
         case kIROp_UIntPtrType:
         case kIROp_PtrType:
-        case kIROp_Int8x4PackedType:
-        case kIROp_UInt8x4PackedType:
             context->marshalBasicType(builder, dataType, concreteTypedVar);
             break;
         case kIROp_VectorType:
@@ -311,8 +309,6 @@ struct AnyValueMarshallingContext
                     break;
                 }
             case kIROp_UIntType:
-            case kIROp_Int8x4PackedType:
-            case kIROp_UInt8x4PackedType:
 #if SLANG_PTR_IS_32
             case kIROp_UIntPtrType:
 #endif
@@ -560,8 +556,6 @@ struct AnyValueMarshallingContext
                     break;
                 }
             case kIROp_UIntType:
-            case kIROp_Int8x4PackedType:
-            case kIROp_UInt8x4PackedType:
                 {
                     ensureOffsetAt4ByteBoundary();
                     if (fieldOffset < static_cast<uint32_t>(anyValInfo->fieldKeys.getCount()))
@@ -861,8 +855,6 @@ SlangInt _getAnyValueSizeRaw(IRType* type, SlangInt offset)
     case kIROp_FloatType:
     case kIROp_UIntType:
     case kIROp_BoolType:
-    case kIROp_Int8x4PackedType:
-    case kIROp_UInt8x4PackedType:
         return alignUp(offset, 4) + 4;
     case kIROp_UInt64Type:
     case kIROp_Int64Type:
diff --git a/source/slang/slang-ir-byte-address-legalize.cpp b/source/slang/slang-ir-byte-address-legalize.cpp
index 9207e6a2fc..617c8c7c4f 100644
--- a/source/slang/slang-ir-byte-address-legalize.cpp
+++ b/source/slang/slang-ir-byte-address-legalize.cpp
@@ -840,8 +840,6 @@ struct ByteAddressBufferLegalizationContext
         case kIROp_IntType:
         case kIROp_FloatType:
         case kIROp_BoolType:
-        case kIROp_Int8x4PackedType:
-        case kIROp_UInt8x4PackedType:
             // The basic 32-bit types (and `bool`) can be handled by
             // loading `uint` values and then bit-casting.
             //
diff --git a/source/slang/slang-ir-layout.cpp b/source/slang/slang-ir-layout.cpp
index 1332c8a25a..7ce19bf670 100644
--- a/source/slang/slang-ir-layout.cpp
+++ b/source/slang/slang-ir-layout.cpp
@@ -128,9 +128,6 @@ static Result _calcSizeAndAlignment(
         BASE(UIntPtr, kPointerSize);
         BASE(Double, 8);
 
-        BASE(Int8x4Packed, 4);
-        BASE(UInt8x4Packed, 4);
-
         // We are currently handling `bool` following the HLSL
         // precednet of storing it in 4 bytes.
         //
diff --git a/source/slang/slang-ir-lower-bit-cast.cpp b/source/slang/slang-ir-lower-bit-cast.cpp
index 0a77836397..450ad8ac94 100644
--- a/source/slang/slang-ir-lower-bit-cast.cpp
+++ b/source/slang/slang-ir-lower-bit-cast.cpp
@@ -178,8 +178,6 @@ struct BitCastLoweringContext
         case kIROp_UIntType:
         case kIROp_FloatType:
         case kIROp_BoolType:
-        case kIROp_Int8x4PackedType:
-        case kIROp_UInt8x4PackedType:
 #if SLANG_PTR_IS_32
         case kIROp_IntPtrType:
         case kIROp_UIntPtrType:
diff --git a/source/slang/slang-ir-util.cpp b/source/slang/slang-ir-util.cpp
index 39c1c5bb18..f75a24ac6a 100644
--- a/source/slang/slang-ir-util.cpp
+++ b/source/slang/slang-ir-util.cpp
@@ -107,8 +107,6 @@ IROp getTypeStyle(IROp op)
     case kIROp_UInt64Type:
     case kIROp_IntPtrType:
     case kIROp_UIntPtrType:
-    case kIROp_Int8x4PackedType:
-    case kIROp_UInt8x4PackedType:
         {
             // All int like
             return kIROp_IntType;
@@ -144,8 +142,6 @@ IROp getTypeStyle(BaseType op)
     case BaseType::UInt:
     case BaseType::UInt64:
     case BaseType::UIntPtr:
-    case BaseType::Int8x4Packed:
-    case BaseType::UInt8x4Packed:
         return kIROp_IntType;
     case BaseType::Half:
     case BaseType::Float:
@@ -476,12 +472,6 @@ void getTypeNameHint(StringBuilder& sb, IRInst* type)
     case kIROp_UIntPtrType:
         sb << "uintptr";
         break;
-    case kIROp_Int8x4PackedType:
-        sb << "int8_t4_packed";
-        break;
-    case kIROp_UInt8x4PackedType:
-        sb << "uint8_t4_packed";
-        break;
     case kIROp_CharType:
         sb << "char";
         break;
@@ -1862,10 +1852,6 @@ UnownedStringSlice getBasicTypeNameHint(IRType* basicType)
         return UnownedStringSlice::fromLiteral("uint64");
     case kIROp_UIntPtrType:
         return UnownedStringSlice::fromLiteral("uintptr");
-    case kIROp_Int8x4PackedType:
-        return UnownedStringSlice::fromLiteral("int8_t4_packed");
-    case kIROp_UInt8x4PackedType:
-        return UnownedStringSlice::fromLiteral("uint8_t4_packed");
     case kIROp_FloatType:
         return UnownedStringSlice::fromLiteral("float");
     case kIROp_HalfType:
diff --git a/source/slang/slang-ir.cpp b/source/slang/slang-ir.cpp
index 3a7ace37d4..8bdcdd12ad 100644
--- a/source/slang/slang-ir.cpp
+++ b/source/slang/slang-ir.cpp
@@ -3814,8 +3814,6 @@ IRInst* IRBuilder::emitDefaultConstruct(IRType* type, bool fallback)
     case kIROp_UIntType:
     case kIROp_UIntPtrType:
     case kIROp_UInt64Type:
-    case kIROp_Int8x4PackedType:
-    case kIROp_UInt8x4PackedType:
     case kIROp_CharType:
         return getIntValue(type, 0);
     case kIROp_BoolType:
@@ -7536,8 +7534,6 @@ bool isIntegralType(IRType* t)
         case BaseType::UInt64:
         case BaseType::IntPtr:
         case BaseType::UIntPtr:
-        case BaseType::Int8x4Packed:
-        case BaseType::UInt8x4Packed:
             return true;
         default:
             return false;
@@ -7584,10 +7580,6 @@ IntInfo getIntTypeInfo(const IRType* intType)
     case kIROp_Int64Type:
         return {64, true};
 
-    case kIROp_Int8x4PackedType:
-    case kIROp_UInt8x4PackedType:
-        return {32, false};
-
     case kIROp_IntPtrType:  // target platform dependent
     case kIROp_UIntPtrType: // target platform dependent
     default:
diff --git a/source/slang/slang-lower-to-ir.cpp b/source/slang/slang-lower-to-ir.cpp
index e5037bf04a..e5ca77634c 100644
--- a/source/slang/slang-lower-to-ir.cpp
+++ b/source/slang/slang-lower-to-ir.cpp
@@ -4633,8 +4633,6 @@ struct ExprLoweringVisitorBase : public ExprVisitor<Derived, LoweredValInfo>
             case BaseType::UInt64:
             case BaseType::UIntPtr:
             case BaseType::IntPtr:
-            case BaseType::Int8x4Packed:
-            case BaseType::UInt8x4Packed:
                 return LoweredValInfo::simple(getBuilder()->getIntValue(type, 0));
 
             case BaseType::Half:
diff --git a/source/slang/slang-mangle.cpp b/source/slang/slang-mangle.cpp
index dedbb2d486..d51fafb6bd 100644
--- a/source/slang/slang-mangle.cpp
+++ b/source/slang/slang-mangle.cpp
@@ -186,13 +186,6 @@ void emitBaseType(ManglingContext* context, BaseType baseType)
     case BaseType::IntPtr:
         emitRaw(context, "ip");
         break;
-    case BaseType::Int8x4Packed:
-        emitRaw(context, "c4p");
-        break;
-    case BaseType::UInt8x4Packed:
-        emitRaw(context, "C4p");
-        break;
-
     default:
         SLANG_UNEXPECTED("unimplemented case in base type mangling");
         break;
diff --git a/source/slang/slang-type-layout.cpp b/source/slang/slang-type-layout.cpp
index a412bf5b28..48cfd8e58a 100644
--- a/source/slang/slang-type-layout.cpp
+++ b/source/slang/slang-type-layout.cpp
@@ -110,10 +110,6 @@ struct DefaultLayoutRulesImpl : SimpleLayoutRulesImpl
                 sizeof(intptr_t),
                 sizeof(intptr_t));
 
-        case BaseType::Int8x4Packed:
-        case BaseType::UInt8x4Packed:
-            return SimpleLayoutInfo(LayoutResourceKind::Uniform, 4, 4);
-
         case BaseType::Half:
             return SimpleLayoutInfo(LayoutResourceKind::Uniform, 2, 2);
         case BaseType::Float:
diff --git a/source/slang/slang-type-system-shared.h b/source/slang/slang-type-system-shared.h
index 583eb22166..d7bd43122e 100644
--- a/source/slang/slang-type-system-shared.h
+++ b/source/slang/slang-type-system-shared.h
@@ -22,8 +22,6 @@ namespace Slang
     X(Char)                  \
     X(IntPtr)                \
     X(UIntPtr)               \
-    X(Int8x4Packed)          \
-    X(UInt8x4Packed)         \
     /* end */
 
 enum class BaseType
diff --git a/source/slang/slang.cpp b/source/slang/slang.cpp
index 001354162b..b94752b8b3 100644
--- a/source/slang/slang.cpp
+++ b/source/slang/slang.cpp
@@ -82,8 +82,6 @@ namespace Slang
      BaseTypeInfo::Flag::Signed | BaseTypeInfo::Flag::Integer,
      uint8_t(BaseType::IntPtr)},
     {uint8_t(sizeof(uintptr_t)), BaseTypeInfo::Flag::Integer, uint8_t(BaseType::UIntPtr)},
-    {uint8_t(sizeof(uint32_t)), BaseTypeInfo::Flag::Integer, uint8_t(BaseType::Int8x4Packed)},
-    {uint8_t(sizeof(uint32_t)), BaseTypeInfo::Flag::Integer, uint8_t(BaseType::UInt8x4Packed)},
 };
 
 /* static */ bool BaseTypeInfo::check()
@@ -135,10 +133,6 @@ namespace Slang
         return UnownedStringSlice::fromLiteral("intptr_t");
     case BaseType::UIntPtr:
         return UnownedStringSlice::fromLiteral("uintptr_t");
-    case BaseType::Int8x4Packed:
-        return UnownedStringSlice::fromLiteral("int8_t4_packed");
-    case BaseType::UInt8x4Packed:
-        return UnownedStringSlice::fromLiteral("uint8_t4_packed");
     default:
         {
             SLANG_ASSERT(!"Unknown basic type");
@@ -3101,7 +3095,8 @@ static void _calcViewInitiatingHierarchy(
     for (auto& [_, value] : outHierarchy)
     {
         value.sort(
-            [](SourceView* a, SourceView* b) -> bool {
+            [](SourceView* a, SourceView* b) -> bool
+            {
                 return a->getInitiatingSourceLoc().getRaw() < b->getInitiatingSourceLoc().getRaw();
             });
     }
diff --git a/tests/hlsl-intrinsic/packed/packed-types-error.slang b/tests/hlsl-intrinsic/packed/packed-types-error.slang
deleted file mode 100644
index 7034f50a90..0000000000
--- a/tests/hlsl-intrinsic/packed/packed-types-error.slang
+++ /dev/null
@@ -1,21 +0,0 @@
-//TEST(compute):SIMPLE(filecheck=CHECK): -target spirv
-
-[numthreads(1, 1, 1)]
-void computeMain(uint3 dispatchThreadID : SV_DispatchThreadID) {
-    uint8_t4_packed packedU1 = 0x0U;
-    uint8_t4_packed packedU2 = 0xFU;
-    int8_t4_packed packedS1 = 0xFU;
-    int8_t4_packed packedS2 = 0xFU;
-
-    // Arithmetic and logical (bitwise) operations are not supported on packed types.
-    // An attempt to overload these operators will fail during compilation due to ambiguity caused by multiple possible overloads.
-
-    // CHECK: error 39999: ambiguous call to '-' with arguments of type
-    uint8_t4_packed val1 = packedU1 - packedU2;
-    // CHECK: error 39999: ambiguous call to '*' with arguments of type
-    int8_t4_packed val2 = packedS1 * packedS2;
-    // CHECK: error 39999: ambiguous call to '&' with arguments of type
-    uint8_t4_packed val3 = packedU1 & packedS1;
-    // CHECK: error 39999: ambiguous call to '|' with arguments of type
-    int8_t4_packed val4 = packedU1 | packedS1;
-}
diff --git a/tests/hlsl-intrinsic/packed/packed-types-warning.slang b/tests/hlsl-intrinsic/packed/packed-types-warning.slang
deleted file mode 100644
index 0b46925a40..0000000000
--- a/tests/hlsl-intrinsic/packed/packed-types-warning.slang
+++ /dev/null
@@ -1,72 +0,0 @@
-//TEST(compute):SIMPLE(filecheck=CHECK): -target spirv
-
-[numthreads(1, 1, 1)]
-void computeMain(uint3 dispatchThreadID : SV_DispatchThreadID) {
-    uint8_t4_packed packedU = 0U;
-    int8_t4_packed packedS = 0xFU;
-    uint val1 = 0xFU;
-
-    //
-    // Implicit conversions between the packed types are not OK, they must be explicit.
-    //
-
-    // CHECK: warning 30081: implicit conversion from 'int8_t4_packed' to 'uint8_t4_packed' is not recommended
-    packedU = packedS;
-    // CHECK: warning 30081: implicit conversion from 'uint8_t4_packed' to 'int8_t4_packed' is not recommended
-    packedS = packedU;
-
-
-    // Implicit casting from 32 bit literals are OK.
-    packedU = 32U;
-    packedS = 32;
-
-    // Implicit casting from 64 bit literals are not OK.
-    // CHECK: warning 30081: implicit conversion from 'uint64_t' to 'uint8_t4_packed' is not recommended
-    packedU = 0xFFFFFFFFFFULL;
-    // CHECK: warning 30081: implicit conversion from 'int64_t' to 'uint8_t4_packed' is not recommended
-    packedU = 0xFFFFFFFFFFLL;
-
-    //
-    // Explicit casting from other builtin integer types are OK.
-    //
-    packedU = uint8_t4_packed(val1);
-    packedU = uint8_t4_packed(uint16_t(123));
-    val1 = uint(packedS);
-
-    //
-    // Implicit casting from other builtin integer types are not OK.
-    //
-
-    // CHECK: warning 30081: implicit conversion from 'uint' to 'uint8_t4_packed' is not recommended
-    packedU = val1;
-    // CHECK: warning 30081: implicit conversion from 'uint' to 'int8_t4_packed' is not recommended
-    packedS = val1;
-    // CHECK: warning 30081: implicit conversion from 'uint8_t4_packed' to 'uint' is not recommended
-    val1 = packedU;
-    // CHECK: warning 30081: implicit conversion from 'int8_t4_packed' to 'uint' is not recommended
-    val1 = packedS;
-
-    // CHECK: warning 30081: implicit conversion from 'uint8_t' to 'uint8_t4_packed' is not recommended
-    packedU = uint8_t(1);
-    // CHECK: warning 30081: implicit conversion from 'int64_t' to 'int8_t4_packed' is not recommended
-    packedS = int64_t(1);
-    // CHECK: warning 30081: implicit conversion from 'uint8_t4_packed' to 'uint64_t' is not recommended
-    uint64_t val2 = packedU;
-    // CHECK: warning 30081: implicit conversion from 'int8_t4_packed' to 'int16_t' is not recommended
-    int16_t val3 = packedS;
-
-    //
-    // Arithmetic and logical (bitwise) operations are not supported on packed types,
-    // but overload to integer types will be made and unrecommended conversion warnings
-    // should be thrown out.
-    //
-
-    // CHECK: warning 30081: implicit conversion from 'uint8_t4_packed' to 'int' is not recommended
-    packedU = uint8_t4_packed(packedU + 32);
-    // CHECK: warning 30081: implicit conversion from 'uint8_t4_packed' to 'int' is not recommended
-    packedU = uint8_t4_packed(packedU / 2);
-    // CHECK: warning 30081: implicit conversion from 'uint8_t4_packed' to 'int' is not recommended
-    packedU = uint8_t4_packed(packedU | 0xF);
-    // CHECK: warning 30081: implicit conversion from 'uint8_t4_packed' to 'int' is not recommended
-    packedU = uint8_t4_packed(packedU & 0x3);
-}
diff --git a/tests/hlsl-intrinsic/packed/packed-types.slang b/tests/hlsl-intrinsic/packed/packed-types.slang
deleted file mode 100644
index 0bbc6f4049..0000000000
--- a/tests/hlsl-intrinsic/packed/packed-types.slang
+++ /dev/null
@@ -1,36 +0,0 @@
-//TEST(compute):COMPARE_COMPUTE_EX:-vk -compute -shaderobj
-//TEST(compute):COMPARE_COMPUTE_EX:-slang -compute -shaderobj -render-feature hardware-device
-//TEST(compute):COMPARE_COMPUTE_EX:-slang -compute -profile cs_6_6 -dx12 -use-dxil -shaderobj -render-feature hardware-device
-//TEST(compute):COMPARE_COMPUTE_EX:-metal -compute -shaderobj 
-//TEST(compute):COMPARE_COMPUTE_EX:-cpu -compute -shaderobj
-//TEST(compute):COMPARE_COMPUTE_EX:-wgpu -compute -shaderobj
-//TEST(compute):COMPARE_COMPUTE_EX:-cuda -compute -g0
-
-//TEST_INPUT:ubuffer(data=[0xD37A83FF], stride=4):name packedUArray
-StructuredBuffer<uint8_t4_packed> packedUArray;
-
-//TEST_INPUT:ubuffer(data=[0xDEADBEEF], stride=4):name packedSArray
-StructuredBuffer<int8_t4_packed> packedSArray;
-
-//TEST_INPUT:ubuffer(data=[0 0], stride=4):out,name outputBuffer
-RWStructuredBuffer<uint8_t4_packed> outputBuffer;
-
-// Test type layout works.
-struct Custom {
-    uint8_t4_packed packedU;
-    uint3 other1;
-    int8_t4_packed packedS;
-    float other2;
-}
-
-[numthreads(1, 1, 1)]
-void computeMain(uint3 dispatchThreadID : SV_DispatchThreadID) {
-    uint id = dispatchThreadID.x;
-
-    Custom val;
-    val.packedU = packedUArray[id];
-    val.packedS = packedSArray[id];
-
-    outputBuffer[id] = val.packedU;
-    outputBuffer[id + 1] = uint8_t4_packed(val.packedS);
-}
diff --git a/tests/hlsl-intrinsic/packed/packed-types.slang.expected.txt b/tests/hlsl-intrinsic/packed/packed-types.slang.expected.txt
deleted file mode 100644
index a348b3dfd1..0000000000
--- a/tests/hlsl-intrinsic/packed/packed-types.slang.expected.txt
+++ /dev/null
@@ -1,2 +0,0 @@
-D37A83FF
-DEADBEEF

From ed9effbbf3e9e4d8b553bb5e155ae10b823f964c Mon Sep 17 00:00:00 2001
From: fairywreath <nerradfour@gmail.com>
Date: Wed, 26 Feb 2025 13:07:31 -0500
Subject: [PATCH 7/8] fix wgsl pack clamp

---
 source/slang/hlsl.meta.slang | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/source/slang/hlsl.meta.slang b/source/slang/hlsl.meta.slang
index cccc23acb5..fd8537e991 100644
--- a/source/slang/hlsl.meta.slang
+++ b/source/slang/hlsl.meta.slang
@@ -24389,7 +24389,7 @@ uint packUint4x8Clamp(int32_t4 unpackedValue)
     __target_switch
     {
     case hlsl: __intrinsic_asm "pack_clamp_u8";
-    case wgsl: __intrinsic_asm "pack4xU8Clamp";
+    case wgsl: __intrinsic_asm "pack4xU8Clamp(vec4<u32>($0))";
     default:
         return packInt4x8(clamp(unpackedValue, 0, 255));
     }

From bbd0ad4bdb8320ad75604f9aa64f8ff6640aae40 Mon Sep 17 00:00:00 2001
From: fairywreath <nerradfour@gmail.com>
Date: Wed, 26 Feb 2025 21:47:32 -0500
Subject: [PATCH 8/8] fix formatting

---
 source/slang/slang.cpp | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/source/slang/slang.cpp b/source/slang/slang.cpp
index b94752b8b3..82a0e877a4 100644
--- a/source/slang/slang.cpp
+++ b/source/slang/slang.cpp
@@ -3095,8 +3095,7 @@ static void _calcViewInitiatingHierarchy(
     for (auto& [_, value] : outHierarchy)
     {
         value.sort(
-            [](SourceView* a, SourceView* b) -> bool
-            {
+            [](SourceView* a, SourceView* b) -> bool {
                 return a->getInitiatingSourceLoc().getRaw() < b->getInitiatingSourceLoc().getRaw();
             });
     }