diff --git a/source/slang-core-module/slang-embedded-core-module-source.cpp b/source/slang-core-module/slang-embedded-core-module-source.cpp
index 557b50eb2d..9dc324d2a6 100644
--- a/source/slang-core-module/slang-embedded-core-module-source.cpp
+++ b/source/slang-core-module/slang-embedded-core-module-source.cpp
@@ -56,12 +56,6 @@ enum BaseTypeConversionRank : uint8_t
     kBaseTypeConversionRank_Int32,
     kBaseTypeConversionRank_IntPtr,
     kBaseTypeConversionRank_Int64,
-
-    // Packed type conversion ranks where the overall rank order does not apply.
-    // They must be explicitly casted to another type.
-    kBaseTypeConversionRank_Int8x4Packed,
-    kBaseTypeConversionRank_UInt8x4Packed,
-
     kBaseTypeConversionRank_Error,
 };
 
@@ -155,17 +149,6 @@ static const BaseTypeConversionInfo kBaseTypes[] = {
      UINT_MASK,
      kBaseTypeConversionKind_Unsigned,
      kBaseTypeConversionRank_IntPtr},
-
-    {"int8_t4_packed",
-     BaseType::Int8x4Packed,
-     0,
-     kBaseTypeConversionKind_Unsigned,
-     kBaseTypeConversionRank_Int8x4Packed},
-    {"uint8_t4_packed",
-     BaseType::UInt8x4Packed,
-     0,
-     kBaseTypeConversionKind_Unsigned,
-     kBaseTypeConversionRank_UInt8x4Packed},
 };
 
 void Session::finalizeSharedASTBuilder()
@@ -192,12 +175,6 @@ void Session::finalizeSharedASTBuilder()
         globalAstBuilder->getBuiltinType(baseType.tag);
 }
 
-static bool isConversionRankPackedType(BaseTypeConversionRank rank)
-{
-    return (rank == BaseTypeConversionRank::kBaseTypeConversionRank_Int8x4Packed) ||
-           (rank == BaseTypeConversionRank::kBaseTypeConversionRank_UInt8x4Packed);
-}
-
 // Given two base types, we need to be able to compute the cost of converting between them.
 ConversionCost getBaseTypeConversionCost(
     BaseTypeConversionInfo const& toInfo,
@@ -210,14 +187,6 @@ ConversionCost getBaseTypeConversionCost(
         return kConversionCost_None;
     }
 
-    // Handle special case for packed types, where they must be explicitly casted to another type.
-    bool isToPackedType = isConversionRankPackedType(toInfo.conversionRank);
-    bool isFromPackedType = isConversionRankPackedType(fromInfo.conversionRank);
-    if (isToPackedType || isFromPackedType)
-    {
-        return kConversionCost_GeneralConversion;
-    }
-
     // Conversions within the same kind are easist to handle
     if (toInfo.conversionKind == fromInfo.conversionKind)
     {
diff --git a/source/slang/hlsl.meta.slang b/source/slang/hlsl.meta.slang
index c9f3fb5337..a671a3dc44 100644
--- a/source/slang/hlsl.meta.slang
+++ b/source/slang/hlsl.meta.slang
@@ -17309,8 +17309,8 @@ uint dot4add_u8packed(uint x, uint y, uint acc)
             result:$$uint = OpIAdd %dotResult $acc;
         };
     default:
-        uint4 vecX = unpack_u8u32(uint8_t4_packed(x));
-        uint4 vecY = unpack_u8u32(uint8_t4_packed(y));
+        uint4 vecX = unpackUint4x8ToUint32(x);
+        uint4 vecY = unpackUint4x8ToUint32(y);
         return dot(vecX, vecY) + acc;
     }
 }
@@ -17337,8 +17337,8 @@ int dot4add_i8packed(uint x, uint y, int acc)
             result:$$int = OpIAdd %dotResult $acc;
         };
     default:
-        int4 vecX = unpack_s8s32(int8_t4_packed(x));
-        int4 vecY = unpack_s8s32(int8_t4_packed(y));
+        int4 vecX = unpackInt4x8ToInt32(x);
+        int4 vecY = unpackInt4x8ToInt32(y);
         return dot(vecX, vecY) + acc;
     }
 }
@@ -24035,383 +24035,426 @@ T workgroupUniformLoad<T>(__ref T v)
 }
 
 //
-// Pack/Unpack Math Intrinsics
+// HLSL Pack/Unpack Math Intrinsics
 //
 // These were introduced in SM 6.6 but requirements are dropped to SM 5.0 here
 // to expose these intrinsics on targets that do not have SM 6.6 features.
 //
 
-//@hidden:
+//@public:
+
+typealias uint8_t4_packed = uint;
+typealias int8_t4_packed = uint;
+
+/// Unpack 4 signed 8-bit values into a vector of 16 bit integers.
 [__readNone]
 [ForceInline]
-uint16_t __lsb_as_u16(uint32_t val)
+[require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, shader5_sm_5_0)]
+int16_t4 unpack_s8s16(int8_t4_packed packed)
 {
-    return uint16_t(val & 0xFFU);
+    return unpackInt4x8ToInt16(packed);
 }
 
-//@hidden:
+/// Unpack 4 unsigned 8-bit values into a vector of 16 bit integers.
 [__readNone]
 [ForceInline]
-uint32_t __lsb_as_u32(uint32_t val)
+[require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, shader5_sm_5_0)]
+uint16_t4 unpack_u8u16(uint8_t4_packed packed)
 {
-    return (val & 0xFFU);
+    return unpackUint4x8ToUint16(packed);
 }
 
-//@hidden:
+/// Unpack 4 signed 8-bit values into a vector of 32 bit integers.
 [__readNone]
 [ForceInline]
-int8_t __lsb_as_s8(uint32_t val)
+[require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, shader5_sm_5_0)]
+int32_t4 unpack_s8s32(int8_t4_packed packed)
 {
-    return int8_t(val & 0xFFU);
+    return unpackInt4x8ToInt32(packed);
 }
 
-//@hidden:
+/// Unpack 4 unsigned 8-bit values into a vector of 32 bit integers.
 [__readNone]
 [ForceInline]
-int16_t __lsb_as_s16(uint32_t val)
+[require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, shader5_sm_5_0)]
+uint32_t4 unpack_u8u32(uint8_t4_packed packed)
 {
-    return int16_t(__lsb_as_s8(val));
+    return unpackUint4x8ToUint32(packed);
 }
 
-//@hidden:
+/// Pack a vector of 4 unsigned 32 bit integers into a packed value of 4 8-bit integers, dropping unused bits.
 [__readNone]
 [ForceInline]
-int32_t __lsb_as_s32(uint32_t val)
+[require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, shader5_sm_5_0)]
+uint8_t4_packed pack_u8(uint32_t4 unpackedValue)
 {
-    return int32_t(__lsb_as_s8(val));
+    return packUint4x8(unpackedValue);
 }
 
-//@hidden:
+/// Pack a vector of 4 signed 32 bit integers into a packed value of 4 8-bit integers, dropping unused bits.
+[__readNone]
+[ForceInline]
+[require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, shader5_sm_5_0)]
+int8_t4_packed pack_s8(int32_t4 unpackedValue)
+{
+    return packInt4x8(unpackedValue);
+}
+
+/// Pack a vector of 4 unsigned 16 bit integers into a packed value of 4 8-bit integers, dropping unused bits.
+[__readNone]
+[ForceInline]
+[require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, shader5_sm_5_0)]
+uint8_t4_packed pack_u8(uint16_t4 unpackedValue)
+{
+    return packUint4x8(unpackedValue);
+}
+
+/// Pack a vector of 4 signed 16 bit integers into a packed value of 4 8-bit integers, dropping unused bits.
+[__readNone]
+[ForceInline]
+[require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, shader5_sm_5_0)]
+int8_t4_packed pack_s8(int16_t4 unpackedValue)
+{
+    return packInt4x8(unpackedValue);
+}
+
+/// Pack a vector of 4 unsigned 32 bit integers into a packed value of 4 8-bit integers,
+/// clamping each value to the range [0, 255] to ensure it fits within 8 bits.
+[__readNone]
+[ForceInline]
+[require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, shader5_sm_5_0)]
+uint8_t4_packed pack_clamp_u8(int32_t4 unpackedValue)
+{
+    return packUint4x8Clamp(unpackedValue);
+}
+
+/// Pack a vector of 4 signed 32 bit integers into a packed value of 4 8-bit integers,
+/// clamping each value to the range [-128, 127] to ensure it fits within 8 bits.
+[__readNone]
+[ForceInline]
+[require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, shader5_sm_5_0)]
+int8_t4_packed pack_clamp_s8(int32_t4 unpackedValue)
+{
+    return packInt4x8Clamp(unpackedValue);
+}
+
+/// Pack a vector of 4 unsigned 16 bit integers into a packed value of 4 8-bit integers,
+/// clamping each value to the range [0, 255] to ensure it fits within 8 bits.
 [__readNone]
 [ForceInline]
-uint32_t __lsb_clamp_u8_as_u32(int32_t val)
+[require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, shader5_sm_5_0)]
+uint8_t4_packed pack_clamp_u8(int16_t4 unpackedValue)
 {
-    return clamp(val, 0, 255);
+    return packUint4x8Clamp(unpackedValue);
 }
 
+/// Pack a vector of 4 signed 16 bit integers into a packed value of 4 8-bit integers,
+/// clamping each value to the range [-128, 127] to ensure it fits within 8 bits.
+[__readNone]
+[ForceInline]
+[require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, shader5_sm_5_0)]
+int8_t4_packed pack_clamp_s8(int16_t4 unpackedValue)
+{
+    return packInt4x8Clamp(unpackedValue);
+}
+
+// Work-graphs
+
+//@public:
+/// read-only input to Broadcasting launch node.
+__generic<T>
+//TODO: DispatchNodeInputRecord should be available only for broadcasting node shader.
+//[require(broadcasting_node)]
+[require(spirv)]
+struct DispatchNodeInputRecord
+{
+    /// Provide an access to a record object that only holds a single record.
+    NodePayloadPtr<T> Get()
+    {
+        int index = 0;
+        __target_switch
+        {
+        case spirv:
+            return spirv_asm
+            {
+                %in_payload_t = OpTypeNodePayloadArrayAMDX $$T;
+                %in_payload_ptr_t = OpTypePointer NodePayloadAMDX %in_payload_t;
+                %var = OpVariable %in_payload_ptr_t NodePayloadAMDX;
+                result : $$NodePayloadPtr<T> = OpAccessChain %var $index;
+            };
+        }
+    }
+};
+
+//
+// Pack/Unpack Intrinsics
+//
+
 //@hidden:
+
+[__readNone]
+[ForceInline]
+uint16_t __lsbAsUint16(uint32_t val)
+{
+    return uint16_t(val & 0xFFU);
+}
+
+[__readNone]
+[ForceInline]
+uint32_t __lsbAsUint32(uint32_t val)
+{
+    return (val & 0xFFU);
+}
+
+[__readNone]
+[ForceInline]
+int8_t __lsbAsInt8(uint32_t val)
+{
+    return int8_t(val);
+}
+
 [__readNone]
 [ForceInline]
-uint32_t __lsb_clamp_s8_as_u32(int32_t val)
+int16_t __lsbAsInt16(uint32_t val)
 {
-    return (uint32_t(clamp(val, -128, 127)) & 0xFFU);
+    return int16_t(__lsbAsInt8(val));
+}
+
+[__readNone]
+[ForceInline]
+int32_t __lsbAsInt32(uint32_t val)
+{
+    return int32_t(__lsbAsInt8(val));
 }
 
 //@public:
-/// Unpack 4 signed 8-bit values into a vector of 16 bit integers.
+
+/// Unpack 4 unsigned 8-bit values into a vector of 32 bit integers.
 [__readNone]
 [ForceInline]
 [require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, shader5_sm_5_0)]
-int16_t4 unpack_s8s16(int8_t4_packed packed)
+[require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, shader5_sm_5_0)]
+uint32_t4 unpackUint4x8ToUint32(uint packedValue)
 {
     __target_switch
     {
-    case hlsl: __intrinsic_asm "unpack_s8s16";
-    case spirv: 
+    case hlsl: __intrinsic_asm "unpack_u8u32";
+    case wgsl: __intrinsic_asm "unpack4xU8";
+    case spirv:
         return spirv_asm
         {
-            %s8Vec = OpBitcast $$vector<int8_t, 4> $packed;
-            result:$$vector<int16_t, 4> = OpSConvert %s8Vec
+            %u8Vec = OpBitcast $$vector<uint8_t, 4> $packedValue;
+            result:$$vector<uint32_t, 4> = OpUConvert %u8Vec
         };
     default:
-        uint32_t packedValue = uint32_t(packed);
-        return int16_t4
+        return uint32_t4
         (
-            __lsb_as_s16(packedValue),
-            __lsb_as_s16(packedValue >> 8U),
-            __lsb_as_s16(packedValue >> 16U),
-            __lsb_as_s16(packedValue >> 24U),
+            __lsbAsUint32(packedValue),
+            __lsbAsUint32(packedValue >> 8U),
+            __lsbAsUint32(packedValue >> 16U),
+            uint32_t(packedValue >> 24U),
         );
     }
 }
 
-//@public:
 /// Unpack 4 unsigned 8-bit values into a vector of 16 bit integers.
 [__readNone]
 [ForceInline]
 [require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, shader5_sm_5_0)]
-uint16_t4 unpack_u8u16(uint8_t4_packed packed)
+uint16_t4 unpackUint4x8ToUint16(uint packedValue)
 {
     __target_switch
     {
     case hlsl: __intrinsic_asm "unpack_u8u16";
-    case spirv: 
+    case spirv:
         return spirv_asm
         {
-            %u8Vec = OpBitcast $$vector<uint8_t, 4> $packed;
+            %u8Vec = OpBitcast $$vector<uint8_t, 4> $packedValue;
             result:$$vector<uint16_t, 4> = OpUConvert %u8Vec
         };
     default:
-        uint32_t packedValue = uint32_t(packed);
         return uint16_t4
         (
-            __lsb_as_u16(packedValue),
-            __lsb_as_u16(packedValue >> 8U),
-            __lsb_as_u16(packedValue >> 16U),
-            __lsb_as_u16(packedValue >> 24U),
+            __lsbAsUint16(packedValue),
+            __lsbAsUint16(packedValue >> 8U),
+            __lsbAsUint16(packedValue >> 16U),
+            uint16_t(packedValue >> 24U),
         );
     }
 }
 
-//@public:
 /// Unpack 4 signed 8-bit values into a vector of 32 bit integers.
 [__readNone]
 [ForceInline]
 [require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, shader5_sm_5_0)]
-int32_t4 unpack_s8s32(int8_t4_packed packed)
+int32_t4 unpackInt4x8ToInt32(uint packedValue)
 {
     __target_switch
     {
     case hlsl: __intrinsic_asm "unpack_s8s32";
     case wgsl: __intrinsic_asm "unpack4xI8";
-    case spirv: 
+    case spirv:
         return spirv_asm
         {
-            %s8Vec = OpBitcast $$vector<int8_t, 4> $packed;
+            %s8Vec = OpBitcast $$vector<int8_t, 4> $packedValue;
             result:$$vector<int32_t, 4> = OpSConvert %s8Vec
         };
     default:
-        uint32_t packedValue = uint32_t(packed);
         return int32_t4
         (
-            __lsb_as_s32(packedValue),
-            __lsb_as_s32(packedValue >> 8U),
-            __lsb_as_s32(packedValue >> 16U),
-            __lsb_as_s32(packedValue >> 24U),
+            __lsbAsInt32(packedValue),
+            __lsbAsInt32(packedValue >> 8U),
+            __lsbAsInt32(packedValue >> 16U),
+            int32_t(int8_t(packedValue >> 24U)),
         );
     }
 }
 
-//@public:
-/// Unpack 4 unsigned 8-bit values into a vector of 32 bit integers.
+/// Unpack 4 signed 8-bit values into a vector of 16 bit integers.
 [__readNone]
 [ForceInline]
 [require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, shader5_sm_5_0)]
-uint32_t4 unpack_u8u32(uint8_t4_packed packed)
+int16_t4 unpackInt4x8ToInt16(uint packedValue)
 {
     __target_switch
     {
-    case hlsl: __intrinsic_asm "unpack_u8u32";
-    case wgsl: __intrinsic_asm "unpack4xU8";
-    case spirv: 
+    case hlsl: __intrinsic_asm "unpack_s8s16";
+    case spirv:
         return spirv_asm
         {
-            %u8Vec = OpBitcast $$vector<uint8_t, 4> $packed;
-            result:$$vector<uint32_t, 4> = OpUConvert %u8Vec
+            %s8Vec = OpBitcast $$vector<int8_t, 4> $packedValue;
+            result:$$vector<int16_t, 4> = OpSConvert %s8Vec
         };
     default:
-        uint32_t packedValue = uint32_t(packed);
-        return uint32_t4
+        return int16_t4
         (
-            __lsb_as_u32(packedValue),
-            __lsb_as_u32(packedValue >> 8U),
-            __lsb_as_u32(packedValue >> 16U),
-            __lsb_as_u32(packedValue >> 24U),
+            __lsbAsInt16(packedValue),
+            __lsbAsInt16(packedValue >> 8U),
+            __lsbAsInt16(packedValue >> 16U),
+            int16_t(int8_t(packedValue >> 24U)),
         );
     }
 }
 
-//@public:
 /// Pack a vector of 4 unsigned 32 bit integers into a packed value of 4 8-bit integers, dropping unused bits.
 [__readNone]
 [ForceInline]
 [require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, shader5_sm_5_0)]
-uint8_t4_packed pack_u8(uint32_t4 unpackedValue)
+uint packUint4x8(uint32_t4 unpackedValue)
 {
     __target_switch
     {
     case hlsl: __intrinsic_asm "pack_u8";
     case wgsl: __intrinsic_asm "pack4xU8";
     default:
-        return uint8_t4_packed
-        (
-            __lsb_as_u32(unpackedValue.x)
-            | (__lsb_as_u32(unpackedValue.y) << 8U)
-            | (__lsb_as_u32(unpackedValue.z) << 16U)
-            | (__lsb_as_u32(unpackedValue.w) << 24U)
-        );
+        return __lsbAsUint32(unpackedValue.x)
+            | (__lsbAsUint32(unpackedValue.y) << 8U)
+            | (__lsbAsUint32(unpackedValue.z) << 16U)
+            | (unpackedValue.w << 24U);
     }
 }
 
-//@public:
-/// Pack a vector of 4 signed 32 bit integers into a packed value of 4 8-bit integers, dropping unused bits.
+/// Pack a vector of 4 unsigned 16 bit integers into a packed value of 4 8-bit integers, dropping unused bits.
 [__readNone]
 [ForceInline]
 [require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, shader5_sm_5_0)]
-int8_t4_packed pack_s8(int32_t4 unpackedValue)
+uint packUint4x8(uint16_t4 unpackedValue)
 {
     __target_switch
     {
-    case hlsl: __intrinsic_asm "pack_s8";
-    case wgsl: __intrinsic_asm "pack4xI8";
+    case hlsl: __intrinsic_asm "pack_u8";
     default:
-        return int8_t4_packed
-        (
-            __lsb_as_u32(unpackedValue.x)
-            | (__lsb_as_u32(unpackedValue.y) << 8U)
-            | (__lsb_as_u32(unpackedValue.z) << 16U)
-            | (__lsb_as_u32(unpackedValue.w) << 24U)
-        );
+        return packUint4x8(uint32_t4(unpackedValue));
     }
 }
 
-//@public:
-/// Pack a vector of 4 unsigned 16 bit integers into a packed value of 4 8-bit integers, dropping unused bits.
+/// Pack a vector of 4 signed 32 bit integers into a packed value of 4 8-bit integers, dropping unused bits.
 [__readNone]
 [ForceInline]
 [require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, shader5_sm_5_0)]
-uint8_t4_packed pack_u8(uint16_t4 unpackedValue)
+uint packInt4x8(int32_t4 unpackedValue)
 {
     __target_switch
     {
-    case hlsl: __intrinsic_asm "pack_u8";
+    case hlsl: __intrinsic_asm "pack_s8";
+    case wgsl: __intrinsic_asm "pack4xI8";
     default:
-        return uint8_t4_packed
-        (
-            __lsb_as_u32(unpackedValue.x)
-            | (__lsb_as_u32(unpackedValue.y) << 8U)
-            | (__lsb_as_u32(unpackedValue.z) << 16U)
-            | (__lsb_as_u32(unpackedValue.w) << 24U)
-        );
+        return packUint4x8(uint32_t4(unpackedValue));
     }
 }
 
-//@public:
 /// Pack a vector of 4 signed 16 bit integers into a packed value of 4 8-bit integers, dropping unused bits.
 [__readNone]
 [ForceInline]
 [require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, shader5_sm_5_0)]
-int8_t4_packed pack_s8(int16_t4 unpackedValue)
+uint packInt4x8(int16_t4 unpackedValue)
 {
     __target_switch
     {
     case hlsl: __intrinsic_asm "pack_s8";
     default:
-        return int8_t4_packed
-        (
-            __lsb_as_u32(unpackedValue.x)
-            | (__lsb_as_u32(unpackedValue.y) << 8U)
-            | (__lsb_as_u32(unpackedValue.z) << 16U)
-            | (__lsb_as_u32(unpackedValue.w) << 24U)
-        );
+        return packUint4x8(uint32_t4(unpackedValue));
     }
 }
 
-//@public:
-/// Pack a vector of 4 unsigned 32 bit integers into a packed value of 4 8-bit integers,
-/// clamping each value to the range [0, 255] to ensure it fits within 8 bits.
+/// Pack a vector of 4 signed 32 bit integers into a packed value of 4 8-bit integers,
+/// clamping each value to the range [-128, 127] to ensure it fits within 8 bits.
 [__readNone]
 [ForceInline]
 [require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, shader5_sm_5_0)]
-uint8_t4_packed pack_clamp_u8(int32_t4 unpackedValue)
+uint packUint4x8Clamp(int32_t4 unpackedValue)
 {
     __target_switch
     {
     case hlsl: __intrinsic_asm "pack_clamp_u8";
     case wgsl: __intrinsic_asm "pack4xU8Clamp(vec4<u32>($0))";
     default:
-        return uint8_t4_packed
-        (
-            __lsb_clamp_u8_as_u32(unpackedValue.x)
-            | (__lsb_clamp_u8_as_u32(unpackedValue.y) << 8U)
-            | (__lsb_clamp_u8_as_u32(unpackedValue.z) << 16U)
-            | (__lsb_clamp_u8_as_u32(unpackedValue.w) << 24U)
-        );
+        return packInt4x8(clamp(unpackedValue, 0, 255));
     }
 }
 
-//@public:
-/// Pack a vector of 4 signed 32 bit integers into a packed value of 4 8-bit integers,
-/// clamping each value to the range [-128, 127] to ensure it fits within 8 bits.
+/// Pack a vector of 4 unsigned 16 bit integers into a packed value of 4 8-bit integers,
+/// clamping each value to the range [0, 255] to ensure it fits within 8 bits.
 [__readNone]
 [ForceInline]
 [require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, shader5_sm_5_0)]
-int8_t4_packed pack_clamp_s8(int32_t4 unpackedValue)
+uint packUint4x8Clamp(int16_t4 unpackedValue)
 {
     __target_switch
     {
-    case hlsl: __intrinsic_asm "pack_clamp_s8";
-    case wgsl: __intrinsic_asm "pack4xI8Clamp";
+    case hlsl: __intrinsic_asm "pack_clamp_u8";
     default:
-        return int8_t4_packed
-        (
-            __lsb_clamp_s8_as_u32(unpackedValue.x)
-            | (__lsb_clamp_s8_as_u32(unpackedValue.y) << 8U)
-            | (__lsb_clamp_s8_as_u32(unpackedValue.z) << 16U)
-            | (__lsb_clamp_s8_as_u32(unpackedValue.w) << 24U)
-        );
+        return packInt4x8(clamp(unpackedValue, 0, 255));
     }
 }
 
-//@public:
-/// Pack a vector of 4 unsigned 16 bit integers into a packed value of 4 8-bit integers,
-/// clamping each value to the range [0, 255] to ensure it fits within 8 bits.
+/// Pack a vector of 4 signed 32 bit integers into a packed value of 4 8-bit integers,
+/// clamping each value to the range [-128, 127] to ensure it fits within 8 bits.
 [__readNone]
 [ForceInline]
 [require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, shader5_sm_5_0)]
-uint8_t4_packed pack_clamp_u8(int16_t4 unpackedValue)
+uint packInt4x8Clamp(int32_t4 unpackedValue)
 {
     __target_switch
     {
-    case hlsl: __intrinsic_asm "pack_clamp_u8";
+    case hlsl: __intrinsic_asm "pack_clamp_s8";
+    case wgsl: __intrinsic_asm "pack4xI8Clamp";
     default:
-        return uint8_t4_packed
-        (
-            __lsb_clamp_u8_as_u32(unpackedValue.x)
-            | (__lsb_clamp_u8_as_u32(unpackedValue.y) << 8U)
-            | (__lsb_clamp_u8_as_u32(unpackedValue.z) << 16U)
-            | (__lsb_clamp_u8_as_u32(unpackedValue.w) << 24U)
-        );
+        return packInt4x8(clamp(unpackedValue, -128, 127));
     }
 }
 
-//@public:
 /// Pack a vector of 4 signed 16 bit integers into a packed value of 4 8-bit integers,
 /// clamping each value to the range [-128, 127] to ensure it fits within 8 bits.
 [__readNone]
 [ForceInline]
 [require(cpp_cuda_glsl_hlsl_metal_spirv_wgsl, shader5_sm_5_0)]
-int8_t4_packed pack_clamp_s8(int16_t4 unpackedValue)
+uint packInt4x8Clamp(int16_t4 unpackedValue)
 {
     __target_switch
     {
     case hlsl: __intrinsic_asm "pack_clamp_s8";
     default:
-        return int8_t4_packed
-        (
-            __lsb_clamp_s8_as_u32(unpackedValue.x)
-            | (__lsb_clamp_s8_as_u32(unpackedValue.y) << 8U)
-            | (__lsb_clamp_s8_as_u32(unpackedValue.z) << 16U)
-            | (__lsb_clamp_s8_as_u32(unpackedValue.w) << 24U)
-        );
+        return packInt4x8(clamp(unpackedValue, -128, 127));
     }
 }
-
-// Work-graphs
-
-//@public:
-/// read-only input to Broadcasting launch node.
-__generic<T>
-//TODO: DispatchNodeInputRecord should be available only for broadcasting node shader.
-//[require(broadcasting_node)]
-[require(spirv)]
-struct DispatchNodeInputRecord
-{
-    /// Provide an access to a record object that only holds a single record.
-    NodePayloadPtr<T> Get()
-    {
-        int index = 0;
-        __target_switch
-        {
-        case spirv:
-            return spirv_asm
-            {
-                %in_payload_t = OpTypeNodePayloadArrayAMDX $$T;
-                %in_payload_ptr_t = OpTypePointer NodePayloadAMDX %in_payload_t;
-                %var = OpVariable %in_payload_ptr_t NodePayloadAMDX;
-                result : $$NodePayloadPtr<T> = OpAccessChain %var $index;
-            };
-        }
-    }
-};
-
diff --git a/source/slang/slang-check-conversion.cpp b/source/slang/slang-check-conversion.cpp
index a9785a585e..c5cf192ee3 100644
--- a/source/slang/slang-check-conversion.cpp
+++ b/source/slang/slang-check-conversion.cpp
@@ -1026,8 +1026,6 @@ int getTypeBitSize(Type* t)
         return 16;
     case BaseType::Int:
     case BaseType::UInt:
-    case BaseType::Int8x4Packed:
-    case BaseType::UInt8x4Packed:
         return 32;
     case BaseType::Int64:
     case BaseType::UInt64:
diff --git a/source/slang/slang-check-decl.cpp b/source/slang/slang-check-decl.cpp
index 486ac6e9c9..5ee8ba3efe 100644
--- a/source/slang/slang-check-decl.cpp
+++ b/source/slang/slang-check-decl.cpp
@@ -1981,8 +1981,6 @@ void SemanticsDeclHeaderVisitor::checkVarDeclCommon(VarDeclBase* varDecl)
             case BaseType::UInt:
             case BaseType::UInt64:
             case BaseType::UIntPtr:
-            case BaseType::Int8x4Packed:
-            case BaseType::UInt8x4Packed:
                 break;
             default:
                 getSink()->diagnose(varDecl, Diagnostics::staticConstRequirementMustBeIntOrBool);
diff --git a/source/slang/slang-emit-c-like.cpp b/source/slang/slang-emit-c-like.cpp
index 1c48d98efd..ff40d5b286 100644
--- a/source/slang/slang-emit-c-like.cpp
+++ b/source/slang/slang-emit-c-like.cpp
@@ -266,11 +266,6 @@ void CLikeSourceEmitter::emitSimpleType(IRType* type)
     case kIROp_UIntPtrType:
         return UnownedStringSlice("uintptr_t");
 
-    case kIROp_Int8x4PackedType:
-        return UnownedStringSlice("int8_t4_packed");
-    case kIROp_UInt8x4PackedType:
-        return UnownedStringSlice("uint8_t4_packed");
-
     case kIROp_HalfType:
         return UnownedStringSlice("half");
 
@@ -1334,8 +1329,6 @@ void CLikeSourceEmitter::emitSimpleValueImpl(IRInst* inst)
                         return;
                     }
                 case BaseType::UInt:
-                case BaseType::Int8x4Packed:
-                case BaseType::UInt8x4Packed:
                     {
                         m_writer->emit(UInt(uint32_t(litInst->value.intVal)));
                         m_writer->emit("U");
@@ -4045,8 +4038,6 @@ void CLikeSourceEmitter::emitVecNOrScalar(
                 m_writer->emit("ushort");
                 break;
             case kIROp_UIntType:
-            case kIROp_Int8x4PackedType:
-            case kIROp_UInt8x4PackedType:
                 m_writer->emit("uint");
                 break;
             case kIROp_UInt64Type:
diff --git a/source/slang/slang-emit-cpp.cpp b/source/slang/slang-emit-cpp.cpp
index 13a85e8abf..bfc021677d 100644
--- a/source/slang/slang-emit-cpp.cpp
+++ b/source/slang/slang-emit-cpp.cpp
@@ -101,13 +101,8 @@ static const char s_xyzwNames[] = "xyzw";
     case kIROp_UIntPtrType:
         return UnownedStringSlice("uintptr_t");
 
-    case kIROp_Int8x4PackedType:
-    case kIROp_UInt8x4PackedType:
-        return UnownedStringSlice("uint32_t");
-
         // Not clear just yet how we should handle half... we want all processing as float
         // probly, but when reading/writing to memory converting
-
     case kIROp_HalfType:
         return UnownedStringSlice("half");
 
diff --git a/source/slang/slang-emit-cuda.cpp b/source/slang/slang-emit-cuda.cpp
index 58ac377bfb..8657b3707e 100644
--- a/source/slang/slang-emit-cuda.cpp
+++ b/source/slang/slang-emit-cuda.cpp
@@ -84,9 +84,6 @@ UnownedStringSlice CUDASourceEmitter::getBuiltinTypeName(IROp op)
     case kIROp_UIntPtrType:
         return UnownedStringSlice("uint");
 #endif
-    case kIROp_Int8x4PackedType:
-    case kIROp_UInt8x4PackedType:
-        return UnownedStringSlice("uint");
 
     case kIROp_HalfType:
         return UnownedStringSlice("__half");
diff --git a/source/slang/slang-emit-glsl.cpp b/source/slang/slang-emit-glsl.cpp
index 696830bf24..fca5a8933b 100644
--- a/source/slang/slang-emit-glsl.cpp
+++ b/source/slang/slang-emit-glsl.cpp
@@ -1330,8 +1330,6 @@ void GLSLSourceEmitter::emitSimpleValueImpl(IRInst* inst)
                         return;
                     }
                 case BaseType::UInt:
-                case BaseType::Int8x4Packed:
-                case BaseType::UInt8x4Packed:
                     {
                         m_writer->emit(UInt(uint32_t(litInst->value.intVal)));
                         m_writer->emit("U");
@@ -2175,8 +2173,6 @@ bool GLSLSourceEmitter::tryEmitInstExprImpl(IRInst* inst, const EmitOpInfo& inOu
                 break;
 
             case BaseType::UInt:
-            case BaseType::Int8x4Packed:
-            case BaseType::UInt8x4Packed:
                 if (fromType == BaseType::Float)
                 {
                     m_writer->emit("floatBitsToUint");
@@ -3186,18 +3182,6 @@ void GLSLSourceEmitter::emitSimpleTypeImpl(IRType* type)
 #endif
             return;
         }
-    case kIROp_Int8x4PackedType:
-        {
-            _requireBaseType(BaseType::Int8x4Packed);
-            m_writer->emit("uint");
-            return;
-        }
-    case kIROp_UInt8x4PackedType:
-        {
-            _requireBaseType(BaseType::UInt8x4Packed);
-            m_writer->emit("uint");
-            return;
-        }
     case kIROp_VoidType:
     case kIROp_BoolType:
     case kIROp_Int8Type:
diff --git a/source/slang/slang-emit-hlsl.cpp b/source/slang/slang-emit-hlsl.cpp
index 59d40d3a14..89300e13e3 100644
--- a/source/slang/slang-emit-hlsl.cpp
+++ b/source/slang/slang-emit-hlsl.cpp
@@ -902,8 +902,6 @@ bool HLSLSourceEmitter::tryEmitInstExprImpl(IRInst* inst, const EmitOpInfo& inOu
             case BaseType::UInt64:
             case BaseType::UIntPtr:
             case BaseType::Bool:
-            case BaseType::Int8x4Packed:
-            case BaseType::UInt8x4Packed:
                 // Because the intermediate type will always
                 // be an integer type, we can convert to
                 // another integer type of the same size
@@ -943,8 +941,6 @@ bool HLSLSourceEmitter::tryEmitInstExprImpl(IRInst* inst, const EmitOpInfo& inOu
             case BaseType::UInt:
             case BaseType::Int:
             case BaseType::Bool:
-            case BaseType::Int8x4Packed:
-            case BaseType::UInt8x4Packed:
                 break;
             case BaseType::UInt16:
             case BaseType::Int16:
@@ -1330,8 +1326,6 @@ void HLSLSourceEmitter::emitSimpleTypeImpl(IRType* type)
     case kIROp_Int16Type:
     case kIROp_UInt16Type:
     case kIROp_HalfType:
-    case kIROp_Int8x4PackedType:
-    case kIROp_UInt8x4PackedType:
         {
             m_writer->emit(getDefaultBuiltinTypeName(type->getOp()));
             return;
diff --git a/source/slang/slang-emit-metal.cpp b/source/slang/slang-emit-metal.cpp
index 0a7db8b288..1bb7383469 100644
--- a/source/slang/slang-emit-metal.cpp
+++ b/source/slang/slang-emit-metal.cpp
@@ -1099,10 +1099,6 @@ void MetalSourceEmitter::emitSimpleTypeImpl(IRType* type)
     case kIROp_UIntPtrType:
         m_writer->emit("ulong");
         return;
-    case kIROp_Int8x4PackedType:
-    case kIROp_UInt8x4PackedType:
-        m_writer->emit("uint");
-        return;
     case kIROp_StructType:
         m_writer->emit(getName(type));
         return;
diff --git a/source/slang/slang-emit-spirv.cpp b/source/slang/slang-emit-spirv.cpp
index 802df915e6..c7e222247c 100644
--- a/source/slang/slang-emit-spirv.cpp
+++ b/source/slang/slang-emit-spirv.cpp
@@ -1466,8 +1466,6 @@ struct SPIRVEmitContext : public SourceEmitterBase, public SPIRVEmitSharedContex
         case kIROp_Int8Type:
         case kIROp_IntType:
         case kIROp_Int64Type:
-        case kIROp_Int8x4PackedType:
-        case kIROp_UInt8x4PackedType:
             {
                 const IntInfo i = getIntTypeInfo(as<IRType>(inst));
                 if (i.width == 16)
@@ -7642,8 +7640,6 @@ struct SPIRVEmitContext : public SourceEmitterBase, public SPIRVEmitSharedContex
             case kIROp_UInt64Type:
             case kIROp_UInt8Type:
             case kIROp_UIntPtrType:
-            case kIROp_Int8x4PackedType:
-            case kIROp_UInt8x4PackedType:
                 spvEncoding = 6; // Unsigned
                 break;
             case kIROp_FloatType:
diff --git a/source/slang/slang-emit-wgsl.cpp b/source/slang/slang-emit-wgsl.cpp
index 7c83b194d5..d87cd06deb 100644
--- a/source/slang/slang-emit-wgsl.cpp
+++ b/source/slang/slang-emit-wgsl.cpp
@@ -511,10 +511,6 @@ void WGSLSourceEmitter::emitSimpleTypeImpl(IRType* type)
     case kIROp_UIntPtrType:
         m_writer->emit("u64");
         return;
-    case kIROp_Int8x4PackedType:
-    case kIROp_UInt8x4PackedType:
-        m_writer->emit("u32");
-        return;
     case kIROp_StructType:
         m_writer->emit(getName(type));
         return;
@@ -967,8 +963,6 @@ void WGSLSourceEmitter::emitSimpleValueImpl(IRInst* inst)
                         return;
                     }
                 case BaseType::UInt:
-                case BaseType::Int8x4Packed:
-                case BaseType::UInt8x4Packed:
                     {
                         m_writer->emit("u32(");
                         m_writer->emit(UInt(uint32_t(litInst->value.intVal)));
diff --git a/source/slang/slang-ir-any-value-marshalling.cpp b/source/slang/slang-ir-any-value-marshalling.cpp
index 2dc91d299a..d124b293d4 100644
--- a/source/slang/slang-ir-any-value-marshalling.cpp
+++ b/source/slang/slang-ir-any-value-marshalling.cpp
@@ -153,8 +153,6 @@ struct AnyValueMarshallingContext
         case kIROp_IntPtrType:
         case kIROp_UIntPtrType:
         case kIROp_PtrType:
-        case kIROp_Int8x4PackedType:
-        case kIROp_UInt8x4PackedType:
             context->marshalBasicType(builder, dataType, concreteTypedVar);
             break;
         case kIROp_VectorType:
@@ -311,8 +309,6 @@ struct AnyValueMarshallingContext
                     break;
                 }
             case kIROp_UIntType:
-            case kIROp_Int8x4PackedType:
-            case kIROp_UInt8x4PackedType:
 #if SLANG_PTR_IS_32
             case kIROp_UIntPtrType:
 #endif
@@ -560,8 +556,6 @@ struct AnyValueMarshallingContext
                     break;
                 }
             case kIROp_UIntType:
-            case kIROp_Int8x4PackedType:
-            case kIROp_UInt8x4PackedType:
                 {
                     ensureOffsetAt4ByteBoundary();
                     if (fieldOffset < static_cast<uint32_t>(anyValInfo->fieldKeys.getCount()))
@@ -861,8 +855,6 @@ SlangInt _getAnyValueSizeRaw(IRType* type, SlangInt offset)
     case kIROp_FloatType:
     case kIROp_UIntType:
     case kIROp_BoolType:
-    case kIROp_Int8x4PackedType:
-    case kIROp_UInt8x4PackedType:
         return alignUp(offset, 4) + 4;
     case kIROp_UInt64Type:
     case kIROp_Int64Type:
diff --git a/source/slang/slang-ir-byte-address-legalize.cpp b/source/slang/slang-ir-byte-address-legalize.cpp
index 9207e6a2fc..617c8c7c4f 100644
--- a/source/slang/slang-ir-byte-address-legalize.cpp
+++ b/source/slang/slang-ir-byte-address-legalize.cpp
@@ -840,8 +840,6 @@ struct ByteAddressBufferLegalizationContext
         case kIROp_IntType:
         case kIROp_FloatType:
         case kIROp_BoolType:
-        case kIROp_Int8x4PackedType:
-        case kIROp_UInt8x4PackedType:
             // The basic 32-bit types (and `bool`) can be handled by
             // loading `uint` values and then bit-casting.
             //
diff --git a/source/slang/slang-ir-layout.cpp b/source/slang/slang-ir-layout.cpp
index 1332c8a25a..7ce19bf670 100644
--- a/source/slang/slang-ir-layout.cpp
+++ b/source/slang/slang-ir-layout.cpp
@@ -128,9 +128,6 @@ static Result _calcSizeAndAlignment(
         BASE(UIntPtr, kPointerSize);
         BASE(Double, 8);
 
-        BASE(Int8x4Packed, 4);
-        BASE(UInt8x4Packed, 4);
-
         // We are currently handling `bool` following the HLSL
         // precednet of storing it in 4 bytes.
         //
diff --git a/source/slang/slang-ir-lower-bit-cast.cpp b/source/slang/slang-ir-lower-bit-cast.cpp
index 0a77836397..450ad8ac94 100644
--- a/source/slang/slang-ir-lower-bit-cast.cpp
+++ b/source/slang/slang-ir-lower-bit-cast.cpp
@@ -178,8 +178,6 @@ struct BitCastLoweringContext
         case kIROp_UIntType:
         case kIROp_FloatType:
         case kIROp_BoolType:
-        case kIROp_Int8x4PackedType:
-        case kIROp_UInt8x4PackedType:
 #if SLANG_PTR_IS_32
         case kIROp_IntPtrType:
         case kIROp_UIntPtrType:
diff --git a/source/slang/slang-ir-util.cpp b/source/slang/slang-ir-util.cpp
index 39c1c5bb18..f75a24ac6a 100644
--- a/source/slang/slang-ir-util.cpp
+++ b/source/slang/slang-ir-util.cpp
@@ -107,8 +107,6 @@ IROp getTypeStyle(IROp op)
     case kIROp_UInt64Type:
     case kIROp_IntPtrType:
     case kIROp_UIntPtrType:
-    case kIROp_Int8x4PackedType:
-    case kIROp_UInt8x4PackedType:
         {
             // All int like
             return kIROp_IntType;
@@ -144,8 +142,6 @@ IROp getTypeStyle(BaseType op)
     case BaseType::UInt:
     case BaseType::UInt64:
     case BaseType::UIntPtr:
-    case BaseType::Int8x4Packed:
-    case BaseType::UInt8x4Packed:
         return kIROp_IntType;
     case BaseType::Half:
     case BaseType::Float:
@@ -476,12 +472,6 @@ void getTypeNameHint(StringBuilder& sb, IRInst* type)
     case kIROp_UIntPtrType:
         sb << "uintptr";
         break;
-    case kIROp_Int8x4PackedType:
-        sb << "int8_t4_packed";
-        break;
-    case kIROp_UInt8x4PackedType:
-        sb << "uint8_t4_packed";
-        break;
     case kIROp_CharType:
         sb << "char";
         break;
@@ -1862,10 +1852,6 @@ UnownedStringSlice getBasicTypeNameHint(IRType* basicType)
         return UnownedStringSlice::fromLiteral("uint64");
     case kIROp_UIntPtrType:
         return UnownedStringSlice::fromLiteral("uintptr");
-    case kIROp_Int8x4PackedType:
-        return UnownedStringSlice::fromLiteral("int8_t4_packed");
-    case kIROp_UInt8x4PackedType:
-        return UnownedStringSlice::fromLiteral("uint8_t4_packed");
     case kIROp_FloatType:
         return UnownedStringSlice::fromLiteral("float");
     case kIROp_HalfType:
diff --git a/source/slang/slang-ir.cpp b/source/slang/slang-ir.cpp
index 1b0f99cd39..b982385fa2 100644
--- a/source/slang/slang-ir.cpp
+++ b/source/slang/slang-ir.cpp
@@ -3814,8 +3814,6 @@ IRInst* IRBuilder::emitDefaultConstruct(IRType* type, bool fallback)
     case kIROp_UIntType:
     case kIROp_UIntPtrType:
     case kIROp_UInt64Type:
-    case kIROp_Int8x4PackedType:
-    case kIROp_UInt8x4PackedType:
     case kIROp_CharType:
         return getIntValue(type, 0);
     case kIROp_BoolType:
@@ -7543,8 +7541,6 @@ bool isIntegralType(IRType* t)
         case BaseType::UInt64:
         case BaseType::IntPtr:
         case BaseType::UIntPtr:
-        case BaseType::Int8x4Packed:
-        case BaseType::UInt8x4Packed:
             return true;
         default:
             return false;
@@ -7591,10 +7587,6 @@ IntInfo getIntTypeInfo(const IRType* intType)
     case kIROp_Int64Type:
         return {64, true};
 
-    case kIROp_Int8x4PackedType:
-    case kIROp_UInt8x4PackedType:
-        return {32, false};
-
     case kIROp_IntPtrType:  // target platform dependent
     case kIROp_UIntPtrType: // target platform dependent
     default:
diff --git a/source/slang/slang-lower-to-ir.cpp b/source/slang/slang-lower-to-ir.cpp
index e5037bf04a..e5ca77634c 100644
--- a/source/slang/slang-lower-to-ir.cpp
+++ b/source/slang/slang-lower-to-ir.cpp
@@ -4633,8 +4633,6 @@ struct ExprLoweringVisitorBase : public ExprVisitor<Derived, LoweredValInfo>
             case BaseType::UInt64:
             case BaseType::UIntPtr:
             case BaseType::IntPtr:
-            case BaseType::Int8x4Packed:
-            case BaseType::UInt8x4Packed:
                 return LoweredValInfo::simple(getBuilder()->getIntValue(type, 0));
 
             case BaseType::Half:
diff --git a/source/slang/slang-mangle.cpp b/source/slang/slang-mangle.cpp
index dedbb2d486..d51fafb6bd 100644
--- a/source/slang/slang-mangle.cpp
+++ b/source/slang/slang-mangle.cpp
@@ -186,13 +186,6 @@ void emitBaseType(ManglingContext* context, BaseType baseType)
     case BaseType::IntPtr:
         emitRaw(context, "ip");
         break;
-    case BaseType::Int8x4Packed:
-        emitRaw(context, "c4p");
-        break;
-    case BaseType::UInt8x4Packed:
-        emitRaw(context, "C4p");
-        break;
-
     default:
         SLANG_UNEXPECTED("unimplemented case in base type mangling");
         break;
diff --git a/source/slang/slang-type-layout.cpp b/source/slang/slang-type-layout.cpp
index 7968450b49..e6729ca850 100644
--- a/source/slang/slang-type-layout.cpp
+++ b/source/slang/slang-type-layout.cpp
@@ -111,10 +111,6 @@ struct DefaultLayoutRulesImpl : SimpleLayoutRulesImpl
                 sizeof(intptr_t),
                 sizeof(intptr_t));
 
-        case BaseType::Int8x4Packed:
-        case BaseType::UInt8x4Packed:
-            return SimpleLayoutInfo(LayoutResourceKind::Uniform, 4, 4);
-
         case BaseType::Half:
             return SimpleLayoutInfo(LayoutResourceKind::Uniform, 2, 2);
         case BaseType::Float:
diff --git a/source/slang/slang-type-system-shared.h b/source/slang/slang-type-system-shared.h
index 583eb22166..d7bd43122e 100644
--- a/source/slang/slang-type-system-shared.h
+++ b/source/slang/slang-type-system-shared.h
@@ -22,8 +22,6 @@ namespace Slang
     X(Char)                  \
     X(IntPtr)                \
     X(UIntPtr)               \
-    X(Int8x4Packed)          \
-    X(UInt8x4Packed)         \
     /* end */
 
 enum class BaseType
diff --git a/source/slang/slang.cpp b/source/slang/slang.cpp
index fe7ecc4e94..924a4371ac 100644
--- a/source/slang/slang.cpp
+++ b/source/slang/slang.cpp
@@ -82,8 +82,6 @@ namespace Slang
      BaseTypeInfo::Flag::Signed | BaseTypeInfo::Flag::Integer,
      uint8_t(BaseType::IntPtr)},
     {uint8_t(sizeof(uintptr_t)), BaseTypeInfo::Flag::Integer, uint8_t(BaseType::UIntPtr)},
-    {uint8_t(sizeof(uint32_t)), BaseTypeInfo::Flag::Integer, uint8_t(BaseType::Int8x4Packed)},
-    {uint8_t(sizeof(uint32_t)), BaseTypeInfo::Flag::Integer, uint8_t(BaseType::UInt8x4Packed)},
 };
 
 /* static */ bool BaseTypeInfo::check()
@@ -135,10 +133,6 @@ namespace Slang
         return UnownedStringSlice::fromLiteral("intptr_t");
     case BaseType::UIntPtr:
         return UnownedStringSlice::fromLiteral("uintptr_t");
-    case BaseType::Int8x4Packed:
-        return UnownedStringSlice::fromLiteral("int8_t4_packed");
-    case BaseType::UInt8x4Packed:
-        return UnownedStringSlice::fromLiteral("uint8_t4_packed");
     default:
         {
             SLANG_ASSERT(!"Unknown basic type");
diff --git a/tests/hlsl-intrinsic/packed/pack-unpack.slang b/tests/hlsl-intrinsic/packed/pack-unpack.slang
index b20e69fa84..0b9490ee4a 100644
--- a/tests/hlsl-intrinsic/packed/pack-unpack.slang
+++ b/tests/hlsl-intrinsic/packed/pack-unpack.slang
@@ -9,6 +9,18 @@
 // Debug info for inlining errors can be given out, so disable them for this test.
 //TEST(compute):COMPARE_COMPUTE_EX:-cuda -compute -g0
 
+//TEST(compute):COMPARE_COMPUTE_EX:-vk -compute -shaderobj -xslang -DUSE_SLANG_SYNTAX
+//TEST(compute):COMPARE_COMPUTE_EX:-slang -compute -shaderobj -render-feature hardware-device -xslang -DUSE_SLANG_SYNTAX
+//TEST(compute):COMPARE_COMPUTE_EX:-slang -compute -profile cs_6_6 -dx12 -use-dxil -shaderobj -render-feature hardware-device -xslang -DUSE_SLANG_SYNTAX
+//TEST(compute):COMPARE_COMPUTE_EX:-metal -compute -shaderobj -xslang -DUSE_SLANG_SYNTAX
+//TEST(compute):COMPARE_COMPUTE_EX:-cpu -compute -shaderobj -xslang -DUSE_SLANG_SYNTAX
+
+// 16 bit variants are not supported by WGSL.
+//TEST(compute):COMPARE_COMPUTE_EX:-wgpu -compute -shaderobj -xslang -DWGSL -xslang -DUSE_SLANG_SYNTAX
+// Debug info for inlining errors can be given out, so disable them for this test.
+//TEST(compute):COMPARE_COMPUTE_EX:-cuda -compute -g0 -xslang -DUSE_SLANG_SYNTAX
+
+
 //TEST_INPUT:ubuffer(data=[0xD37A83FF], stride=4):name unpackTestBuffer
 StructuredBuffer<uint32_t> unpackTestBuffer;
 
@@ -27,17 +39,125 @@ StructuredBuffer<int32_t4> packClampSTestBuffer;
 //TEST_INPUT:ubuffer(data=[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0], stride=4):out,name outputBuffer
 RWStructuredBuffer<uint> outputBuffer;
 
+uint32_t4 __unpack_u8u32(uint value)
+{
+#if defined(USE_SLANG_SYNTAX)
+    return unpackUint4x8ToUint32(value);
+#else
+    return unpack_u8u32(uint8_t4_packed(value));
+#endif
+}
+
+uint16_t4 __unpack_u8u16(uint value)
+{
+#if defined(USE_SLANG_SYNTAX)
+    return unpackUint4x8ToUint16(value);
+#else
+    return unpack_u8u16(uint8_t4_packed(value));
+#endif
+}
+
+int32_t4 __unpack_s8s32(uint value)
+{
+#if defined(USE_SLANG_SYNTAX)
+    return unpackInt4x8ToInt32(value);
+#else
+    return unpack_s8s32(int8_t4_packed(value));
+#endif
+}
+
+int16_t4 __unpack_s8s16(uint value)
+{
+#if defined(USE_SLANG_SYNTAX)
+    return unpackInt4x8ToInt16(value);
+#else
+    return unpack_s8s16(int8_t4_packed(value));
+#endif
+}
+
+uint __pack_u8(uint32_t4 value)
+{
+#if defined(USE_SLANG_SYNTAX)
+    return packUint4x8(value);
+#else
+    return uint(pack_u8(value));
+#endif
+}
+
+uint __pack_u8(uint16_t4 value)
+{
+#if defined(USE_SLANG_SYNTAX)
+    return packUint4x8(value);
+#else
+    return uint(pack_u8(value));
+#endif
+}
+
+uint __pack_s8(int32_t4 value)
+{
+#if defined(USE_SLANG_SYNTAX)
+    return packInt4x8(value);
+#else
+    return uint(pack_s8(value));
+#endif
+}
+
+uint __pack_s8(int16_t4 value)
+{
+#if defined(USE_SLANG_SYNTAX)
+    return packInt4x8(value);
+#else
+    return uint(pack_s8(value));
+#endif
+}
+
+uint __pack_clamp_u8(int32_t4 value)
+{
+#if defined(USE_SLANG_SYNTAX)
+    return packUint4x8Clamp(value);
+#else
+    return uint(pack_clamp_u8(value));
+#endif
+}
+
+uint __pack_clamp_u8(int16_t4 value)
+{
+#if defined(USE_SLANG_SYNTAX)
+    return packUint4x8Clamp(value);
+#else
+    return uint(pack_clamp_u8(value));
+#endif
+}
+
+uint __pack_clamp_s8(int32_t4 value)
+{
+#if defined(USE_SLANG_SYNTAX)
+    return packInt4x8Clamp(value);
+#else
+    return uint(pack_clamp_s8(value));
+#endif
+}
+
+uint __pack_clamp_s8(int16_t4 value)
+{
+#if defined(USE_SLANG_SYNTAX)
+    return packInt4x8Clamp(value);
+#else
+    return uint(pack_clamp_s8(value));
+#endif
+}
+
 [numthreads(1, 1, 1)]
 void computeMain(uint3 dispatchThreadID : SV_DispatchThreadID)
 {
-    const uint8_t4_packed unpackTestValue = uint8_t4_packed(unpackTestBuffer[0]);
+    const uint unpackTestValue = unpackTestBuffer[0];
 
     uint index = 0;
 
     /*
      * Unpack without sign extension.
      */
-    uint32_t4 unpackedU32 = unpack_u8u32(unpackTestValue);
+    uint32_t4 unpackedU32 = __unpack_u8u32(unpackTestValue);
     // 0xFF
     outputBuffer[index++] = uint(unpackedU32.x);
     // 0x83
@@ -48,7 +168,7 @@ void computeMain(uint3 dispatchThreadID : SV_DispatchThreadID)
     outputBuffer[index++] = uint(unpackedU32.w);
 
 #if !defined(WGSL)
-    uint16_t4 unpackedU16 = unpack_u8u16(unpackTestValue);
+    uint16_t4 unpackedU16 = __unpack_u8u16(unpackTestValue);
     // 0xFF
     outputBuffer[index++] = uint(unpackedU16.x);
     // 0x83
@@ -67,7 +187,7 @@ void computeMain(uint3 dispatchThreadID : SV_DispatchThreadID)
     /*
      * Unpack with sign extension.
      */
-    int32_t4 unpackedS32 = unpack_s8s32(int8_t4_packed(unpackTestValue));
+    int32_t4 unpackedS32 = __unpack_s8s32(unpackTestValue);
     // 0xFFFFFFFF
     outputBuffer[index++] = uint(unpackedS32.x);
     // 0xFFFFFF83
@@ -78,7 +198,7 @@ void computeMain(uint3 dispatchThreadID : SV_DispatchThreadID)
     outputBuffer[index++] = uint(unpackedS32.w);
 
 #if !defined(WGSL)
-    int16_t4 unpackedS16 = unpack_s8s16(int8_t4_packed(unpackTestValue));
+    int16_t4 unpackedS16 = __unpack_s8s16(unpackTestValue);
     // 0xFFFFFFFF
     outputBuffer[index++] = uint(unpackedS16.x);
     // 0xFFFFFF83
@@ -100,22 +220,17 @@ void computeMain(uint3 dispatchThreadID : SV_DispatchThreadID)
      */
     uint32_t4 packU32TestValues = packTestBuffer[0];
     int32_t4 packS32TestValues = packU32TestValues;
-    uint8_t4_packed packU32Result = pack_u8(packU32TestValues);
-    int8_t4_packed packS32Result = pack_s8(packS32TestValues);
 
     // 0xD4236A3F
-    outputBuffer[index++] = uint(packU32Result);
-    outputBuffer[index++] = uint(packS32Result);
+    outputBuffer[index++] = __pack_u8(packU32TestValues);
+    outputBuffer[index++] = __pack_s8(packS32TestValues);
 
 #if !defined(WGSL)
     uint16_t4 packU16TestValues = int16_t4(int16_t(packU32TestValues.x), int16_t(packU32TestValues.y), 
                                            int16_t(packU32TestValues.z), int16_t(packU32TestValues.w));
     int16_t4 packS16TestValues = packU16TestValues;
-    uint8_t4_packed packU16Result = pack_u8(packU16TestValues);
-    int8_t4_packed packS16Result = pack_s8(packS16TestValues);
-
-    outputBuffer[index++] = uint(packU16Result);
-    outputBuffer[index++] = uint(packS16Result);
+    outputBuffer[index++] = __pack_u8(packU16TestValues);
+    outputBuffer[index++] = __pack_s8(packS16TestValues);
 #else
     outputBuffer[index++] = 0xD4236A3F;
     outputBuffer[index++] = 0xD4236A3F;
@@ -123,17 +238,15 @@ void computeMain(uint3 dispatchThreadID : SV_DispatchThreadID)
 
     /*
      * Pack with unsigned clamping.
-     */ 
+     */
     int32_t4 packClampU32TestValues = packClampUTestBuffer[0];
-    uint8_t4_packed packClampU32Result = pack_clamp_u8(packClampU32TestValues);
     // 0xFEFFFF05
-    outputBuffer[index++] = uint(packClampU32Result);
+    outputBuffer[index++] = __pack_clamp_u8(packClampU32TestValues);
 
 #if !defined(WGSL)
     int16_t4 packClampU16TestValues = int16_t4(int16_t(packClampU32TestValues.x), int16_t(packClampU32TestValues.y),
                                                int16_t(packClampU32TestValues.z), int16_t(packClampU32TestValues.w));
-    uint8_t4_packed packClampU16Result = pack_clamp_u8(packClampU16TestValues);
-    outputBuffer[index++] = uint(packClampU16Result);
+    outputBuffer[index++] = __pack_clamp_u8(packClampU16TestValues);
 #else
     outputBuffer[index++] = 0xFEFFFF05;
 #endif
@@ -142,15 +255,13 @@ void computeMain(uint3 dispatchThreadID : SV_DispatchThreadID)
      * Pack with signed clamping
      */
     int32_t4 packClampS32TestValues = packClampSTestBuffer[0];
-    int8_t4_packed packClampS32Result = pack_clamp_s8(packClampS32TestValues);
     // 0x81807FFF
-    outputBuffer[index++] = uint(packClampS32Result);
+    outputBuffer[index++] = __pack_clamp_s8(packClampS32TestValues);
 
 #if !defined(WGSL)
     int16_t4 packClampS16TestValues = int16_t4(int16_t(packClampS32TestValues.x), int16_t(packClampS32TestValues.y),
                                                int16_t(packClampS32TestValues.z), int16_t(packClampS32TestValues.w));
-    int8_t4_packed packClampS16Result = pack_clamp_s8(packClampS16TestValues);
-    outputBuffer[index++] = uint(packClampS16Result);
+    outputBuffer[index++] = __pack_clamp_s8(packClampS16TestValues);
 #else
     outputBuffer[index++] = 0x81807FFF;
 #endif
diff --git a/tests/hlsl-intrinsic/packed/packed-types-error.slang b/tests/hlsl-intrinsic/packed/packed-types-error.slang
deleted file mode 100644
index 7034f50a90..0000000000
--- a/tests/hlsl-intrinsic/packed/packed-types-error.slang
+++ /dev/null
@@ -1,21 +0,0 @@
-//TEST(compute):SIMPLE(filecheck=CHECK): -target spirv
-
-[numthreads(1, 1, 1)]
-void computeMain(uint3 dispatchThreadID : SV_DispatchThreadID) {
-    uint8_t4_packed packedU1 = 0x0U;
-    uint8_t4_packed packedU2 = 0xFU;
-    int8_t4_packed packedS1 = 0xFU;
-    int8_t4_packed packedS2 = 0xFU;
-
-    // Arithmetic and logical (bitwise) operations are not supported on packed types.
-    // An attempt to overload these operators will fail during compilation due to ambiguity caused by multiple possible overloads.
-
-    // CHECK: error 39999: ambiguous call to '-' with arguments of type
-    uint8_t4_packed val1 = packedU1 - packedU2;
-    // CHECK: error 39999: ambiguous call to '*' with arguments of type
-    int8_t4_packed val2 = packedS1 * packedS2;
-    // CHECK: error 39999: ambiguous call to '&' with arguments of type
-    uint8_t4_packed val3 = packedU1 & packedS1;
-    // CHECK: error 39999: ambiguous call to '|' with arguments of type
-    int8_t4_packed val4 = packedU1 | packedS1;
-}
diff --git a/tests/hlsl-intrinsic/packed/packed-types-warning.slang b/tests/hlsl-intrinsic/packed/packed-types-warning.slang
deleted file mode 100644
index 0b46925a40..0000000000
--- a/tests/hlsl-intrinsic/packed/packed-types-warning.slang
+++ /dev/null
@@ -1,72 +0,0 @@
-//TEST(compute):SIMPLE(filecheck=CHECK): -target spirv
-
-[numthreads(1, 1, 1)]
-void computeMain(uint3 dispatchThreadID : SV_DispatchThreadID) {
-    uint8_t4_packed packedU = 0U;
-    int8_t4_packed packedS = 0xFU;
-    uint val1 = 0xFU;
-
-    //
-    // Implicit conversions between the packed types are not OK, they must be explicit.
-    //
-
-    // CHECK: warning 30081: implicit conversion from 'int8_t4_packed' to 'uint8_t4_packed' is not recommended
-    packedU = packedS;
-    // CHECK: warning 30081: implicit conversion from 'uint8_t4_packed' to 'int8_t4_packed' is not recommended
-    packedS = packedU;
-
-
-    // Implicit casting from 32 bit literals are OK.
-    packedU = 32U;
-    packedS = 32;
-
-    // Implicit casting from 64 bit literals are not OK.
-    // CHECK: warning 30081: implicit conversion from 'uint64_t' to 'uint8_t4_packed' is not recommended
-    packedU = 0xFFFFFFFFFFULL;
-    // CHECK: warning 30081: implicit conversion from 'int64_t' to 'uint8_t4_packed' is not recommended
-    packedU = 0xFFFFFFFFFFLL;
-
-    //
-    // Explicit casting from other builtin integer types are OK.
-    //
-    packedU = uint8_t4_packed(val1);
-    packedU = uint8_t4_packed(uint16_t(123));
-    val1 = uint(packedS);
-
-    //
-    // Implicit casting from other builtin integer types are not OK.
-    //
-
-    // CHECK: warning 30081: implicit conversion from 'uint' to 'uint8_t4_packed' is not recommended
-    packedU = val1;
-    // CHECK: warning 30081: implicit conversion from 'uint' to 'int8_t4_packed' is not recommended
-    packedS = val1;
-    // CHECK: warning 30081: implicit conversion from 'uint8_t4_packed' to 'uint' is not recommended
-    val1 = packedU;
-    // CHECK: warning 30081: implicit conversion from 'int8_t4_packed' to 'uint' is not recommended
-    val1 = packedS;
-
-    // CHECK: warning 30081: implicit conversion from 'uint8_t' to 'uint8_t4_packed' is not recommended
-    packedU = uint8_t(1);
-    // CHECK: warning 30081: implicit conversion from 'int64_t' to 'int8_t4_packed' is not recommended
-    packedS = int64_t(1);
-    // CHECK: warning 30081: implicit conversion from 'uint8_t4_packed' to 'uint64_t' is not recommended
-    uint64_t val2 = packedU;
-    // CHECK: warning 30081: implicit conversion from 'int8_t4_packed' to 'int16_t' is not recommended
-    int16_t val3 = packedS;
-
-    //
-    // Arithmetic and logical (bitwise) operations are not supported on packed types,
-    // but overload to integer types will be made and unrecommended conversion warnings
-    // should be thrown out.
-    //
-
-    // CHECK: warning 30081: implicit conversion from 'uint8_t4_packed' to 'int' is not recommended
-    packedU = uint8_t4_packed(packedU + 32);
-    // CHECK: warning 30081: implicit conversion from 'uint8_t4_packed' to 'int' is not recommended
-    packedU = uint8_t4_packed(packedU / 2);
-    // CHECK: warning 30081: implicit conversion from 'uint8_t4_packed' to 'int' is not recommended
-    packedU = uint8_t4_packed(packedU | 0xF);
-    // CHECK: warning 30081: implicit conversion from 'uint8_t4_packed' to 'int' is not recommended
-    packedU = uint8_t4_packed(packedU & 0x3);
-}
diff --git a/tests/hlsl-intrinsic/packed/packed-types.slang b/tests/hlsl-intrinsic/packed/packed-types.slang
deleted file mode 100644
index 0bbc6f4049..0000000000
--- a/tests/hlsl-intrinsic/packed/packed-types.slang
+++ /dev/null
@@ -1,36 +0,0 @@
-//TEST(compute):COMPARE_COMPUTE_EX:-vk -compute -shaderobj
-//TEST(compute):COMPARE_COMPUTE_EX:-slang -compute -shaderobj -render-feature hardware-device
-//TEST(compute):COMPARE_COMPUTE_EX:-slang -compute -profile cs_6_6 -dx12 -use-dxil -shaderobj -render-feature hardware-device
-//TEST(compute):COMPARE_COMPUTE_EX:-metal -compute -shaderobj 
-//TEST(compute):COMPARE_COMPUTE_EX:-cpu -compute -shaderobj
-//TEST(compute):COMPARE_COMPUTE_EX:-wgpu -compute -shaderobj
-//TEST(compute):COMPARE_COMPUTE_EX:-cuda -compute -g0
-
-//TEST_INPUT:ubuffer(data=[0xD37A83FF], stride=4):name packedUArray
-StructuredBuffer<uint8_t4_packed> packedUArray;
-
-//TEST_INPUT:ubuffer(data=[0xDEADBEEF], stride=4):name packedSArray
-StructuredBuffer<int8_t4_packed> packedSArray;
-
-//TEST_INPUT:ubuffer(data=[0 0], stride=4):out,name outputBuffer
-RWStructuredBuffer<uint8_t4_packed> outputBuffer;
-
-// Test type layout works.
-struct Custom {
-    uint8_t4_packed packedU;
-    uint3 other1;
-    int8_t4_packed packedS;
-    float other2;
-}
-
-[numthreads(1, 1, 1)]
-void computeMain(uint3 dispatchThreadID : SV_DispatchThreadID) {
-    uint id = dispatchThreadID.x;
-
-    Custom val;
-    val.packedU = packedUArray[id];
-    val.packedS = packedSArray[id];
-
-    outputBuffer[id] = val.packedU;
-    outputBuffer[id + 1] = uint8_t4_packed(val.packedS);
-}
diff --git a/tests/hlsl-intrinsic/packed/packed-types.slang.expected.txt b/tests/hlsl-intrinsic/packed/packed-types.slang.expected.txt
deleted file mode 100644
index a348b3dfd1..0000000000
--- a/tests/hlsl-intrinsic/packed/packed-types.slang.expected.txt
+++ /dev/null
@@ -1,2 +0,0 @@
-D37A83FF
-DEADBEEF