rename the new quant type

zhewang1-intc · zhewang1-intc · commit 2017cb097d68 · 2024-06-20T18:39:36.000+08:00
diff --git a/include/common/core/common_types.hpp b/include/common/core/common_types.hpp
@@ -30,7 +30,7 @@ enum class mem_layout : uint8_t { row_major = 0, col_major = 1 };
 enum class quant_mode : uint8_t {
   S4_ASYM = 0,
   S4_FULLRANGE_NO_ZP = 1,
-  INT4_ASYM_ZERO_NO_DEGRAD = 2
+  INT4_ASYM_FP_ZERO = 2
 };
 
 struct quant_info {
diff --git a/include/experimental/group/gemm/impl/int4_dequantize_xe.hpp b/include/experimental/group/gemm/impl/int4_dequantize_xe.hpp
@@ -285,7 +285,7 @@ class gemm_t<
 
   // compress int4 along N dimensions
   using zero_pt_tile_desc_t = std::conditional_t<
-      quant_info_.quant_mode != quant_mode::INT4_ASYM_ZERO_NO_DEGRAD,
+      quant_info_.quant_mode != quant_mode::INT4_ASYM_FP_ZERO,
       subgroup::tile_desc_t<
           (tile_size_x_b + pack_ratio - 1) / pack_ratio,
           tile_size_y_zero_pt,
diff --git a/include/experimental/kernel/gemm/impl/int4_dequantize_kslicing_xe.hpp b/include/experimental/kernel/gemm/impl/int4_dequantize_kslicing_xe.hpp
@@ -618,7 +618,7 @@ class gemm_universal_t<
     int start_y_scale = start_k / dequant_s;
 
     int start_x_zero_pt = gemm_t::compute_policy::quant_mode ==
-            quant_mode::INT4_ASYM_ZERO_NO_DEGRAD
+            quant_mode::INT4_ASYM_FP_ZERO
         ? start_n
         : start_n / pack_ratio;
     int start_y_zero_pt = start_k / dequant_s;
@@ -691,7 +691,7 @@ class gemm_universal_t<
           mem_desc_zero_pt);
     } else if constexpr (
         gemm_t::compute_policy::quant_mode ==
-        quant_mode::INT4_ASYM_ZERO_NO_DEGRAD) {
+        quant_mode::INT4_ASYM_FP_ZERO) {
       mem_desc_zero_pt_t mem_desc_zero_pt(
           args.zero_pt_base,
           {args.matrix_n,
diff --git a/include/subgroup/tile/impl/tile_op_functor.hpp b/include/subgroup/tile/impl/tile_op_functor.hpp
@@ -151,15 +151,15 @@ struct dequant_int4_weight_t {
                   zero_pt_i8;
             } else if constexpr (
                 quant_mode == quant_mode::S4_FULLRANGE_NO_ZP ||
-                quant_mode == quant_mode::INT4_ASYM_ZERO_NO_DEGRAD) {
+                quant_mode == quant_mode::INT4_ASYM_FP_ZERO) {
               cvt_blk_i8.xetla_select<step, 1>(jj * block_size_y_b + ii) =
                   cvt_blk_i8.xetla_select<step, 1>(jj * block_size_y_b + ii) -
                   int8_t(8);
             }
             dst_blk.xetla_select<step, 1>(jj * block_size_y_b + ii) =
                 cvt_blk_i8.xetla_select<step, 1>(jj * block_size_y_b + ii) *
                 scale.reg[scale_idx];
-            if constexpr (quant_mode == quant_mode::INT4_ASYM_ZERO_NO_DEGRAD) {
+            if constexpr (quant_mode == quant_mode::INT4_ASYM_FP_ZERO) {
               uint32_t zero_pt_idx =
                   offset_y_in_tile / dequant_s * zero_pt_t::block_size_x +
                   offset_x_in_tile;
diff --git a/tests/integration/gemv/int4/main.cpp b/tests/integration/gemv/int4/main.cpp
@@ -41,7 +41,7 @@ class test_col_major_1 {
   static constexpr size_t dequant_s = 128;
   // static constexpr quant_mode quant_mode = quant_mode::S4_ASYM;
   // static constexpr quant_mode quant_mode = quant_mode::S4_FULLRANGE_NO_ZP;
-  static constexpr quant_mode quant_mode = quant_mode::INT4_ASYM_ZERO_NO_DEGRAD;
+  static constexpr quant_mode quant_mode = quant_mode::INT4_ASYM_FP_ZERO;
 
   static constexpr size_t local_kslicing = 1;
   static constexpr size_t global_kslicing = 1;
@@ -133,15 +133,15 @@ std::vector<fp16> convert_int4(
   std::vector<fp16> dequant_fp16(sizeof(data_type_b) * 2);
 
   int8_t zero_pt_i8;
-  if constexpr (quant_mode != quant_mode::INT4_ASYM_ZERO_NO_DEGRAD)
+  if constexpr (quant_mode != quant_mode::INT4_ASYM_FP_ZERO)
     zero_pt_i8 = zero_pt & 0xf;
   for (uint32_t i = 0; i < dequant_fp16.size(); i++) {
     int8_t dequant_8bit = data_b & 0xf;
     if constexpr (quant_mode == quant_mode::S4_FULLRANGE_NO_ZP) {
       dequant_fp16[i] = scale * (dequant_8bit - 8);
     } else if constexpr (quant_mode == quant_mode::S4_ASYM) {
       dequant_fp16[i] = scale * (dequant_8bit - zero_pt_i8);
-    } else if constexpr (quant_mode == quant_mode::INT4_ASYM_ZERO_NO_DEGRAD) {
+    } else if constexpr (quant_mode == quant_mode::INT4_ASYM_FP_ZERO) {
       dequant_fp16[i] = scale * (dequant_8bit - 8) + zero_pt;
     } else {
       assert(0);
@@ -176,13 +176,13 @@ std::vector<data_type_acc_in> dequantize_weight(
     for (uint32_t j = 0; j < width; j += step) {
       int start_b_in = i * width + j;
       int start_scale_in = start_b_in / step;
-      int start_zero_pt_in = quant_mode == quant_mode::INT4_ASYM_ZERO_NO_DEGRAD
+      int start_zero_pt_in = quant_mode == quant_mode::INT4_ASYM_FP_ZERO
           ? (j / step) * matrix_n + i
           : (j / step) * (matrix_n / pack_radio) + i / pack_radio;
       int start_out =
           layout_b == mem_layout::row_major ? 0 : i * matrix_k + j * pack_radio;
       data_type_zero_pt zp_value = zero_pt[start_zero_pt_in];
-      if constexpr (quant_mode != quant_mode::INT4_ASYM_ZERO_NO_DEGRAD)
+      if constexpr (quant_mode != quant_mode::INT4_ASYM_FP_ZERO)
         zp_value = zp_value >> (4 * (i % pack_radio));
       for (uint32_t jj = 0; jj < step; jj++) {
         std::vector<fp16> dequant_fp16 = convert_int4<quant_mode>(
@@ -225,7 +225,7 @@ void dequantize_gemv_run(int iter) {
   using data_type_b = typename Test::data_type_b;
   using data_type_c = typename Test::data_type_c;
   using data_type_zero_pt = std::conditional_t<
-      Test::quant_mode == quant_mode::INT4_ASYM_ZERO_NO_DEGRAD,
+      Test::quant_mode == quant_mode::INT4_ASYM_FP_ZERO,
       data_type_c,
       data_type_b>;
   using data_type_scale = fp16;
@@ -246,7 +246,7 @@ void dequantize_gemv_run(int iter) {
   constexpr size_t size_zero_pt_k = matrix_k / dequant_s;
   constexpr size_t size_zero_pt_n = matrix_n;
   constexpr size_t size_zero_pt =
-      Test::quant_mode != quant_mode::INT4_ASYM_ZERO_NO_DEGRAD
+      Test::quant_mode != quant_mode::INT4_ASYM_FP_ZERO
       ? size_zero_pt_k * size_zero_pt_n / 2
       : size_zero_pt_k * size_zero_pt_n;
 
@@ -509,7 +509,7 @@ void dequantize_gemv_run(int iter) {
             epilogue_args);
   } else if constexpr (
       compute_policy::quant_mode == quant_mode::S4_ASYM ||
-      compute_policy::quant_mode == quant_mode::INT4_ASYM_ZERO_NO_DEGRAD) {
+      compute_policy::quant_mode == quant_mode::INT4_ASYM_FP_ZERO) {
     gemm_arg =
         typename gemm_op_t::template arguments_t<compute_policy::quant_mode>(
             matrix_m,