@@ -1676,6 +1676,12 @@ bool get_reorder_dt(const deserialized_op &base_op_ref, dnnl_data_type_t &sdt,
1676
1676
dnnl_data_type_t &ddt) {
1677
1677
sdt = convert_dt (base_op_ref.in_lts_ .front ().get_data_type ());
1678
1678
ddt = convert_dt (base_op_ref.out_lts_ .front ().get_data_type ());
1679
+
1680
+ const auto &op_kind = base_op_ref.kind_ ;
1681
+ // As we always use f32 computation in the reference path, to link
1682
+ // arguments correctly in the reference path, we need to always create
1683
+ // dequantize ops with f32 output.
1684
+ if (op_kind == " DynamicDequantize" ) { ddt = dnnl_f32; }
1679
1685
return true ;
1680
1686
}
1681
1687
@@ -1704,10 +1710,16 @@ bool get_reorder_attrs(const deserialized_op &base_op_ref,
1704
1710
// scale
1705
1711
attr_t ::policy_t scale_policy = attr_t ::policy_t ::COMMON;
1706
1712
int64_t axis = 1 ;
1713
+ std::vector<dnnl_dim_t > groups;
1714
+ dnnl_data_type_t scale_dt, zp_dt;
1715
+
1716
+ const int ndims
1717
+ = static_cast <int >(base_op_ref.in_lts_ .front ().shape_ .size ());
1718
+ base_op_ref.get_attr_s64 (axis, " axis" );
1719
+ if (axis < 0 ) axis += ndims;
1720
+
1721
+ // per dimension
1707
1722
if (qtype == " per_channel" ) {
1708
- // per dimension
1709
- base_op_ref.get_attr_s64 (axis, " axis" );
1710
- const auto ndims = base_op_ref.in_lts_ .front ().shape_ .size ();
1711
1723
if (axis < 0 ) axis += ndims;
1712
1724
if (axis == 0 ) {
1713
1725
scale_policy = attr_t ::PER_DIM_0;
@@ -1720,6 +1732,14 @@ bool get_reorder_attrs(const deserialized_op &base_op_ref,
1720
1732
} else {
1721
1733
assert (!" unsupported axis" );
1722
1734
}
1735
+ } else if (qtype == " per_group" ) {
1736
+ scale_policy = attr_t ::PER_TENSOR;
1737
+
1738
+ std::vector<int64_t > group_shape;
1739
+ base_op_ref.get_attr_s64_vector (group_shape, " group_shape" );
1740
+ groups = {group_shape[ndims - 2 ], group_shape[ndims - 1 ]};
1741
+ scale_dt = static_cast <dnnl_data_type_t >(
1742
+ base_op_ref.in_lts_ [1 ].get_data_type ());
1723
1743
}
1724
1744
1725
1745
if (op_kind == " Dequantize" || op_kind == " Quantize" ) {
@@ -1734,18 +1754,29 @@ bool get_reorder_attrs(const deserialized_op &base_op_ref,
1734
1754
if (has_zps && !zps.empty ())
1735
1755
zp.set (arg, attr_t ::policy_t ::COMMON, zps.front ());
1736
1756
} else if (op_kind == " DynamicDequantize" || op_kind == " DynamicQuantize" ) {
1757
+ // For reference path, it always use f32 for computation.
1758
+ scale_dt = dnnl_f32;
1759
+
1737
1760
// TODO: benchdnn needs to alloc memory based on is_def() function.
1738
1761
// so add tmp value for per_tensor scales && zps to make is_def()
1739
1762
// return false to alloc memory.
1740
1763
if (qtype == " per_tensor" ) {
1741
1764
arg_scales.set (arg, {scale_policy, 2 });
1765
+ } else if (qtype == " per_group" ) {
1766
+ arg_scales.set (arg, {scale_policy, 1 .f , scale_dt, groups});
1742
1767
} else {
1743
1768
arg_scales.set (arg, {scale_policy});
1744
1769
}
1745
1770
// zps is optional for DynamicDequantize/DynamicQuantize, default is
1746
1771
// symmetric quantization
1747
1772
if (base_op_ref.in_lts_ .size () == 3 ) {
1748
- zp.set (arg, attr_t ::policy_t ::COMMON, 1 );
1773
+ if (qtype == " per_group" ) {
1774
+ zp_dt = static_cast <dnnl_data_type_t >(
1775
+ base_op_ref.in_lts_ [2 ].get_data_type ());
1776
+ zp.set (arg, {scale_policy, 0 , zp_dt, groups});
1777
+ } else {
1778
+ zp.set (arg, attr_t ::policy_t ::COMMON, 1 );
1779
+ }
1749
1780
}
1750
1781
}
1751
1782
return true ;
0 commit comments