@@ -46,10 +46,10 @@ template <impl::data_type_t type>
46
46
using data_t = typename prec_traits_t <type>::type;
47
47
48
48
template <impl::data_type_t type_i, impl::data_type_t type_o>
49
- using _qz_a1b0 = q10n::qz_a1b0 <data_t <type_i>, data_t <type_o>>;
49
+ using _qz_a1b0 = q10n::qz_a1b0_t <data_t <type_i>, data_t <type_o>>;
50
50
51
51
template <impl::data_type_t type_i, impl::data_type_t type_o>
52
- using _qz = q10n::qz <data_t <type_i>, data_t <type_o>>;
52
+ using _qz = q10n::qz_t <data_t <type_i>, data_t <type_o>>;
53
53
54
54
namespace fmt_order {
55
55
const bool keep = true ;
@@ -343,7 +343,7 @@ struct simple_reorder_impl<SIMPLE_REORDER_TEMPL_CALL,
343
343
const float s = src_scales[src_scales_mask == 0 ? 0 : os_off];
344
344
const float d = dst_scales[dst_scales_mask == 0 ? 0 : os_off];
345
345
346
- o = q10n::qz_b0 <data_t <type_i>, data_t <type_o>>()(
346
+ o = q10n::qz_b0_t <data_t <type_i>, data_t <type_o>>()(
347
347
i, s * adj_scale * d);
348
348
if (req_comp) cp[g * OC + oc] -= (int32_t )o;
349
349
if (has_asymmetric_comp) zp[g * OC + oc] -= (int32_t )o;
@@ -547,7 +547,7 @@ struct simple_reorder_impl<SIMPLE_REORDER_TEMPL_CALL,
547
547
const float src_scale = s[src_scales_mask == 0 ? 0 : os_off];
548
548
const float dst_scale = d[dst_scales_mask == 0 ? 0 : os_off];
549
549
out[index (oc, ic)]
550
- = q10n::qz_b0 <data_t <type_i>, data_t <type_o>>()(
550
+ = q10n::qz_b0_t <data_t <type_i>, data_t <type_o>>()(
551
551
inp[plain_off],
552
552
src_scale * adj_scale * dst_scale);
553
553
if (req_comp) c[oc] -= (128 * (int32_t )(out[index (oc, ic)]));
@@ -710,7 +710,7 @@ struct simple_reorder_impl<SIMPLE_REORDER_TEMPL_CALL,
710
710
for (dim_t oc = 0 ; oc < oc_block; ++oc) {
711
711
const auto plain_off
712
712
= oc * plain_d.blocking_desc ().strides [w_groups + 0 ];
713
- out[oc] = q10n::qz_b0 <data_t <type_i>, data_t <type_o>>()(
713
+ out[oc] = q10n::qz_b0_t <data_t <type_i>, data_t <type_o>>()(
714
714
inp[plain_off], s[oc] * adj_scale * d[oc]);
715
715
if (has_asymmetric_comp) zp[oc] -= (int32_t )(out[oc]);
716
716
}
@@ -904,7 +904,7 @@ struct simple_reorder_impl<SIMPLE_REORDER_TEMPL_CALL,
904
904
+ ic * plain_d.blocking_desc ().strides [w_groups + 1 ];
905
905
auto index = AB_or_BC_blk_off<tag_traits_t <tag_o>::inner_blks>(
906
906
oc, ic);
907
- out[index ] = q10n::qz_b0 <data_t <type_i>, data_t <type_o>>()(
907
+ out[index ] = q10n::qz_b0_t <data_t <type_i>, data_t <type_o>>()(
908
908
inp[plain_off], s[oc] * adj_scale * d[oc]);
909
909
910
910
if (has_asymmetric_comp) zp[oc] -= (int32_t )(out[index ]);
@@ -1077,8 +1077,9 @@ struct simple_reorder_impl<SIMPLE_REORDER_TEMPL_CALL,
1077
1077
auto index
1078
1078
= AB_or_BC_blk_off<tag_traits_t <tag_o>::inner_blks>(
1079
1079
d0, d1);
1080
- out[index ] = q10n::qz_b0<data_t <type_i>, data_t <type_o>>()(
1081
- inp[plain_off], s[0 ] * adj_scale * d[0 ]);
1080
+ out[index ]
1081
+ = q10n::qz_b0_t <data_t <type_i>, data_t <type_o>>()(
1082
+ inp[plain_off], s[0 ] * adj_scale * d[0 ]);
1082
1083
1083
1084
auto o = static_cast <int32_t >(out[index ]);
1084
1085
if (req_comp) cp[d1] -= (128 * o);
@@ -1088,16 +1089,17 @@ struct simple_reorder_impl<SIMPLE_REORDER_TEMPL_CALL,
1088
1089
auto index
1089
1090
= AB_or_BC_blk_off<tag_traits_t <tag_o>::inner_blks>(
1090
1091
d0, d1);
1091
- out[index ] = q10n::qz_b0<data_t <type_i>, data_t <type_o>>()(
1092
- 0 , s[0 ] * adj_scale * d[0 ]);
1092
+ out[index ]
1093
+ = q10n::qz_b0_t <data_t <type_i>, data_t <type_o>>()(
1094
+ 0 , s[0 ] * adj_scale * d[0 ]);
1093
1095
}
1094
1096
}
1095
1097
1096
1098
for_ (int d0 = d0_block; d0 < D0_blksize; ++d0)
1097
1099
for (int d1 = 0 ; d1 < D1_blksize; ++d1) {
1098
1100
auto index = AB_or_BC_blk_off<tag_traits_t <tag_o>::inner_blks>(
1099
1101
d0, d1);
1100
- out[index ] = q10n::qz_b0 <data_t <type_i>, data_t <type_o>>()(
1102
+ out[index ] = q10n::qz_b0_t <data_t <type_i>, data_t <type_o>>()(
1101
1103
0 , s[0 ] * adj_scale * d[0 ]);
1102
1104
}
1103
1105
};
@@ -1265,7 +1267,7 @@ struct simple_reorder_impl<SIMPLE_REORDER_TEMPL_CALL,
1265
1267
= src_scales[src_scales_mask == 0 ? 0 : g * OC];
1266
1268
const float dst_scale
1267
1269
= dst_scales[dst_scales_mask == 0 ? 0 : g * OC];
1268
- out[g] = q10n::qz_b0 <data_t <type_i>, data_t <type_o>>()(
1270
+ out[g] = q10n::qz_b0_t <data_t <type_i>, data_t <type_o>>()(
1269
1271
inp[i_off], src_scale * adj_scale * dst_scale);
1270
1272
}
1271
1273
};
@@ -2094,25 +2096,26 @@ struct simple_reorder_impl<SIMPLE_REORDER_TEMPL_CALL,
2094
2096
if (alpha == 1.0 && beta == 0.0 ) {
2095
2097
PRAGMA_OMP_SIMD ()
2096
2098
for (size_t e = start; e < end; ++e) {
2097
- output[e] = q10n::qz_a1b0<data_t <type_i>, data_t <type_o>>()(
2098
- input[e]);
2099
+ output[e]
2100
+ = q10n::qz_a1b0_t <data_t <type_i>, data_t <type_o>>()(
2101
+ input[e]);
2099
2102
}
2100
2103
} else if (alpha == 1.0 ) {
2101
2104
PRAGMA_OMP_SIMD ()
2102
2105
for (size_t e = start; e < end; ++e) {
2103
- output[e] = q10n::qz_a1 <data_t <type_i>, data_t <type_o>>()(
2106
+ output[e] = q10n::qz_a1_t <data_t <type_i>, data_t <type_o>>()(
2104
2107
input[e], output[e], beta);
2105
2108
}
2106
2109
} else if (beta == 0.0 ) {
2107
2110
PRAGMA_OMP_SIMD ()
2108
2111
for (size_t e = start; e < end; ++e) {
2109
- output[e] = q10n::qz_b0 <data_t <type_i>, data_t <type_o>>()(
2112
+ output[e] = q10n::qz_b0_t <data_t <type_i>, data_t <type_o>>()(
2110
2113
input[e], alpha);
2111
2114
}
2112
2115
} else {
2113
2116
PRAGMA_OMP_SIMD ()
2114
2117
for (size_t e = start; e < end; ++e) {
2115
- output[e] = q10n::qz <data_t <type_i>, data_t <type_o>>()(
2118
+ output[e] = q10n::qz_t <data_t <type_i>, data_t <type_o>>()(
2116
2119
input[e], output[e], alpha, beta);
2117
2120
}
2118
2121
}
@@ -2121,28 +2124,27 @@ struct simple_reorder_impl<SIMPLE_REORDER_TEMPL_CALL,
2121
2124
if (alpha == 1.0 && beta == 0.0 ) {
2122
2125
PRAGMA_OMP_SIMD ()
2123
2126
for (size_t e = nelems - rem_elems; e < nelems; ++e) {
2124
- output[e] = q10n::qz_a1b0 <data_t <type_i>,
2127
+ output[e] = q10n::qz_a1b0_t <data_t <type_i>,
2125
2128
data_t <type_o>>()(input[e]);
2126
2129
}
2127
2130
} else if (alpha == 1.0 ) {
2128
2131
PRAGMA_OMP_SIMD ()
2129
2132
for (size_t e = nelems - rem_elems; e < nelems; ++e) {
2130
- output[e]
2131
- = q10n::qz_a1<data_t <type_i>, data_t <type_o>>()(
2132
- input[e], output[e], beta);
2133
+ output[e] = q10n::qz_a1_t <data_t <type_i>,
2134
+ data_t <type_o>>()(input[e], output[e], beta);
2133
2135
}
2134
2136
} else if (beta == 0.0 ) {
2135
2137
PRAGMA_OMP_SIMD ()
2136
2138
for (size_t e = nelems - rem_elems; e < nelems; ++e) {
2137
- output[e]
2138
- = q10n::qz_b0<data_t <type_i>, data_t <type_o>>()(
2139
- input[e], alpha);
2139
+ output[e] = q10n::qz_b0_t <data_t <type_i>,
2140
+ data_t <type_o>>()(input[e], alpha);
2140
2141
}
2141
2142
} else {
2142
2143
PRAGMA_OMP_SIMD ()
2143
2144
for (size_t e = nelems - rem_elems; e < nelems; ++e) {
2144
- output[e] = q10n::qz<data_t <type_i>, data_t <type_o>>()(
2145
- input[e], output[e], alpha, beta);
2145
+ output[e]
2146
+ = q10n::qz_t <data_t <type_i>, data_t <type_o>>()(
2147
+ input[e], output[e], alpha, beta);
2146
2148
}
2147
2149
}
2148
2150
}
0 commit comments