Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

cherry-pick new commits from rls-v3.6 #273

Merged
merged 10 commits into from
Feb 13, 2025
6 changes: 5 additions & 1 deletion include/oneapi/dnnl/dnnl.hpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*******************************************************************************
* Copyright 2016-2024 Intel Corporation
* Copyright 2016-2025 Intel Corporation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -1495,10 +1495,12 @@ struct memory : public handle<dnnl_memory_t> {
AB8a2b = dnnl_AB8a2b,
abDc16d = dnnl_abDc16d,
abDc32d = dnnl_abDc32d,
abDC16d4c = dnnl_abDC16d4c,
abDC32d4c = dnnl_abDC32d4c,
abCd32c = dnnl_abCd32c,
abdEc16e = dnnl_abdEc16e,
abdEc32e = dnnl_abdEc32e,
abdEC16e4c = dnnl_abdEC16e4c,
abdEC32e2c = dnnl_abdEC32e2c,
abdEC32e4c = dnnl_abdEC32e4c,
abdCe16c = dnnl_abdCe16c,
Expand Down Expand Up @@ -1994,8 +1996,10 @@ struct memory : public handle<dnnl_memory_t> {

ldOi16o = abDc16d,
ldOi32o = abDc32d,
ldOI16o4i = abDC16d4c,
ldOI32o4i = abDC32d4c,
ldgOi16o = abdEc16e,
ldgOI16o4i = abdEC16e4c,
ldgOi32o = abdEc32e,
ldgOI32o2i = abdEC32e2c,
ldgOI32o4i = abdEC32e4c,
Expand Down
6 changes: 5 additions & 1 deletion include/oneapi/dnnl/dnnl_types.h
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*******************************************************************************
* Copyright 2016-2024 Intel Corporation
* Copyright 2016-2025 Intel Corporation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -1043,6 +1043,8 @@ typedef enum {
dnnl_cabd,
dnnl_dabc,
dnnl_Ab32a,
dnnl_abdEC16e4c,
dnnl_abDC16d4c,

/// Just a sentinel, not real memory format tag. Must be changed after new
/// format tag is added.
Expand Down Expand Up @@ -1178,10 +1180,12 @@ typedef enum {
/// 5D LSTM projection tensor
dnnl_ldOi16o = dnnl_abDc16d,
dnnl_ldOi32o = dnnl_abDc32d,
dnnl_ldOI16o4i = dnnl_abDC16d4c,
dnnl_ldOI32o4i = dnnl_abDC32d4c,
dnnl_ldIo32i = dnnl_abCd32c,
/// 6D RNN weights tensor
dnnl_ldgOi16o = dnnl_abdEc16e,
dnnl_ldgOI16o4i = dnnl_abdEC16e4c,
dnnl_ldgOi32o = dnnl_abdEc32e,
dnnl_ldgOI32o2i = dnnl_abdEC32e2c,
dnnl_ldgOI32o4i = dnnl_abdEC32e4c,
Expand Down
6 changes: 5 additions & 1 deletion src/common/c_types_map.hpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*******************************************************************************
* Copyright 2016-2024 Intel Corporation
* Copyright 2016-2025 Intel Corporation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -710,6 +710,7 @@ const format_tag_t AB32a32b8a2b = dnnl_AB32a32b8a2b;
const format_tag_t AB8a2b = dnnl_AB8a2b;
const format_tag_t abDc16d = dnnl_abDc16d;
const format_tag_t abDc32d = dnnl_abDc32d;
const format_tag_t abDC16d4c = dnnl_abDC16d4c;
const format_tag_t abDC32d4c = dnnl_abDC32d4c;
const format_tag_t abCd4c = dnnl_abCd4c;
const format_tag_t abCde4c = dnnl_abCde4c;
Expand All @@ -719,6 +720,7 @@ const format_tag_t abCde32c = dnnl_abCde32c;
const format_tag_t abCdef32c = dnnl_abCdef32c;
const format_tag_t abdEc16e = dnnl_abdEc16e;
const format_tag_t abdEc32e = dnnl_abdEc32e;
const format_tag_t abdEC16e4c = dnnl_abdEC16e4c;
const format_tag_t abdEC32e2c = dnnl_abdEC32e2c;
const format_tag_t abdEC32e4c = dnnl_abdEC32e4c;
const format_tag_t abdEC64e2c = dnnl_abdEC64e2c;
Expand Down Expand Up @@ -1479,10 +1481,12 @@ const format_tag_t gOIhw4o8i2o = dnnl_gOIhw4o8i2o;
const format_tag_t gOIdhw4o8i2o = dnnl_gOIdhw4o8i2o;
const format_tag_t ldOi16o = dnnl_ldOi16o;
const format_tag_t ldOi32o = dnnl_ldOi32o;
const format_tag_t ldOI16o4i = dnnl_ldOI16o4i;
const format_tag_t ldOI32o4i = dnnl_ldOI32o4i;
const format_tag_t ldIo32i = dnnl_ldIo32i;
const format_tag_t ldgOi16o = dnnl_ldgOi16o;
const format_tag_t ldgOi32o = dnnl_ldgOi32o;
const format_tag_t ldgOI16o4i = dnnl_ldgOI16o4i;
const format_tag_t ldgOI32o2i = dnnl_ldgOI32o2i;
const format_tag_t ldgOI32o4i = dnnl_ldgOI32o4i;
const format_tag_t ldgOI64o2i = dnnl_ldgOI64o2i;
Expand Down
6 changes: 5 additions & 1 deletion src/common/dnnl_debug_autogenerated.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*******************************************************************************
* Copyright 2018-2024 Intel Corporation
* Copyright 2018-2025 Intel Corporation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -381,8 +381,10 @@ const char *dnnl_fmt_tag2str(dnnl_format_tag_t v) {
if (v == dnnl_AB32a32b8a2b) return "AB32a32b8a2b";
if (v == dnnl_AB8a2b) return "AB8a2b";
if (v == dnnl_abDc32d) return "abDc32d";
if (v == dnnl_abDC16d4c) return "abDC16d4c";
if (v == dnnl_abDC32d4c) return "abDC32d4c";
if (v == dnnl_abdEc32e) return "abdEc32e";
if (v == dnnl_abdEC16e4c) return "abdEC16e4c";
if (v == dnnl_abdEC32e2c) return "abdEC32e2c";
if (v == dnnl_abdEC32e4c) return "abdEC32e4c";
if (v == dnnl_aBdefC16b4c) return "aBdefC16b4c";
Expand Down Expand Up @@ -1005,10 +1007,12 @@ const char *dnnl_fmt_tag2str(dnnl_format_tag_t v) {
if (v == dnnl_ldgo) return "ldgo";
if (v == dnnl_ldOi16o) return "ldOi16o";
if (v == dnnl_ldOi32o) return "ldOi32o";
if (v == dnnl_ldOI16o4i) return "ldOI16o4i";
if (v == dnnl_ldOI32o4i) return "ldOI32o4i";
if (v == dnnl_ldIo32i) return "ldIo32i";
if (v == dnnl_ldgOi16o) return "ldgOi16o";
if (v == dnnl_ldgOi32o) return "ldgOi32o";
if (v == dnnl_ldgOI16o4i) return "ldgOI16o4i";
if (v == dnnl_ldgOI32o2i) return "ldgOI32o2i";
if (v == dnnl_ldgOI32o4i) return "ldgOI32o4i";
if (v == dnnl_ldgOI64o2i) return "ldgOI64o2i";
Expand Down
4 changes: 3 additions & 1 deletion src/common/memory_desc_wrapper.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*******************************************************************************
* Copyright 2016-2024 Intel Corporation
* Copyright 2016-2025 Intel Corporation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -635,6 +635,7 @@ status_t process_tag(F f, format_tag_t tag, Args&&... args) {
C(AB8a2b, {0, 1}, {8, 2}, {0, 1});
C(abDc16d, {0, 1, 3, 2}, {16}, {3});
C(abDc32d, {0, 1, 3, 2}, {32}, {3});
C(abDC16d4c, {0, 1, 3, 2}, {16, 4}, {3, 2});
C(abDC32d4c, {0, 1, 3, 2}, {32, 4}, {3, 2});
C(abCd4c, {0, 1, 2, 3}, {4}, {2});
C(abCde4c, {0, 1, 2, 3, 4}, {4}, {2});
Expand All @@ -644,6 +645,7 @@ status_t process_tag(F f, format_tag_t tag, Args&&... args) {
C(abCdef32c, {0, 1, 2, 3, 4, 5}, {32}, {2});
C(abdEc16e, {0, 1, 3, 4, 2}, {16}, {4});
C(abdEc32e, {0, 1, 3, 4, 2}, {32}, {4});
C(abdEC16e4c, {0, 1, 3, 4, 2}, {16, 4}, {4, 2});
C(abdEC32e2c, {0, 1, 3, 4, 2}, {32, 2}, {4, 2});
C(abdEC32e4c, {0, 1, 3, 4, 2}, {32, 4}, {4, 2});
C(abdEC64e2c, {0, 1, 3, 4, 2}, {64, 2}, {4, 2});
Expand Down
6 changes: 5 additions & 1 deletion src/common/tag_traits.hpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*******************************************************************************
* Copyright 2018-2024 Intel Corporation
* Copyright 2018-2025 Intel Corporation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -113,6 +113,7 @@ enum class inner_blk_t {
_16b4c,
_16c2b,
_16c4b,
_16e4c,
_24a2b,
_24a4b,
_24b2a,
Expand All @@ -121,6 +122,7 @@ enum class inner_blk_t {
_24b4c,
_24c2b,
_24c4b,
_16d4c,
_32d4c,
_32e2c,
_32e4c,
Expand Down Expand Up @@ -821,6 +823,7 @@ DECL_TRAITS(aBCde4c8b2c, _BC, _4c8b2c, 5);
DECL_TRAITS(aBCdef4c8b2c, _BC, _4c8b2c, 6);
DECL_TRAITS(abDc16d, _D, _16d, 4);
DECL_TRAITS(abDc32d, _D, _32d, 4);
DECL_TRAITS(abDC16d4c, _CD, _16d4c, 4);
DECL_TRAITS(abDC32d4c, _CD, _32d4c, 4);
DECL_TRAITS(abCd32c, _C, _32c, 4);
DECL_TRAITS(abCde32c, _C, _32c, 5);
Expand All @@ -830,6 +833,7 @@ DECL_TRAITS(abCde4c, _C, _4c, 5);
DECL_TRAITS(abCdef4c, _C, _4c, 6);
DECL_TRAITS(abdEc16e, _E, _16e, 5);
DECL_TRAITS(abdEc32e, _E, _32e, 5);
DECL_TRAITS(abdEC16e4c, _CE, _16e4c, 5);
DECL_TRAITS(abdEC32e2c, _CE, _32e2c, 5);
DECL_TRAITS(abdEC32e4c, _CE, _32e4c, 5);
DECL_TRAITS(abdEC64e2c, _CE, _64e2c, 5);
Expand Down
4 changes: 2 additions & 2 deletions src/cpu/rnn/ref_rnn.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*******************************************************************************
* Copyright 2018-2024 Intel Corporation
* Copyright 2018-2025 Intel Corporation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -413,7 +413,7 @@ _ref_rnn_common_t<aprop, src_type, weights_type, acc_type>::pd_t::init_brgemm(
VDISPATCH_RNN(
!(rnn_.is_signed_int8_conf() && !is_superset(isa, avx512_core_amx)),
VERBOSE_ISA_DT_MISMATCH);
VDISPATCH_RNN(!(rnn_.is_int8_conf() && !is_superset(isa, avx512_core_vnni)),
VDISPATCH_RNN(!(rnn_.is_int8_conf() && !is_superset(isa, avx2)),
VERBOSE_ISA_DT_MISMATCH);
VDISPATCH_RNN(!(rnn_.is_f32_conf() && !is_superset(isa, avx2)),
VERBOSE_ISA_DT_MISMATCH);
Expand Down
11 changes: 5 additions & 6 deletions src/cpu/rnn/rnn_reorders.hpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*******************************************************************************
* Copyright 2018-2024 Intel Corporation
* Copyright 2018-2025 Intel Corporation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -802,7 +802,8 @@ struct rnn_brgemm_weights_reorder_s8_t : public primitive_t {
format_tag_t otag, itag;

itag = id.matches_one_of_tag(ldigo, ldio);
otag = od.matches_one_of_tag(ldgOI64o4i, ldgOI32o4i, ldOI32o4i);
otag = od.matches_one_of_tag(
ldgOI64o4i, ldgOI32o4i, ldgOI16o4i, ldOI32o4i, ldOI16o4i);
if (itag != format_tag::undef && otag != format_tag::undef) {
_pd->itag_ = itag;
_pd->otag_ = otag;
Expand Down Expand Up @@ -860,15 +861,13 @@ struct rnn_brgemm_weights_reorder_s8_t : public primitive_t {
return status::success;
}

const auto &blocked_d = dst_d;
const auto &pdims = blocked_d.padded_dims();

const int o_block = pd()->otag_ == ldgOI64o4i ? 64 : 32;
const int o_block = dst_d.blocking_desc().inner_blks[0];
static constexpr int i_block = 4;

dim_t L, D, I, G, O;
init_dims(L, D, I, G, O, src_d);

const auto &pdims = dst_d.padded_dims();
const dim_t pI = pdims[2];
const dim_t pO = (src_d.ndims() == 5) ? pdims[4] : pdims[3];
const dim_t IB = pI / i_block;
Expand Down
16 changes: 9 additions & 7 deletions src/cpu/rnn/rnn_utils.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*******************************************************************************
* Copyright 2018-2023 Intel Corporation
* Copyright 2018-2025 Intel Corporation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -76,8 +76,8 @@ bool rnn_utils::is_ldoi(const memory_desc_wrapper &mdw) {
bool rnn_utils::is_ldigo_blocked(const memory_desc_wrapper &mdw) {
format_tag_t md_format_tag = mdw.matches_one_of_tag(format_tag::ldgOi32o,
format_tag::ldgOI32o2i, format_tag::ldgOI32o4i,
format_tag::ldgOI64o2i, format_tag::ldgOI64o4i,
format_tag::ldgOi16o);
format_tag::ldgOI16o4i, format_tag::ldgOI64o2i,
format_tag::ldgOI64o4i, format_tag::ldgOi16o);
return md_format_tag != format_tag::undef;
}

Expand All @@ -88,8 +88,8 @@ bool rnn_utils::is_ldgoi_blocked(const memory_desc_wrapper &mdw) {
}

bool rnn_utils::is_ldio_blocked(const memory_desc_wrapper &mdw) {
format_tag_t md_format_tag = mdw.matches_one_of_tag(
format_tag::ldOi32o, format_tag::ldOI32o4i, format_tag::ldOi16o);
format_tag_t md_format_tag = mdw.matches_one_of_tag(format_tag::ldOi32o,
format_tag::ldOI32o4i, ldOI16o4i, format_tag::ldOi16o);
return md_format_tag != format_tag::undef;
}

Expand Down Expand Up @@ -286,14 +286,16 @@ status_t rnn_utils::set_expected_desc(rnn_conf_t &rnn,

if (weights_type == weights_type_t::projection) {
if (rnn.is_int8_conf())
tag = format_tag::ldOI32o4i;
tag = utils::map(n_block, format_tag::undef, 32,
format_tag::ldOI32o4i, 16, format_tag::ldOI16o4i);
else
tag = utils::map(n_block, format_tag::undef, 32,
format_tag::ldOi32o, 16, format_tag::ldOi16o);
} else if (rnn.is_fwd) {
if (rnn.is_int8_conf())
tag = utils::map(n_block, format_tag::undef, 64,
format_tag::ldgOI64o4i, 32, ldgOI32o4i);
format_tag::ldgOI64o4i, 32, ldgOI32o4i, 16,
ldgOI16o4i);
else if (rnn.is_xf16_conf())
tag = utils::map(n_block, format_tag::undef, 64,
format_tag::ldgOI64o2i, 32, ldgOI32o2i);
Expand Down
17 changes: 11 additions & 6 deletions src/cpu/x64/brgemm/brgemm_utils.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*******************************************************************************
* Copyright 2022-2024 Intel Corporation
* Copyright 2022-2025 Intel Corporation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -152,10 +152,14 @@ void set_isa_impl(brgemm_desc_t *brg) {
is_isa_ok(avx512_core_fp16), avx512_core_fp16);
}
} else if (brg->is_int8) {
brg->isa_impl = utils::map(true, isa_undef, is_isa_ok(avx512_core_amx),
avx512_core_amx, is_isa_ok(avx512_core_vnni), avx512_core_vnni,
is_isa_ok(avx512_core), avx512_core, is_isa_ok(avx2_vnni_2),
avx2_vnni_2, is_isa_ok(avx2_vnni), avx2_vnni);
brg->isa_impl
= utils::map(true, isa_undef, is_isa_ok(avx512_core_amx_fp16),
avx512_core_amx_fp16, is_isa_ok(avx512_core_amx),
avx512_core_amx, is_isa_ok(avx512_core_fp16),
avx512_core_fp16, is_isa_ok(avx512_core_vnni),
avx512_core_vnni, is_isa_ok(avx512_core), avx512_core,
is_isa_ok(avx2_vnni_2), avx2_vnni_2,
is_isa_ok(avx2_vnni), avx2_vnni, is_isa_ok(avx2), avx2);
} else if (brg->is_fp8) {
brg->isa_impl = utils::map(true, isa_undef,
is_isa_ok(avx10_1_512_amx_fp16), avx10_1_512_amx_fp16);
Expand Down Expand Up @@ -872,7 +876,8 @@ void init_brgemm_conf(brgemm_desc_t *brg, cpu_isa_t isa,

brg->isa_user = isa;
set_isa_impl(brg);
brg->is_int8_tmm = brg->is_int8 && brg->isa_impl == avx512_core_amx;
brg->is_int8_tmm
= brg->is_int8 && is_superset(brg->isa_impl, avx512_core_amx);
brg->is_bf16_tmm = brg->is_bf16 && brg->isa_impl == avx512_core_amx;
brg->is_f16_tmm = brg->is_f16 && brg->isa_impl == avx512_core_amx_fp16;
brg->is_bf32 = is_bf32
Expand Down
11 changes: 9 additions & 2 deletions src/cpu/x64/brgemm/jit_brgemm_kernel.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*******************************************************************************
* Copyright 2020-2024 Intel Corporation
* Copyright 2020-2025 Intel Corporation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -3602,7 +3602,14 @@ void jit_brgemm_kernel_t<Wmm>::generate() {

if (brg.is_int8 && !brg.has_int8_vnni) {
mov(reg_tmp_gpr.cvt16(), 0x1);
vpbroadcastw(int8_ones_words(), reg_tmp_gpr.cvt16());

if (is_superset(brg.isa_impl, avx512_core))
vpbroadcastw(int8_ones_words(), reg_tmp_gpr.cvt16());
else if (is_superset(brg.isa_impl, avx2)) {
movq(Xmm(int8_ones_words().getIdx()), reg_tmp_gpr);
vpbroadcastw(int8_ones_words(), Xmm(int8_ones_words().getIdx()));
} else
assert(!"unsupported isa");
}

if (brg.is_f16_b_non_amx_vnni()) {
Expand Down
10 changes: 9 additions & 1 deletion src/cpu/x64/jit_avx512_common_conv_kernel.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*******************************************************************************
* Copyright 2016-2024 Intel Corporation
* Copyright 2016-2025 Intel Corporation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand All @@ -20,6 +20,7 @@
#include "common/type_helpers.hpp"
#include "common/utils.hpp"

#include "cpu/cpu_convolution_pd.hpp"
#include "cpu/platform.hpp"
#include "cpu/x64/cpu_barrier.hpp"
#include "cpu/x64/injectors/injector_utils.hpp"
Expand Down Expand Up @@ -846,6 +847,13 @@ status_t jit_avx512_common_conv_fwd_kernel::init_conf(jit_conv_conf_t &jcp,
jcp.stride_h = (ndims == 3) ? 1 : cd.strides[ndims - 4];
jcp.stride_w = cd.strides[ndims - 3];

// Big int (> INT_MAX) values are unsupported and jcp fields may overflow
// TODO: change data type of jcp fields to size_t
VDISPATCH_CONV_IC(!((ndims == 5 && cd.dilates[ndims - 5] > INT_MAX)
|| (ndims >= 4 && cd.dilates[ndims - 4] > INT_MAX)
|| (cd.dilates[ndims - 3] > INT_MAX)),
VERBOSE_BAD_PARAM, "dilates");

jcp.dilate_d = (ndims == 5) ? cd.dilates[0] : 0;
jcp.dilate_h = (ndims == 3) ? 0 : cd.dilates[ndims - 4];
jcp.dilate_w = cd.dilates[ndims - 3];
Expand Down
2 changes: 1 addition & 1 deletion src/cpu/x64/matmul/brgemm_matmul.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ status_t brgemm_matmul_t<isa>::pd_t::init(engine_t *engine) {

const bool is_f32 = everyone_is(f32, src_dt, wei_dt, dst_dt);
const bool is_int8 = one_of(src_dt, u8, s8) && wei_dt == s8
&& one_of(dst_dt, u8, s8, s32, f32, bf16);
&& one_of(dst_dt, u8, s8, s32, f32, f16, bf16);
const bool is_f8 = one_of(src_dt, f8_e5m2, f8_e4m3)
&& one_of(wei_dt, f8_e5m2, f8_e4m3)
&& one_of(dst_dt, f32, f16, bf16, f8_e5m2, f8_e4m3);
Expand Down
Loading
Loading