Skip to content

Commit ad7203c

Browse files
committed
cpu: x64: jit_direct_reorder: add identical extra buffers support
If extra data is identical for both buffers, direct copy can handle that. Those buffers are never big, no need to parallelize the copy.
1 parent 8c492e5 commit ad7203c

File tree

2 files changed

+24
-4
lines changed

2 files changed

+24
-4
lines changed

src/cpu/reorder/cpu_reorder_comp_s8_s8.cpp

+6-1
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*******************************************************************************
2-
* Copyright 2020-2024 Intel Corporation
2+
* Copyright 2020-2025 Intel Corporation
33
* Copyright 2023 FUJITSU LIMITED
44
*
55
* Licensed under the Apache License, Version 2.0 (the "License");
@@ -28,6 +28,7 @@ const impl_list_map_t &comp_s8_s8_impl_list_map() {
2828
// s8 -> s8
2929
{{s8, s8, 2}, {
3030
DNNL_X64_ONLY(CPU_REORDER_INSTANCE(x64::brgemm_matmul_copy_reorder_t))
31+
DNNL_X64_ONLY(CPU_REORDER_INSTANCE(x64::jit_uni_reorder_direct_copy_t))
3132
DNNL_X64_ONLY(CPU_REORDER_INSTANCE(x64::jit_uni_reorder_t))
3233
DNNL_AARCH64_ONLY(CPU_REORDER_INSTANCE(aarch64::jit_uni_reorder_t))
3334
DNNL_NON_X64_ONLY(REG_SR(s8, oi, s8, OI4i16o4i, fmt_order::keep, spec::conv_req_comp))
@@ -50,6 +51,7 @@ const impl_list_map_t &comp_s8_s8_impl_list_map() {
5051
// s8 -> s8
5152
{{s8, s8, 3}, {
5253
DNNL_X64_ONLY(CPU_REORDER_INSTANCE(x64::brgemm_matmul_copy_reorder_t))
54+
DNNL_X64_ONLY(CPU_REORDER_INSTANCE(x64::jit_uni_reorder_direct_copy_t))
5355
DNNL_X64_ONLY(CPU_REORDER_INSTANCE(x64::jit_uni_reorder_t))
5456
DNNL_AARCH64_ONLY(CPU_REORDER_INSTANCE(aarch64::jit_uni_reorder_t))
5557
DNNL_NON_X64_ONLY(REG_SR(s8, any, s8, wio, fmt_order::keep, spec::conv_req_comp))
@@ -88,6 +90,7 @@ const impl_list_map_t &comp_s8_s8_impl_list_map() {
8890
nullptr,
8991
}},
9092
{{s8, s8, 4}, {
93+
DNNL_X64_ONLY(CPU_REORDER_INSTANCE(x64::jit_uni_reorder_direct_copy_t))
9194
DNNL_X64_ONLY(CPU_REORDER_INSTANCE(x64::jit_uni_reorder_t))
9295
DNNL_AARCH64_ONLY(CPU_REORDER_INSTANCE(aarch64::jit_uni_reorder_t))
9396
DNNL_NON_X64_ONLY(REG_SR(s8, any, s8, hwio, fmt_order::keep, spec::conv_req_comp))
@@ -137,6 +140,7 @@ const impl_list_map_t &comp_s8_s8_impl_list_map() {
137140
nullptr,
138141
}},
139142
{{s8, s8, 5}, {
143+
DNNL_X64_ONLY(CPU_REORDER_INSTANCE(x64::jit_uni_reorder_direct_copy_t))
140144
DNNL_X64_ONLY(CPU_REORDER_INSTANCE(x64::jit_uni_reorder_t))
141145
DNNL_AARCH64_ONLY(CPU_REORDER_INSTANCE(aarch64::jit_uni_reorder_t))
142146
DNNL_NON_X64_ONLY(REG_SR(s8, any, s8, hwigo, fmt_order::keep, spec::conv_req_comp))
@@ -183,6 +187,7 @@ const impl_list_map_t &comp_s8_s8_impl_list_map() {
183187
nullptr,
184188
}},
185189
{{s8, s8, 6}, {
190+
DNNL_X64_ONLY(CPU_REORDER_INSTANCE(x64::jit_uni_reorder_direct_copy_t))
186191
DNNL_X64_ONLY(CPU_REORDER_INSTANCE(x64::jit_uni_reorder_t))
187192
DNNL_AARCH64_ONLY(CPU_REORDER_INSTANCE(aarch64::jit_uni_reorder_t))
188193
DNNL_NON_X64_ONLY(REG_SR(s8, any, s8, dhwigo, fmt_order::keep, spec::conv_req_comp))

src/cpu/x64/jit_uni_reorder_direct_copy.cpp

+18-3
Original file line numberDiff line numberDiff line change
@@ -309,9 +309,13 @@ status_t jit_uni_reorder_direct_copy_t::pd_t::init(
309309
VDISPATCH_REORDER(src_d.similar_to(dst_d, true, false, 0),
310310
VERBOSE_TENSOR_FORMAT_MISMATCH, "src", "dst");
311311

312-
VDISPATCH_REORDER(
313-
utils::everyone_is(0UL, src_d.extra().flags, dst_d.extra().flags),
314-
VERBOSE_UNSUPPORTED_MD_FLAG);
312+
VDISPATCH_REORDER(src_d.extra().flags == dst_d.extra().flags,
313+
VERBOSE_UNSUPPORTED_MD_FLAG, "src or dst");
314+
315+
VDISPATCH_REORDER(IMPLICATION(src_d.extra().flags > 0UL,
316+
src_d.additional_buffer_size()
317+
== dst_d.additional_buffer_size()),
318+
VERBOSE_UNSUPPORTED_MD_FLAG, "src or dst");
315319

316320
VDISPATCH_REORDER(attr()->has_default_values(), VERBOSE_UNSUPPORTED_ATTR);
317321

@@ -374,6 +378,17 @@ status_t jit_uni_reorder_direct_copy_t::execute(const exec_ctx_t &ctx) const {
374378
out + (start + dst_d.offset0()) * dst_dt_size, end - start);
375379
});
376380

381+
if (src_d.is_additional_buffer()) {
382+
// Verified in pd_t::init();
383+
assert(src_d.extra().flags == dst_d.extra().flags);
384+
385+
const auto additional_size = src_d.additional_buffer_size();
386+
const auto data_size = src_d.size(/* index = */ 0,
387+
/* include_additional_size = */ false);
388+
std::memcpy(out + data_size * dst_dt_size, in + data_size * src_dt_size,
389+
additional_size);
390+
}
391+
377392
return status::success;
378393
}
379394

0 commit comments

Comments
 (0)