-
Notifications
You must be signed in to change notification settings - Fork 1k
/
Copy pathgpu_reorder_pd.cpp
101 lines (90 loc) · 4.42 KB
/
gpu_reorder_pd.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
/*******************************************************************************
* Copyright 2025 Intel Corporation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*******************************************************************************/
#include "gpu/gpu_reorder_pd.hpp"
#include "gpu/gpu_engine.hpp"
#include "gpu/gpu_stream.hpp"
#include "gpu/gpu_zero_points_conv.hpp"
namespace dnnl {
namespace impl {
namespace gpu {
status_t gpu_reorder_pd_t::maybe_create_zp_precompute_conv_pd(
impl::engine_t *dst_engine) {
memory_desc_wrapper dst_mdw(dst_md());
auto &extra = dst_mdw.extra();
auto needs_conv = memory_extra_flags::compensation_gpu_conv_asymmetric_src;
auto is_dst_gpu = (dst_engine->kind() == engine_kind::gpu);
do_zp_precomp_conv_ = is_dst_gpu && (extra.flags & needs_conv);
if (!do_zp_precomp_conv_) return status::success;
using namespace memory_extra_flags;
const auto out_type = data_type::f32;
primitive_attr_t attr;
const bool is_bwd_d
= extra.flags & compensation_gpu_conv_asymmetric_src_bwd;
auto prop = (is_bwd_d) ? prop_kind::backward_data
: prop_kind::forward_inference;
CHECK(create_zp_precompute_conv_pd(zp_precomp_conv_pd_, dst_engine, attr,
dst_md(), extra.idhw, extra.odhw, extra.pdhw, extra.ddhw, out_type,
prop));
using namespace memory_tracking::names;
auto gpu_align = utils::downcast<gpu::engine_t *>(dst_engine)
->get_buffer_alignment();
auto scratchpad = scratchpad_registry().registrar();
const auto ®istry = zp_precomp_conv_pd_->scratchpad_registry();
memory_desc_wrapper wspace((is_bwd_d) ? zp_precomp_conv_pd_->diff_dst_md()
: zp_precomp_conv_pd_->src_md());
scratchpad.book(key_conv_tr_src, wspace.size(), 1, gpu_align);
scratchpad.book(key_conv_tails, registry.size(), 1, gpu_align);
return status::success;
}
status_t gpu_reorder_pd_t::maybe_create_zp_precompute_conv(
std::shared_ptr<impl::primitive_t> &zp_precomp_conv,
impl::engine_t *engine, gpu::primitive_t *primitive) const {
if (!do_zp_precomp_conv_) return status::success;
return primitive->create_nested_primitive(
zp_precomp_conv, zp_precomp_conv_pd_, engine);
}
status_t gpu_reorder_pd_t::maybe_exec_zp_precompute_conv(const exec_ctx_t &ctx,
const std::shared_ptr<impl::primitive_t> &zp_precomp_conv) const {
using namespace memory_tracking::names;
if (!do_zp_precomp_conv_) return status::success;
const bool is_bwd_d = (zp_precomp_conv_pd_->get_prop_kind()
== prop_kind::backward_data);
auto *gpu_stream = utils::downcast<gpu::stream_t *>(ctx.stream());
auto conv_md_in = (is_bwd_d) ? zp_precomp_conv_pd_->diff_dst_md()
: zp_precomp_conv_pd_->src_md();
auto scratchpad
= ctx.get_scratchpad_grantor().get_memory_storage(key_conv_tr_src);
std::unique_ptr<memory_t, memory_deleter_t> wspace;
CHECK(safe_ptr_assign(wspace,
new memory_t(ctx.stream()->engine(), conv_md_in,
std::move(scratchpad))));
CHECK(gpu_stream->fill(*wspace->memory_storage(), 0x01,
memory_desc_wrapper(conv_md_in).size(),
gpu_stream->ctx().get_deps(), gpu_stream->ctx().get_deps()));
exec_args_t r_args;
auto arg_in = (is_bwd_d) ? DNNL_ARG_DIFF_DST : DNNL_ARG_SRC;
auto arg_out = (is_bwd_d) ? DNNL_ARG_DIFF_SRC : DNNL_ARG_DST;
r_args[arg_in] = memory_arg_t {(memory_t *)wspace.get(), true};
r_args[DNNL_ARG_WEIGHTS] = memory_arg_t {ctx.output(DNNL_ARG_TO), true};
r_args[arg_out] = memory_arg_t {ctx.output(DNNL_ARG_TO), false};
exec_ctx_t r_ctx(ctx, std::move(r_args));
nested_scratchpad_t ns(ctx, key_conv_tails, zp_precomp_conv);
r_ctx.set_scratchpad_grantor(ns.grantor());
return zp_precomp_conv->execute(r_ctx);
}
} // namespace gpu
} // namespace impl
} // namespace dnnl