Skip to content

Commit 3b8f5cd

Browse files
taoye9avmanerikar
authored andcommitted
cpu: aarch64: hot fix for aux tensor management of stateless gemm-conv and winograd conv without lock.
Signed-off-by: Ye Tao <ye.tao@arm.com> Change-Id: Ifb30292a8bfc5219c44515eb4d29b277a0f0b24a Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/c/oncpuml/oneDNN/+/680780 Tested-by: svc_mongoosetron <svc_mongoosetron@arm.com> Reviewed-by: Hamza Butt <hamza.butt@arm.com> IP-Review: Hamza Butt <hamza.butt@arm.com>
1 parent c5c10ad commit 3b8f5cd

4 files changed

+60
-43
lines changed

src/cpu/aarch64/acl_gemm_convolution.cpp

+26-19
Original file line numberDiff line numberDiff line change
@@ -89,37 +89,44 @@ template <data_type_t src_t, data_type_t wei_t, data_type_t dst_t,
8989
data_type_t bia_t>
9090
status_t acl_gemm_convolution_fwd_t<src_t, wei_t, dst_t, bia_t>::init(
9191
engine_t *engine) {
92+
// commented due to hot fix solution for stateless API which should be replaced soon.
93+
// auto acp_ = pd()->acp_;
94+
// acl_obj_->conv.configure(&acp_.src_tensor_info, &acp_.wei_tensor_info,
95+
// acp_.with_bias ? &acp_.bia_tensor_info : nullptr,
96+
// &acp_.dst_tensor_info, acp_.padstride_info, acp_.weights_info,
97+
// acp_.dilation_info, acp_.act_info, acp_.fast_math);
98+
// acl_obj_->aux_mem_req = acl_obj_->conv.workspace();
99+
return status::success;
100+
}
101+
102+
template <data_type_t src_type, data_type_t wei_type, data_type_t dst_type,
103+
data_type_t bia_type>
104+
std::unique_ptr<acl_obj_t<typename acl_gemm_convolution_fwd_t<src_type,
105+
wei_type, dst_type, bia_type>::Op>>
106+
acl_gemm_convolution_fwd_t<src_type, wei_type, dst_type,
107+
bia_type>::reinitialize_acl_obj() const {
92108
auto acp_ = pd()->acp_;
93-
acl_obj_->conv.configure(&acp_.src_tensor_info, &acp_.wei_tensor_info,
109+
std::unique_ptr<acl_obj_t<Op>> acl_obj = std::make_unique<acl_obj_t<Op>>();
110+
acl_obj->conv.configure(&acp_.src_tensor_info, &acp_.wei_tensor_info,
94111
acp_.with_bias ? &acp_.bia_tensor_info : nullptr,
95112
&acp_.dst_tensor_info, acp_.padstride_info, acp_.weights_info,
96113
acp_.dilation_info, acp_.act_info, acp_.fast_math);
97-
acl_obj_->aux_mem_req = acl_obj_->conv.workspace();
98-
return status::success;
99-
}
100-
101-
template <data_type_t src_t, data_type_t wei_t, data_type_t dst_t,
102-
data_type_t bia_t>
103-
void acl_gemm_convolution_fwd_t<src_t, wei_t, dst_t,
104-
bia_t>::reinitialize_acl_obj() const {
105-
auto acp = pd()->acp_;
106-
std::lock_guard<std::mutex> _lock {this->mtx};
107-
acl_obj_ = std::make_unique<acl_obj_t<Op>>();
108-
acl_obj_->conv.configure(&acp.src_tensor_info, &acp.wei_tensor_info,
109-
acp.with_bias ? &acp.bia_tensor_info : nullptr,
110-
&acp.dst_tensor_info, acp.padstride_info, acp.weights_info,
111-
acp.dilation_info, acp.act_info, acp.fast_math);
112-
acl_obj_->aux_mem_req = acl_obj_->conv.workspace();
114+
acl_obj->aux_mem_req = acl_obj->conv.workspace();
115+
return acl_obj;
113116
}
114117

115118
template <data_type_t src_t, data_type_t wei_t, data_type_t dst_t,
116119
data_type_t bia_t>
117120
status_t
118121
acl_gemm_convolution_fwd_t<src_t, wei_t, dst_t, bia_t>::execute_forward(
119122
const exec_ctx_t &ctx) const {
120-
reinitialize_acl_obj();
123+
// Temporary hotfix: We're using a local acl_obj instance in this method
124+
// instead of the class member acl_obj_. This hotfix is to bypass persistent aux mem requirements but is not the ideal solution.
125+
// It should be refactored or removed in the future when a more permanent fix is implemented.
126+
auto acl_obj = reinitialize_acl_obj();
127+
121128
return execute_forward_conv_acl<acl_obj_t<Op>, pd_t, src_data_t, wei_data_t,
122-
dst_data_t, bia_data_t>(ctx, acl_obj_.get(), pd(), gemm_conv_keys);
129+
dst_data_t, bia_data_t>(ctx, acl_obj.get(), pd(), gemm_conv_keys);
123130
}
124131

125132
using namespace data_type;

src/cpu/aarch64/acl_gemm_convolution.hpp

+8-5
Original file line numberDiff line numberDiff line change
@@ -49,8 +49,10 @@ struct acl_gemm_convolution_fwd_t : public primitive_t {
4949
acl_post_ops_t post_ops;
5050
};
5151

52-
acl_gemm_convolution_fwd_t(const pd_t *apd)
53-
: primitive_t(apd), acl_obj_(std::make_unique<acl_obj_t<Op>>()) {}
52+
// hot fix solution for stateless API which should be replaced soon.
53+
// acl_gemm_convolution_fwd_t(const pd_t *apd)
54+
// : primitive_t(apd), acl_obj_(std::make_unique<acl_obj_t<Op>>()) {}
55+
acl_gemm_convolution_fwd_t(const pd_t *apd) : primitive_t(apd) {}
5456

5557
status_t init(engine_t *engine) override;
5658

@@ -67,11 +69,12 @@ struct acl_gemm_convolution_fwd_t : public primitive_t {
6769
status_t execute_forward(const exec_ctx_t &ctx) const;
6870

6971
// hot fix solution for stateless API which should be replaced soon.
70-
mutable std::mutex mtx;
71-
void reinitialize_acl_obj() const;
72+
std::unique_ptr<acl_obj_t<Op>> reinitialize_acl_obj() const;
7273

7374
const pd_t *pd() const { return (const pd_t *)primitive_t::pd().get(); }
74-
mutable std::unique_ptr<acl_obj_t<Op>> acl_obj_;
75+
76+
// commented due to hot fix solution for stateless API which should be replaced soon.
77+
// std::unique_ptr<acl_obj_t<Op>> acl_obj_;
7578

7679
}; // acl_gemm_convolution_fwd_t
7780

src/cpu/aarch64/acl_winograd_convolution.cpp

+19-14
Original file line numberDiff line numberDiff line change
@@ -79,13 +79,14 @@ status_t acl_wino_convolution_fwd_t::pd_t::init(engine_t *engine) {
7979
}
8080

8181
status_t acl_wino_convolution_fwd_t::init(engine_t *engine) {
82-
auto acp = pd()->acp_;
83-
acl_obj_->conv.configure(&acp.src_tensor_info, &acp.wei_tensor_info,
84-
acp.with_bias ? &acp.bia_tensor_info : nullptr,
85-
&acp.dst_tensor_info, acp.padstride_info, acp.act_info,
86-
true); // to support 5x5, 7x7 filter shapes in addition to 3x3
87-
88-
acl_obj_->aux_mem_req = acl_obj_->conv.workspace();
82+
// commented due to hot fix solution for stateless API which should be replaced soon.
83+
// auto acp = pd()->acp_;
84+
// acl_obj_->conv.configure(&acp.src_tensor_info, &acp.wei_tensor_info,
85+
// acp.with_bias ? &acp.bia_tensor_info : nullptr,
86+
// &acp.dst_tensor_info, acp.padstride_info, acp.act_info,
87+
// true); // to support 5x5, 7x7 filter shapes in addition to 3x3
88+
89+
// acl_obj_->aux_mem_req = acl_obj_->conv.workspace();
8990
return status::success;
9091
}
9192

@@ -129,23 +130,27 @@ status_t acl_wino_convolution_fwd_t::pd_t::init_conf() {
129130
return status::success;
130131
}
131132

132-
void acl_wino_convolution_fwd_t::reinitialize_acl_obj() const {
133+
std::unique_ptr<acl_obj_t<acl_wino_convolution_fwd_t::Op>>
134+
acl_wino_convolution_fwd_t::reinitialize_acl_obj() const {
133135
auto acp = pd()->acp_;
134-
std::lock_guard<std::mutex> _lock {this->mtx};
135-
acl_obj_ = std::make_unique<acl_obj_t<Op>>();
136-
acl_obj_->conv.configure(&acp.src_tensor_info, &acp.wei_tensor_info,
136+
std::unique_ptr<acl_obj_t<Op>> acl_obj = std::make_unique<acl_obj_t<Op>>();
137+
acl_obj->conv.configure(&acp.src_tensor_info, &acp.wei_tensor_info,
137138
acp.with_bias ? &acp.bia_tensor_info : nullptr,
138139
&acp.dst_tensor_info, acp.padstride_info, acp.act_info,
139140
true); // to support 5x5, 7x7 filter shapes in addition to 3x3
140141

141-
acl_obj_->aux_mem_req = acl_obj_->conv.workspace();
142+
acl_obj->aux_mem_req = acl_obj->conv.workspace();
143+
return acl_obj;
142144
}
143145

144146
status_t acl_wino_convolution_fwd_t::execute_forward(
145147
const exec_ctx_t &ctx) const {
146-
reinitialize_acl_obj();
148+
// Temporary hotfix: We're using a local acl_obj instance in this method
149+
// instead of the class member acl_obj_. This hotfix is to bypass persistent aux mem requirements but is not the ideal solution.
150+
// It should be refactored or removed in the future when a more permanent fix is implemented.
151+
const auto acl_obj = reinitialize_acl_obj();
147152
return execute_forward_conv_acl<acl_obj_t<Op>, pd_t, data_t>(
148-
ctx, acl_obj_.get(), pd(), wino_conv_keys);
153+
ctx, acl_obj.get(), pd(), wino_conv_keys);
149154
}
150155
} // namespace aarch64
151156
} // namespace cpu

src/cpu/aarch64/acl_winograd_convolution.hpp

+7-5
Original file line numberDiff line numberDiff line change
@@ -47,8 +47,10 @@ struct acl_wino_convolution_fwd_t : public primitive_t {
4747
status_t init_conf();
4848
};
4949

50-
acl_wino_convolution_fwd_t(const pd_t *apd)
51-
: primitive_t(apd), acl_obj_(std::make_unique<acl_obj_t<Op>>()) {}
50+
// hot fix solution for stateless API which should be replaced soon.
51+
// acl_wino_convolution_fwd_t(const pd_t *apd)
52+
// : primitive_t(apd), acl_obj_(std::make_unique<acl_obj_t<Op>>()) {}
53+
acl_wino_convolution_fwd_t(const pd_t *apd) : primitive_t(apd) {}
5254

5355
status_t init(engine_t *engine) override;
5456

@@ -60,11 +62,11 @@ struct acl_wino_convolution_fwd_t : public primitive_t {
6062
status_t execute_forward(const exec_ctx_t &ctx) const;
6163

6264
// hot fix solution for stateless API which should be replaced soon.
63-
mutable std::mutex mtx;
64-
void reinitialize_acl_obj() const;
65+
std::unique_ptr<acl_obj_t<Op>> reinitialize_acl_obj() const;
6566

6667
const pd_t *pd() const { return (const pd_t *)primitive_t::pd().get(); }
67-
mutable std::unique_ptr<acl_obj_t<Op>> acl_obj_;
68+
// commented due to hot fix solution for stateless API which should be replaced soon.
69+
// std::unique_ptr<acl_obj_t<Op>> acl_obj_;
6870
}; // acl_wino_convolution_fwd_t
6971

7072
} // namespace aarch64

0 commit comments

Comments
 (0)