Skip to content

Commit 35b88ed

Browse files
committed
cpu: aarch64: Enable stateless ACL LayerNorm
1 parent 1fa6bd2 commit 35b88ed

File tree

2 files changed

+30
-69
lines changed

2 files changed

+30
-69
lines changed

src/cpu/aarch64/acl_layer_normalization.cpp

+20-17
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*******************************************************************************
2-
* Copyright 2023 Arm Ltd. and affiliates
2+
* Copyright 2023, 2025 Arm Ltd. and affiliates
33
*
44
* Licensed under the Apache License, Version 2.0 (the "License");
55
* you may not use this file except in compliance with the License.
@@ -21,29 +21,32 @@ namespace impl {
2121
namespace cpu {
2222
namespace aarch64 {
2323

24+
status_t acl_layer_normalization_fwd_t::init(engine_t *engine) {
25+
auto aep = pd()->anp;
26+
acl_obj.get()->msdNorm.configure(
27+
&aep.data_info, &aep.data_info, pd()->desc()->layer_norm_epsilon);
28+
return status::success;
29+
}
30+
2431
status_t acl_layer_normalization_fwd_t::execute_forward(
2532
const exec_ctx_t &ctx) const {
2633

27-
// Lock here is needed because resource_mapper does not support
28-
// concurrent access.
29-
std::lock_guard<std::mutex> _lock {this->mtx};
30-
31-
// Retrieve primitive resource and configured Compute Library objects
32-
auto *acl_resource
33-
= ctx.get_resource_mapper()
34-
->get<acl_layer_normalization_resource_t>(this);
35-
acl_msdnorm_obj_t &acl_obj = acl_resource->get_acl_obj();
36-
3734
auto src = CTX_IN_MEM(const float *, DNNL_ARG_SRC);
38-
acl_obj.src_tensor.allocator()->import_memory(const_cast<float *>(src));
39-
4035
auto dst = CTX_OUT_MEM(float *, DNNL_ARG_DST);
41-
acl_obj.dst_tensor.allocator()->import_memory(dst);
4236

43-
acl_obj.msdNorm.run();
37+
auto aep = pd()->anp;
38+
arm_compute::Tensor src_tensor;
39+
arm_compute::Tensor dst_tensor;
40+
41+
src_tensor.allocator()->init(aep.data_info);
42+
src_tensor.allocator()->import_memory(const_cast<float *>(src));
43+
dst_tensor.allocator()->init(aep.data_info);
44+
dst_tensor.allocator()->import_memory(dst);
4445

45-
acl_obj.src_tensor.allocator()->free();
46-
acl_obj.dst_tensor.allocator()->free();
46+
arm_compute::ITensorPack act_pack;
47+
act_pack.add_tensor(arm_compute::TensorType::ACL_SRC, &src_tensor);
48+
act_pack.add_tensor(arm_compute::TensorType::ACL_DST, &dst_tensor);
49+
acl_obj.get()->msdNorm.run(act_pack);
4750

4851
return status::success;
4952
}

src/cpu/aarch64/acl_layer_normalization.hpp

+10-52
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*******************************************************************************
2-
* Copyright 2023-2024 Arm Ltd. and affiliates
2+
* Copyright 2023-2025 Arm Ltd. and affiliates
33
*
44
* Licensed under the Apache License, Version 2.0 (the "License");
55
* you may not use this file except in compliance with the License.
@@ -17,6 +17,7 @@
1717
#ifndef CPU_AARCH64_ACL_LAYER_NORMALIZATION_HPP
1818
#define CPU_AARCH64_ACL_LAYER_NORMALIZATION_HPP
1919

20+
#include "arm_compute/runtime/experimental/operators/CpuMeanStdDevNormalization.h"
2021
#include "cpu/aarch64/acl_utils.hpp"
2122
#include "cpu/cpu_layer_normalization_pd.hpp"
2223

@@ -26,45 +27,15 @@ namespace cpu {
2627
namespace aarch64 {
2728

2829
struct acl_msdnorm_obj_t {
29-
arm_compute::NEMeanStdDevNormalizationLayer msdNorm;
30-
arm_compute::Tensor src_tensor;
31-
arm_compute::Tensor dst_tensor;
30+
arm_compute::experimental::op::CpuMeanStdDevNormalization msdNorm;
3231
};
3332

3433
struct acl_msdnorm_conf_t {
3534
arm_compute::TensorInfo data_info; // src and dst tensors
3635
};
3736

38-
struct acl_layer_normalization_resource_t : public resource_t {
39-
acl_layer_normalization_resource_t()
40-
: acl_obj(utils::make_unique<acl_msdnorm_obj_t>()) {}
41-
42-
status_t configure(
43-
const acl_msdnorm_conf_t &anp, const layer_normalization_pd_t *pd) {
44-
if (!acl_obj) return status::out_of_memory;
45-
46-
acl_obj->src_tensor.allocator()->init(anp.data_info);
47-
acl_obj->dst_tensor.allocator()->init(anp.data_info);
48-
49-
// clang-format off
50-
acl_obj->msdNorm.configure(
51-
&acl_obj->src_tensor,
52-
&acl_obj->dst_tensor,
53-
pd->desc()->layer_norm_epsilon);
54-
// clang-format on
55-
56-
return status::success;
57-
}
58-
59-
acl_msdnorm_obj_t &get_acl_obj() const { return *acl_obj; }
60-
61-
DNNL_DISALLOW_COPY_AND_ASSIGN(acl_layer_normalization_resource_t);
62-
63-
private:
64-
std::unique_ptr<acl_msdnorm_obj_t> acl_obj;
65-
}; // acl_layer_normalization_resource_t
66-
6737
struct acl_layer_normalization_fwd_t : public primitive_t {
38+
using Op = arm_compute::experimental::op::CpuMeanStdDevNormalization;
6839
struct pd_t : public cpu_layer_normalization_fwd_pd_t {
6940
using cpu_layer_normalization_fwd_pd_t::
7041
cpu_layer_normalization_fwd_pd_t;
@@ -219,36 +190,23 @@ struct acl_layer_normalization_fwd_t : public primitive_t {
219190

220191
}; // pd_t
221192

222-
acl_layer_normalization_fwd_t(const pd_t *apd) : primitive_t(apd) {}
223-
224-
status_t create_resource(
225-
engine_t *engine, resource_mapper_t &mapper) const override {
226-
if (mapper.has_resource(this)) return status::success;
227-
228-
auto r = utils::make_unique<acl_layer_normalization_resource_t>();
229-
if (!r) return status::out_of_memory;
230-
231-
// Configure the resource based on information from primitive descriptor
232-
CHECK(r->configure(pd()->anp, pd()));
233-
mapper.add(this, std::move(r));
234-
235-
return status::success;
236-
}
193+
acl_layer_normalization_fwd_t(const pd_t *apd)
194+
: primitive_t(apd), acl_obj(std::make_unique<acl_msdnorm_obj_t>()) {}
237195

238196
status_t execute(const exec_ctx_t &ctx) const override {
239197
return execute_forward(ctx);
240198
}
199+
status_t init(engine_t *engine) override;
241200

242201
private:
243-
// To guard the const execute_forward, the mutex must be 'mutable'
244-
mutable std::mutex mtx;
245202
status_t execute_forward(const exec_ctx_t &ctx) const;
246203
const pd_t *pd() const { return (const pd_t *)primitive_t::pd().get(); }
247-
}; // acl_layer_normalization_fwd_t
204+
std::unique_ptr<acl_msdnorm_obj_t> acl_obj;
205+
};
248206

249207
} // namespace aarch64
250208
} // namespace cpu
251209
} // namespace impl
252210
} // namespace dnnl
253211

254-
#endif // CPU_AARCH64_ACL_LAYER_NORMALIZATION_HPP
212+
#endif // CPU_AARCH64_ACL_LAYER_NORMALIZATION_HPP

0 commit comments

Comments
 (0)