[ARM] Revert uxlfoundation#2235 "fix, build, docs: aarch64: mutex lock if the ACL kernel is not stateless"

alvoron · xczhai · commit 8ac4a198ed41 · 2024-12-20T13:46:06.000+08:00
diff --git a/.github/automation/build_acl.sh b/.github/automation/build_acl.sh
@@ -28,7 +28,7 @@ source ${SCRIPT_DIR}/common_aarch64.sh
 
 ACL_CONFIG=${ACL_CONFIG:-"Release"}
 ACL_ROOT_DIR=${ACL_ROOT_DIR:-"${PWD}/ComputeLibrary"}
-ACL_VERSION=${ACL_VERSION:-v24.11.1}
+ACL_VERSION=${ACL_VERSION:-v24.09}
 ACL_ARCH=${ACL_ARCH:-"armv8.2-a"}
 ACL_REPO="https://github.com/ARM-software/ComputeLibrary.git"
 
diff --git a/README.md b/README.md
@@ -173,7 +173,7 @@ On a CPU based on Arm AArch64 architecture, oneDNN CPU engine can be built with
 machine learning applications and provides AArch64 optimized implementations
 of core functions. This functionality currently requires that ACL is downloaded
 and built separately. See [Build from Source] section of the Developer Guide for
-details. oneDNN only supports Compute Library versions 24.11.1 or later.
+details. oneDNN only supports Compute Library versions 24.09 or later.
 
 [Arm Compute Library (ACL)]: https://github.com/arm-software/ComputeLibrary
 
diff --git a/cmake/ACL.cmake b/cmake/ACL.cmake
@@ -31,7 +31,7 @@ endif()
 
 find_package(ACL REQUIRED)
 
-set(ACL_MINIMUM_VERSION "24.11.1")
+set(ACL_MINIMUM_VERSION "24.09")
 
 if(ACL_FOUND)
     file(GLOB_RECURSE ACL_VERSION_FILE ${ACL_INCLUDE_DIR}/*/arm_compute_version.embed)
diff --git a/src/cpu/acl/matmul/acl_matmul.cpp b/src/cpu/acl/matmul/acl_matmul.cpp
@@ -15,7 +15,6 @@
 *******************************************************************************/
 
 #include "cpu/acl/matmul/acl_matmul.hpp"
-#include <mutex>
 
 namespace dnnl {
 namespace impl {
@@ -176,16 +175,7 @@ status_t acl_matmul_t::execute_forward(const exec_ctx_t &ctx) const {
     auto src_base = CTX_IN_MEM(const data_t *, DNNL_ARG_SRC);
     auto wei_base = CTX_IN_MEM(const data_t *, DNNL_ARG_WEIGHTS);
 
-    const auto &amp = pd()->amp_;
-
-    std::unique_lock<std::mutex> locker {mtx_, std::defer_lock};
-
-    // Some of the underlying kernels used by ACL still require some state and
-    // are not safe to be called in parallel with different execution contexts.
-    // Eventually when all kernels are truly stateless, this guard can be
-    // removed.
-    if (!acl_obj_->asm_gemm.has_stateless_impl()) { locker.lock(); }
-
+    auto amp = pd()->amp_;
     bool is_transA = amp.is_transA;
     bool is_transB = amp.is_transB;
     bool do_transC = amp.do_transC;
diff --git a/src/cpu/acl/matmul/acl_matmul.hpp b/src/cpu/acl/matmul/acl_matmul.hpp
@@ -17,7 +17,6 @@
 #ifndef ACL_MATMUL_HPP
 #define ACL_MATMUL_HPP
 
-#include <mutex>
 #include "common/utils.hpp"
 #include "cpu/acl/acl_post_ops.hpp"
 #include "cpu/acl/matmul/acl_matmul_utils.hpp"
@@ -72,7 +71,6 @@ struct acl_matmul_t : public primitive_t {
     const pd_t *pd() const { return (const pd_t *)primitive_t::pd().get(); }
 
     std::unique_ptr<acl_matmul_obj_t> acl_obj_;
-    mutable std::mutex mtx_;
 }; // acl_matmul_t
 
 } // namespace matmul