uxlfoundation
diff --git a/‎cmake/options.cmake
+52-48 b/‎cmake/options.cmake
+52-48
diff --git a/‎include/oneapi/dnnl/dnnl.hpp
+3 b/‎include/oneapi/dnnl/dnnl.hpp
+3
diff --git a/‎include/oneapi/dnnl/dnnl_types.h
+3 b/‎include/oneapi/dnnl/dnnl_types.h
+3
diff --git a/‎src/CMakeLists.txt
+16-12 b/‎src/CMakeLists.txt
+16-12
diff --git a/‎src/common/c_types_map.hpp
+3 b/‎src/common/c_types_map.hpp
+3
diff --git a/‎src/common/memory_desc_wrapper.cpp
+3 b/‎src/common/memory_desc_wrapper.cpp
+3
diff --git a/‎src/common/primitive_attr_quant.hpp
+3 b/‎src/common/primitive_attr_quant.hpp
+3
@@ -1,23 +1,23 @@
-#===============================================================================
-# Copyright 2018-2025 Intel Corporation
+#== == == == == == == == == == == == == == == == == == == == == == == == == == == == == == == == == == == == == == == =
+#Copyright 2018 - 2025 Intel Corporation
 #
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
+#Licensed under the Apache License, Version 2.0(the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
 #
-#     http://www.apache.org/licenses/LICENSE-2.0
+#http: //www.apache.org/licenses/LICENSE-2.0
 #
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#===============================================================================
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
+#== == == == == == == == == == == == == == == == == == == == == == == == == == == == == == == == == == == == == == == =
 
-# Manage different library options
-#===============================================================================
+#Manage different library options
+#== == == == == == == == == == == == == == == == == == == == == == == == == == == == == == == == == == == == == == == =
 
-if(options_cmake_included)
+if (options_cmake_included)
     return()
 endif()
 set(options_cmake_included true)
@@ -26,14 +26,18 @@ if(CMAKE_CURRENT_SOURCE_DIR STREQUAL CMAKE_SOURCE_DIR)
     set(DNNL_IS_MAIN_PROJECT TRUE)
 endif()
 
-# ========
-# Features
-# ========
+#== == == ==
+#Features
+#== == == ==
 
 option(DNNL_VERBOSE
     "allows oneDNN be verbose whenever ONEDNN_VERBOSE
     environment variable set to 1" ON) # enabled by default
 
+option(DNNL_AARCH64_MATMUL_SRC_QUANT
+    "allows oneDNN to use dynamic quantisation for source(A) matrix when
+    environment variable set to 1" OFF) # disabled by default
+
 option(DNNL_ENABLE_CONCURRENT_EXEC
     "disables sharing a common scratchpad between primitives.
     This option must be turned ON if there is a possibility of executing
@@ -42,7 +46,7 @@ option(DNNL_ENABLE_CONCURRENT_EXEC
     OFF) # disabled by default
 
 option(DNNL_ENABLE_PRIMITIVE_CACHE "enables primitive cache." ON)
-    # enabled by default
+#enabled by default
 
 option(DNNL_ENABLE_MAX_CPU_ISA
     "enables control of CPU ISA detected by oneDNN via DNNL_MAX_CPU_ISA
@@ -58,9 +62,9 @@ option(ONEDNN_ENABLE_GRAPH_DUMP "enables control of dumping graph artifacts via
     ONEDNN_GRAPH_DUMP environment variable. The option and feature are valid only
     when ONEDNN_BUILD_GRAPH is ON" OFF)
 
-# =============================
-# Building properties and scope
-# =============================
+#== == == == == == == == == == == == == == =
+#Building properties and scope
+#== == == == == == == == == == == == == == =
 
 set(DNNL_LIBRARY_TYPE "SHARED" CACHE STRING
     "specifies whether oneDNN library should be SHARED or STATIC")
@@ -169,9 +173,9 @@ set(DNNL_AMD_SYCL_KERNELS_TARGET_ARCH "" CACHE STRING
     stops to require specifying the target architecture. After removing the option
     the generic SYCL kernels will always be enabled for AMD vendor.")
 
-# =============
-# Optimizations
-# =============
+#== == == == == == =
+#Optimizations
+#== == == == == == =
 
 set(DNNL_ARCH_OPT_FLAGS "HostOpts" CACHE STRING
     "specifies compiler optimization flags (see below for more information).
@@ -230,13 +234,13 @@ set(ONEDNN_EXPERIMENTAL_GRAPH_COMPILER_CPU_LLVM_CONFIG "AUTO" CACHE STRING
 set(ONEDNN_EXPERIMENTAL_GRAPH_COMPILER_CPU_JIT "builtin" CACHE STRING
     "the optional JIT backends for graph-compiler: llvm;c;builtin")
 
-# ======================
-# Profiling capabilities
-# ======================
+#== == == == == == == == == == ==
+#Profiling capabilities
+#== == == == == == == == == == ==
 
-# TODO: restore default to ON after the issue with linking C files by 
-# Intel oneAPI DPC++ Compiler is fixed. Currently this compiler issues a warning
-# when linking object files built from C and C++ sources.
+#TODO : restore default to ON after the issue with linking C files by
+#Intel oneAPI DPC++ Compiler is fixed.Currently this compiler issues a warning
+#when linking object files built from C and C++ sources.
 option(DNNL_ENABLE_JIT_PROFILING
     "Enable registration of oneDNN kernels that are generated at
     runtime with VTune Profiler (on by default). Without the
@@ -250,9 +254,9 @@ option(DNNL_ENABLE_ITT_TASKS
     on those ITT tasks and show corresponding timeline information."
     ON)
 
-# ===================
-# Engine capabilities
-# ===================
+#== == == == == == == == == =
+#Engine capabilities
+#== == == == == == == == == =
 
 set(DNNL_CPU_RUNTIME "OMP" CACHE STRING
     "specifies the threading runtime for CPU engines;
@@ -305,8 +309,8 @@ set(OPENCLROOT "" CACHE STRING
     "path to Intel SDK for OpenCL applications.
     Use this option to specify custom location for OpenCL.")
 
-# TODO: move logic to other cmake files?
-# Shortcuts for SYCL/DPC++
+#TODO : move logic to other cmake files ?
+#Shortcuts for SYCL / DPC++
 if(DNNL_CPU_RUNTIME STREQUAL "DPCPP" OR DNNL_CPU_RUNTIME STREQUAL "SYCL")
     set(DNNL_CPU_SYCL true)
 else()
@@ -346,18 +350,18 @@ if(DNNL_SYCL_HIP AND NOT "${DNNL_AMD_SYCL_KERNELS_TARGET_ARCH}" STREQUAL "")
     set(DNNL_AMD_ENABLE_SYCL_KERNELS TRUE)
 endif()
 
-# =============
-# Miscellaneous
-# =============
+#== == == == == == =
+#Miscellaneous
+#== == == == == == =
 
 option(BENCHDNN_USE_RDPMC
     "enables rdpms counter to report precise cpu frequency in benchdnn.
     CAUTION: may not work on all cpus (hence disabled by default)"
     OFF) # disabled by default
 
-# =========================
-# Developer and debug flags
-# =========================
+#== == == == == == == == == == == == =
+#Developer and debug flags
+#== == == == == == == == == == == == =
 
 set(DNNL_USE_CLANG_SANITIZER "" CACHE STRING
     "instructs build system to use a Clang sanitizer. Possible values:
@@ -398,9 +402,9 @@ option(DNNL_DISABLE_GPU_REF_KERNELS
         "builds oneDNN with only optimized kernels for GPU compute
         primitives" OFF)
 
-# =============================
-# External BLAS library options
-# =============================
+#== == == == == == == == == == == == == == =
+#External BLAS library options
+#== == == == == == == == == == == == == == =
 
 set(DNNL_BLAS_VENDOR "NONE" CACHE STRING
     "Use an external BLAS library. Valid values:
@@ -416,9 +420,9 @@ set(DNNL_BLAS_VENDOR "NONE" CACHE STRING
         installation. This vendor is supported for performance analysis
         purposes only.")
 
-# ==============================================
-# AArch64 optimizations with Arm Compute Library
-# ==============================================
+#== == == == == == == == == == == == == == == == == == == == == == ==
+#AArch64 optimizations with Arm Compute Library
+#== == == == == == == == == == == == == == == == == == == == == == ==
 
 option(DNNL_AARCH64_USE_ACL "Enables use of AArch64 optimised functions
     from Arm Compute Library.
 
@@ -1615,6 +1615,9 @@ struct memory : public handle<dnnl_memory_t> {
         BA16a32b4a = dnnl_BA16a32b4a,
         BA16a48b4a = dnnl_BA16a48b4a,
         BA16a64b4a = dnnl_BA16a64b4a,
+        BA24b8a = dnnl_BA24b8a,
+        aCB24c8b = dnnl_aCB24c8b,
+        abDC24d8c = dnnl_abDC24d8c,
         decbA16a = dnnl_decbA16a,
         decbA8a = dnnl_decbA8a,
         defcbA16a = dnnl_defcbA16a,
 
@@ -715,6 +715,9 @@ typedef enum {
     dnnl_aBC16b32c,
     dnnl_AB16a16b,
     dnnl_AB16a32b,
+    dnnl_BA24b8a,
+    dnnl_aCB24c8b,
+    dnnl_abDC24d8c,
     dnnl_ABcde16a16b2a,
     dnnl_aBCdef16b16c2b,
     dnnl_Acedb16a,
 
@@ -1,18 +1,18 @@
-#===============================================================================
-# Copyright 2016-2025 Intel Corporation
+#== == == == == == == == == == == == == == == == == == == == == == == == == == == == == == == == == == == == == == == =
+#Copyright 2016 - 2025 Intel Corporation
 #
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
+#Licensed under the Apache License, Version 2.0(the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
 #
-#     http://www.apache.org/licenses/LICENSE-2.0
+#http: //www.apache.org/licenses/LICENSE-2.0
 #
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#===============================================================================
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
+#== == == == == == == == == == == == == == == == == == == == == == == == == == == == == == == == == == == == == == == =
 
 file(GLOB HEADERS_ROOT
     ${CMAKE_CURRENT_SOURCE_DIR}/../include/*.h
@@ -80,6 +80,10 @@ if(DNNL_EXPERIMENTAL_SPARSE)
     message(STATUS "Experimental functionality for sparse domain is enabled")
 endif()
 
+if(DNNL_AARCH64_MATMUL_SRC_QUANT)
+    message(STATUS "Functionality for dynamic quantisation for source(A) matrix in matmuls")
+endif()
+
 if(DNNL_EXPERIMENTAL_UKERNEL)
     if(DNNL_TARGET_ARCH STREQUAL "ARCH_GENERIC")
         message(FATAL_ERROR "ukernel API does not support generic architecture.")
 
@@ -372,6 +372,9 @@ const format_tag_t aCB16b16c = dnnl_aCB16b16c;
 const format_tag_t aCB16b32c = dnnl_aCB16b32c;
 const format_tag_t aCB16b48c = dnnl_aCB16b48c;
 const format_tag_t aCB16b64c = dnnl_aCB16b64c;
+const format_tag_t BA24b8a = dnnl_BA24b8a;
+const format_tag_t aCB24c8b = dnnl_aCB24c8b;
+const format_tag_t abDC24d8c = dnnl_abDC24d8c;
 const format_tag_t aCB16b16c2b = dnnl_aCB16b16c2b;
 const format_tag_t aCB16b32c2b = dnnl_aCB16b32c2b;
 const format_tag_t aCB16b48c2b = dnnl_aCB16b48c2b;
 
@@ -202,6 +202,9 @@ status_t memory_desc_wrapper::compute_blocking(
         C(BA16a32b, {1, 0}, {16, 32}, {0, 1});
         C(BA16a48b, {1, 0}, {16, 48}, {0, 1});
         C(BA16a64b, {1, 0}, {16, 64}, {0, 1});
+        C(BA24b8a, {1, 0}, {24, 8}, {1, 0});
+        C(aCB24c8b, {0, 2, 1}, {24, 8}, {2, 1});
+        C(abDC24d8c, {0, 1, 3, 2}, {24, 8}, {3, 2});
         C(BA16a16b2a, {1, 0}, {16, 16, 2}, {0, 1, 0});
         C(BA16a32b2a, {1, 0}, {16, 32, 2}, {0, 1, 0});
         C(BA16a48b2a, {1, 0}, {16, 48, 2}, {0, 1, 0});
 
@@ -277,6 +277,9 @@ struct zero_points_t : public c_compatible {
 
     // arg-specific checks
     bool common(int arg) const { return get_mask(arg) == 0; }
+    bool per_ocic(int arg, int ndims) const {
+        return get_mask(arg) == 3 << (ndims - 2);
+    }
     bool per_dim_1(int arg) const { return get_mask(arg) == 2; }
     bool has_default_values(int arg) const {
         return is_set(arg) == false && has_default_data_type(arg);