[CPU] Weights caching: hash computation fix (openvinotoolkit#25625)

v-Golubev · web-flow · commit b9d98cb32e0c · 2024-07-24T23:37:14.000Z
### Details:
- *Modify hash computation logic: take into account not only dnnl desc
format, but all the desc info. Previous logic were not fully correct
since the hash could be equal for 2 descs with different
`compute_compensations` flag -- this led to accuracy issues*
- *Weights repacking hash computation logic is moved to one helper which
is reused across the CPU plugin code*

### Tickets:
 - *CVS-139671*
diff --git a/src/plugins/intel_cpu/src/dnnl_extension_utils.cpp b/src/plugins/intel_cpu/src/dnnl_extension_utils.cpp
@@ -3,15 +3,17 @@
 //
 
 #include "dnnl_extension_utils.h"
-#include "memory_desc/dnnl_blocked_memory_desc.h"
-#include "onednn/iml_type_mapper.h"
-#include "utils/general_utils.h"
+
 #include <common/primitive_desc.hpp>
 #include <common/primitive_desc_iface.hpp>
 #include <oneapi/dnnl/dnnl.hpp>
-
 #include <vector>
 
+#include "cpu_memory.h"
+#include "memory_desc/dnnl_blocked_memory_desc.h"
+#include "onednn/iml_type_mapper.h"
+#include "utils/general_utils.h"
+
 using namespace dnnl;
 
 namespace ov {
@@ -254,5 +256,11 @@ bool DnnlExtensionUtils::isUnarySupportedAsPostOp(Algorithm alg) {
 #endif
 }
 
+std::string DnnlExtensionUtils::computeWeightsStringHash(const std::shared_ptr<const IMemory> memory,
+                                                         const std::shared_ptr<DnnlMemoryDesc> dstDesc) {
+    const auto desc_hash = dnnl::impl::primitive_hashing::get_md_hash(*dstDesc->getDnnlDesc().get());
+    return std::to_string(desc_hash) + "_" + std::to_string(reinterpret_cast<uint64_t>(memory->getData()));
+}
+
 }   // namespace intel_cpu
 }   // namespace ov
diff --git a/src/plugins/intel_cpu/src/dnnl_extension_utils.h b/src/plugins/intel_cpu/src/dnnl_extension_utils.h
@@ -22,6 +22,7 @@ namespace intel_cpu {
 class DnnlMemoryDesc;
 class DnnlBlockedMemoryDesc;
 class Shape;
+class IMemory;
 
 class DnnlExtensionUtils {
 public:
@@ -101,6 +102,13 @@ class DnnlExtensionUtils {
     static dnnl_memory_desc_t clone_desc(const_dnnl_memory_desc_t cdesc);
     static const char* query_pd_info(const_dnnl_primitive_desc_t pd);
     static bool isUnarySupportedAsPostOp(Algorithm alg);
+    /**
+     * @brief Computes weights string hash based on weights memory and requested descriptor
+     * @param memory Weights memory pointer
+     * @param dstDesc descriptor defining weights representation after repacking
+     * @return string hash
+     */
+    static std::string computeWeightsStringHash(const std::shared_ptr<const IMemory> memory, const std::shared_ptr<DnnlMemoryDesc> dstDesc);
 };
 
 }   // namespace intel_cpu
diff --git a/src/plugins/intel_cpu/src/node.cpp b/src/plugins/intel_cpu/src/node.cpp
@@ -831,16 +831,8 @@ void Node::prepareMemory(const DnnlMemoryDescPtr& intDesc, size_t indx) {
     MemoryPtr ptr;
     auto weightCache = context->getWeightsCache();
     if (weightCache != nullptr && memory::format_kind::blocked == intDesc->getDnnlDesc().get_format_kind()) {
-        const auto& format = intDesc->serializeFormat();
-        const uint64_t data_hash =
-            weightCache->GetHashFunc().hash(static_cast<const unsigned char*>(internalBlob->getData()),
-                                            internalBlob->getSize());
-
-        const std::string string_hash = name + "_" + std::to_string(indx)
-                                        + "_" + format
-                                        + "_" + std::to_string(internalBlob->getSize())
-                                        + "_" + std::to_string(data_hash);
-
+        const auto string_hash =
+            name + "_" + std::to_string(indx) + "_" + DnnlExtensionUtils::computeWeightsStringHash(internalBlob, intDesc);
         ptr = *weightCache->findOrCreate(string_hash, create);
     } else {
         ptr = create();
@@ -905,10 +897,7 @@ MemoryPtr Node::prepareWeightMemory(DnnlMemoryDescPtr dstWeightDesc, DnnlMemoryD
 
     auto weightCache = context->getWeightsCache();
     if (weightCache != nullptr) {
-        const std::string string_hash = getName() + "_" + format
-            + "_" + std::to_string(edgeMem->getSize())
-            + "_" + std::to_string(*edgeMem->getDataAs<uint64_t>());
-
+        const auto string_hash = DnnlExtensionUtils::computeWeightsStringHash(edgeMem, dstWeightDesc);
         ptr = *weightCache->findOrCreate(string_hash, create);
     } else {
         ptr = create();
diff --git a/src/plugins/intel_cpu/src/nodes/executors/dnnl/dnnl_utils.cpp b/src/plugins/intel_cpu/src/nodes/executors/dnnl/dnnl_utils.cpp
@@ -4,12 +4,15 @@
 
 #include "nodes/executors/dnnl/dnnl_utils.hpp"
 
+#include <common/primitive_desc_iface.hpp>
 #include <oneapi/dnnl/dnnl.hpp>
 
 #include "cpu_memory.h"
 #include "memory_desc/dnnl_memory_desc.h"
+#include "memory_desc/cpu_memory_desc_utils.h"
 #include "nodes/executors/executor.hpp"
 #include "nodes/reorder.h"
+#include "utils/cpu_utils.hpp"
 
 namespace ov {
 namespace intel_cpu {
@@ -86,8 +89,7 @@ MemoryPtr prepareWeightsMemory(const DnnlMemoryDescPtr srcWeightDesc,
     MemoryPtr ptr;
     if (globalWeightCache &&
         dnnl::memory::format_kind::blocked == dstWeightDesc->getDnnlDesc().get_format_kind()) {
-        const std::string string_hash = format + "_" + std::to_string(weightsMem->getSize()) + "_" +
-                                        std::to_string(reinterpret_cast<uint64_t>(weightsMem->getData()));
+        const auto string_hash = DnnlExtensionUtils::computeWeightsStringHash(weightsMem, dstWeightDesc);
         ptr = *globalWeightCache->findOrCreate(string_hash, create);
     } else {
         ptr = create();
diff --git a/src/plugins/intel_cpu/src/weights_cache.cpp b/src/plugins/intel_cpu/src/weights_cache.cpp
@@ -10,8 +10,6 @@
 namespace ov {
 namespace intel_cpu {
 
-const SimpleDataHash WeightsSharing::simpleCRC;
-
 WeightsSharing::SharedMemory::SharedMemory(
         std::unique_lock<std::mutex> && lock,
         const MemoryInfo::Ptr & memory,
diff --git a/src/plugins/intel_cpu/src/weights_cache.hpp b/src/plugins/intel_cpu/src/weights_cache.hpp
@@ -22,31 +22,6 @@
 
 namespace ov {
 namespace intel_cpu {
-
-class SimpleDataHash {
-public:
-    SimpleDataHash() {
-        for (int i = 0; i < kTableSize; i++) {
-            uint64_t c = i;
-            for (int j = 0; j < 8; j++)
-                c = ((c & 1) ? 0xc96c5795d7870f42 : 0) ^ (c >> 1);
-            table[i] = c;
-        }
-    }
-    // Computes 64-bit "cyclic redundancy check" sum, as specified in ECMA-182
-    uint64_t hash(const unsigned char* data, size_t size) const {
-        uint64_t crc = 0;
-        for (size_t idx = 0; idx < size; idx++)
-            crc = table[(unsigned char)crc ^ data[idx]] ^ (crc >> 8);
-
-        return ~crc;
-    }
-
-protected:
-    static constexpr int kTableSize = 256;
-    uint64_t table[kTableSize];
-};
-
 /**
  * Caching store of Memory objects
  * Will return a cached object or create new one
@@ -94,12 +69,9 @@ class WeightsSharing {
 
     SharedMemory::Ptr get(const std::string& key) const;
 
-    static const SimpleDataHash& GetHashFunc () { return simpleCRC; }
-
 protected:
     mutable std::mutex guard;
     std::unordered_map<std::string, MemoryInfo::Ptr> sharedWeights;
-    static const SimpleDataHash simpleCRC;
 };
 
 /**
diff --git a/src/plugins/intel_cpu/tests/functional/custom/subgraph_tests/src/quantized_matmuls_with_shared_weights.cpp b/src/plugins/intel_cpu/tests/functional/custom/subgraph_tests/src/quantized_matmuls_with_shared_weights.cpp
@@ -0,0 +1,103 @@
+// Copyright (C) 2018-2024 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <regex>
+
+#include "common_test_utils/node_builders/constant.hpp"
+#include "common_test_utils/node_builders/fake_quantize.hpp"
+#include "common_test_utils/node_builders/reshape.hpp"
+#include "openvino/openvino.hpp"
+#include "shared_test_classes/base/ov_subgraph.hpp"
+
+namespace ov {
+namespace test {
+
+enum class FQInterval { U8, I8 };
+inline std::ostream& operator<<(std::ostream& os, FQInterval interval) {
+    switch (interval) {
+    case FQInterval::U8:
+        os << "U8";
+        break;
+    case FQInterval::I8:
+        os << "I8";
+        break;
+    default:
+        OPENVINO_THROW("Unknown FQInterval");
+    }
+    return os;
+}
+
+typedef std::tuple<InputShape, InputShape, FQInterval, FQInterval> QuantizedMatMulsWithSharedWeightsParans;
+
+/* This test verifies the correctness of the hash function computation for the shared weights.
+   Specifically, it checks that when one op requires compensations computation and second one does not,
+   the resulting hashes are not identical, and the weights are repacked for each op separately
+*/
+class QuantizedMatMulsWithSharedWeightsTest
+    : public testing::WithParamInterface<QuantizedMatMulsWithSharedWeightsParans>,
+      virtual public SubgraphBaseTest {
+public:
+    static std::string getTestCaseName(const testing::TestParamInfo<QuantizedMatMulsWithSharedWeightsParans>& obj) {
+        InputShape shape1;
+        InputShape shape2;
+        FQInterval interval1;
+        FQInterval interval2;
+        std::tie(shape1, shape2, interval1, interval2) = obj.param;
+        std::ostringstream result;
+        result << "IS1=" << shape1 << "IS2=" << shape2 << "FQInterval1=" << interval1 << "FQInterval2=" << interval2;
+        return result.str();
+    }
+
+    void SetUp() override {
+        targetDevice = ov::test::utils::DEVICE_CPU;
+        abs_threshold = 1e-4;
+
+        InputShape shape1;
+        InputShape shape2;
+        FQInterval interval1;
+        FQInterval interval2;
+        std::tie(shape1, shape2, interval1, interval2) = this->GetParam();
+        init_input_shapes({shape1, shape2});
+
+        const auto weights = ov::test::utils::make_constant(ov::element::i8, {16, 16});
+        const auto convert = std::make_shared<ov::op::v0::Convert>(weights, ov::element::f32);
+        const auto scale = ov::test::utils::make_constant(ov::element::f32, {16, 1}, ov::test::utils::InputGenerateData(0, 1, 5));
+        const auto mul = std::make_shared<ov::op::v1::Multiply>(convert, scale);
+
+        auto build_fq = [](const ov::Output<ov::Node>& parent, FQInterval interval_type) {
+            const auto low = interval_type == FQInterval::I8 ? std::vector<float>{-12.8f} : std::vector<float>{0.f};
+            const auto high = interval_type == FQInterval::I8 ? std::vector<float>{12.7f} : std::vector<float>{25.5f};
+            return ov::test::utils::make_fake_quantize(parent, ov::element::f32, 256, {1, 1, 1, 1}, low, high, low, high);
+        };
+
+        const auto param1 = std::make_shared<ov::op::v0::Parameter>(ov::element::f32, inputDynamicShapes[0]);
+        const auto fq1 = build_fq(param1, interval1);
+        const auto mm1 = std::make_shared<ov::op::v0::MatMul>(fq1, mul, false, true);
+
+        const auto param2 = std::make_shared<ov::op::v0::Parameter>(ov::element::f32, inputDynamicShapes[1]);
+        const auto fq2 = build_fq(param2, interval2);
+        const auto mm2 = std::make_shared<ov::op::v0::MatMul>(fq2, mul, false, true);
+
+        function = std::make_shared<ov::Model>(ov::OutputVector{mm1, mm2}, ov::ParameterVector{param1, param2});
+    }
+};
+
+TEST_P(QuantizedMatMulsWithSharedWeightsTest, CompareWithRefs) {
+    run();
+}
+
+namespace {
+
+std::vector<InputShape> shapes1{{{-1, -1, -1, 16}, {{1, 1, 15, 16}, {1, 1, 12, 16}, {1, 1, 15, 16}}}};
+std::vector<InputShape> shapes2{{{-1, -1, -1, 16}, {{1, 1, 12, 16}, {1, 1, 15, 16}, {1, 1, 12, 16}}}};
+INSTANTIATE_TEST_SUITE_P(smoke_CustomTest, QuantizedMatMulsWithSharedWeightsTest,
+                         ::testing::Combine(
+                                 ::testing::ValuesIn(shapes1),
+                                 ::testing::ValuesIn(shapes2),
+                                 ::testing::Values(FQInterval::U8, FQInterval::I8),
+                                 ::testing::Values(FQInterval::U8, FQInterval::I8)),
+                         QuantizedMatMulsWithSharedWeightsTest::getTestCaseName);
+}  // namespace
+}  // namespace test
+}  // namespace ov