openvinotoolkit · dmatveev · Jan 15, 2025 · Jan 13, 2025 · Jan 13, 2025 · Jan 13, 2025
@@ -133,9 +133,18 @@ std::shared_ptr<ov::npuw::ICompiledModel> ov::npuw::ICompiledModel::create(
     auto use_llm_key = ov::intel_npu::npuw::llm::enabled.name();
     if (properties.count(use_llm_key) && properties.at(use_llm_key).as<bool>() == true) {
         LOG_INFO("ov::npuw::LLMCompiledModel will be created.");
-        compiled_model = std::make_shared<ov::npuw::LLMCompiledModel>(model, plugin, properties);
+        // Drop CACHE_DIR from the config
+        // If it's present we will be utilizing LLMCompiledModel's import
+        // and not the underlying models and submodels
+        auto config = properties;
+        config.erase(ov::cache_dir.name());
+        compiled_model = std::make_shared<ov::npuw::LLMCompiledModel>(model, plugin, config);
     } else {
         LOG_INFO("ov::npuw::CompiledModel will be created.");
+        // CACHE_DIR isn't supported with NPU_USE_NPUW
+        if (properties.count(ov::cache_dir.name())) {
+            OPENVINO_THROW("Option 'CACHE_DIR' is not supported with NPU_USE_NPUW!");
+        }
         pre_load_transform(model, properties);
         compiled_model = std::make_shared<ov::npuw::CompiledModel>(model, plugin, properties);
     }
@@ -611,6 +620,12 @@ void ov::npuw::CompiledModel::serialize(std::ostream& stream) const {
 
     // Write config
     write(stream, m_cfg);
+    // FIXME: utilize overload instead
+    write(stream, m_non_npuw_props.size());
+    for (const auto& p : m_non_npuw_props) {
+        write(stream, p.first);
+        write_any(stream, p.second);
+    }
 
     // Serialize compiled submodels
     write(stream, m_compiled_submodels.size());
@@ -671,6 +686,18 @@ std::shared_ptr<ov::npuw::CompiledModel> ov::npuw::CompiledModel::deserialize(
 
     // Deserialize config
     read(stream, compiled->m_cfg);
+    compiled->m_cfg.parseEnvVars();
+    // FIXME: utilize overload instead
+    std::size_t props_size;
+    read(stream, props_size);
+    for (std::size_t i = 0; i < props_size; ++i) {
+        std::string key;
+        read(stream, key);
+        ov::Any val;
+        read_any(stream, val);
+        compiled->m_non_npuw_props[key] = val;
+    }
+    compiled->implement_properties();
 
     // Deserialize compiled submodels
     std::size_t subm_size = 0;

@@ -611,21 +611,21 @@ std::shared_ptr<ov::npuw::LLMCompiledModel> ov::npuw::LLMCompiledModel::deserial
     if (vmajor != OPENVINO_VERSION_MAJOR || vminor != OPENVINO_VERSION_MINOR || vpatch != OPENVINO_VERSION_PATCH ||
         s11n_version != std::string(NPUW_SERIALIZATION_VERSION)) {
         OPENVINO_THROW("This blobs was serialized with different OV version!",
-                       " Serialized by OV ",
+                       "\nSerialized by OV ",
                        vmajor,
                        '.',
                        vminor,
                        '.',
                        vpatch,
-                       " Current OV version ",
+                       "\nCurrent OV version ",
                        OPENVINO_VERSION_MAJOR,
                        '.',
                        OPENVINO_VERSION_MINOR,
                        '.',
                        OPENVINO_VERSION_PATCH,
-                       " NPUW serialized by version ",
+                       "\nNPUW serialized by version ",
                        s11n_version,
-                       " NPUW current serialization version ",
+                       "\nNPUW current serialization version ",
                        NPUW_SERIALIZATION_VERSION);
     }
 
@@ -653,6 +653,7 @@ std::shared_ptr<ov::npuw::LLMCompiledModel> ov::npuw::LLMCompiledModel::deserial
 
     // Deserialize config
     read(stream, compiled->m_cfg);
+    compiled->implement_properties();
 
     // Deserialize CompiledModels
     compiled->m_kvcache_compiled = ov::npuw::CompiledModel::deserialize(stream, plugin);

@@ -23,6 +23,10 @@ void ov::npuw::s11n::write(std::ostream& stream, const bool& var) {
     stream.write(reinterpret_cast<const char*>(&var), sizeof var);
 }
 
+void ov::npuw::s11n::write(std::ostream& stream, const float& var) {
+    stream.write(reinterpret_cast<const char*>(&var), sizeof var);
+}
+
 void ov::npuw::s11n::write(std::ostream& stream, const ov::npuw::compiled::Spatial& var) {
     using ov::npuw::s11n::write;
 
@@ -74,6 +78,42 @@ void ov::npuw::s11n::write(std::ostream& stream, const ov::Output<const ov::Node
     write(stream, var.get_names());
 }
 
+enum class AnyType : int { STRING = 0, CHARS, INT, UINT32, INT64, UINT64, SIZET, FLOAT, BOOL };
+
+void ov::npuw::s11n::write_any(std::ostream& stream, const ov::Any& var) {
+    // FIXME: figure out a proper way to serialize Any (for config)
+    if (var.is<std::string>()) {
+        write(stream, int(AnyType::STRING));
+        write(stream, var.as<std::string>());
+    } else if (var.is<const char*>()) {
+        write(stream, int(AnyType::CHARS));
+        write(stream, std::string(var.as<const char*>()));
+    } else if (var.is<std::size_t>()) {
+        write(stream, int(AnyType::SIZET));
+        write(stream, var.as<std::size_t>());
+    } else if (var.is<int>()) {
+        write(stream, int(AnyType::INT));
+        write(stream, var.as<int>());
+    } else if (var.is<int64_t>()) {
+        write(stream, int(AnyType::INT64));
+        write(stream, var.as<int64_t>());
+    } else if (var.is<uint32_t>()) {
+        write(stream, int(AnyType::UINT32));
+        write(stream, var.as<uint32_t>());
+    } else if (var.is<uint64_t>()) {
+        write(stream, int(AnyType::UINT64));
+        write(stream, var.as<uint64_t>());
+    } else if (var.is<float>()) {
+        write(stream, int(AnyType::FLOAT));
+        write(stream, var.as<float>());
+    } else if (var.is<bool>()) {
+        write(stream, int(AnyType::BOOL));
+        write(stream, var.as<bool>());
+    } else {
+        NPUW_ASSERT(false && "Unsupported type");
+    }
+}
+
 void ov::npuw::s11n::read(std::istream& stream, std::streampos& var) {
     stream.read(reinterpret_cast<char*>(&var), sizeof var);
 }
@@ -89,6 +129,10 @@ void ov::npuw::s11n::read(std::istream& stream, bool& var) {
     stream.read(reinterpret_cast<char*>(&var), sizeof var);
 }
 
+void ov::npuw::s11n::read(std::istream& stream, float& var) {
+    stream.read(reinterpret_cast<char*>(&var), sizeof var);
+}
+
 void ov::npuw::s11n::read(std::istream& stream, ov::npuw::compiled::Spatial& var) {
     using ov::npuw::s11n::read;
 
@@ -169,3 +213,49 @@ void ov::npuw::s11n::read(std::istream& stream, std::shared_ptr<ov::Node>& var)
     var->output(0).set_tensor_ptr(tensor_dummy);
     var->set_friendly_name(*names.begin());  // any_name ?
 }
+
+void ov::npuw::s11n::read_any(std::istream& stream, ov::Any& var) {
+    // FIXME: ugly, but cannot use .read(stream) here due to its usage of operator>>()
+    int type_int;
+    read(stream, type_int);
+    AnyType type = static_cast<AnyType>(type_int);
+    if (type == AnyType::STRING) {
+        std::string val;
+        read(stream, val);
+        var = std::move(val);
+    } else if (type == AnyType::CHARS) {
+        std::string val;
+        read(stream, val);
+        var = std::move(val);
+    } else if (type == AnyType::SIZET) {
+        std::size_t val;
+        read(stream, val);
+        var = val;
+    } else if (type == AnyType::INT) {
+        int val;
+        read(stream, val);
+        var = val;
+    } else if (type == AnyType::INT64) {
+        int64_t val;
+        read(stream, val);
+        var = val;
+    } else if (type == AnyType::UINT32) {
+        uint32_t val;
+        read(stream, val);
+        var = val;
+    } else if (type == AnyType::UINT64) {
+        uint64_t val;
+        read(stream, val);
+        var = val;
+    } else if (type == AnyType::FLOAT) {
+        float val;
+        read(stream, val);
+        var = val;
+    } else if (type == AnyType::BOOL) {
+        bool val;
+        read(stream, val);
+        var = val;
+    } else {
+        NPUW_ASSERT(false && "Unsupported type");
+    }
+}
@@ -27,6 +27,7 @@ class Config;
 namespace ov {
 
 // Forward declaration
+class Any;
 class Node;
 class Tensor;
 template <class>
@@ -52,19 +53,23 @@ namespace s11n {
 void write(std::ostream& stream, const std::streampos& var);
 void write(std::ostream& stream, const std::string& var);
 void write(std::ostream& stream, const bool& var);
+void write(std::ostream& stream, const float& var);
 void write(std::ostream& stream, const ov::npuw::compiled::Spatial& var);
 void write(std::ostream& stream, const ov::Tensor& var);
 void write(std::ostream& stream, const ::intel_npu::Config& var);
 void write(std::ostream& stream, const ov::Output<const ov::Node>& var);
+void write_any(std::ostream& stream, const ov::Any& var);
 
 void read(std::istream& stream, std::streampos& var);
 void read(std::istream& stream, std::string& var);
 void read(std::istream& stream, bool& var);
+void read(std::istream& stream, float& var);
 void read(std::istream& stream, ov::npuw::compiled::Spatial& var);
 void read(std::istream& stream, ov::Tensor& var);
 void read(std::istream& stream, ::intel_npu::Config& var);
 void read(std::istream& stream, std::shared_ptr<ov::op::v0::Parameter>& var);
 void read(std::istream& stream, std::shared_ptr<ov::Node>& var);
+void read_any(std::istream& stream, ov::Any& var);
 
 // Forward declaration
 template <typename T1, typename T2>

@@ -219,7 +219,8 @@ void Bank::read_and_add_tensor(std::istream& stream, int64_t uid, const std::str
     auto iter_device = device_bank.storage.find(uid);
 
     if (iter_device != device_bank.storage.end()) {
-        // Already allocated
+        // Shouldn't be possible
+        NPUW_ASSERT(false);
         return;
     }
 
@@ -234,6 +235,10 @@ void Bank::read_and_add_tensor(std::istream& stream, int64_t uid, const std::str
     ov::Tensor allocated_tensor;
 
     // FIXME: reading not via a dedicated function
+    bool is_intialized = false;
+    read(stream, is_intialized);
+    NPUW_ASSERT(is_intialized);
+
     std::string type_str;
     read(stream, type_str);
     ov::element::Type type(type_str);

@@ -622,10 +622,6 @@ std::shared_ptr<ov::ICompiledModel> Plugin::compile_model(const std::shared_ptr<
     ov::AnyMap localProperties = properties;
     if (localProperties.count(useNpuwKey)) {
         if (localProperties.at(useNpuwKey).as<bool>() == true) {
-            // CACHE_DIR isn't supported with NPU_USE_NPUW
-            if (localProperties.count(ov::cache_dir.name()) || !_globalConfig.get<CACHE_DIR>().empty()) {
-                OPENVINO_THROW("Option 'CACHE_DIR' is not supported with NPU_USE_NPUW!");
-            }
             return ov::npuw::ICompiledModel::create(model->clone(), shared_from_this(), localProperties);
         } else {
             // NPUW is disabled, remove the key from the properties