Skip to content

Commit 7764042

Browse files
authored
[Core][IR FE] Support (de)serialization of string type Constant (openvinotoolkit#22578)
**Details:** Support (de)serialization of string type Constant **Ticket:** 126525 --------- Signed-off-by: Kazantsev, Roman <roman.kazantsev@intel.com>
1 parent a8debd0 commit 7764042

File tree

12 files changed

+421
-18
lines changed

12 files changed

+421
-18
lines changed

src/core/dev_api/openvino/runtime/string_aligned_buffer.hpp

+54-1
Original file line numberDiff line numberDiff line change
@@ -10,11 +10,16 @@ namespace ov {
1010

1111
/// \brief StringAlignedBuffer class to store pointer to pre-allocated buffer with std::string objects
1212
/// it is responsible for deallocation of std::string objects that will be stored in the buffer
13-
class StringAlignedBuffer : public ov::AlignedBuffer {
13+
class OPENVINO_API StringAlignedBuffer : public ov::AlignedBuffer {
1414
public:
1515
StringAlignedBuffer() = default;
16+
1617
StringAlignedBuffer(size_t num_elements, size_t byte_size, size_t alignment, bool initialize);
1718

19+
virtual size_t get_num_elements() const {
20+
return m_num_elements;
21+
}
22+
1823
virtual ~StringAlignedBuffer();
1924

2025
private:
@@ -25,4 +30,52 @@ class StringAlignedBuffer : public ov::AlignedBuffer {
2530
size_t m_num_elements;
2631
};
2732

33+
/// \brief SharedStringAlignedBuffer class to store pointer to shared pre-allocated buffer with std::string objects
34+
/// it must not be responsible for deallocation of std::string objects
35+
class OPENVINO_API SharedStringAlignedBuffer : public ov::StringAlignedBuffer {
36+
public:
37+
SharedStringAlignedBuffer(char* ptr, size_t size);
38+
39+
virtual ~SharedStringAlignedBuffer() {
40+
m_allocated_buffer = nullptr;
41+
m_aligned_buffer = nullptr;
42+
m_byte_size = 0;
43+
m_num_elements = 0;
44+
}
45+
};
46+
47+
template <>
48+
class OPENVINO_API AttributeAdapter<std::shared_ptr<ov::StringAlignedBuffer>>
49+
: public DirectValueAccessor<std::shared_ptr<ov::StringAlignedBuffer>> {
50+
public:
51+
AttributeAdapter(std::shared_ptr<ov::StringAlignedBuffer>& value);
52+
53+
OPENVINO_RTTI("AttributeAdapter<std::shared_ptr<ov::StringAlignedBuffer>");
54+
55+
static std::shared_ptr<ov::StringAlignedBuffer> unpack_string_tensor(const char* packed_string_tensor_ptr,
56+
size_t packed_string_tensor_size);
57+
void get_header(std::shared_ptr<uint8_t>& header, size_t& header_size);
58+
void get_raw_string_by_index(const char*& raw_string_ptr, size_t& raw_string_size, size_t string_ind);
59+
60+
protected:
61+
std::shared_ptr<uint8_t> m_header;
62+
size_t m_header_size;
63+
};
64+
65+
template <>
66+
class OPENVINO_API AttributeAdapter<std::shared_ptr<ov::SharedStringAlignedBuffer>>
67+
: public DirectValueAccessor<std::shared_ptr<ov::SharedStringAlignedBuffer>> {
68+
public:
69+
AttributeAdapter(std::shared_ptr<ov::SharedStringAlignedBuffer>& value);
70+
71+
OPENVINO_RTTI("AttributeAdapter<std::shared_ptr<ov::SharedStringAlignedBuffer>");
72+
73+
void get_header(std::shared_ptr<uint8_t>& header, size_t& header_size);
74+
void get_raw_string_by_index(const char*& raw_string_ptr, size_t& raw_string_size, size_t string_ind);
75+
76+
protected:
77+
std::shared_ptr<uint8_t> m_header;
78+
size_t m_header_size;
79+
};
80+
2881
} // namespace ov

src/core/src/op/constant.cpp

+25-3
Original file line numberDiff line numberDiff line change
@@ -368,10 +368,32 @@ bool Constant::visit_attributes(AttributeVisitor& visitor) {
368368

369369
const auto need_to_reallocate = (m_shape != prev_shape) || (prev_type != m_element_type);
370370
if (m_alloc_buffer_on_visit_attributes && need_to_reallocate) {
371-
// Filling in a fresh constant
372-
allocate_buffer(false);
371+
if (m_element_type == ov::element::string) {
372+
// string objects initialization is required
373+
allocate_buffer(true);
374+
} else {
375+
// Filling in a fresh constant
376+
allocate_buffer(false);
377+
}
378+
}
379+
380+
if (m_element_type == ov::element::string) {
381+
if (auto string_aligned_buffer = std::dynamic_pointer_cast<ov::StringAlignedBuffer>(m_data)) {
382+
visitor.on_attribute("value", string_aligned_buffer);
383+
} else if (auto shared_string_tensor = std::dynamic_pointer_cast<ov::SharedBuffer<ov::Tensor>>(m_data)) {
384+
auto shared_string_buffer =
385+
std::make_shared<ov::SharedStringAlignedBuffer>(static_cast<char*>(shared_string_tensor->get_ptr()),
386+
shared_string_tensor->size());
387+
visitor.on_attribute("value", shared_string_buffer);
388+
} else {
389+
// deserialization case when buffer does not exist yet
390+
std::shared_ptr<ov::StringAlignedBuffer> string_aligned_buffer;
391+
visitor.on_attribute("value", string_aligned_buffer);
392+
m_data = string_aligned_buffer;
393+
}
394+
} else {
395+
visitor.on_attribute("value", m_data);
373396
}
374-
visitor.on_attribute("value", m_data);
375397
update_identical_flags(false, false);
376398
return true;
377399
}

src/core/src/pass/serialize.cpp

+50
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
#include "openvino/pass/constant_folding.hpp"
2323
#include "openvino/reference/convert.hpp"
2424
#include "openvino/runtime/aligned_buffer.hpp"
25+
#include "openvino/runtime/string_aligned_buffer.hpp"
2526
#include "openvino/util/file_util.hpp"
2627
#include "pugixml.hpp"
2728
#include "transformations/hash.hpp"
@@ -511,6 +512,55 @@ class XmlSerializer : public ov::AttributeVisitor {
511512
} else if (const auto& a =
512513
ov::as_type<ov::AttributeAdapter<std::shared_ptr<ov::op::util::Variable>>>(&adapter)) {
513514
m_xml_node.append_attribute(name.c_str()).set_value(a->get()->get_info().variable_id.c_str());
515+
} else if (ov::is_type<ov::AttributeAdapter<std::shared_ptr<ov::StringAlignedBuffer>>>(&adapter) ||
516+
ov::is_type<ov::AttributeAdapter<std::shared_ptr<ov::SharedStringAlignedBuffer>>>(&adapter)) {
517+
if (name == "value" && translate_type_name(m_node_type_name) == "Const") {
518+
auto a1 = ov::as_type<ov::AttributeAdapter<std::shared_ptr<ov::StringAlignedBuffer>>>(&adapter);
519+
auto a2 = ov::as_type<ov::AttributeAdapter<std::shared_ptr<ov::SharedStringAlignedBuffer>>>(&adapter);
520+
size_t new_size = 0;
521+
size_t inter_size = 0;
522+
// write a header of packed string tensor
523+
std::shared_ptr<uint8_t> header_ptr = nullptr;
524+
size_t header_size = 0;
525+
if (a1) {
526+
a1->get_header(header_ptr, header_size);
527+
} else {
528+
a2->get_header(header_ptr, header_size);
529+
}
530+
531+
int64_t offset = m_constant_write_handler.write(reinterpret_cast<const char*>(header_ptr.get()),
532+
header_size,
533+
&inter_size,
534+
m_compress_to_fp16,
535+
m_output_element_type);
536+
new_size += inter_size;
537+
538+
// write raw strings part
539+
size_t num_elements = 0;
540+
if (a1) {
541+
num_elements = a1->get()->get_num_elements();
542+
} else {
543+
num_elements = a2->get()->get_num_elements();
544+
}
545+
for (size_t ind = 0; ind < num_elements; ++ind) {
546+
const char* raw_string_ptr;
547+
size_t raw_string_size;
548+
if (a1) {
549+
a1->get_raw_string_by_index(raw_string_ptr, raw_string_size, ind);
550+
} else {
551+
a2->get_raw_string_by_index(raw_string_ptr, raw_string_size, ind);
552+
}
553+
554+
m_constant_write_handler.write(raw_string_ptr,
555+
raw_string_size,
556+
&inter_size,
557+
m_compress_to_fp16,
558+
m_output_element_type);
559+
new_size += inter_size;
560+
}
561+
m_xml_node.append_attribute("offset").set_value(static_cast<unsigned long long>(offset));
562+
m_xml_node.append_attribute("size").set_value(static_cast<unsigned long long>(new_size));
563+
}
514564
} else if (const auto& a = ov::as_type<ov::AttributeAdapter<std::shared_ptr<ov::AlignedBuffer>>>(&adapter)) {
515565
if (name == "value" && translate_type_name(m_node_type_name) == "Const") {
516566
const int64_t size = a->get()->size();

src/core/src/runtime/string_aligned_buffer.cpp

+130
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,80 @@
44

55
#include "openvino/runtime/string_aligned_buffer.hpp"
66

7+
#include <numeric>
8+
9+
#include "openvino/core/type/element_type.hpp"
710
#include "openvino/runtime/aligned_buffer.hpp"
811

12+
namespace {
13+
void aux_unpack_string_tensor(const char* data, size_t size, std::shared_ptr<ov::StringAlignedBuffer>& string_buffer) {
14+
// unpack string tensor
15+
// packed format is the following:
16+
// <num_string>, <1st string offset>,..., <nth string offset>, <1st string raw format>,..., <nth string raw format>
17+
// check the format of the input bitstream representing the string tensor
18+
OPENVINO_ASSERT(size >= 4, "Incorrect packed string tensor format: no batch size in the packed string tensor");
19+
const int32_t* pindices = reinterpret_cast<const int32_t*>(data);
20+
int32_t num_strings = pindices[0];
21+
OPENVINO_ASSERT(int32_t(size) >= 4 + 4 + 4 * num_strings,
22+
"Incorrect packed string tensor format: the packed string tensor must contain first "
23+
"string offset and end indices");
24+
const int32_t* begin_ids = pindices + 1;
25+
const int32_t* end_ids = pindices + 2;
26+
const char* symbols = reinterpret_cast<const char*>(pindices + 2 + num_strings);
27+
28+
// allocate StringAlignedBuffer to store unpacked strings in std::string objects
29+
// SharedBuffer to read byte stream is not applicable because we need unpacked format for strings
30+
string_buffer = std::make_shared<ov::StringAlignedBuffer>(
31+
num_strings,
32+
ov::element::string.size() * num_strings,
33+
64, // host alignment used the same as in creation of buffer for Constant
34+
true);
35+
std::string* src_strings = static_cast<std::string*>(string_buffer->get_ptr());
36+
for (int32_t idx = 0; idx < num_strings; ++idx) {
37+
src_strings[idx] = std::string(symbols + begin_ids[idx], symbols + end_ids[idx]);
38+
}
39+
}
40+
41+
void aux_get_header(const std::shared_ptr<ov::StringAlignedBuffer>& string_aligned_buffer_ptr,
42+
std::shared_ptr<uint8_t>& header,
43+
size_t& header_size) {
44+
OPENVINO_ASSERT(string_aligned_buffer_ptr, "StringAlignedBuffer pointer is nullptr");
45+
// packed format is the following:
46+
// <num_string>, <1st string offset>,..., <nth string offset>, <1st string raw format>,..., <nth rawformat>
47+
auto num_elements = string_aligned_buffer_ptr->get_num_elements();
48+
auto strings = reinterpret_cast<std::string*>(string_aligned_buffer_ptr->get_ptr());
49+
50+
// first run over all elements: calculate total memory required to hold all strings
51+
header_size = sizeof(int32_t) * (1 + 1 + num_elements);
52+
header = std::shared_ptr<uint8_t>(new uint8_t[header_size], std::default_delete<uint8_t[]>());
53+
54+
int32_t* pindices = reinterpret_cast<int32_t*>(header.get());
55+
pindices[0] = int32_t(num_elements);
56+
pindices[1] = 0;
57+
pindices += 2;
58+
size_t current_symbols_pos = 0;
59+
60+
for (size_t ind = 0; ind < num_elements; ++ind) {
61+
auto str = strings[ind];
62+
current_symbols_pos += str.size();
63+
*pindices = int32_t(current_symbols_pos);
64+
++pindices;
65+
}
66+
}
67+
68+
void aux_get_raw_string_by_index(const std::shared_ptr<ov::StringAlignedBuffer>& string_aligned_buffer_ptr,
69+
const char*& raw_string_ptr,
70+
size_t& raw_string_size,
71+
size_t string_ind) {
72+
OPENVINO_ASSERT(string_aligned_buffer_ptr, "StringAlignedBuffer pointer is nullptr");
73+
OPENVINO_ASSERT(string_ind < string_aligned_buffer_ptr->get_num_elements(),
74+
"Incorrect packed string tensor format: no batch size in the packed string tensor");
75+
const std::string* strings = reinterpret_cast<const std::string*>(string_aligned_buffer_ptr->get_ptr());
76+
raw_string_ptr = strings[string_ind].data();
77+
raw_string_size = strings[string_ind].size();
78+
}
79+
} // namespace
80+
981
namespace ov {
1082
StringAlignedBuffer::StringAlignedBuffer(size_t num_elements, size_t byte_size, size_t alignment, bool initialize)
1183
: AlignedBuffer(byte_size, alignment),
@@ -29,4 +101,62 @@ StringAlignedBuffer::~StringAlignedBuffer() {
29101
}
30102
}
31103

104+
SharedStringAlignedBuffer::SharedStringAlignedBuffer(char* ptr, size_t size) {
105+
m_allocated_buffer = ptr;
106+
m_aligned_buffer = ptr;
107+
m_byte_size = size;
108+
m_num_elements = size / ov::element::string.size();
109+
}
110+
111+
AttributeAdapter<std::shared_ptr<ov::StringAlignedBuffer>>::AttributeAdapter(
112+
std::shared_ptr<ov::StringAlignedBuffer>& value)
113+
: DirectValueAccessor<std::shared_ptr<ov::StringAlignedBuffer>>(value),
114+
m_header(nullptr),
115+
m_header_size(0) {}
116+
117+
std::shared_ptr<ov::StringAlignedBuffer>
118+
AttributeAdapter<std::shared_ptr<ov::StringAlignedBuffer>>::unpack_string_tensor(const char* packed_string_tensor_ptr,
119+
size_t packed_string_tensor_size) {
120+
std::shared_ptr<ov::StringAlignedBuffer> string_aligned_buffer;
121+
aux_unpack_string_tensor(packed_string_tensor_ptr, packed_string_tensor_size, string_aligned_buffer);
122+
return string_aligned_buffer;
123+
}
124+
125+
void AttributeAdapter<std::shared_ptr<ov::StringAlignedBuffer>>::get_header(std::shared_ptr<uint8_t>& header,
126+
size_t& header_size) {
127+
if (!m_header) {
128+
aux_get_header(m_ref, m_header, m_header_size);
129+
}
130+
header = m_header;
131+
header_size = m_header_size;
132+
}
133+
134+
void AttributeAdapter<std::shared_ptr<ov::StringAlignedBuffer>>::get_raw_string_by_index(const char*& raw_string_ptr,
135+
size_t& raw_string_size,
136+
size_t string_ind) {
137+
aux_get_raw_string_by_index(m_ref, raw_string_ptr, raw_string_size, string_ind);
138+
}
139+
140+
AttributeAdapter<std::shared_ptr<ov::SharedStringAlignedBuffer>>::AttributeAdapter(
141+
std::shared_ptr<ov::SharedStringAlignedBuffer>& value)
142+
: DirectValueAccessor<std::shared_ptr<ov::SharedStringAlignedBuffer>>(value),
143+
m_header(nullptr),
144+
m_header_size(0) {}
145+
146+
void AttributeAdapter<std::shared_ptr<ov::SharedStringAlignedBuffer>>::get_header(std::shared_ptr<uint8_t>& header,
147+
size_t& header_size) {
148+
if (!m_header) {
149+
aux_get_header(m_ref, m_header, m_header_size);
150+
}
151+
header = m_header;
152+
header_size = m_header_size;
153+
}
154+
155+
void AttributeAdapter<std::shared_ptr<ov::SharedStringAlignedBuffer>>::get_raw_string_by_index(
156+
const char*& raw_string_ptr,
157+
size_t& raw_string_size,
158+
size_t string_ind) {
159+
aux_get_raw_string_by_index(m_ref, raw_string_ptr, raw_string_size, string_ind);
160+
}
161+
32162
} // namespace ov
60 Bytes
Binary file not shown.
+26
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
<?xml version="1.0"?>
2+
<net name="Model0" version="11">
3+
<layers>
4+
<layer id="0" name="Constant_1" type="Const" version="opset1">
5+
<data element_type="string" shape="2, 3" offset="0" size="60" />
6+
<output>
7+
<port id="0" precision="STRING">
8+
<dim>2</dim>
9+
<dim>3</dim>
10+
</port>
11+
</output>
12+
</layer>
13+
<layer id="1" name="Result_2" type="Result" version="opset1">
14+
<input>
15+
<port id="0" precision="STRING">
16+
<dim>2</dim>
17+
<dim>3</dim>
18+
</port>
19+
</input>
20+
</layer>
21+
</layers>
22+
<edges>
23+
<edge from-layer="0" from-port="0" to-layer="1" to-port="0" />
24+
</edges>
25+
<rt_info />
26+
</net>

src/core/tests/pass/serialization/serialize.cpp

+2-1
Original file line numberDiff line numberDiff line change
@@ -99,7 +99,8 @@ INSTANTIATE_TEST_SUITE_P(
9999
std::make_tuple("nms5_dynamism.xml", "nms5_dynamism.bin"),
100100
std::make_tuple("if_diff_case.xml", "if_diff_case.bin"),
101101
std::make_tuple("if_body_without_parameters.xml", "if_body_without_parameters.bin"),
102-
std::make_tuple("string_parameter.xml", "string_parameter.bin")));
102+
std::make_tuple("string_parameter.xml", "string_parameter.bin"),
103+
std::make_tuple("const_string.xml", "const_string.bin")));
103104

104105
#ifdef ENABLE_OV_ONNX_FRONTEND
105106

src/core/tests/visitors/op/constant.cpp

+4-8
Original file line numberDiff line numberDiff line change
@@ -86,8 +86,7 @@ TEST(attributes, constant_op_from_host_tensor_identical_elements) {
8686
ASSERT_TRUE(g_k->get_all_data_elements_bitwise_identical());
8787
}
8888

89-
// TODO: implement (de)serialization string constants
90-
TEST(attributes, DISABLED_constant_op_string) {
89+
TEST(attributes, constant_op_string) {
9190
vector<std::string> data{"abc", "de fc qq", "", "123 abc", "0112 3 ", "&&&"};
9291
auto k = make_shared<op::v0::Constant>(element::string, Shape{2, 3}, data);
9392
NodeBuilder builder(k);
@@ -101,8 +100,7 @@ TEST(attributes, DISABLED_constant_op_string) {
101100
ASSERT_FALSE(g_k->get_all_data_elements_bitwise_identical());
102101
}
103102

104-
// TODO: implement (de)serialization string constants
105-
TEST(attributes, DISABLED_constant_op_identical_elements_string) {
103+
TEST(attributes, constant_op_identical_elements_string) {
106104
vector<std::string> data{"abc edfg", "abc edfg", "abc edfg", "abc edfg", "abc edfg", "abc edfg"};
107105
auto k = make_shared<op::v0::Constant>(element::string, Shape{2, 3}, data);
108106
NodeBuilder builder(k);
@@ -116,8 +114,7 @@ TEST(attributes, DISABLED_constant_op_identical_elements_string) {
116114
ASSERT_TRUE(g_k->get_all_data_elements_bitwise_identical());
117115
}
118116

119-
// TODO: implement (de)serialization string constants
120-
TEST(attributes, DISABLED_constant_op_from_host_tensor_different_elements_string) {
117+
TEST(attributes, constant_op_from_host_tensor_different_elements_string) {
121118
vector<std::string> data{"abc", "de fc qq", "", "123 abc", "0112 3 ", "&&&"};
122119
auto tensor = ov::Tensor(element::string, Shape{2, 3}, &data[0]);
123120
auto k = make_shared<op::v0::Constant>(tensor);
@@ -133,8 +130,7 @@ TEST(attributes, DISABLED_constant_op_from_host_tensor_different_elements_string
133130
ASSERT_FALSE(g_k->get_all_data_elements_bitwise_identical());
134131
}
135132

136-
// TODO: implement (de)serialization string constants
137-
TEST(attributes, DISABLED_constant_op_from_host_tensor_identical_elements_string) {
133+
TEST(attributes, constant_op_from_host_tensor_identical_elements_string) {
138134
vector<std::string> data{"abc edfg", "abc edfg", "abc edfg", "abc edfg", "abc edfg", "abc edfg"};
139135
auto tensor = ov::Tensor(element::string, Shape{2, 3}, &data[0]);
140136
auto k = make_shared<op::v0::Constant>(tensor);

0 commit comments

Comments
 (0)