[TF FE] Optimize TensorList ops decompositions (openvinotoolkit#25003)

rkazants · web-flow · commit 8b6eac63d7fc · 2024-06-18T21:18:58.000Z
**Details:** Simplify representation of TensorList ops* and it preserves
tensor list rank that helps for further fusion of Loop with tensor list
ops into RNN sequence operations. Currently, it always flattens tensor
list elements to 1D and it is blocking the fusion.

**Ticket:** TBD

---------

Signed-off-by: Kazantsev, Roman &lt;roman.kazantsev@intel.com&gt;
diff --git a/src/frontends/tensorflow/src/frontend.cpp b/src/frontends/tensorflow/src/frontend.cpp
@@ -13,6 +13,7 @@
 #include "helper_transforms/embedding_segments_feature_fusing.hpp"
 #include "helper_transforms/saved_model_unused_remover.hpp"
 #include "helper_transforms/tensor_array_v3_replacer.hpp"
+#include "helper_transforms/tensor_list_ops_resolver.hpp"
 #include "input_model.hpp"
 #include "op_table.hpp"
 #include "openvino/core/so_extension.hpp"
@@ -567,6 +568,7 @@ void FrontEnd::normalize(const std::shared_ptr<ov::Model>& model) const {
     manager.register_pass<pass::TensorArrayV3Replacer>();
     manager.register_pass<pass::ConstToResultRemover>();
     manager.register_pass<pass::SwitchMergeResolver>();
+    manager.register_pass<pass::TensorListOperationsResolver>();
     manager.register_pass<ov::pass::UnrollIf>();
     manager.register_pass<ov::pass::RemoveConcatZeroDimInput>();
     manager.register_pass<ov::pass::TransposeSinkingGeneral>();
diff --git a/src/frontends/tensorflow/src/op_table.cpp b/src/frontends/tensorflow/src/op_table.cpp
@@ -247,7 +247,7 @@ const std::map<std::string, CreatorFunction> get_supported_ops() {
         {"DynamicPartition", CreatorFunction(translate_dynamic_partition_op)},
         {"Einsum", CreatorFunction(translate_einsum_op)},
         {"Elu", CreatorFunction(translate_elu_op)},
-        {"EmptyTensorList", CreatorFunction(translate_tensor_list_reserve_op)},
+        {"EmptyTensorList", CreatorFunction(translate_empty_tensor_list_op)},
         {"EnsureShape", CreatorFunction(translate_identity_op)},
         {"ExpandDims", CreatorFunction(translate_expand_dims_op)},
         {"ExtractImagePatches", CreatorFunction(translate_extract_image_patches_op)},
diff --git a/src/frontends/tensorflow/tests/convert_tricky_models.cpp b/src/frontends/tensorflow/tests/convert_tricky_models.cpp
@@ -420,20 +420,10 @@ TEST_F(FrontEndConversionWithReferenceTestsF, ModelWithEmptyTensorListAndPushBac
     { model = convert_model("empty_tensor_list/empty_tensor_list.pb"); }
     {
         auto x = make_shared<v0::Parameter>(f32, Shape{2, 3, 5});
-        auto minus_one_const = make_shared<v0::Constant>(i32, Shape{1}, -1);
-        auto x_flatten = make_shared<v1::Reshape>(x, minus_one_const, false);
-        auto zero_const = make_shared<v0::Constant>(i32, Shape{1}, 0);
-        auto x_unsqueeze_flatten = make_shared<v0::Unsqueeze>(x_flatten, zero_const);
-        auto list_push_back = make_shared<v0::Concat>(OutputVector{x_unsqueeze_flatten}, 0);
-        auto list_push_back_shape = make_shared<v3::ShapeOf>(list_push_back, element::i32);
-        auto start = make_shared<v0::Constant>(i32, Shape{1}, 0);
-        auto stop = make_shared<v0::Constant>(i32, Shape{1}, 1);
-        auto step = make_shared<v0::Constant>(i32, Shape{1}, 1);
-        auto batch = make_shared<v8::Slice>(list_push_back_shape, start, stop, step);
-        auto shape_without_batch = make_shared<v0::Constant>(i32, Shape{3}, vector<int32_t>{2, 3, 5});
-        auto recover_item_shape = make_shared<v0::Concat>(OutputVector{batch, shape_without_batch}, 0);
-        auto recover_item = make_shared<v1::Reshape>(list_push_back, recover_item_shape, false);
-        model_ref = make_shared<Model>(OutputVector{recover_item}, ParameterVector{x});
+        auto axes = make_shared<v0::Constant>(i32, Shape{1}, 0);
+        auto x_unsqueeze = make_shared<v0::Unsqueeze>(x, axes);
+        auto list_push_back = make_shared<v0::Concat>(OutputVector{x_unsqueeze}, 0);
+        model_ref = make_shared<Model>(OutputVector{list_push_back}, ParameterVector{x});
     }
     comparator.disable(FunctionsComparator::CmpValues::ATTRIBUTES);
 }
diff --git a/src/frontends/tensorflow_common/include/common_op_table.hpp b/src/frontends/tensorflow_common/include/common_op_table.hpp
@@ -160,6 +160,7 @@ OP_CONVERTER(translate_square_op);
 OP_CONVERTER(translate_squeeze_op);
 OP_CONVERTER(translate_strided_slice_op);
 OP_CONVERTER(translate_sqrt_op);
+OP_CONVERTER(translate_empty_tensor_list_op);
 OP_CONVERTER(translate_tensor_list_from_tensor_op);
 OP_CONVERTER(translate_tensor_list_get_item_op);
 OP_CONVERTER(translate_tensor_list_length_op);
diff --git a/src/frontends/tensorflow_common/include/helper_ops/tensor_list_ops.hpp b/src/frontends/tensorflow_common/include/helper_ops/tensor_list_ops.hpp
@@ -0,0 +1,185 @@
+// Copyright (C) 2018-2024 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <vector>
+
+#include "internal_operation.hpp"
+#include "openvino/op/constant.hpp"
+
+namespace ov {
+namespace frontend {
+namespace tensorflow {
+
+// Internal operation for TensorList that represents a initial state of tensor list container
+class TensorList : public InternalOperation {
+public:
+    OPENVINO_OP("TensorList", "ov::frontend::tensorflow", InternalOperation);
+
+    TensorList(const ov::Output<ov::Node>& num_elements,
+               const ov::Rank& element_rank,
+               const element::Type& element_dtype,
+               const std::shared_ptr<DecoderBase>& decoder = std::make_shared<DecoderFake>())
+        : InternalOperation(decoder, OutputVector{num_elements}, 1, "TensorList"),
+          m_num_elements(num_elements),
+          m_element_rank(element_rank),
+          m_element_dtype(element_dtype) {
+        validate_and_infer_types();
+    }
+
+    void validate_and_infer_types() override {
+        if (m_element_rank.is_static()) {
+            auto element_rank = m_element_rank.get_length();
+            auto output_shape = ov::PartialShape::dynamic(element_rank + 1);
+            set_output_type(0, m_element_dtype, output_shape);
+        }
+
+        set_output_type(0, m_element_dtype, ov::PartialShape::dynamic());
+    }
+
+    ov::element::Type get_element_type() const {
+        return m_element_dtype;
+    }
+
+    std::shared_ptr<Node> clone_with_new_inputs(const OutputVector& inputs) const override {
+        FRONT_END_OP_CONVERSION_CHECK(inputs.size() == 1,
+                                      "[TensorFlow Frontend] internal error: TensorList expects no inputs");
+        auto tensor_list_node = std::make_shared<TensorList>(inputs[0], m_element_rank, m_element_dtype, m_decoder);
+        tensor_list_node->set_attrs(get_attrs());
+        return tensor_list_node;
+    }
+
+    ov::Rank get_element_rank() const {
+        return m_element_rank;
+    }
+
+    void set_element_rank(const ov::Rank& element_rank) {
+        m_element_rank = element_rank;
+    }
+
+    ov::Output<ov::Node> get_num_elements() const {
+        return m_num_elements;
+    }
+
+private:
+    ov::Output<ov::Node> m_num_elements;
+    ov::Rank m_element_rank;
+    ov::element::Type m_element_dtype;
+};
+
+// Internal operation for TensorListGetItem
+// it gets an element (Tensor) in tensor list by index
+class TensorListGetItem : public InternalOperation {
+public:
+    OPENVINO_OP("TensorListGetItem", "ov::frontend::tensorflow", InternalOperation);
+
+    TensorListGetItem(const Output<Node>& input_handle,
+                      const Output<Node>& index,
+                      const Output<Node>& element_shape,
+                      const ov::element::Type& element_type,
+                      const std::shared_ptr<DecoderBase>& decoder = std::make_shared<DecoderFake>())
+        : InternalOperation(decoder, OutputVector{input_handle, index, element_shape}, 1, "TensorListGetItem"),
+          m_element_type(element_type) {
+        validate_and_infer_types();
+    }
+
+    void validate_and_infer_types() override {
+        // deduce an element (Tensor) shape
+        ov::PartialShape comp_element_shape = ov::PartialShape::dynamic();
+        if (const auto& const_element_shape =
+                ov::as_type_ptr<ov::op::v0::Constant>(input_value(2).get_node_shared_ptr())) {
+            auto element_shape_value = const_element_shape->get_vector<int32_t>();
+            comp_element_shape = ov::PartialShape::dynamic(static_cast<int64_t>(element_shape_value.size()));
+            for (size_t idx = 0; idx < element_shape_value.size(); ++idx) {
+                comp_element_shape[idx] = (element_shape_value[idx] >= 0)
+                                              ? static_cast<int64_t>(element_shape_value[idx])
+                                              : ov::Dimension::dynamic();
+            }
+        } else if (input_value(0).get_partial_shape().rank().is_static()) {
+            // the second try to deduce element shape if it is still of dynamic rank
+            auto tensor_list_rank = input_value(0).get_partial_shape().rank().get_length();
+            OPENVINO_ASSERT(
+                tensor_list_rank > 0,
+                "[TensorFlow Frontend] internal error or inconsistent model: tensor list rank must be greater than 0");
+            // exclude tensor dimension (or batch)
+            comp_element_shape = ov::PartialShape::dynamic(tensor_list_rank - 1);
+            for (int64_t idx = 1; idx < tensor_list_rank; ++idx) {
+                comp_element_shape[idx - 1] = input_value(0).get_partial_shape()[idx];
+            }
+        }
+
+        // deduce an element (Tensor) type
+        if (m_element_type.is_dynamic() && input_value(0).get_element_type().is_static()) {
+            m_element_type = input_value(0).get_element_type();
+        }
+
+        set_output_type(0, m_element_type, comp_element_shape);
+    }
+
+    ov::element::Type get_element_type() const {
+        return m_element_type;
+    }
+
+    std::shared_ptr<Node> clone_with_new_inputs(const OutputVector& inputs) const override {
+        FRONT_END_OP_CONVERSION_CHECK(inputs.size() == 3,
+                                      "[TensorFlow Frontend] internal error: TensorListGetItem expects three inputs");
+        auto tensor_list_get_item =
+            std::make_shared<TensorListGetItem>(inputs[0], inputs[1], inputs[2], m_element_type, m_decoder);
+        tensor_list_get_item->set_attrs(get_attrs());
+        return tensor_list_get_item;
+    }
+
+private:
+    ov::element::Type m_element_type;
+};
+
+// Internal operation for TensorListSetItem
+// it inserts tensor to tensor list by index
+class TensorListSetItem : public InternalOperation {
+public:
+    OPENVINO_OP("TensorListSetItem", "ov::frontend::tensorflow", InternalOperation);
+
+    TensorListSetItem(const Output<Node>& input_handle,
+                      const Output<Node>& index,
+                      const Output<Node>& item,
+                      const std::shared_ptr<DecoderBase>& decoder = std::make_shared<DecoderFake>())
+        : InternalOperation(decoder, OutputVector{input_handle, index, item}, 1, "TensorListSetItem") {
+        validate_and_infer_types();
+    }
+
+    void validate_and_infer_types() override {
+        // deduce a type of elements in tensor list
+        ov::element::Type element_type = ov::element::dynamic;
+        if (input_value(0).get_element_type().is_static()) {
+            element_type = input_value(0).get_element_type();
+        } else if (input_value(2).get_element_type().is_static()) {
+            element_type = input_value(2).get_element_type();
+        }
+
+        // deduce a shape of tensor list [num_tensors, <tensor shape>]
+        ov::PartialShape tensor_list_shape = ov::PartialShape::dynamic();
+        if (input_value(2).get_partial_shape().rank().is_static()) {
+            auto element_rank = input_value(2).get_partial_shape().rank().get_length();
+            tensor_list_shape = ov::PartialShape::dynamic(element_rank + 1);
+            for (int64_t idx = 0; idx < element_rank; ++idx) {
+                tensor_list_shape[idx + 1] = input_value(2).get_partial_shape()[idx];
+            }
+        }
+
+        set_output_type(0, element_type, tensor_list_shape);
+    }
+
+    std::shared_ptr<Node> clone_with_new_inputs(const OutputVector& inputs) const override {
+        FRONT_END_OP_CONVERSION_CHECK(inputs.size() == 3,
+                                      "[TensorFlow Frontend] internal error: TensorListSetItem expects three inputs");
+        auto tensor_list_set_item = std::make_shared<TensorListSetItem>(inputs[0], inputs[1], inputs[2], m_decoder);
+        tensor_list_set_item->set_attrs(get_attrs());
+        return tensor_list_set_item;
+    }
+};
+
+}  // namespace tensorflow
+}  // namespace frontend
+}  // namespace ov
diff --git a/src/frontends/tensorflow_common/include/helper_transforms/tensor_list_ops_resolver.hpp b/src/frontends/tensorflow_common/include/helper_transforms/tensor_list_ops_resolver.hpp
@@ -0,0 +1,54 @@
+// Copyright (C) 2018-2024 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <memory>
+#include <utility>
+
+#include "openvino/pass/graph_rewrite.hpp"
+#include "openvino/pass/pass.hpp"
+
+namespace ov {
+namespace frontend {
+namespace tensorflow {
+namespace pass {
+
+// Replace internal operation TensorListReserve with a sub-graph producing initial container
+class TensorListReplacer : public ov::pass::MatcherPass {
+public:
+    OPENVINO_RTTI("ov::frontend::tensorflow::pass::TensorListReplacer");
+    TensorListReplacer();
+};
+
+// Replace internal operation TensorListSetItem with a sub-graph that inserts a new tensor into container
+class TensorListSetItemReplacer : public ov::pass::MatcherPass {
+public:
+    OPENVINO_RTTI("ov::frontend::tensorflow::pass::TensorListSetItemReplacer");
+    TensorListSetItemReplacer();
+};
+
+// Replace internal operation TensorListGetItem with a sub-graph that gets a tensor from container by index
+class TensorListGetItemReplacer : public ov::pass::MatcherPass {
+public:
+    OPENVINO_RTTI("ov::frontend::tensorflow::pass::TensorListGetItemReplacer");
+    TensorListGetItemReplacer();
+};
+
+// Replace and optimize sub-graphs with TensorList operations such as TensorListReserve,
+// TensorListSetItem, TensorListGetItem
+class TensorListOperationsResolver : public ov::pass::GraphRewrite {
+public:
+    OPENVINO_RTTI("TensorListOperationsResolver", "0");
+    TensorListOperationsResolver() {
+        add_matcher<TensorListReplacer>();
+        add_matcher<TensorListSetItemReplacer>();
+        add_matcher<TensorListGetItemReplacer>();
+    }
+};
+
+}  // namespace pass
+}  // namespace tensorflow
+}  // namespace frontend
+}  // namespace ov
diff --git a/src/frontends/tensorflow_common/src/helper_transforms/tensor_list_ops_resolver.cpp b/src/frontends/tensorflow_common/src/helper_transforms/tensor_list_ops_resolver.cpp
diff --git a/src/frontends/tensorflow_common/src/op/tensor_list_operations.cpp b/src/frontends/tensorflow_common/src/op/tensor_list_operations.cpp