|
| 1 | +// Copyright (C) 2023 Intel Corporation |
| 2 | +// SPDX-License-Identifier: Apache-2.0 |
| 3 | +// |
| 4 | + |
| 5 | +#pragma once |
| 6 | + |
| 7 | +#include "pass.hpp" |
| 8 | + |
| 9 | +#include "allocate_buffers.hpp" |
| 10 | + |
| 11 | +namespace ov { |
| 12 | +namespace snippets { |
| 13 | +namespace lowered { |
| 14 | +namespace pass { |
| 15 | + |
| 16 | +/** |
| 17 | + * @interface DefineBufferClusters |
| 18 | + * @brief The pass defines buffer clusters. The buffers from one cluster share the |
| 19 | + * same memory (has the same offset relative to the data pointer of buffer scratchpad). |
| 20 | + * - If MemoryAccess op or Loop can read and write to the same (inplace behavior), the Buffers should be in the one cluster. |
| 21 | + * - If Buffer is in the Loop which read or write from/to the other Buffers, this Buffer can emulate `window` slidings. |
| 22 | + * It means that Buffer inside can reuse memory of Buffers outside in bounds of full Loop work. |
| 23 | + * Demonstration: |
| 24 | + * |-----------------------------------------------------| |
| 25 | + * | |------------| |------------| | InnerLoops have work amount 128 |
| 26 | + * Buffer0 [3x128]-> | | InnerLoop0 | -> Buffer1 [3x128] -> | InnerLoop1 | | -> Buffer2 [3x128] OuterLoop has work amount 3 |
| 27 | + * | |------------| OuterLoop |------------| | |
| 28 | + * |-----------------------------------------------------| |
| 29 | + * Buffer1 can reuse memory [128] of Buffer0 or Buffer2 in each iteration of OuterLoop |
| 30 | + * Note: The pass requires expression enumeration and buffer identification (for nested Buffers inplace). |
| 31 | + * These passes should be executed separately before this pass! |
| 32 | + * @ingroup snippets |
| 33 | + */ |
| 34 | +class DefineBufferClusters : public Pass { |
| 35 | +public: |
| 36 | + OPENVINO_RTTI("DefineBufferClusters", "Pass") |
| 37 | + |
| 38 | + DefineBufferClusters(AllocateBuffers::BufferClusters& clusters) : m_clusters(clusters) {} |
| 39 | + |
| 40 | + /** |
| 41 | + * @brief Apply the pass to the Linear IR |
| 42 | + * @param linear_ir the target Linear IR |
| 43 | + * @return status of the pass |
| 44 | + */ |
| 45 | + bool run(lowered::LinearIR& linear_ir) override; |
| 46 | + |
| 47 | +private: |
| 48 | + using BufferPorts = std::unordered_map<ExpressionPtr, std::set<size_t>>; |
| 49 | + /** |
| 50 | + * @brief Finds Buffer cluster in set of clusters which contains the target expression with Buffer |
| 51 | + * @param target target expression with Buffer op |
| 52 | + * @return vector iterator which refers to the found cluster |
| 53 | + */ |
| 54 | + AllocateBuffers::BufferClusters::iterator find_cluster_by_expr(const ExpressionPtr& target); |
| 55 | + /** |
| 56 | + * @brief Returns True if Buffer is direct source for the target expr (there aren't other loop between the Buffer and target expr) |
| 57 | + * @param buffer_expr expression with assumed Buffer op |
| 58 | + * @param target_expr expression with target op - LoopEnd or MemoryAccess op |
| 59 | + * @return boolean value |
| 60 | + */ |
| 61 | + bool is_direct_buffer(const ExpressionPtr& buffer_expr, const ExpressionPtr& target_expr) const; |
| 62 | + /** |
| 63 | + * @brief Creates new buffer cluster if buffer_exprs is missed in clusters. If buffer_exprs is already in clusters, do nothing |
| 64 | + * @param buffer_expr expression with Buffer op |
| 65 | + */ |
| 66 | + void create_new_cluster(const ExpressionPtr& buffer_expr); |
| 67 | + /** |
| 68 | + * @brief Returns common ID of cluster if all buffer inside have the same Buffer ID. Otherwise returns the default value SIZE_MAX |
| 69 | + * that means that Buffers in cluster have different IDs. |
| 70 | + * @param cluster set of Buffer expressions - cluster |
| 71 | + * @return common buffer ID or SIZE_MAX - size value |
| 72 | + */ |
| 73 | + size_t get_cluster_buffer_id(const AllocateBuffers::BufferCluster& cluster) const; |
| 74 | + |
| 75 | + /** |
| 76 | + * @brief Analyzes Loop: if Loop has Buffer ops on inputs and outputs, Loop can read and write from/to the same memory. |
| 77 | + * @param expr_it iterator of Linear IR which refers to the expression with LoopEnd |
| 78 | + */ |
| 79 | + void parse_loop(const LinearIR::constExprIt& expr_it); |
| 80 | + /** |
| 81 | + * @brief Analyzes full MemoryAccess op: if the op has Buffer ops on I/O, the op can read and write from/to the same memory. |
| 82 | + * @param expr expression with full MemoryAccess op |
| 83 | + */ |
| 84 | + void parse_memory_access_op(const ExpressionPtr& expr); |
| 85 | + /** |
| 86 | + * @brief Gets input outputs buffers of Loop |
| 87 | + * @param loop_expr expression with LoopEnd op |
| 88 | + * @return unordered map [Expression -> set of input ports] which represents input Buffers of Loop |
| 89 | + */ |
| 90 | + BufferPorts get_input_buffers(const ExpressionPtr& loop_expr) const; |
| 91 | + /** |
| 92 | + * @brief Gets output buffers of Loop |
| 93 | + * @param loop_expr expression with LoopEnd op |
| 94 | + * @return unordered map [Expression -> set of input ports] which represents output Buffers of Loop |
| 95 | + */ |
| 96 | + BufferPorts get_output_buffers(const ExpressionPtr& loop_expr) const; |
| 97 | + /** |
| 98 | + * @brief Analyzes nested Loops: unite nested buffer clusters if they can reproduce `window` sliding |
| 99 | + * @param input_buffers unordered map [Expression -> set of input ports] which represents input Buffers of Loop |
| 100 | + * @param output_buffers unordered map [Expression -> set of output ports (one)] which represents output Buffers of Loop |
| 101 | + * @param outer_loop_end_expr_it iterator of Linear IR which refers to the expression with outer LoopEnd |
| 102 | + */ |
| 103 | + void parse_nested_loops(const BufferPorts& input_buffers, const BufferPorts& output_buffers, const LinearIR::constExprIt& outer_loop_end_expr_it); |
| 104 | + /** |
| 105 | + * @brief Finds the last connected Loop to the target Buffer and returns the corresponding finalization offset |
| 106 | + * @param buffer_expr expression with Buffer op |
| 107 | + * @return finalization offset - int64_t value |
| 108 | + */ |
| 109 | + int64_t get_buffer_finalization_offset(const ExpressionPtr& buffer_expr) const; |
| 110 | + /** |
| 111 | + * @brief Check if two Buffer expressions are connected to the same Loop. Set common LoopEnd as `loop` parameter and |
| 112 | + * indexes of Loop ports `up_idx` and `down_idx` if Buffers are really neighbours |
| 113 | + * @param up expression with upper Buffer op |
| 114 | + * @param down expression with lower Buffer op |
| 115 | + * @param loop expression with common LoopEnd op |
| 116 | + * @param up_idx the reference to port index of upper Buffer op to the Loop |
| 117 | + * @param down_idx the reference to port index of lower Buffer op to the Loop |
| 118 | + * @return Return True if the Buffers are connected to the same Loop |
| 119 | + */ |
| 120 | + static bool are_buffer_neighbours(const ExpressionPtr& up, const ExpressionPtr& down, ExpressionPtr& loop, size_t& up_idx, size_t& down_idx); |
| 121 | + /** |
| 122 | + * @brief Unite clusters |
| 123 | + * @param inner_cluster_it iterator to inner cluster - buffer cluster is in the loop |
| 124 | + * @param outer_cluster buffer clusters with buffers outside the Loop |
| 125 | + * @param outer_buffer target Buffer from outer_cluster |
| 126 | + * @param is_outer_up true if outer buffer is upper in Linear IR than inner Buffers |
| 127 | + * @return Return True if clusters have been united |
| 128 | + */ |
| 129 | + bool unite_nested_clusters(const AllocateBuffers::BufferClusters::iterator& inner_cluster_it, AllocateBuffers::BufferCluster& outer_cluster, |
| 130 | + const ExpressionPtr& outer_buffer, bool is_outer_up); |
| 131 | + |
| 132 | + AllocateBuffers::BufferClusters& m_clusters; |
| 133 | +}; |
| 134 | + |
| 135 | +} // namespace pass |
| 136 | +} // namespace lowered |
| 137 | +} // namespace snippets |
| 138 | +} // namespace ov |
0 commit comments