1
1
// Copyright (C) 2018-2022 Intel Corporation
2
2
// SPDX-License-Identifier: Apache-2.0
3
3
//
4
-
4
+ #include < common/blocked_desc_creator.h>
5
+ #include < cpu_types.h>
6
+ #include < edge.h>
5
7
#include < gtest/gtest.h>
6
8
#include < ie_common.h>
7
-
8
- #include < nodes/reorder.h>
9
- #include " nodes/input.h"
10
- #include < edge.h>
9
+ #include < memory_desc/cpu_memory_desc_utils.h>
10
+ #include < memory_desc/dnnl_memory_desc.h>
11
11
#include < node.h>
12
+ #include < nodes/reorder.h>
13
+
14
+ #include < common/memory_desc_wrapper.hpp>
15
+ #include < dnnl.hpp>
16
+ #include < utility>
17
+
18
+ #include " ../../../ie_test_utils/common_test_utils/common_utils.hpp"
12
19
#include " cache/multi_cache.h"
20
+ #include " nodes/input.h"
21
+
22
+ using namespace InferenceEngine ;
23
+ using namespace ov ::intel_cpu;
24
+ namespace ReorderCPUTest {
25
+ void checkReorder (const ov::intel_cpu::Memory& inputMemory,
26
+ const ov::intel_cpu::Memory& outputMemory,
27
+ const InferenceEngine::Precision& prescision) {
28
+ auto srcData = inputMemory.GetData ();
29
+ auto dstData = outputMemory.GetData ();
30
+ auto mdInput = inputMemory.GetDescWithType <DnnlMemoryDesc>()->getDnnlDesc ();
31
+ auto mdOutput = outputMemory.GetDescWithType <DnnlMemoryDesc>()->getDnnlDesc ();
32
+
33
+ const dnnl::impl::memory_desc_wrapper mdwInput (mdInput.data );
34
+ const dnnl::impl::memory_desc_wrapper mdwOutput (mdOutput.data );
35
+ auto nelems = mdwInput.nelems ();
36
+
37
+ for (size_t i = 0 ; i < nelems; ++i) {
38
+ auto srcOffset = mdwInput.off_l (i, false );
39
+ auto dstOffset = mdwOutput.off_l (i, false );
40
+ switch (prescision) {
41
+ case InferenceEngine::Precision::FP32: {
42
+ auto s = *(static_cast <float *>(srcData) + srcOffset);
43
+ auto d = *(static_cast <float *>(dstData) + dstOffset);
44
+ ASSERT_EQ (s, d) << " mismatch at position " << i;
45
+ break ;
46
+ }
47
+ case InferenceEngine::Precision::I8: {
48
+ auto s = *(static_cast <int8_t *>(srcData) + srcOffset);
49
+ auto d = *(static_cast <int8_t *>(dstData) + dstOffset);
50
+ ASSERT_EQ (s, d) << " mismatch at position " << i;
51
+ break ;
52
+ }
53
+ default :
54
+ FAIL () << " Unsupported data precision in the test" << prescision.name ();
55
+ }
56
+ }
57
+ }
58
+
59
+ std::string layoutName (const LayoutType& layout) {
60
+ if (layout == LayoutType::nspc)
61
+ return " nspc" ;
62
+ if (layout == LayoutType::ncsp)
63
+ return " ncsp" ;
64
+ if (layout == LayoutType::nCsp8c)
65
+ return " nCsp8c" ;
66
+ if (layout == LayoutType::nCsp16c)
67
+ return " nCsp16c" ;
68
+ return " Unsupported layout type" ;
69
+ }
70
+
71
+ void fillData (const ov::intel_cpu::Memory& inputMemory, const InferenceEngine::Precision& prec) {
72
+ ov::intel_cpu::DnnlMemoryDescPtr dnnlMdInput = inputMemory.GetDescWithType <DnnlMemoryDesc>();
73
+ const dnnl::impl::memory_desc_wrapper mdInput{dnnlMdInput->getDnnlDesc ().data };
74
+ auto elemNum = mdInput.nelems ();
75
+ auto inputReorderData = inputMemory.GetData ();
76
+ switch (prec) {
77
+ case InferenceEngine::Precision::FP32:
78
+ for (size_t i = 0 ; i < elemNum; ++i)
79
+ *(static_cast <float *>(inputReorderData) + mdInput.off_l (i, false )) = static_cast <float >(i);
80
+ break ;
81
+ case InferenceEngine::Precision::I8:
82
+ for (size_t i = 0 ; i < elemNum; ++i)
83
+ *(static_cast <int8_t *>(inputReorderData) + mdInput.off_l (i, false )) = static_cast <int8_t >(i);
84
+ break ;
85
+ default :
86
+ FAIL () << " Unsupported data precision in the test" << prec.name ();
87
+ }
88
+ }
89
+ struct ReorderCustomImplTestParamSet {
90
+ // logical dimension of input
91
+ std::vector<size_t > srcDims;
92
+ bool isNspc2Ncsp;
93
+ uint32_t strideFactor;
94
+ InferenceEngine::Precision prec;
95
+ size_t stridedAxis;
96
+ };
97
+
98
+ struct ReorderCPUTestParamSet {
99
+ ngraph::PartialShape inputPartialShape;
100
+ // logical dimension vector of input
101
+ std::vector<std::vector<size_t >> inputShapes;
102
+ LayoutType srcLayout;
103
+ LayoutType dstLayout;
104
+ InferenceEngine::Precision prec;
105
+ };
106
+
107
+ class ReorderCPUTestGraph {
108
+ public:
109
+ void buildReorderGraph (const ov::intel_cpu::CpuBlockedMemoryDesc& inputDesc,
110
+ const ov::intel_cpu::CpuBlockedMemoryDesc& outputDesc) {
111
+ const dnnl::engine cpuEngine = {dnnl::engine::kind::cpu, 0 };
112
+ ov::intel_cpu::WeightsSharing::Ptr weightsCache;
113
+
114
+ inputNode = std::make_shared<ov::intel_cpu::node::Input>(inputDesc.clone (),
115
+ " Reorder_Input" ,
116
+ " Parameter" ,
117
+ cpuEngine,
118
+ weightsCache);
119
+ reorderNode = std::make_shared<ov::intel_cpu::node::Reorder>(" Reorder" , cpuEngine, weightsCache);
120
+ outputNode = std::make_shared<ov::intel_cpu::node::Input>(outputDesc.clone (),
121
+ " Reorder_Output" ,
122
+ " Result" ,
123
+ cpuEngine,
124
+ weightsCache);
125
+
126
+ parentEdge = std::make_shared<ov::intel_cpu::Edge>(inputNode, reorderNode, 0 , 0 );
127
+ childEdge = std::make_shared<ov::intel_cpu::Edge>(reorderNode, outputNode, 0 , 0 );
128
+ parentEdge->changeStatus (ov::intel_cpu::Edge::Status::NeedAllocation);
129
+ childEdge->changeStatus (ov::intel_cpu::Edge::Status::NeedAllocation);
130
+ reorderNode->addEdge (parentEdge);
131
+ reorderNode->addEdge (childEdge);
132
+
133
+ auto rtParamsCache = std::make_shared<ov::intel_cpu::MultiCache>(100 );
134
+
135
+ auto parentMemory = std::make_shared<ov::intel_cpu::Memory>(cpuEngine);
136
+ auto childMemory = std::make_shared<ov::intel_cpu::Memory>(cpuEngine);
137
+ parentMemory->Create (inputDesc, nullptr );
138
+ childMemory->Create (outputDesc, nullptr );
139
+
140
+ parentEdge->reuse (parentMemory);
141
+ childEdge->reuse (childMemory);
142
+
143
+ reorderNode->setDescs (inputDesc, outputDesc);
144
+ reorderNode->setRuntimeCache (rtParamsCache);
145
+ std::array<std::shared_ptr<ov::intel_cpu::Node>, 3 > nodes{inputNode, reorderNode, outputNode};
146
+ for (auto & n : nodes) {
147
+ n->init ();
148
+ n->getSupportedDescriptors ();
149
+ n->initSupportedPrimitiveDescriptors ();
150
+ n->selectPrimitiveDescriptorByIndex (0 );
151
+ }
152
+ stream = dnnl::stream{cpuEngine};
153
+ }
154
+
155
+ protected:
156
+ dnnl::stream stream;
157
+ std::shared_ptr<ov::intel_cpu::node::Input> inputNode;
158
+ std::shared_ptr<ov::intel_cpu::node::Reorder> reorderNode;
159
+ std::shared_ptr<ov::intel_cpu::node::Input> outputNode;
160
+ std::shared_ptr<ov::intel_cpu::Edge> parentEdge;
161
+ std::shared_ptr<ov::intel_cpu::Edge> childEdge;
162
+ InferenceEngine::Precision prec;
163
+ };
164
+
165
+ }// namespace ReorderCPUTest
166
+
167
+ using namespace ReorderCPUTest ;
13
168
14
169
/*
15
170
* Test Reorder::optimizedNcsp2Nspc() and Reorder::optimizedNspc2Ncsp() for
16
171
* inPlace and non-inPlace cases. Specifically, the test checks that dst batch strides are
17
172
* correctly taken into account by the custom impls (the case when the reorder is followed by an inplace concat).
18
173
*/
19
- typedef std::tuple<
20
- std::vector<size_t >, // srcDims
21
- bool > // forceInplace;
22
- ReorderCustomImplTestParamSet;
23
-
24
- class ReorderCustomImplTestBase : public ::testing::Test {
174
+ class ReorderCustomizedStrideTest : public ::testing::Test,
175
+ public ::testing::WithParamInterface<ReorderCustomImplTestParamSet>,
176
+ public ::ReorderCPUTest::ReorderCPUTestGraph {
25
177
public:
26
- static std::string getTestCaseName (const testing::TestParamInfo<ReorderCustomImplTestParamSet> &obj) {
27
- std::vector<size_t > srcDims;
28
- bool inPlace;
29
- std::tie (srcDims, inPlace) = obj.param ;
178
+ static std::string getTestCaseName (const testing::TestParamInfo<ReorderCustomImplTestParamSet>& obj) {
179
+ ReorderCustomImplTestParamSet p = obj.param ;
30
180
std::ostringstream result;
31
- result << " IS=(" ;
32
- for (const auto s : srcDims)
33
- result << s << " ." ;
34
- result.seekp (-1 , result.cur );
181
+ result << " IS:(" ;
182
+ result << CommonTestUtils::vec2str (p.srcDims );
183
+ result << (p.isNspc2Ncsp ? " _NSPC2NCSP" : " _NCSP2NSPC" );
184
+ result << " _InputDataType:" << p.prec .name ();
185
+ result << " _OutputDataType:" << p.prec .name ();
186
+ result << " _StrideFactor:" << p.strideFactor ;
187
+ result << " _StridedLogicChannelIndice:" << p.stridedAxis ;
35
188
result << " )" ;
36
- result << " _InPlace=" << inPlace;
37
189
return result.str ();
38
190
}
39
191
192
+ void Run () {
193
+ buildCustomizedReorderGraph ();
194
+ infer ();
195
+ validate ();
196
+ }
197
+
40
198
protected:
41
- void executeReorderNode (const void * srcData, void * dstData) {
42
- auto getBlockedDims = [](const std::vector<size_t >& dims, const std::vector<size_t >& order){
199
+ void SetUp () override {
200
+ ReorderCustomImplTestParamSet p = ::testing::TestWithParam<ReorderCustomImplTestParamSet>::GetParam ();
201
+ srcDims = p.srcDims ;
202
+
203
+ if (p.isNspc2Ncsp ) {
204
+ // The custom NSPC2NCSP impl is used only if an input shape complies with:
205
+ ASSERT_TRUE (srcDims[1 ] <= 64 && srcDims[1 ] >= 16 && (getNumElems (srcDims) / srcDims[1 ]) >= 128 );
206
+ // The custom NSPC2NCSP impl is used only for FP32
207
+ prec = InferenceEngine::Precision::FP32;
208
+ srcOrder = std::vector<size_t >{0 , 2 , 3 , 1 };
209
+ dstOrder = std::vector<size_t >{0 , 1 , 2 , 3 };
210
+ } else {
211
+ ASSERT_LE (getNumElems (srcDims), 256 );
212
+ srcOrder = std::vector<size_t >{0 , 1 , 2 , 3 };
213
+ dstOrder = std::vector<size_t >{0 , 2 , 3 , 1 };
214
+ // The custom NSPC2NCSP impl is used only for U8
215
+ prec = InferenceEngine::Precision::I8;
216
+ }
217
+ dstDims = srcDims;
218
+ // Create strided dst layout for the inPlace case,
219
+ // For example: If need channel axis stride changes, need to set the height axis dimension.
220
+ dstDims[p.stridedAxis + 1 ] *= p.strideFactor ;
221
+ }
222
+
223
+ void buildCustomizedReorderGraph () {
224
+ auto getBlockedDims = [](const std::vector<size_t >& dims, const std::vector<size_t >& order) {
43
225
std::vector<size_t > result;
44
226
result.reserve (order.size ());
45
227
for (auto i : order)
46
228
result.push_back (dims[i]);
47
229
return result;
48
230
};
49
- auto getStrides = [](const std::vector<size_t >& dims){
231
+ auto getStrides = [](const std::vector<size_t >& dims) {
50
232
std::vector<size_t > result (dims.size ());
51
233
result[dims.size () - 1 ] = 1 ;
52
234
for (int i = dims.size () - 2 ; i >= 0 ; --i) {
53
- result[i] = result[i+ 1 ] * dims[i+ 1 ];
235
+ result[i] = result[i + 1 ] * dims[i + 1 ];
54
236
}
55
237
return result;
56
238
};
57
- const dnnl::engine cpuEngine (dnnl::engine::kind::cpu, 0 );
58
- ov::intel_cpu::WeightsSharing::Ptr weightsCache;
59
-
60
- auto inputNode = std::make_shared<ov::intel_cpu::node::Input>(ov::intel_cpu::Shape (srcDims),
61
- prec,
62
- " Reorder_Input" , " Input" ,
63
- cpuEngine, weightsCache);
64
- auto reorderNode = std::make_shared<ov::intel_cpu::node::Reorder>(" Reorder" , cpuEngine, weightsCache);
65
- auto outputNode = std::make_shared<ov::intel_cpu::node::Input>(ov::intel_cpu::Shape (dstDims),
66
- prec,
67
- " Reorder_Output" , " Output" ,
68
- cpuEngine, weightsCache);
69
-
70
- auto parentEdge = std::make_shared<ov::intel_cpu::Edge>(inputNode, reorderNode, 0 , 0 );
71
- auto childEdge = std::make_shared<ov::intel_cpu::Edge>(reorderNode, outputNode, 0 , 0 );
72
- parentEdge->changeStatus (ov::intel_cpu::Edge::Status::NeedAllocation);
73
- childEdge->changeStatus (ov::intel_cpu::Edge::Status::NeedAllocation);
74
- reorderNode->addEdge (parentEdge);
75
- reorderNode->addEdge (childEdge);
76
- auto rtParamsCache = std::make_shared<ov::intel_cpu::MultiCache>(100 );
77
-
78
239
const std::vector<size_t > srcBlockedDims = getBlockedDims (srcDims, srcOrder);
79
240
const std::vector<size_t > srcStrides = getStrides (srcBlockedDims);
80
241
const std::vector<size_t > offsetPaddingToData (srcDims.size (), 0 );
81
-
82
242
const std::vector<size_t > dstBlockedDims = getBlockedDims (dstDims, dstOrder);
83
243
const std::vector<size_t > dstStrides = getStrides (dstBlockedDims);
84
244
85
- const ov::intel_cpu::CpuBlockedMemoryDesc inputDesc (prec, ov::intel_cpu::Shape (srcDims),
86
- srcBlockedDims, srcOrder ,
87
- 0 , offsetPaddingToData, srcStrides);
88
-
89
- const ov::intel_cpu::CpuBlockedMemoryDesc outputDesc (prec, ov::intel_cpu::Shape (srcDims) ,
90
- getBlockedDims (srcDims, dstOrder), dstOrder ,
91
- 0 , offsetPaddingToData, dstStrides );
245
+ const ov::intel_cpu::CpuBlockedMemoryDesc inputDesc (prec,
246
+ ov::intel_cpu::Shape (srcDims) ,
247
+ srcBlockedDims,
248
+ srcOrder,
249
+ 0 ,
250
+ offsetPaddingToData ,
251
+ srcStrides );
92
252
93
- auto parentMemory = std::make_shared<ov::intel_cpu::Memory>(cpuEngine);
94
- auto childMemory = std::make_shared<ov::intel_cpu::Memory>(cpuEngine);
95
- parentMemory->Create (inputDesc, srcData);
96
- childMemory->Create (outputDesc, dstData);
97
- parentEdge->reuse (parentMemory);
98
- childEdge->reuse (childMemory);
253
+ const ov::intel_cpu::CpuBlockedMemoryDesc outputDesc (prec,
254
+ ov::intel_cpu::Shape (srcDims),
255
+ getBlockedDims (srcDims, dstOrder),
256
+ dstOrder,
257
+ 0 ,
258
+ offsetPaddingToData,
259
+ dstStrides);
260
+ buildReorderGraph (inputDesc, outputDesc);
261
+ }
99
262
100
- reorderNode->setDescs (inputDesc, outputDesc);
101
- reorderNode->setRuntimeCache (rtParamsCache);
102
- std::vector<std::shared_ptr<ov::intel_cpu::Node>> nodes {inputNode, reorderNode, outputNode};
103
- for (auto &n : nodes) {
104
- n->init ();
105
- n->getSupportedDescriptors ();
106
- n->initSupportedPrimitiveDescriptors ();
107
- n->selectPrimitiveDescriptorByIndex (0 );
108
- }
109
- auto config = outputNode->getSelectedPrimitiveDescriptor ()->getConfig ();
110
- config.inConfs .resize (1 );
111
- config.inConfs [0 ].inPlace (forceInplace ? 0 : -1 );
112
- outputNode->getSelectedPrimitiveDescriptor ()->setConfig (config);
263
+ void infer () {
264
+ generateInput ();
113
265
reorderNode->createPrimitive ();
114
-
115
- dnnl::stream strm (cpuEngine);
116
- reorderNode->execute (strm);
117
- return ;
266
+ reorderNode->execute (stream);
118
267
}
119
268
120
- template <typename T>
121
- void Run (const std::vector<T>& srcData, std::vector<T>& dstData) {
122
- fillData ();
123
- executeReorderNode (srcData.data (), dstData.data ());
124
- EXPECT_TRUE (resultIsCorrect (dstData));
269
+ void validate (void ) {
270
+ checkReorder (parentEdge->getMemory (), childEdge->getMemory (), prec);
125
271
}
272
+
126
273
// Fill srcData so that the results of NSPC2NCSP and NCSP2NSPC reorders are incremental numbers 0,1,2,...
127
274
// Fill dstData with zeros
128
- virtual void fillData () = 0;
129
- template <typename T>
130
- bool resultIsCorrect (const std::vector<T>& dstData) {
131
- const size_t numElems = getNumElems (dstDims);
132
- auto b = dstData.begin ();
133
- std::vector<T> expectedData (blockSize);
134
- for (int i = 0 ; i < numElems / blockSize; i++, b += blockSize) {
135
- if (i % 2 == 0 ) {
136
- std::iota (expectedData.begin (), expectedData.end (), i / 2 * blockSize);
137
- if (!std::equal (b, b + blockSize, expectedData.begin ()))
138
- return false ;
139
- } else if (!std::all_of (b, b + blockSize, [](T x){return x == 0 ;})) {
140
- return false ;
141
- }
142
- }
143
- return true ;
275
+ void generateInput () {
276
+ fillData (parentEdge->getMemory (), prec);
277
+ memset (childEdge->getMemory ().GetData (), 0 , childEdge->getMemory ().GetSize ());
144
278
}
279
+
145
280
size_t getNumElems (const std::vector<size_t >& dims) {
146
281
size_t result = 1 ;
147
282
for (auto d : dims)
148
283
result *= d;
149
284
return result;
150
285
}
286
+
287
+ private:
151
288
std::vector<size_t > srcDims;
152
289
std::vector<size_t > srcOrder;
153
290
std::vector<size_t > dstDims;
154
291
std::vector<size_t > dstOrder;
155
- InferenceEngine::Precision prec;
156
- bool forceInplace;
157
- size_t blockSize;
158
292
};
159
293
160
- class ReorderNSPC2NCSPTest : public testing ::WithParamInterface<ReorderCustomImplTestParamSet>,
161
- public ReorderCustomImplTestBase{
162
- protected:
163
- void SetUp () override {
164
- std::tie (srcDims, forceInplace) = this ->GetParam ();
165
- // The custom NSPC2NCSP impl is used only if an input shape complies with:
166
- assert (srcDims[1 ] <= 64 && srcDims[1 ] >= 16 && (getNumElems (srcDims) / srcDims[1 ]) >= 128 );
167
- // The custom NSPC2NCSP impl is used only for FP32
168
- prec = InferenceEngine::Precision::FP32;
169
- srcOrder = std::vector<size_t > {0 , 2 , 3 , 1 };
170
- dstOrder = std::vector<size_t > {0 , 1 , 2 , 3 };
171
- dstDims = srcDims;
172
- blockSize = getNumElems (srcDims);
173
- // Create channel-strided dst layout for the inPlace case
174
- // Other dstDims could also be supported, but fillData() and resultIsCorrect() should be updated accordingly.
175
- if (forceInplace) {
176
- dstDims[1 ] *= 2 ;
177
- blockSize /= srcDims[0 ];
294
+ TEST_P (ReorderCustomizedStrideTest, OutputIsStrided) {
295
+ Run ();
296
+ }
297
+
298
+ const auto stridedParameter =
299
+ ::testing::Values (ReorderCustomImplTestParamSet{{2 , 16 , 8 , 8 }, true , 2 , InferenceEngine::Precision::FP32, 0 },
300
+ ReorderCustomImplTestParamSet{{2 , 16 , 8 , 8 }, true , 4 , InferenceEngine::Precision::FP32, 1 },
301
+ ReorderCustomImplTestParamSet{{2 , 16 , 8 , 8 }, true , 3 , InferenceEngine::Precision::FP32, 1 },
302
+ ReorderCustomImplTestParamSet{{2 , 16 , 8 , 8 }, true , 1 , InferenceEngine::Precision::FP32, 2 },
303
+ ReorderCustomImplTestParamSet{{2 , 8 , 4 , 4 }, false , 2 , InferenceEngine::Precision::I8, 0 },
304
+ ReorderCustomImplTestParamSet{{2 , 8 , 4 , 4 }, false , 5 , InferenceEngine::Precision::I8, 1 },
305
+ ReorderCustomImplTestParamSet{{2 , 8 , 4 , 4 }, false , 1 , InferenceEngine::Precision::I8, 2 });
306
+
307
+ INSTANTIATE_TEST_SUITE_P (smoke_ReorderTestCustomStrideWithFactor,
308
+ ReorderCustomizedStrideTest,
309
+ stridedParameter,
310
+ ReorderCustomizedStrideTest::getTestCaseName);
311
+
312
+ /*
313
+ * ReorderCPUTest to test the CPU plugin-in dynamism and RT cache
314
+ */
315
+ class ReorderDynamismCPUTest : public ::testing::Test,
316
+ public ::testing::WithParamInterface<ReorderCPUTestParamSet>,
317
+ public ::ReorderCPUTest::ReorderCPUTestGraph {
318
+ public:
319
+ static std::string getTestCaseName (const testing::TestParamInfo<ReorderCPUTestParamSet>& obj) {
320
+ ReorderCPUTestParamSet p = obj.param ;
321
+ std::ostringstream result;
322
+ result << " IS:(" ;
323
+ result << " InputPartialShape:" << CommonTestUtils::partialShape2str ({p.inputPartialShape });
324
+ for (const auto inputShape : p.inputShapes ) {
325
+ result << CommonTestUtils::vec2str (inputShape);
178
326
}
327
+ result << " _InputLayoutType:" << layoutName (p.srcLayout ) << " ." ;
328
+ result << " _OutputLayoutType:" << layoutName (p.dstLayout ) << " ." ;
329
+ result << " _InputDataType:" << p.prec .name ();
330
+ result << " _OutputDataType:" << p.prec .name ();
331
+ result << " )" ;
332
+ return result.str ();
179
333
}
334
+
180
335
void Run () {
181
- ReorderCustomImplTestBase::Run (srcData, dstData);
182
- }
183
- void fillData () override {
184
- dstData.resize (getNumElems (dstDims));
185
- std::fill (dstData.begin (), dstData.end (), 0 );
186
- srcData.resize (getNumElems (srcDims));
187
- const int numChannels = srcDims[1 ];
188
- const int spBlockSize = srcDims[2 ] * srcDims[3 ];
189
- const int batchSize = spBlockSize * numChannels;
190
- int i = 0 ;
191
- for (int n = 0 ; n < getNumElems (srcDims); n += batchSize) {
192
- for (int sp = n; sp < n + spBlockSize; sp++) {
193
- for (int c = sp; c < sp + batchSize; c += spBlockSize) {
194
- srcData[i++] = static_cast <float >(c);
195
- }
196
- }
336
+ for (auto inputshape : inputShapes) {
337
+ generate_inputs (inputshape);
338
+ infer ();
339
+ validate ();
197
340
}
198
341
}
199
- std::vector<float > dstData;
200
- std::vector<float > srcData;
201
- };
202
342
203
- class ReorderNCSP2NSPCTest : public testing ::WithParamInterface<ReorderCustomImplTestParamSet>,
204
- public ReorderCustomImplTestBase{
205
343
protected:
206
- void SetUp () override {
207
- std::tie (srcDims, forceInplace) = this ->GetParam ();
208
- // Avoid uint8_t overflow or modify fillNCSP2NSPC() and resultIsCorrect()
209
- assert (getNumElems (srcDims) <= 256 );
210
- srcOrder = std::vector<size_t > {0 , 1 , 2 , 3 };
211
- dstOrder = std::vector<size_t > {0 , 2 , 3 , 1 };
212
- // The custom NSPC2NCSP impl is used only for U8
213
- prec = InferenceEngine::Precision::U8;
214
- dstDims = srcDims;
215
- blockSize = getNumElems (srcDims);
216
- // Create channel-strided dst layout for the inPlace case
217
- // Other dstDims could also be supported, but fillData() and resultIsCorrect() should be updated accordingly.
218
- if (forceInplace) {
219
- dstDims[1 ] *= 2 ;
220
- blockSize = srcDims[1 ];
221
- }
344
+ void generate_inputs (const std::vector<size_t >& inputShape) {
345
+ parentEdge->getParent ()->redefineOutputMemory ({inputShape});
346
+ fillData (parentEdge->getMemory (), prec);
222
347
}
223
- void Run () {
224
- ReorderCustomImplTestBase::Run (srcData, dstData );
348
+ void infer () {
349
+ reorderNode-> executeDynamic (stream );
225
350
}
226
- void fillData () override {
227
- dstData.resize (getNumElems (dstDims));
228
- std::fill (dstData.begin (), dstData.end (), 0 );
229
- srcData.resize (getNumElems (srcDims));
230
- const int numChannels = srcDims[1 ];
231
- const int batchSize = srcDims[2 ] * srcDims[3 ] * numChannels;
232
- int i = 0 ;
233
- for (int n = 0 ; n < getNumElems (srcDims); n += batchSize) {
234
- for (int c = n; c < n + numChannels; c ++) {
235
- for (int sp = c; sp < c + batchSize; sp += numChannels) {
236
- srcData[i++] = static_cast <uint8_t >(sp);
237
- }
238
- }
239
- }
351
+ void validate (void ) {
352
+ checkReorder (parentEdge->getMemory (), childEdge->getMemory (), prec);
240
353
}
241
- std::vector<uint8_t > dstData;
242
- std::vector<uint8_t > srcData;
243
- };
244
354
245
- TEST_P (ReorderNSPC2NCSPTest, NSPC2NCSP) {
246
- Run ();
247
- }
355
+ struct BuildReorderParams {
356
+ ov::intel_cpu::Shape srcShape;
357
+ ov::intel_cpu::Shape dstShape;
358
+ LayoutType srcLayout;
359
+ LayoutType dstLayout;
360
+ };
248
361
249
- TEST_P (ReorderNCSP2NSPCTest, NCSP2NSPC) {
250
- Run ();
251
- }
362
+ void SetUp () override {
363
+ ReorderCPUTestParamSet reorderTestParam = this ->GetParam ();
364
+ BuildReorderParams reorderParams;
365
+ reorderParams.srcLayout = reorderTestParam.srcLayout ;
366
+ reorderParams.dstLayout = reorderTestParam.dstLayout ;
367
+ reorderParams.srcShape = ov::intel_cpu::Shape (reorderTestParam.inputPartialShape );
368
+ reorderParams.dstShape = reorderParams.srcShape ;
369
+ inputShapes = reorderTestParam.inputShapes ;
370
+ prec = reorderTestParam.prec ;
371
+
372
+ buildReorderDynamismGraph (reorderParams);
373
+ }
252
374
253
- const std::vector< bool > forceInplace { false , true };
254
- const auto NSPC2NCSPparams =::testing::Combine(
255
- ::testing::Values (std::vector< size_t > { 2 , 16 , 8 , 8 }),
256
- ::testing::ValuesIn(forceInplace)) ;
375
+ void buildReorderDynamismGraph ( const BuildReorderParams& reorderParams) {
376
+ BlockedDescCreator::CreatorsMap blockCreatorMap = BlockedDescCreator::getCommonCreators ();
377
+ auto srcBlockedDescCreator = blockCreatorMap[reorderParams. srcLayout ];
378
+ auto dstBlockedDescCreator = blockCreatorMap[reorderParams. dstLayout ] ;
257
379
258
- INSTANTIATE_TEST_SUITE_P (smoke_ReorderTestCustomNSPC, ReorderNSPC2NCSPTest, NSPC2NCSPparams,
259
- ReorderCustomImplTestBase::getTestCaseName);
380
+ const ov::intel_cpu::CpuBlockedMemoryDesc inputDesc =
381
+ srcBlockedDescCreator->createDesc (prec, reorderParams.srcShape );
382
+
383
+ const ov::intel_cpu::CpuBlockedMemoryDesc outputDesc =
384
+ dstBlockedDescCreator->createDesc (prec, reorderParams.dstShape );
385
+
386
+ buildReorderGraph (inputDesc, outputDesc);
387
+ }
388
+
389
+ private:
390
+ std::vector<std::vector<size_t >> inputShapes;
391
+ };
392
+
393
+ TEST_P (ReorderDynamismCPUTest, CompareResult) {
394
+ Run ();
395
+ }
260
396
261
- const auto NCSP2NSPCparams =::testing::Combine(
262
- ::testing::Values (std::vector<size_t > {2 , 8 , 4 , 4 }),
263
- ::testing::ValuesIn(forceInplace));
397
+ const auto reorderCpuTestDynamismParams =
398
+ ::testing::Values (ReorderCPUTestParamSet{{2 , 16 , 8 , -1 },
399
+ {{2 , 16 , 8 , 8 }, {2 , 16 , 8 , 16 }, {2 , 16 , 8 , 8 }},
400
+ LayoutType::nspc,
401
+ LayoutType::ncsp,
402
+ InferenceEngine::Precision::FP32},
403
+ ReorderCPUTestParamSet{{-1 , -1 , -1 , -1 },
404
+ {{2 , 8 , 4 , 4 }, {2 , 8 , 8 , 4 }, {2 , 8 , 4 , 4 }},
405
+ LayoutType::ncsp,
406
+ LayoutType::nspc,
407
+ InferenceEngine::Precision::FP32},
408
+ ReorderCPUTestParamSet{{2 , 32 , -1 , 4 },
409
+ {{2 , 32 , 3 , 4 }, {2 , 32 , 6 , 4 }, {2 , 32 , 3 , 4 }},
410
+ LayoutType::ncsp,
411
+ LayoutType::nCsp8c,
412
+ InferenceEngine::Precision::FP32},
413
+ ReorderCPUTestParamSet{{-1 , 32 , -1 , -1 },
414
+ {{2 , 32 , 3 , 4 }, {2 , 32 , 6 , 4 }, {2 , 32 , 3 , 4 }},
415
+ LayoutType::nCsp16c,
416
+ LayoutType::nspc,
417
+ InferenceEngine::Precision::I8});
264
418
265
- INSTANTIATE_TEST_SUITE_P (smoke_ReorderTestCustomNCSP, ReorderNCSP2NSPCTest, NCSP2NSPCparams,
266
- ReorderCustomImplTestBase::getTestCaseName);
419
+ INSTANTIATE_TEST_SUITE_P (smoke_ReorderTestDynamism,
420
+ ReorderDynamismCPUTest,
421
+ reorderCpuTestDynamismParams,
422
+ ReorderDynamismCPUTest::getTestCaseName);
0 commit comments