Skip to content

Commit b2e0b87

Browse files
committed
benchdnn: graph: use default value from benchdnn for no ref mem
1 parent aad39fb commit b2e0b87

File tree

4 files changed

+71
-34
lines changed

4 files changed

+71
-34
lines changed

tests/benchdnn/graph/custom_driver.cpp

+11-3
Original file line numberDiff line numberDiff line change
@@ -293,10 +293,20 @@ void setup_cmp(compare::compare_t &cmp, const prb_t *prb, data_kind_t kind,
293293

294294
int fill_mem(dnn_mem_t &mem_dt, dnn_mem_t &mem_fp, int f_min, int f_max) {
295295

296+
const auto dt = mem_dt.dt();
297+
if (has_bench_mode_modifier(mode_modifier_t::no_ref_memory)
298+
&& !is_integral_dt(dt)) {
299+
// Use data filled by benchdnn for `no_ref_memory`, except some
300+
// customized operations in Graph API which expect the input
301+
// values to indicate indexing information, especially for integral
302+
// inputs. Hence we need to be limited the input value to the
303+
// provided range.
304+
return OK;
305+
}
306+
296307
const auto nelems = mem_fp.nelems();
297308
if (nelems == 0) return OK;
298309

299-
const auto dt = mem_dt.dt();
300310
f_min = (dt == dnnl_u8 && f_min < 0) ? 0 : f_min;
301311
const int64_t n_chunks = 16;
302312
const int64_t chunk_size = div_up(nelems, n_chunks);
@@ -339,8 +349,6 @@ void init_memory_args(dnn_mem_map_t &mem_map, const prb_t *prb,
339349

340350
int init_ref_memory_args(dnn_mem_map_t &ref_mem_map, dnn_mem_map_t &mem_map,
341351
const prb_t *prb, res_t *res) {
342-
if (has_bench_mode_modifier(mode_modifier_t::no_ref_memory)) return OK;
343-
344352
switch (prb->alg) {
345353
case GENINDEX:
346354
SAFE(::custom::genindex::init_ref_memory_args(

tests/benchdnn/graph/graph.cpp

+19-15
Original file line numberDiff line numberDiff line change
@@ -202,6 +202,10 @@ int find_logical_tensor(size_t lt_id, const graph::op_ref_list_t &ops,
202202
int map_unmap_partition_mem(graph::partition_mem_map_t &partition_mem_map,
203203
const std::vector<dnnl::graph::logical_tensor> &lts,
204204
const int &map_flag, res_t *res) {
205+
206+
// Not map or unmap the reference primitive memories for `no_ref_memory`
207+
if (has_bench_mode_modifier(mode_modifier_t::no_ref_memory)) return OK;
208+
205209
// In case one logical tensor is used for multiple inputs, record the
206210
// processed logical tensor ids to avoid duplicate processing
207211
std::unordered_set<size_t> processed_ids;
@@ -253,7 +257,6 @@ int make_input_tensors(std::vector<dnnl::graph::tensor> &input_ts,
253257
}
254258

255259
// generate tensor for graph path
256-
257260
const auto iter = partition_mem_map.find(lt_id);
258261
if (iter != partition_mem_map.end()) {
259262
const auto &graph_mem = iter->second;
@@ -663,10 +666,12 @@ int doit(const prb_t *prb, res_t *res) {
663666
std::vector<dnnl::graph::tensor> output_ts(outputs.size());
664667

665668
ref_partition_t ref_partition(dg, partitions[i], inputs, outputs);
669+
666670
// Construct memory for both perf & corr modes
667-
SAFE(ref_partition.init_ref(
668-
graph_in_ports, partition_mem_map_v[i], res),
669-
WARN);
671+
SAFE(ref_partition.init_ref(graph_in_ports, res), WARN);
672+
if (res->state == SKIPPED) return OK;
673+
674+
SAFE(ref_partition.init_graph_mem(partition_mem_map_v[i], res), WARN);
670675
if (res->state == SKIPPED) return OK;
671676

672677
if (has_bench_mode_bit(mode_bit_t::corr)) {
@@ -683,15 +688,12 @@ int doit(const prb_t *prb, res_t *res) {
683688
}
684689

685690
// unmap memory from host to device
686-
map_unmap_partition_mem(partition_mem_map_v[i], inputs, UNMAP, res);
687-
map_unmap_partition_mem(partition_mem_map_v[i], outputs, UNMAP, res);
688-
if (res->state == FAIL) {
689-
BENCHDNN_PRINT(0,
690-
"FAIL: Fail to unmap memories to host for partition "
691-
"%zu.\n",
692-
i);
693-
return FAIL;
694-
}
691+
SAFE(map_unmap_partition_mem(
692+
partition_mem_map_v[i], inputs, UNMAP, res),
693+
WARN);
694+
SAFE(map_unmap_partition_mem(
695+
partition_mem_map_v[i], outputs, UNMAP, res),
696+
WARN);
695697

696698
const op_ref_list_t &op_list = ref_partition.get_partition_ops();
697699
const auto &inplace_ports
@@ -731,8 +733,10 @@ int doit(const prb_t *prb, res_t *res) {
731733
graph_mem_mgr.stop_graph_mem_check();
732734

733735
// map memory from device back to host
734-
map_unmap_partition_mem(partition_mem_map_v[i], inputs, MAP, res);
735-
map_unmap_partition_mem(partition_mem_map_v[i], outputs, MAP, res);
736+
SAFE(map_unmap_partition_mem(partition_mem_map_v[i], inputs, MAP, res),
737+
WARN);
738+
SAFE(map_unmap_partition_mem(partition_mem_map_v[i], outputs, MAP, res),
739+
WARN);
736740

737741
// If the device is out-of-memory due to graph path execution, skip the
738742
// case.

tests/benchdnn/graph/ref_partition.cpp

+37-14
Original file line numberDiff line numberDiff line change
@@ -70,8 +70,14 @@ ref_partition_t::ref_partition_t(const deserialized_graph_t &dg,
7070
}
7171
};
7272

73-
int ref_partition_t::init_ref(const std::vector<size_t> &graph_in_ports,
74-
partition_mem_map_t &partition_mem_map, res_t *res) {
73+
int ref_partition_t::init_ref(
74+
const std::vector<size_t> &graph_in_ports, res_t *res) {
75+
76+
// Not create reference primitives and filling data with pre-designed
77+
// strategies for `no_ref_memory`
78+
if (!has_bench_mode_bit(mode_bit_t::corr)
79+
&& has_bench_mode_modifier(mode_modifier_t::no_ref_memory))
80+
return OK;
7581

7682
for (const auto &par_op_ref : partition_ops_ref_) {
7783
// res should be independent from op to op
@@ -150,30 +156,47 @@ int ref_partition_t::init_ref(const std::vector<size_t> &graph_in_ports,
150156
SAFE_V(data_displacer.displace_input_data(
151157
entry.first, const_cast<dnn_mem_t &>(entry.second), res));
152158
}
159+
return OK;
160+
}
161+
162+
int ref_partition_t::init_graph_mem(
163+
partition_mem_map_t &partition_mem_map, res_t *res) {
153164

154165
// init graph input/oputput memory from lt_id_2_mems_
155166
for (const auto &id : partition_in_ids_) {
156-
if (lt_id_2_mems_.find(id) == lt_id_2_mems_.end()) {
167+
partition_mem_map.emplace(id,
168+
dnn_graph_mem_t({}, lt_id_2_lt_.at(id), /*is_op_input=*/true));
169+
if (lt_id_2_mems_.find(id) != lt_id_2_mems_.end()) {
170+
SAFE(partition_mem_map.at(id).fill_mem_with_data(
171+
lt_id_2_mems_.at(id)),
172+
WARN);
173+
} else if (!has_bench_mode_modifier(mode_modifier_t::no_ref_memory)) {
157174
BENCHDNN_PRINT(0, "Fail: cannot find memory for %zu\n", id);
158175
res->state = FAILED;
159176
return FAIL;
160177
}
161-
partition_mem_map.emplace(id,
162-
dnn_graph_mem_t(
163-
lt_id_2_mems_.at(id), lt_id_2_lt_.at(id), true));
164178
}
179+
165180
for (const auto &id : partition_out_ids_) {
166-
if (fake_lt_ids_.find(id) != fake_lt_ids_.end()) {
167-
partition_mem_map.emplace(
168-
id, dnn_graph_mem_t({}, lt_id_2_lt_.at(id), false, true));
169-
} else if (lt_id_2_mems_.find(id) == lt_id_2_mems_.end()) {
181+
182+
if (fake_lt_ids_.find(id) != fake_lt_ids_.end()
183+
|| has_bench_mode_modifier(mode_modifier_t::no_ref_memory)) {
184+
partition_mem_map.emplace(id,
185+
dnn_graph_mem_t({}, lt_id_2_lt_.at(id),
186+
/*is_op_input=*/false, /*use_graph_layout=*/true));
187+
} else if (lt_id_2_mems_.find(id) != lt_id_2_mems_.end()) {
188+
// For output memories of graph, they need to be in compliance with
189+
// the reference memories regarding the shapes and memory tags, as
190+
// the memories of both paths will be reordered to abx for
191+
// comparison.
192+
partition_mem_map.emplace(id,
193+
dnn_graph_mem_t(lt_id_2_mems_.at(id), lt_id_2_lt_.at(id),
194+
/*is_op_input=*/false));
195+
} else {
170196
BENCHDNN_PRINT(0, "Fail: cannot find memory for %zu\n", id);
171197
res->state = FAILED;
172198
return FAIL;
173-
} else
174-
partition_mem_map.emplace(id,
175-
dnn_graph_mem_t(
176-
lt_id_2_mems_.at(id), lt_id_2_lt_.at(id), false));
199+
}
177200
}
178201

179202
return OK;

tests/benchdnn/graph/ref_partition.hpp

+4-2
Original file line numberDiff line numberDiff line change
@@ -40,8 +40,10 @@ class ref_partition_t {
4040
const std::vector<dnnl::graph::logical_tensor> &outs);
4141

4242
// prepare memories in both paths, one by one ref primitive
43-
int init_ref(const std::vector<size_t> &graph_ports,
44-
partition_mem_map_t &partition_mem_map, res_t *res);
43+
int init_ref(const std::vector<size_t> &graph_ports, res_t *res);
44+
45+
int init_graph_mem(partition_mem_map_t &partition_mem_map, res_t *res);
46+
4547
// run partition in ref path, one by one ref primitive
4648
void exec_ops(res_t *res);
4749

0 commit comments

Comments
 (0)