diff --git a/tests/benchdnn/graph/bench_graph.cpp b/tests/benchdnn/graph/bench_graph.cpp index 441bd29ea8b..3224f254995 100644 --- a/tests/benchdnn/graph/bench_graph.cpp +++ b/tests/benchdnn/graph/bench_graph.cpp @@ -58,17 +58,6 @@ void check_correctness(const settings_t &s) { } } -int verify_input(const settings_t &s) { - if (has_bench_mode_modifier(mode_modifier_t::no_ref_memory)) { - // TODO: update graph driver doc page once the limitation is removed. - BENCHDNN_PRINT(0, "%s\n", - "Error: graph driver doesn't support " - "--mode-modifier=M/--mode=F."); - return FAIL; - } - return OK; -} - int bench(int argc, char **argv) { driver_name = "graph"; using namespace parser; @@ -88,7 +77,6 @@ int bench(int argc, char **argv) { if (!parsed_options) { if (!parse_input_file(s.json_file, argv[0])) catch_unknown_options(argv[0]); - SAFE(verify_input(s), WARN); check_correctness(s); flush_temp_memory(); } diff --git a/tests/benchdnn/graph/custom_driver.cpp b/tests/benchdnn/graph/custom_driver.cpp index d2307106b59..7a0540dab77 100644 --- a/tests/benchdnn/graph/custom_driver.cpp +++ b/tests/benchdnn/graph/custom_driver.cpp @@ -293,10 +293,20 @@ void setup_cmp(compare::compare_t &cmp, const prb_t *prb, data_kind_t kind, int fill_mem(dnn_mem_t &mem_dt, dnn_mem_t &mem_fp, int f_min, int f_max) { + const auto dt = mem_dt.dt(); + if (has_bench_mode_modifier(mode_modifier_t::no_ref_memory) + && !is_integral_dt(dt)) { + // Use data filled by benchdnn for `no_ref_memory`, except some + // customized operations in Graph API which expect the input + // values to indicate indexing information, especially for integral + // inputs. Hence we need to be limited the input value to the + // provided range. + return OK; + } + const auto nelems = mem_fp.nelems(); if (nelems == 0) return OK; - const auto dt = mem_dt.dt(); f_min = (dt == dnnl_u8 && f_min < 0) ? 0 : f_min; const int64_t n_chunks = 16; const int64_t chunk_size = div_up(nelems, n_chunks); @@ -339,8 +349,6 @@ void init_memory_args(dnn_mem_map_t &mem_map, const prb_t *prb, int init_ref_memory_args(dnn_mem_map_t &ref_mem_map, dnn_mem_map_t &mem_map, const prb_t *prb, res_t *res) { - if (has_bench_mode_modifier(mode_modifier_t::no_ref_memory)) return OK; - switch (prb->alg) { case GENINDEX: SAFE(::custom::genindex::init_ref_memory_args( diff --git a/tests/benchdnn/graph/graph.cpp b/tests/benchdnn/graph/graph.cpp index 3860bbffb7c..71a99c31a50 100644 --- a/tests/benchdnn/graph/graph.cpp +++ b/tests/benchdnn/graph/graph.cpp @@ -202,6 +202,10 @@ int find_logical_tensor(size_t lt_id, const graph::op_ref_list_t &ops, int map_unmap_partition_mem(graph::partition_mem_map_t &partition_mem_map, const std::vector <s, const int &map_flag, res_t *res) { + + // Not map or unmap the reference primitive memories for `no_ref_memory` + if (has_bench_mode_modifier(mode_modifier_t::no_ref_memory)) return OK; + // In case one logical tensor is used for multiple inputs, record the // processed logical tensor ids to avoid duplicate processing std::unordered_set processed_ids; @@ -253,7 +257,6 @@ int make_input_tensors(std::vector &input_ts, } // generate tensor for graph path - const auto iter = partition_mem_map.find(lt_id); if (iter != partition_mem_map.end()) { const auto &graph_mem = iter->second; @@ -663,10 +666,12 @@ int doit(const prb_t *prb, res_t *res) { std::vector output_ts(outputs.size()); ref_partition_t ref_partition(dg, partitions[i], inputs, outputs); + // Construct memory for both perf & corr modes - SAFE(ref_partition.init_ref( - graph_in_ports, partition_mem_map_v[i], res), - WARN); + SAFE(ref_partition.init_ref(graph_in_ports, res), WARN); + if (res->state == SKIPPED) return OK; + + SAFE(ref_partition.init_graph_mem(partition_mem_map_v[i], res), WARN); if (res->state == SKIPPED) return OK; if (has_bench_mode_bit(mode_bit_t::corr)) { @@ -683,15 +688,12 @@ int doit(const prb_t *prb, res_t *res) { } // unmap memory from host to device - map_unmap_partition_mem(partition_mem_map_v[i], inputs, UNMAP, res); - map_unmap_partition_mem(partition_mem_map_v[i], outputs, UNMAP, res); - if (res->state == FAIL) { - BENCHDNN_PRINT(0, - "FAIL: Fail to unmap memories to host for partition " - "%zu.\n", - i); - return FAIL; - } + SAFE(map_unmap_partition_mem( + partition_mem_map_v[i], inputs, UNMAP, res), + WARN); + SAFE(map_unmap_partition_mem( + partition_mem_map_v[i], outputs, UNMAP, res), + WARN); const op_ref_list_t &op_list = ref_partition.get_partition_ops(); const auto &inplace_ports @@ -731,8 +733,10 @@ int doit(const prb_t *prb, res_t *res) { graph_mem_mgr.stop_graph_mem_check(); // map memory from device back to host - map_unmap_partition_mem(partition_mem_map_v[i], inputs, MAP, res); - map_unmap_partition_mem(partition_mem_map_v[i], outputs, MAP, res); + SAFE(map_unmap_partition_mem(partition_mem_map_v[i], inputs, MAP, res), + WARN); + SAFE(map_unmap_partition_mem(partition_mem_map_v[i], outputs, MAP, res), + WARN); // If the device is out-of-memory due to graph path execution, skip the // case. diff --git a/tests/benchdnn/graph/graph_memory.cpp b/tests/benchdnn/graph/graph_memory.cpp index c8fb95b5a72..05c9578983f 100644 --- a/tests/benchdnn/graph/graph_memory.cpp +++ b/tests/benchdnn/graph/graph_memory.cpp @@ -45,23 +45,10 @@ size_t get_benchdnn_device_limit() { // Constructs memories for all inputs and outputs needed for comparison. dnn_graph_mem_t::dnn_graph_mem_t(const dnn_mem_t &mem, const deserialized_lt <, const bool is_op_input, - const bool is_fake_output) + const bool use_graph_layout) : graph_dims_(lt.shape_), graph_strides_(lt.stride_) { - const auto &prim_dt = mem.dt(); - // Conversion from graph types to dnnl types + boolean to u8. - const auto &graph_dt = convert_dt(lt.get_data_type()); - - // Get memory tag of primitive memory - int ndims = mem.ndims(); - dims_t strides(mem.strides(), mem.strides() + ndims); - std::string mtag = strides2memory_tag(ndims, strides); - const auto &g_eng = get_graph_engine().operator const dnnl::engine &(); - // We create memory for graph path in two steps: - // 1. Create memory objects. - // 2. Do memory copy if needed. - // // For inputs, graph path needs data from reference path, // and the data movement requires both memories have the same // shape, so the tag of graph path is used to create the memory. @@ -70,42 +57,77 @@ dnn_graph_mem_t::dnn_graph_mem_t(const dnn_mem_t &mem, // otherwise use shape & tag from ref path side // Create memory for graph path + const auto &graph_dt = convert_dt(lt.get_data_type()); const auto data_type = static_cast(graph_dt); - if (is_op_input) { - if (graph_dims_.empty()) graph_dims_.push_back(1); - if (graph_strides_.empty()) graph_strides_.push_back(1); - // create graph memory + if (graph_dims_.empty()) { + // As graph strides are deduced from graph dims, they should be in + // compliance with each other. + assert(graph_strides_.empty()); + + graph_dims_.push_back(1); + graph_strides_.push_back(1); + } + + if (is_op_input) { + // Create graph memory with memory description from graph path. dnnl::memory::desc md(graph_dims_, data_type, graph_strides_); mem_ = dnn_mem_t(md.get(), g_eng.get()); - - const auto prim_to_graph_memcpy = [](dnn_mem_t &graph_mem, - const dnn_mem_t &prim_mem) { - const void *prim_data_handle = static_cast(prim_mem); - void *graph_data_handle = graph_mem.get_mapped_pointer(); - std::memcpy(graph_data_handle, prim_data_handle, graph_mem.size()); - }; - - if (prim_dt != graph_dt) { - // Call a reorder (for data conversion) when reference memory - // doesn't coincide with the graph memory... - dnn_mem_t c_mem(ndims, mem.dims(), graph_dt, mtag, g_eng.get()); - SAFE_V(c_mem.reorder(mem)); - prim_to_graph_memcpy(mem_, c_mem); - } else { - // ... otherwise, perform a plain memcpy. - prim_to_graph_memcpy(mem_, mem); - } } else { - if (is_fake_output) { + if (use_graph_layout) { + // For some cases such as fake outputs and no reference memory + // mode, which means the output does not have correctponding + // argument in primitives, we need to create them with memory + // description from graph path. dnnl::memory::desc md(graph_dims_, data_type, graph_strides_); mem_ = dnn_mem_t(md.get(), g_eng.get()); + } else { + // Use information from the reference memory descriptor to create + // memories. As we need to reorder output from both paths to abx + // for comparison, the memory tag of graph path output should align + // the reference path. + + // Get memory tag of primitive memory + int ndims = mem.ndims(); + dims_t strides(mem.strides(), mem.strides() + ndims); + std::string mtag = strides2memory_tag(ndims, strides); + mem_ = dnn_mem_t(mem.md_, graph_dt, mtag, g_eng.get()); } } } +int dnn_graph_mem_t::fill_mem_with_data(const dnn_mem_t &mem) { + + if (mem.size() != mem_.size()) return FAILED; + + const auto &src_dt = mem.dt(); + const auto &dst_dt = mem_.dt(); + + int ndims = mem.ndims(); + dims_t strides(mem.strides(), mem.strides() + ndims); + std::string mtag = strides2memory_tag(ndims, strides); + const auto &g_eng = get_graph_engine().operator const dnnl::engine &(); + + const auto prim_to_graph_memcpy = [](dnn_mem_t &graph_mem, + const dnn_mem_t &prim_mem) { + const void *prim_data_handle = static_cast(prim_mem); + void *graph_data_handle = graph_mem.get_mapped_pointer(); + std::memcpy(graph_data_handle, prim_data_handle, graph_mem.size()); + }; + + if (src_dt != dst_dt) { + dnn_mem_t c_mem(ndims, mem.dims(), dst_dt, mtag, g_eng.get()); + SAFE_V(c_mem.reorder(mem)); + prim_to_graph_memcpy(mem_, c_mem); + } else { + prim_to_graph_memcpy(mem_, mem); + } + + return OK; +} + dnnl::graph::tensor dnn_graph_mem_t::make_graph_tensor( const deserialized_lt <) const { void *data_handle; diff --git a/tests/benchdnn/graph/graph_memory.hpp b/tests/benchdnn/graph/graph_memory.hpp index 35fcc94e715..e9221fbe76b 100644 --- a/tests/benchdnn/graph/graph_memory.hpp +++ b/tests/benchdnn/graph/graph_memory.hpp @@ -156,12 +156,15 @@ struct dnn_graph_mem_t { // // The constructor accepts three boolean parameters: // 1. is_op_input: whether the logical tensor is an input of an op - // 2. is_fake_output: for fake outputs, the driver cannot create memory - // objects based on primitive memory for them, but construct memory - // from graph shape. The default value is false. + // 2. use_graph_layout: for fake outputs and mode without reference + // memories, the driver cannot create memory objects based on primitive + // memory for them, but construct memory from graph shape. The default + // value is false. // dnn_graph_mem_t(const dnn_mem_t &mem, const deserialized_lt <, - const bool is_op_input, const bool is_fake_output = false); + const bool is_op_input, const bool use_graph_layout = false); + + int fill_mem_with_data(const dnn_mem_t &mem); dnnl::graph::tensor make_graph_tensor(const deserialized_lt <) const; diff --git a/tests/benchdnn/graph/ref_partition.cpp b/tests/benchdnn/graph/ref_partition.cpp index 5d42c4e7204..a87199e7a03 100644 --- a/tests/benchdnn/graph/ref_partition.cpp +++ b/tests/benchdnn/graph/ref_partition.cpp @@ -70,8 +70,14 @@ ref_partition_t::ref_partition_t(const deserialized_graph_t &dg, } }; -int ref_partition_t::init_ref(const std::vector &graph_in_ports, - partition_mem_map_t &partition_mem_map, res_t *res) { +int ref_partition_t::init_ref( + const std::vector &graph_in_ports, res_t *res) { + + // Not create reference primitives and filling data with pre-designed + // strategies for `no_ref_memory` + if (!has_bench_mode_bit(mode_bit_t::corr) + && has_bench_mode_modifier(mode_modifier_t::no_ref_memory)) + return OK; for (const auto &par_op_ref : partition_ops_ref_) { // res should be independent from op to op @@ -150,30 +156,47 @@ int ref_partition_t::init_ref(const std::vector &graph_in_ports, SAFE_V(data_displacer.displace_input_data( entry.first, const_cast(entry.second), res)); } + return OK; +} + +int ref_partition_t::init_graph_mem( + partition_mem_map_t &partition_mem_map, res_t *res) { // init graph input/oputput memory from lt_id_2_mems_ for (const auto &id : partition_in_ids_) { - if (lt_id_2_mems_.find(id) == lt_id_2_mems_.end()) { + partition_mem_map.emplace(id, + dnn_graph_mem_t({}, lt_id_2_lt_.at(id), /*is_op_input=*/true)); + if (lt_id_2_mems_.find(id) != lt_id_2_mems_.end()) { + SAFE(partition_mem_map.at(id).fill_mem_with_data( + lt_id_2_mems_.at(id)), + WARN); + } else if (!has_bench_mode_modifier(mode_modifier_t::no_ref_memory)) { BENCHDNN_PRINT(0, "Fail: cannot find memory for %zu\n", id); res->state = FAILED; return FAIL; } - partition_mem_map.emplace(id, - dnn_graph_mem_t( - lt_id_2_mems_.at(id), lt_id_2_lt_.at(id), true)); } + for (const auto &id : partition_out_ids_) { - if (fake_lt_ids_.find(id) != fake_lt_ids_.end()) { - partition_mem_map.emplace( - id, dnn_graph_mem_t({}, lt_id_2_lt_.at(id), false, true)); - } else if (lt_id_2_mems_.find(id) == lt_id_2_mems_.end()) { + + if (fake_lt_ids_.find(id) != fake_lt_ids_.end() + || has_bench_mode_modifier(mode_modifier_t::no_ref_memory)) { + partition_mem_map.emplace(id, + dnn_graph_mem_t({}, lt_id_2_lt_.at(id), + /*is_op_input=*/false, /*use_graph_layout=*/true)); + } else if (lt_id_2_mems_.find(id) != lt_id_2_mems_.end()) { + // For output memories of graph, they need to be in compliance with + // the reference memories regarding the shapes and memory tags, as + // the memories of both paths will be reordered to abx for + // comparison. + partition_mem_map.emplace(id, + dnn_graph_mem_t(lt_id_2_mems_.at(id), lt_id_2_lt_.at(id), + /*is_op_input=*/false)); + } else { BENCHDNN_PRINT(0, "Fail: cannot find memory for %zu\n", id); res->state = FAILED; return FAIL; - } else - partition_mem_map.emplace(id, - dnn_graph_mem_t( - lt_id_2_mems_.at(id), lt_id_2_lt_.at(id), false)); + } } return OK; diff --git a/tests/benchdnn/graph/ref_partition.hpp b/tests/benchdnn/graph/ref_partition.hpp index 7fe726792ad..e74d24f5ab3 100644 --- a/tests/benchdnn/graph/ref_partition.hpp +++ b/tests/benchdnn/graph/ref_partition.hpp @@ -40,8 +40,10 @@ class ref_partition_t { const std::vector &outs); // prepare memories in both paths, one by one ref primitive - int init_ref(const std::vector &graph_ports, - partition_mem_map_t &partition_mem_map, res_t *res); + int init_ref(const std::vector &graph_ports, res_t *res); + + int init_graph_mem(partition_mem_map_t &partition_mem_map, res_t *res); + // run partition in ref path, one by one ref primitive void exec_ops(res_t *res); diff --git a/tests/benchdnn/graph/setting_handler.cpp b/tests/benchdnn/graph/setting_handler.cpp index 7ce35dbf6b6..21b1d272a24 100644 --- a/tests/benchdnn/graph/setting_handler.cpp +++ b/tests/benchdnn/graph/setting_handler.cpp @@ -1627,7 +1627,6 @@ bool get_reduction_prb_vdims( } prb_vdims.vdims = {src_dims, dst_dims}; - prb_vdims.dst_dims = src_dims; prb_vdims.ndims = static_cast(src_dims.size()); return true; }