Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

tests: benchdnn: support --mode-modifer=M for graph driver #2880

Open
wants to merge 4 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 0 additions & 12 deletions tests/benchdnn/graph/bench_graph.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -58,17 +58,6 @@ void check_correctness(const settings_t &s) {
}
}

int verify_input(const settings_t &s) {
if (has_bench_mode_modifier(mode_modifier_t::no_ref_memory)) {
// TODO: update graph driver doc page once the limitation is removed.
BENCHDNN_PRINT(0, "%s\n",
"Error: graph driver doesn't support "
"--mode-modifier=M/--mode=F.");
return FAIL;
}
return OK;
}

int bench(int argc, char **argv) {
driver_name = "graph";
using namespace parser;
Expand All @@ -88,7 +77,6 @@ int bench(int argc, char **argv) {
if (!parsed_options) {
if (!parse_input_file(s.json_file, argv[0]))
catch_unknown_options(argv[0]);
SAFE(verify_input(s), WARN);
check_correctness(s);
flush_temp_memory();
}
Expand Down
14 changes: 11 additions & 3 deletions tests/benchdnn/graph/custom_driver.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -293,10 +293,20 @@ void setup_cmp(compare::compare_t &cmp, const prb_t *prb, data_kind_t kind,

int fill_mem(dnn_mem_t &mem_dt, dnn_mem_t &mem_fp, int f_min, int f_max) {

const auto dt = mem_dt.dt();
if (has_bench_mode_modifier(mode_modifier_t::no_ref_memory)
&& !is_integral_dt(dt)) {
// Use data filled by benchdnn for `no_ref_memory`, except some
// customized operations in Graph API which expect the input
// values to indicate indexing information, especially for integral
// inputs. Hence we need to be limited the input value to the
// provided range.
return OK;
}

const auto nelems = mem_fp.nelems();
if (nelems == 0) return OK;

const auto dt = mem_dt.dt();
f_min = (dt == dnnl_u8 && f_min < 0) ? 0 : f_min;
const int64_t n_chunks = 16;
const int64_t chunk_size = div_up(nelems, n_chunks);
Expand Down Expand Up @@ -339,8 +349,6 @@ void init_memory_args(dnn_mem_map_t &mem_map, const prb_t *prb,

int init_ref_memory_args(dnn_mem_map_t &ref_mem_map, dnn_mem_map_t &mem_map,
const prb_t *prb, res_t *res) {
if (has_bench_mode_modifier(mode_modifier_t::no_ref_memory)) return OK;

switch (prb->alg) {
case GENINDEX:
SAFE(::custom::genindex::init_ref_memory_args(
Expand Down
34 changes: 19 additions & 15 deletions tests/benchdnn/graph/graph.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -202,6 +202,10 @@ int find_logical_tensor(size_t lt_id, const graph::op_ref_list_t &ops,
int map_unmap_partition_mem(graph::partition_mem_map_t &partition_mem_map,
const std::vector<dnnl::graph::logical_tensor> &lts,
const int &map_flag, res_t *res) {

// Not map or unmap the reference primitive memories for `no_ref_memory`
if (has_bench_mode_modifier(mode_modifier_t::no_ref_memory)) return OK;

// In case one logical tensor is used for multiple inputs, record the
// processed logical tensor ids to avoid duplicate processing
std::unordered_set<size_t> processed_ids;
Expand Down Expand Up @@ -253,7 +257,6 @@ int make_input_tensors(std::vector<dnnl::graph::tensor> &input_ts,
}

// generate tensor for graph path

const auto iter = partition_mem_map.find(lt_id);
if (iter != partition_mem_map.end()) {
const auto &graph_mem = iter->second;
Expand Down Expand Up @@ -663,10 +666,12 @@ int doit(const prb_t *prb, res_t *res) {
std::vector<dnnl::graph::tensor> output_ts(outputs.size());

ref_partition_t ref_partition(dg, partitions[i], inputs, outputs);

// Construct memory for both perf & corr modes
SAFE(ref_partition.init_ref(
graph_in_ports, partition_mem_map_v[i], res),
WARN);
SAFE(ref_partition.init_ref(graph_in_ports, res), WARN);
if (res->state == SKIPPED) return OK;

SAFE(ref_partition.init_graph_mem(partition_mem_map_v[i], res), WARN);
if (res->state == SKIPPED) return OK;

if (has_bench_mode_bit(mode_bit_t::corr)) {
Expand All @@ -683,15 +688,12 @@ int doit(const prb_t *prb, res_t *res) {
}

// unmap memory from host to device
map_unmap_partition_mem(partition_mem_map_v[i], inputs, UNMAP, res);
map_unmap_partition_mem(partition_mem_map_v[i], outputs, UNMAP, res);
if (res->state == FAIL) {
BENCHDNN_PRINT(0,
"FAIL: Fail to unmap memories to host for partition "
"%zu.\n",
i);
return FAIL;
}
SAFE(map_unmap_partition_mem(
partition_mem_map_v[i], inputs, UNMAP, res),
WARN);
SAFE(map_unmap_partition_mem(
partition_mem_map_v[i], outputs, UNMAP, res),
WARN);

const op_ref_list_t &op_list = ref_partition.get_partition_ops();
const auto &inplace_ports
Expand Down Expand Up @@ -731,8 +733,10 @@ int doit(const prb_t *prb, res_t *res) {
graph_mem_mgr.stop_graph_mem_check();

// map memory from device back to host
map_unmap_partition_mem(partition_mem_map_v[i], inputs, MAP, res);
map_unmap_partition_mem(partition_mem_map_v[i], outputs, MAP, res);
SAFE(map_unmap_partition_mem(partition_mem_map_v[i], inputs, MAP, res),
WARN);
SAFE(map_unmap_partition_mem(partition_mem_map_v[i], outputs, MAP, res),
WARN);

// If the device is out-of-memory due to graph path execution, skip the
// case.
Expand Down
96 changes: 59 additions & 37 deletions tests/benchdnn/graph/graph_memory.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -45,23 +45,10 @@ size_t get_benchdnn_device_limit() {
// Constructs memories for all inputs and outputs needed for comparison.
dnn_graph_mem_t::dnn_graph_mem_t(const dnn_mem_t &mem,
const deserialized_lt &lt, const bool is_op_input,
const bool is_fake_output)
const bool use_graph_layout)
: graph_dims_(lt.shape_), graph_strides_(lt.stride_) {
const auto &prim_dt = mem.dt();
// Conversion from graph types to dnnl types + boolean to u8.
const auto &graph_dt = convert_dt(lt.get_data_type());

// Get memory tag of primitive memory
int ndims = mem.ndims();
dims_t strides(mem.strides(), mem.strides() + ndims);
std::string mtag = strides2memory_tag(ndims, strides);

const auto &g_eng = get_graph_engine().operator const dnnl::engine &();

// We create memory for graph path in two steps:
// 1. Create memory objects.
// 2. Do memory copy if needed.
//
// For inputs, graph path needs data from reference path,
// and the data movement requires both memories have the same
// shape, so the tag of graph path is used to create the memory.
Expand All @@ -70,42 +57,77 @@ dnn_graph_mem_t::dnn_graph_mem_t(const dnn_mem_t &mem,
// otherwise use shape & tag from ref path side

// Create memory for graph path
const auto &graph_dt = convert_dt(lt.get_data_type());
const auto data_type = static_cast<dnnl::memory::data_type>(graph_dt);
if (is_op_input) {
if (graph_dims_.empty()) graph_dims_.push_back(1);
if (graph_strides_.empty()) graph_strides_.push_back(1);

// create graph memory
if (graph_dims_.empty()) {
// As graph strides are deduced from graph dims, they should be in
// compliance with each other.
assert(graph_strides_.empty());

graph_dims_.push_back(1);
graph_strides_.push_back(1);
}

if (is_op_input) {
// Create graph memory with memory description from graph path.
dnnl::memory::desc md(graph_dims_, data_type, graph_strides_);
mem_ = dnn_mem_t(md.get(), g_eng.get());

const auto prim_to_graph_memcpy = [](dnn_mem_t &graph_mem,
const dnn_mem_t &prim_mem) {
const void *prim_data_handle = static_cast<const void *>(prim_mem);
void *graph_data_handle = graph_mem.get_mapped_pointer<void>();
std::memcpy(graph_data_handle, prim_data_handle, graph_mem.size());
};

if (prim_dt != graph_dt) {
// Call a reorder (for data conversion) when reference memory
// doesn't coincide with the graph memory...
dnn_mem_t c_mem(ndims, mem.dims(), graph_dt, mtag, g_eng.get());
SAFE_V(c_mem.reorder(mem));
prim_to_graph_memcpy(mem_, c_mem);
} else {
// ... otherwise, perform a plain memcpy.
prim_to_graph_memcpy(mem_, mem);
}
} else {
if (is_fake_output) {
if (use_graph_layout) {
// For some cases such as fake outputs and no reference memory
// mode, which means the output does not have correctponding
// argument in primitives, we need to create them with memory
// description from graph path.
dnnl::memory::desc md(graph_dims_, data_type, graph_strides_);
mem_ = dnn_mem_t(md.get(), g_eng.get());

} else {
// Use information from the reference memory descriptor to create
// memories. As we need to reorder output from both paths to abx
// for comparison, the memory tag of graph path output should align
// the reference path.

// Get memory tag of primitive memory
int ndims = mem.ndims();
dims_t strides(mem.strides(), mem.strides() + ndims);
std::string mtag = strides2memory_tag(ndims, strides);

mem_ = dnn_mem_t(mem.md_, graph_dt, mtag, g_eng.get());
}
}
}

int dnn_graph_mem_t::fill_mem_with_data(const dnn_mem_t &mem) {

if (mem.size() != mem_.size()) return FAILED;

const auto &src_dt = mem.dt();
const auto &dst_dt = mem_.dt();

int ndims = mem.ndims();
dims_t strides(mem.strides(), mem.strides() + ndims);
std::string mtag = strides2memory_tag(ndims, strides);
const auto &g_eng = get_graph_engine().operator const dnnl::engine &();

const auto prim_to_graph_memcpy = [](dnn_mem_t &graph_mem,
const dnn_mem_t &prim_mem) {
const void *prim_data_handle = static_cast<const void *>(prim_mem);
void *graph_data_handle = graph_mem.get_mapped_pointer<void>();
std::memcpy(graph_data_handle, prim_data_handle, graph_mem.size());
};

if (src_dt != dst_dt) {
dnn_mem_t c_mem(ndims, mem.dims(), dst_dt, mtag, g_eng.get());
SAFE_V(c_mem.reorder(mem));
prim_to_graph_memcpy(mem_, c_mem);
} else {
prim_to_graph_memcpy(mem_, mem);
}

return OK;
}

dnnl::graph::tensor dnn_graph_mem_t::make_graph_tensor(
const deserialized_lt &lt) const {
void *data_handle;
Expand Down
11 changes: 7 additions & 4 deletions tests/benchdnn/graph/graph_memory.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -156,12 +156,15 @@ struct dnn_graph_mem_t {
//
// The constructor accepts three boolean parameters:
// 1. is_op_input: whether the logical tensor is an input of an op
// 2. is_fake_output: for fake outputs, the driver cannot create memory
// objects based on primitive memory for them, but construct memory
// from graph shape. The default value is false.
// 2. use_graph_layout: for fake outputs and mode without reference
// memories, the driver cannot create memory objects based on primitive
// memory for them, but construct memory from graph shape. The default
// value is false.
//
dnn_graph_mem_t(const dnn_mem_t &mem, const deserialized_lt &lt,
const bool is_op_input, const bool is_fake_output = false);
const bool is_op_input, const bool use_graph_layout = false);

int fill_mem_with_data(const dnn_mem_t &mem);

dnnl::graph::tensor make_graph_tensor(const deserialized_lt &lt) const;

Expand Down
51 changes: 37 additions & 14 deletions tests/benchdnn/graph/ref_partition.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -70,8 +70,14 @@ ref_partition_t::ref_partition_t(const deserialized_graph_t &dg,
}
};

int ref_partition_t::init_ref(const std::vector<size_t> &graph_in_ports,
partition_mem_map_t &partition_mem_map, res_t *res) {
int ref_partition_t::init_ref(
const std::vector<size_t> &graph_in_ports, res_t *res) {

// Not create reference primitives and filling data with pre-designed
// strategies for `no_ref_memory`
if (!has_bench_mode_bit(mode_bit_t::corr)
&& has_bench_mode_modifier(mode_modifier_t::no_ref_memory))
return OK;

for (const auto &par_op_ref : partition_ops_ref_) {
// res should be independent from op to op
Expand Down Expand Up @@ -150,30 +156,47 @@ int ref_partition_t::init_ref(const std::vector<size_t> &graph_in_ports,
SAFE_V(data_displacer.displace_input_data(
entry.first, const_cast<dnn_mem_t &>(entry.second), res));
}
return OK;
}

int ref_partition_t::init_graph_mem(
partition_mem_map_t &partition_mem_map, res_t *res) {

// init graph input/oputput memory from lt_id_2_mems_
for (const auto &id : partition_in_ids_) {
if (lt_id_2_mems_.find(id) == lt_id_2_mems_.end()) {
partition_mem_map.emplace(id,
dnn_graph_mem_t({}, lt_id_2_lt_.at(id), /*is_op_input=*/true));
if (lt_id_2_mems_.find(id) != lt_id_2_mems_.end()) {
SAFE(partition_mem_map.at(id).fill_mem_with_data(
lt_id_2_mems_.at(id)),
WARN);
} else if (!has_bench_mode_modifier(mode_modifier_t::no_ref_memory)) {
BENCHDNN_PRINT(0, "Fail: cannot find memory for %zu\n", id);
res->state = FAILED;
return FAIL;
}
partition_mem_map.emplace(id,
dnn_graph_mem_t(
lt_id_2_mems_.at(id), lt_id_2_lt_.at(id), true));
}

for (const auto &id : partition_out_ids_) {
if (fake_lt_ids_.find(id) != fake_lt_ids_.end()) {
partition_mem_map.emplace(
id, dnn_graph_mem_t({}, lt_id_2_lt_.at(id), false, true));
} else if (lt_id_2_mems_.find(id) == lt_id_2_mems_.end()) {

if (fake_lt_ids_.find(id) != fake_lt_ids_.end()
|| has_bench_mode_modifier(mode_modifier_t::no_ref_memory)) {
partition_mem_map.emplace(id,
dnn_graph_mem_t({}, lt_id_2_lt_.at(id),
/*is_op_input=*/false, /*use_graph_layout=*/true));
} else if (lt_id_2_mems_.find(id) != lt_id_2_mems_.end()) {
// For output memories of graph, they need to be in compliance with
// the reference memories regarding the shapes and memory tags, as
// the memories of both paths will be reordered to abx for
// comparison.
partition_mem_map.emplace(id,
dnn_graph_mem_t(lt_id_2_mems_.at(id), lt_id_2_lt_.at(id),
/*is_op_input=*/false));
} else {
BENCHDNN_PRINT(0, "Fail: cannot find memory for %zu\n", id);
res->state = FAILED;
return FAIL;
} else
partition_mem_map.emplace(id,
dnn_graph_mem_t(
lt_id_2_mems_.at(id), lt_id_2_lt_.at(id), false));
}
}

return OK;
Expand Down
6 changes: 4 additions & 2 deletions tests/benchdnn/graph/ref_partition.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -40,8 +40,10 @@ class ref_partition_t {
const std::vector<dnnl::graph::logical_tensor> &outs);

// prepare memories in both paths, one by one ref primitive
int init_ref(const std::vector<size_t> &graph_ports,
partition_mem_map_t &partition_mem_map, res_t *res);
int init_ref(const std::vector<size_t> &graph_ports, res_t *res);

int init_graph_mem(partition_mem_map_t &partition_mem_map, res_t *res);

// run partition in ref path, one by one ref primitive
void exec_ops(res_t *res);

Expand Down
1 change: 0 additions & 1 deletion tests/benchdnn/graph/setting_handler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1627,7 +1627,6 @@ bool get_reduction_prb_vdims(
}

prb_vdims.vdims = {src_dims, dst_dims};
prb_vdims.dst_dims = src_dims;
prb_vdims.ndims = static_cast<int>(src_dims.size());
return true;
}
Expand Down
Loading