-
Notifications
You must be signed in to change notification settings - Fork 1k
/
Copy pathgraph_memory.cpp
167 lines (138 loc) · 6.32 KB
/
graph_memory.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
/*******************************************************************************
* Copyright 2023-2025 Intel Corporation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*******************************************************************************/
#include "graph_memory.hpp"
#include "allocator.hpp"
#include "oneapi/dnnl/dnnl_graph.hpp"
// 0.75f is taken randomly and is subject to change in future.
static constexpr float capacity_factor = 0.75f;
namespace graph {
size_t get_benchdnn_cpu_limit() {
static size_t cpu_device_capacity = get_cpu_ram_size();
const double benchdnn_cpu_limit = capacity_factor * cpu_device_capacity;
assert(benchdnn_cpu_limit > 0);
return benchdnn_cpu_limit;
}
size_t get_benchdnn_device_limit() {
if (is_cpu()) return 0;
static size_t gpu_device_capacity = 0;
static size_t gpu_max_alloc_capacity = 0;
SAFE(get_gpu_ram_sizes(gpu_device_capacity, gpu_max_alloc_capacity), WARN);
const double benchdnn_device_limit = capacity_factor * gpu_device_capacity;
assert(benchdnn_device_limit > 0);
return benchdnn_device_limit;
}
// Constructs memories for all inputs and outputs needed for comparison.
dnn_graph_mem_t::dnn_graph_mem_t(const dnn_mem_t &mem,
const deserialized_lt <, const bool is_op_input,
const bool use_graph_layout)
: graph_dims_(lt.shape_), graph_strides_(lt.stride_) {
const auto &g_eng = get_graph_engine().operator const dnnl::engine &();
// For inputs, graph path needs data from reference path,
// and the data movement requires both memories have the same
// shape, so the tag of graph path is used to create the memory.
//
// For outputs, use shape & tag from graph path for fake outputs,
// otherwise use shape & tag from ref path side
// Create memory for graph path
const auto &graph_dt = convert_dt(lt.get_data_type());
const auto data_type = static_cast<dnnl::memory::data_type>(graph_dt);
if (graph_dims_.empty()) {
// As graph strides are deduced from graph dims, they should be in
// compliance with each other.
assert(graph_strides_.empty());
graph_dims_.push_back(1);
graph_strides_.push_back(1);
}
if (is_op_input) {
// Create graph memory with memory description from graph path.
dnnl::memory::desc md(graph_dims_, data_type, graph_strides_);
mem_ = dnn_mem_t(md.get(), g_eng.get());
} else {
if (use_graph_layout) {
// For some cases such as fake outputs and no reference memory
// mode, which means the output does not have correctponding
// argument in primitives, we need to create them with memory
// description from graph path.
dnnl::memory::desc md(graph_dims_, data_type, graph_strides_);
mem_ = dnn_mem_t(md.get(), g_eng.get());
} else {
// Use information from the reference memory descriptor to create
// memories. As we need to reorder output from both paths to abx
// for comparison, the memory tag of graph path output should align
// the reference path.
// Get memory tag of primitive memory
int ndims = mem.ndims();
dims_t strides(mem.strides(), mem.strides() + ndims);
std::string mtag = strides2memory_tag(ndims, strides);
mem_ = dnn_mem_t(mem.md_, graph_dt, mtag, g_eng.get());
}
}
}
int dnn_graph_mem_t::fill_mem_with_data(const dnn_mem_t &mem) {
if (mem.size() != mem_.size()) return FAILED;
const auto &src_dt = mem.dt();
const auto &dst_dt = mem_.dt();
int ndims = mem.ndims();
dims_t strides(mem.strides(), mem.strides() + ndims);
std::string mtag = strides2memory_tag(ndims, strides);
const auto &g_eng = get_graph_engine().operator const dnnl::engine &();
const auto prim_to_graph_memcpy = [](dnn_mem_t &graph_mem,
const dnn_mem_t &prim_mem) {
const void *prim_data_handle = static_cast<const void *>(prim_mem);
void *graph_data_handle = graph_mem.get_mapped_pointer<void>();
std::memcpy(graph_data_handle, prim_data_handle, graph_mem.size());
};
if (src_dt != dst_dt) {
dnn_mem_t c_mem(ndims, mem.dims(), dst_dt, mtag, g_eng.get());
SAFE_V(c_mem.reorder(mem));
prim_to_graph_memcpy(mem_, c_mem);
} else {
prim_to_graph_memcpy(mem_, mem);
}
return OK;
}
dnnl::graph::tensor dnn_graph_mem_t::make_graph_tensor(
const deserialized_lt <) const {
void *data_handle;
dnnl_memory_get_data_handle(mem_.m_, &data_handle);
dnnl::graph::logical_tensor graph_lt(lt.id_, lt.get_data_type(), lt.shape_,
str2layout(lt.layout_type_), lt.get_property_type());
dnnl::graph::tensor ret(graph_lt, get_graph_engine(), data_handle);
return ret;
}
void flush_temp_memory() {
using namespace dnnl::graph;
// flush the constant tensor cache.
const auto kind = engine_tgt_kind == dnnl_cpu ? engine::kind::cpu
: engine::kind::gpu;
static size_t ct_capacity = get_constant_tensor_cache_capacity(kind);
if (ct_capacity > 0) set_constant_tensor_cache_capacity(kind, ct_capacity);
// flush the compiled partition cache.
#ifndef DNNL_GRAPH_DISABLE_COMPILED_PARTITION_CACHE
static int cp_capacity = get_compiled_partition_cache_capacity();
set_compiled_partition_cache_capacity(0); // clear the cache
set_compiled_partition_cache_capacity(
cp_capacity); // reset the cache capacity.
#endif
#if DNNL_GPU_RUNTIME == DNNL_RUNTIME_SYCL \
|| DNNL_GPU_RUNTIME == DNNL_RUNTIME_OCL
if (!has_bench_mode_bit(mode_bit_t::corr) && is_gpu()) {
auto &graph_mem_mgr = graph_mem_manager_t::get_instance();
graph_mem_mgr.clear_memory_pool();
}
#endif
}
} // namespace graph