Skip to content

Commit ca8b340

Browse files
Added push(...) and pop(...) for std::vector<> of SIMD registers
1 parent be3458a commit ca8b340

File tree

1 file changed

+56
-0
lines changed

1 file changed

+56
-0
lines changed

src/cpu/x64/jit_generator.hpp

+56
Original file line numberDiff line numberDiff line change
@@ -200,6 +200,34 @@ class jit_generator : public Xbyak::CodeGenerator, public c_compatible {
200200
}
201201
}
202202

203+
inline void push(const std::vector<Xbyak::Xmm> &xmms) {
204+
std::vector<std::function<void()>> deferred_movs{};
205+
size_t offset = 0;
206+
for (size_t i = 0; i < xmms.size(); ++i) {
207+
const auto& xmm = xmms[i];
208+
if (xmm.isXMM()) {
209+
deferred_movs.emplace_back([this, offset, &xmm]() {
210+
uni_vmovdqu(ptr[rsp + offset], xmm);
211+
});
212+
offset += xmm_len;
213+
} else if (xmm.isYMM()) {
214+
deferred_movs.emplace_back([this, offset, &xmm]() {
215+
uni_vmovdqu(ptr[rsp + offset], Xbyak::Ymm{xmm.getIdx()});
216+
});
217+
offset += ymm_len;
218+
} else if (xmm.isZMM()) {
219+
deferred_movs.emplace_back([this, offset, &xmm]() {
220+
uni_vmovdqu(ptr[rsp + offset], Xbyak::Zmm{xmm.getIdx()});
221+
});
222+
offset += zmm_len;
223+
}
224+
}
225+
sub(rsp, offset);
226+
for (const auto& def_mov : deferred_movs) {
227+
def_mov();
228+
}
229+
}
230+
203231
inline void pop(const Xbyak::Xmm &xmm) {
204232
if (xmm.isXMM()) {
205233
uni_vmovdqu(xmm, ptr[rsp]);
@@ -213,6 +241,34 @@ class jit_generator : public Xbyak::CodeGenerator, public c_compatible {
213241
}
214242
}
215243

244+
inline void pop(const std::vector<Xbyak::Xmm> &xmms) {
245+
std::vector<std::function<void()>> deferred_movs{};
246+
size_t offset = 0;
247+
for (size_t i = 0; i < xmms.size(); ++i) {
248+
const auto& xmm = xmms[i];
249+
if (xmm.isXMM()) {
250+
deferred_movs.emplace_back([this, offset, &xmm]() {
251+
uni_vmovdqu(xmm, ptr[rsp + offset]);
252+
});
253+
offset += xmm_len;
254+
} else if (xmm.isYMM()) {
255+
deferred_movs.emplace_back([this, offset, &xmm]() {
256+
uni_vmovdqu(Xbyak::Ymm{xmm.getIdx()}, ptr[rsp + offset]);
257+
});
258+
offset += ymm_len;
259+
} else if (xmm.isZMM()) {
260+
deferred_movs.emplace_back([this, offset, &xmm]() {
261+
uni_vmovdqu(Xbyak::Zmm{xmm.getIdx()}, ptr[rsp + offset]);
262+
});
263+
offset += zmm_len;
264+
}
265+
}
266+
for (const auto& def_mov : deferred_movs) {
267+
def_mov();
268+
}
269+
add(rsp, offset);
270+
}
271+
216272
void preamble() {
217273
if (xmm_to_preserve) {
218274
sub(rsp, xmm_to_preserve * xmm_len);

0 commit comments

Comments
 (0)