Skip to content

Commit 8c6ed50

Browse files
committed
cpu: x64: gemm: kern -> kern_t
1 parent 3470f7c commit 8c6ed50

File tree

113 files changed

+697
-674
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

113 files changed

+697
-674
lines changed

src/cpu/x64/gemm/amx/jit_avx512_core_amx_copy_kern.cpp

+18-16
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@ static inline Zmm make_zmm(const Xmm &v) {
4242
return Zmm(v.getIdx());
4343
}
4444

45-
void jit_avx512_core_amx_copy_kern::transpose(int s, const Ymm &dst1,
45+
void jit_avx512_core_amx_copy_kern_t::transpose(int s, const Ymm &dst1,
4646
const Ymm &dst2, const Ymm &src1, const Ymm &src2) {
4747
switch (s) {
4848
case 32:
@@ -91,8 +91,9 @@ void jit_avx512_core_amx_copy_kern::transpose(int s, const Ymm &dst1,
9191
}
9292
}
9393

94-
void jit_avx512_core_amx_copy_kern::amxtrans8(const Ymm &dst1, const Ymm &dst2,
95-
const Ymm &src1, const Ymm &src2, const Ymm &src3, const Ymm &src4) {
94+
void jit_avx512_core_amx_copy_kern_t::amxtrans8(const Ymm &dst1,
95+
const Ymm &dst2, const Ymm &src1, const Ymm &src2, const Ymm &src3,
96+
const Ymm &src4) {
9697
vpunpcklbw(dst1, src1, src2);
9798
vpunpckhbw(dst2, src1, src2);
9899
vpunpcklbw(src1, src3, src4);
@@ -107,7 +108,7 @@ void jit_avx512_core_amx_copy_kern::amxtrans8(const Ymm &dst1, const Ymm &dst2,
107108
vshufi32x4(src4, dst1, dst2, 0x03);
108109
}
109110

110-
void jit_avx512_core_amx_copy_kern::amxtrans16(
111+
void jit_avx512_core_amx_copy_kern_t::amxtrans16(
111112
const Ymm &dst1, const Ymm &dst2, const Ymm &src1, const Ymm &src2) {
112113
vpunpcklwd(dst1, src1, src2);
113114
vpunpckhwd(dst2, src1, src2);
@@ -117,7 +118,7 @@ void jit_avx512_core_amx_copy_kern::amxtrans16(
117118
vshufi32x4(src2, src2, src2, 0xd8);
118119
}
119120

120-
void jit_avx512_core_amx_copy_kern::load(
121+
void jit_avx512_core_amx_copy_kern_t::load(
121122
const Xmm &dst, const Address &src, bool corner) {
122123
if (!corner && isize_ == 1)
123124
vmovdqu8(dst, src);
@@ -129,14 +130,15 @@ void jit_avx512_core_amx_copy_kern::load(
129130
vmovdqu16(dst | k1 | T_z, src);
130131
}
131132

132-
void jit_avx512_core_amx_copy_kern::store(const Address &dst, const Xmm &src) {
133+
void jit_avx512_core_amx_copy_kern_t::store(
134+
const Address &dst, const Xmm &src) {
133135
if (size_ == 1)
134136
vmovdqu8(dst, src);
135137
else
136138
vmovdqu16(dst, src);
137139
}
138140

139-
void jit_avx512_core_amx_copy_kern::kernel_AN(
141+
void jit_avx512_core_amx_copy_kern_t::kernel_AN(
140142
int unroll_x, int unroll_y, int step, Reg64 A, Reg64 B, bool corner) {
141143
// Transpose data.
142144
int u[] = {32, 16, 8, 4};
@@ -170,7 +172,7 @@ void jit_avx512_core_amx_copy_kern::kernel_AN(
170172
}
171173
}
172174

173-
void jit_avx512_core_amx_copy_kern::kernel_BN(
175+
void jit_avx512_core_amx_copy_kern_t::kernel_BN(
174176
int unroll_x, int unroll_y, int step, Reg64 A, Reg64 B, bool corner) {
175177
// Store data.
176178
for (int i = 0; i < 16; i++)
@@ -179,7 +181,7 @@ void jit_avx512_core_amx_copy_kern::kernel_BN(
179181
src_[i]);
180182
}
181183

182-
void jit_avx512_core_amx_copy_kern::kernel_AT(
184+
void jit_avx512_core_amx_copy_kern_t::kernel_AT(
183185
int unroll_x, int unroll_y, int step, Reg64 A, Reg64 B, bool corner) {
184186
Ymm v[16];
185187

@@ -258,7 +260,7 @@ void jit_avx512_core_amx_copy_kern::kernel_AT(
258260
}
259261
}
260262

261-
void jit_avx512_core_amx_copy_kern::kernel_BT(
263+
void jit_avx512_core_amx_copy_kern_t::kernel_BT(
262264
int unroll_x, int unroll_y, int step, Reg64 A, Reg64 B, bool corner) {
263265
// Transpose data.
264266
int u[] = {16, 8, 4, 2, 1};
@@ -297,7 +299,7 @@ void jit_avx512_core_amx_copy_kern::kernel_BT(
297299
L(store_end);
298300
}
299301

300-
void jit_avx512_core_amx_copy_kern::kernel(
302+
void jit_avx512_core_amx_copy_kern_t::kernel(
301303
int unroll_x, int unroll_y, int step, Reg64 A, Reg64 B, bool corner) {
302304

303305
// Load matrix.
@@ -326,7 +328,7 @@ void jit_avx512_core_amx_copy_kern::kernel(
326328
kernel_BT(unroll_x, unroll_y, step, A, B, corner);
327329
}
328330

329-
void jit_avx512_core_amx_copy_kern::copy_m(int unroll_m, int unroll_n) {
331+
void jit_avx512_core_amx_copy_kern_t::copy_m(int unroll_m, int unroll_n) {
330332
if (is_trans_) {
331333
mov(B1_, B_);
332334
add(B_, unroll_m * unroll_n * size_);
@@ -378,7 +380,7 @@ void jit_avx512_core_amx_copy_kern::copy_m(int unroll_m, int unroll_n) {
378380
L_aligned(kernel_tail_end);
379381
}
380382

381-
void jit_avx512_core_amx_copy_kern::copy_ns(int unroll_n, Label &epilogue) {
383+
void jit_avx512_core_amx_copy_kern_t::copy_ns(int unroll_n, Label &epilogue) {
382384
if (unroll_n > 0) {
383385
copy_ns(unroll_n - 1, epilogue);
384386

@@ -393,7 +395,7 @@ void jit_avx512_core_amx_copy_kern::copy_ns(int unroll_n, Label &epilogue) {
393395
}
394396
}
395397

396-
void jit_avx512_core_amx_copy_kern::copy_n(int unroll_n, Label &epilogue) {
398+
void jit_avx512_core_amx_copy_kern_t::copy_n(int unroll_n, Label &epilogue) {
397399

398400
Label copy_m_loop, copy_m_end;
399401

@@ -422,7 +424,7 @@ void jit_avx512_core_amx_copy_kern::copy_n(int unroll_n, Label &epilogue) {
422424
copy_ns(unroll_n - 1, epilogue);
423425
}
424426

425-
void jit_avx512_core_amx_copy_kern::generate() {
427+
void jit_avx512_core_amx_copy_kern_t::generate() {
426428
// Prologue
427429
preamble();
428430
sub(rsp, stack_alloc_size_);
@@ -494,7 +496,7 @@ void jit_avx512_core_amx_copy_kern::generate() {
494496
postamble();
495497
}
496498

497-
jit_avx512_core_amx_copy_kern::jit_avx512_core_amx_copy_kern(
499+
jit_avx512_core_amx_copy_kern_t::jit_avx512_core_amx_copy_kern_t(
498500
bool is_a, bool is_trans, int isize)
499501
: jit_generator_t(jit_name())
500502
, is_a_(is_a)

src/cpu/x64/gemm/amx/jit_avx512_core_amx_copy_kern.hpp

+3-3
Original file line numberDiff line numberDiff line change
@@ -24,10 +24,10 @@ namespace impl {
2424
namespace cpu {
2525
namespace x64 {
2626

27-
class jit_avx512_core_amx_copy_kern : public jit_generator_t {
27+
class jit_avx512_core_amx_copy_kern_t : public jit_generator_t {
2828
public:
29-
jit_avx512_core_amx_copy_kern(bool is_a, bool is_trans, int isize);
30-
DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_avx512_core_amx_copy_kern);
29+
jit_avx512_core_amx_copy_kern_t(bool is_a, bool is_trans, int isize);
30+
DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_avx512_core_amx_copy_kern_t);
3131

3232
protected:
3333
bool is_a_;

src/cpu/x64/gemm/amx/jit_avx512_core_amx_gemm_kern.cpp

+2-2
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,7 @@ namespace x64 {
5959
#define TILED(X) dword[rsp + ((X) + 0xc0)]
6060
#define TILEQ(X) qword[rsp + ((X) + 0xc0)]
6161

62-
void jit_avx512_core_amx_gemm_kern::generate() {
62+
void jit_avx512_core_amx_gemm_kern_t::generate() {
6363

6464
int kerneltype = ((typea << 1) | typeb);
6565

@@ -455,7 +455,7 @@ void jit_avx512_core_amx_gemm_kern::generate() {
455455
ret();
456456
}
457457

458-
jit_avx512_core_amx_gemm_kern::jit_avx512_core_amx_gemm_kern(
458+
jit_avx512_core_amx_gemm_kern_t::jit_avx512_core_amx_gemm_kern_t(
459459
int typea, int typeb, int typec, int betaZero)
460460
: jit_generator_t(jit_name(), avx512_core_amx)
461461
, typea(typea)

src/cpu/x64/gemm/amx/jit_avx512_core_amx_gemm_kern.hpp

+3-3
Original file line numberDiff line numberDiff line change
@@ -24,11 +24,11 @@ namespace impl {
2424
namespace cpu {
2525
namespace x64 {
2626

27-
class jit_avx512_core_amx_gemm_kern : public jit_generator_t {
27+
class jit_avx512_core_amx_gemm_kern_t : public jit_generator_t {
2828
public:
29-
jit_avx512_core_amx_gemm_kern(
29+
jit_avx512_core_amx_gemm_kern_t(
3030
int typea, int typeb, int typec, int betaZero);
31-
DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_avx512_core_amx_gemm_kern);
31+
DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_avx512_core_amx_gemm_kern_t);
3232

3333
protected:
3434
void generate() override;

src/cpu/x64/gemm/bf16/common_s16.hpp

+24-24
Original file line numberDiff line numberDiff line change
@@ -24,68 +24,68 @@ namespace impl {
2424
namespace cpu {
2525
namespace x64 {
2626

27-
class jit_avx512_core_s16_48x8_copy_an_kern : public jit_generator_t {
28-
DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_avx512_core_s16_48x8_copy_an_kern);
27+
class jit_avx512_core_s16_48x8_copy_an_kern_t : public jit_generator_t {
28+
DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_avx512_core_s16_48x8_copy_an_kern_t);
2929
void generate() override;
3030

3131
public:
32-
jit_avx512_core_s16_48x8_copy_an_kern();
32+
jit_avx512_core_s16_48x8_copy_an_kern_t();
3333
};
3434

35-
class jit_avx512_core_s16_48x8_copy_at_kern : public jit_generator_t {
36-
DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_avx512_core_s16_48x8_copy_at_kern);
35+
class jit_avx512_core_s16_48x8_copy_at_kern_t : public jit_generator_t {
36+
DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_avx512_core_s16_48x8_copy_at_kern_t);
3737
void generate() override;
3838

3939
public:
40-
jit_avx512_core_s16_48x8_copy_at_kern();
40+
jit_avx512_core_s16_48x8_copy_at_kern_t();
4141
};
4242

43-
class jit_avx512_core_s16_48x8_copy_bn_kern : public jit_generator_t {
44-
DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_avx512_core_s16_48x8_copy_bn_kern);
43+
class jit_avx512_core_s16_48x8_copy_bn_kern_t : public jit_generator_t {
44+
DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_avx512_core_s16_48x8_copy_bn_kern_t);
4545
void generate() override;
4646

4747
public:
48-
jit_avx512_core_s16_48x8_copy_bn_kern();
48+
jit_avx512_core_s16_48x8_copy_bn_kern_t();
4949
};
5050

51-
class jit_avx512_core_s16_48x8_copy_bt_kern : public jit_generator_t {
52-
DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_avx512_core_s16_48x8_copy_bt_kern);
51+
class jit_avx512_core_s16_48x8_copy_bt_kern_t : public jit_generator_t {
52+
DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_avx512_core_s16_48x8_copy_bt_kern_t);
5353
void generate() override;
5454

5555
public:
56-
jit_avx512_core_s16_48x8_copy_bt_kern();
56+
jit_avx512_core_s16_48x8_copy_bt_kern_t();
5757
};
5858

59-
class jit_avx512_core_s16_24x8_copy_an_kern : public jit_generator_t {
60-
DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_avx512_core_s16_24x8_copy_an_kern);
59+
class jit_avx512_core_s16_24x8_copy_an_kern_t : public jit_generator_t {
60+
DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_avx512_core_s16_24x8_copy_an_kern_t);
6161
void generate() override;
6262

6363
public:
64-
jit_avx512_core_s16_24x8_copy_an_kern();
64+
jit_avx512_core_s16_24x8_copy_an_kern_t();
6565
};
6666

67-
class jit_avx512_core_s16_24x8_copy_at_kern : public jit_generator_t {
68-
DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_avx512_core_s16_24x8_copy_at_kern);
67+
class jit_avx512_core_s16_24x8_copy_at_kern_t : public jit_generator_t {
68+
DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_avx512_core_s16_24x8_copy_at_kern_t);
6969
void generate() override;
7070

7171
public:
72-
jit_avx512_core_s16_24x8_copy_at_kern();
72+
jit_avx512_core_s16_24x8_copy_at_kern_t();
7373
};
7474

75-
class jit_avx512_core_s16_24x8_copy_bn_kern : public jit_generator_t {
76-
DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_avx512_core_s16_24x8_copy_bn_kern);
75+
class jit_avx512_core_s16_24x8_copy_bn_kern_t : public jit_generator_t {
76+
DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_avx512_core_s16_24x8_copy_bn_kern_t);
7777
void generate() override;
7878

7979
public:
80-
jit_avx512_core_s16_24x8_copy_bn_kern();
80+
jit_avx512_core_s16_24x8_copy_bn_kern_t();
8181
};
8282

83-
class jit_avx512_core_s16_24x8_copy_bt_kern : public jit_generator_t {
84-
DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_avx512_core_s16_24x8_copy_bt_kern);
83+
class jit_avx512_core_s16_24x8_copy_bt_kern_t : public jit_generator_t {
84+
DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_avx512_core_s16_24x8_copy_bt_kern_t);
8585
void generate() override;
8686

8787
public:
88-
jit_avx512_core_s16_24x8_copy_bt_kern();
88+
jit_avx512_core_s16_24x8_copy_bt_kern_t();
8989
};
9090

9191
} // namespace x64

src/cpu/x64/gemm/bf16/jit_avx512_core_gemm_bf16bf16f32_kern.cpp

+13-11
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@ static inline Zmm make_zmm(const Xmm &v) {
4646
}
4747

4848
// Load from or store to C.
49-
void jit_avx512_core_gemm_bf16bf16f32_kern::c_load(
49+
void jit_avx512_core_gemm_bf16bf16f32_kern_t::c_load(
5050
const Xbyak::Xmm &dst, const Xbyak::Address &src, int nelems) {
5151
switch (nelems) {
5252
case 1: vmovss(make_xmm(dst), src); break;
@@ -60,7 +60,7 @@ void jit_avx512_core_gemm_bf16bf16f32_kern::c_load(
6060
}
6161
}
6262

63-
void jit_avx512_core_gemm_bf16bf16f32_kern::c_store(
63+
void jit_avx512_core_gemm_bf16bf16f32_kern_t::c_store(
6464
const Xbyak::Address &dst, const Xbyak::Xmm &src, int nelems) {
6565
switch (nelems) {
6666
case 1: vmovss(dst, make_xmm(src)); break;
@@ -76,7 +76,7 @@ void jit_avx512_core_gemm_bf16bf16f32_kern::c_store(
7676

7777
// Perform length-2 dot product accumulations of bfloat16 in parallel.
7878
// Use vdpbf16ps if available, otherwise emulate.
79-
void jit_avx512_core_gemm_bf16bf16f32_kern::dot_product(
79+
void jit_avx512_core_gemm_bf16bf16f32_kern_t::dot_product(
8080
const Xmm &dst, const Xmm &src1, const Xmm &src2) {
8181
if (bfloat16_)
8282
vdpbf16ps(dst, src1, src2);
@@ -85,7 +85,7 @@ void jit_avx512_core_gemm_bf16bf16f32_kern::dot_product(
8585
}
8686

8787
// Inner kernel.
88-
void jit_avx512_core_gemm_bf16bf16f32_kern::kernel_loop(
88+
void jit_avx512_core_gemm_bf16bf16f32_kern_t::kernel_loop(
8989
int unroll_m, int unroll_n, bool cfetch) {
9090
int um_vecs = utils::div_up(unroll_m, c_nelems_);
9191
Label label_kernel_loop;
@@ -147,7 +147,7 @@ void jit_avx512_core_gemm_bf16bf16f32_kern::kernel_loop(
147147
}
148148

149149
// k remainder loop for kernel.
150-
void jit_avx512_core_gemm_bf16bf16f32_kern::remainder_kernel(
150+
void jit_avx512_core_gemm_bf16bf16f32_kern_t::remainder_kernel(
151151
int unroll_m, int unroll_n, int unroll_k, int bwidth) {
152152
int um_vecs = utils::div_up(unroll_m, c_nelems_);
153153

@@ -181,7 +181,7 @@ void jit_avx512_core_gemm_bf16bf16f32_kern::remainder_kernel(
181181
}
182182

183183
// Inner loop.
184-
void jit_avx512_core_gemm_bf16bf16f32_kern::innerloop(
184+
void jit_avx512_core_gemm_bf16bf16f32_kern_t::innerloop(
185185
int unroll_m, int unroll_n) {
186186
int um_vecs = utils::div_up(unroll_m, c_nelems_);
187187
int stage1 = unroll_n, stage2 = unroll_n;
@@ -311,7 +311,7 @@ void jit_avx512_core_gemm_bf16bf16f32_kern::innerloop(
311311
}
312312

313313
// Outer loop.
314-
void jit_avx512_core_gemm_bf16bf16f32_kern::outerloop(
314+
void jit_avx512_core_gemm_bf16bf16f32_kern_t::outerloop(
315315
int unroll_x, int unroll_y, Label *&cur_outerloop_label) {
316316
Label label_m_loop, label_n_loop, label_n_remainder_loops[6];
317317

@@ -375,7 +375,7 @@ void jit_avx512_core_gemm_bf16bf16f32_kern::outerloop(
375375
align(16);
376376
}
377377

378-
void jit_avx512_core_gemm_bf16bf16f32_kern::generate() {
378+
void jit_avx512_core_gemm_bf16bf16f32_kern_t::generate() {
379379
// Prologue
380380
preamble();
381381
sub(rsp, stack_alloc_size_);
@@ -423,8 +423,9 @@ void jit_avx512_core_gemm_bf16bf16f32_kern::generate() {
423423
postamble();
424424
}
425425

426-
jit_avx512_core_gemm_bf16bf16f32_kern::jit_avx512_core_gemm_bf16bf16f32_kern(
427-
bool beta_zero, bool alpha_one, bool use_zmm)
426+
jit_avx512_core_gemm_bf16bf16f32_kern_t::
427+
jit_avx512_core_gemm_bf16bf16f32_kern_t(
428+
bool beta_zero, bool alpha_one, bool use_zmm)
428429
: jit_generator_t(jit_name())
429430
, beta_zero_(beta_zero)
430431
, alpha_one_(alpha_one)
@@ -507,7 +508,8 @@ jit_avx512_core_gemm_bf16bf16f32_kern::jit_avx512_core_gemm_bf16bf16f32_kern(
507508
this, one_, even_, selector_, scratch_, zmm_tmp0_, zmm_tmp1_);
508509
}
509510

510-
jit_avx512_core_gemm_bf16bf16f32_kern::~jit_avx512_core_gemm_bf16bf16f32_kern()
511+
jit_avx512_core_gemm_bf16bf16f32_kern_t::
512+
~jit_avx512_core_gemm_bf16bf16f32_kern_t()
511513
= default;
512514
} // namespace x64
513515
} // namespace cpu

src/cpu/x64/gemm/bf16/jit_avx512_core_gemm_bf16bf16f32_kern.hpp

+5-5
Original file line numberDiff line numberDiff line change
@@ -25,12 +25,12 @@ namespace impl {
2525
namespace cpu {
2626
namespace x64 {
2727

28-
class jit_avx512_core_gemm_bf16bf16f32_kern : public jit_generator_t {
28+
class jit_avx512_core_gemm_bf16bf16f32_kern_t : public jit_generator_t {
2929
public:
30-
jit_avx512_core_gemm_bf16bf16f32_kern(
30+
jit_avx512_core_gemm_bf16bf16f32_kern_t(
3131
bool beta_zero, bool alpha_one, bool use_zmm);
32-
~jit_avx512_core_gemm_bf16bf16f32_kern() override;
33-
DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_avx512_core_gemm_bf16bf16f32_kern);
32+
~jit_avx512_core_gemm_bf16bf16f32_kern_t() override;
33+
DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_avx512_core_gemm_bf16bf16f32_kern_t);
3434

3535
protected:
3636
bool beta_zero_;
@@ -98,7 +98,7 @@ class jit_avx512_core_gemm_bf16bf16f32_kern : public jit_generator_t {
9898
Xbyak::Zmm zmm_tmp0_;
9999
Xbyak::Zmm zmm_tmp1_;
100100

101-
DNNL_DISALLOW_COPY_AND_ASSIGN(jit_avx512_core_gemm_bf16bf16f32_kern);
101+
DNNL_DISALLOW_COPY_AND_ASSIGN(jit_avx512_core_gemm_bf16bf16f32_kern_t);
102102
};
103103

104104
} // namespace x64

0 commit comments

Comments
 (0)