We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
1 parent 6fec1cd commit ebbe1abCopy full SHA for ebbe1ab
src/gpu/intel/ocl/micro_sdpa.cpp
@@ -264,7 +264,9 @@ status_t micro_sdpa_t::pd_t::init_microkernels(impl::engine_t *engine) {
264
265
problem_kq.B.layout = MatrixLayout::Pr;
266
problem_kq.C.layout = MatrixLayout::T;
267
- problem_kq.A.setAlignment(alignmentForLD(d->head_size() * problem.Ta));
+ const memory_desc_wrapper key_mdw(key_md());
268
+ auto ldk = gemm_desc_t::get_ld(*key_md()) * key_mdw.data_type_size();
269
+ problem_kq.A.setAlignment(alignmentForLD(ldk));
270
problem_kq.B.setAlignment(64); // Q is packed in VNNI format in SLM
271
problem_kq.B.crosspack = 2;
272
problem_kq.B.tileR = into<uint16_t>(d_max());
0 commit comments