diff --git a/src/cpu/x64/injectors/jit_uni_depthwise_injector.cpp b/src/cpu/x64/injectors/jit_uni_depthwise_injector.cpp index 9295ed0257b..651fb939a89 100644 --- a/src/cpu/x64/injectors/jit_uni_depthwise_injector.cpp +++ b/src/cpu/x64/injectors/jit_uni_depthwise_injector.cpp @@ -255,7 +255,7 @@ void jit_uni_depthwise_injector_f32::compute(int start_idx, int end_idx, if (need_to_preserve) { if (preserved_vecs_count > 1) pop_vmm(h, vmm_aux0); - if (preserved_vecs_count > 1) + if (preserved_vecs_count > 0) pop_vmm(h, vmm_mask); } } diff --git a/src/cpu/x64/jit_avx512_core_bf16_dw_conv_kernel.cpp b/src/cpu/x64/jit_avx512_core_bf16_dw_conv_kernel.cpp index 47fef46abcd..155678acb0b 100644 --- a/src/cpu/x64/jit_avx512_core_bf16_dw_conv_kernel.cpp +++ b/src/cpu/x64/jit_avx512_core_bf16_dw_conv_kernel.cpp @@ -467,7 +467,7 @@ void jit_avx512_dw_conv_fwd_kernel_bf16::compute_loop( pop(reg_output); pop(reg_input); pop(reg_kernel); - base_post_ops_data_offset -= reg64_size; + base_post_ops_data_offset -= 3 * reg64_size; } else { compute(ur_ch_blocks, masked_ch_block_tail); diff --git a/src/cpu/x64/jit_avx512_core_bf16_dw_conv_kernel.hpp b/src/cpu/x64/jit_avx512_core_bf16_dw_conv_kernel.hpp index 0ce4cae4e3b..93f44e13b3e 100644 --- a/src/cpu/x64/jit_avx512_core_bf16_dw_conv_kernel.hpp +++ b/src/cpu/x64/jit_avx512_core_bf16_dw_conv_kernel.hpp @@ -61,7 +61,7 @@ struct jit_avx512_dw_conv_fwd_kernel_bf16 : public jit_generator { reg64_t iter_kh = rax; reg64_t reg_oi = rbx; - reg64_t reg_tmp = reg_ch_blocks; + reg64_t reg_tmp = rbp; // fused convolution reg64_t reg_input_buffer_ptr = rdx;