@@ -1407,14 +1407,21 @@ void jit_uni_pool_kernel<isa>::generate() {
1407
1407
1408
1408
n_oi -= nstl::max (0 , r_pad_iterations);
1409
1409
1410
+ // this case will fail(when l_pad and r_pad have renited part:
1411
+ // onednn_verbose,exec,cpu,pooling_v2,jit:avx2,forward_inference,src_f32::blocked:aBcd8b:f0 dst_f32::blocked:aBcd8b:f0 ws_undef::undef::f0,,alg:pooling_max,mb1ic8_ih13oh13kh13sh1dh0ph6_iw13ow13kw13sw1dw0pw6,3.7991
1412
+ int cur_n_oi = 0 ;
1413
+
1410
1414
for (int i = 0 ; i < l_pad_iterations; ++i) {
1411
1415
n_oi--;
1412
1416
const int cur_l_pad = l_pad - i * ur_stride_w;
1413
- if (n_oi < 0 && r_pad1 > 0 )
1417
+ if (n_oi < 0 && r_pad1 > 0 ) {
1418
+ const int cur_r_pad = calculate_end_padding (l_pad, ur_w * (i + 1 ), iw, stride_w, kw);
1414
1419
process_oi (
1415
- ur_w, ur_bc, cur_l_pad, r_pad1, with_c_tail_processing);
1420
+ ur_w, ur_bc, cur_l_pad, cur_r_pad, with_c_tail_processing);
1421
+ }
1416
1422
else if (n_oi >= 0 )
1417
1423
process_oi (ur_w, ur_bc, cur_l_pad, 0 , with_c_tail_processing);
1424
+ cur_n_oi++;
1418
1425
}
1419
1426
1420
1427
xor_ (oi_iter, oi_iter);
@@ -1428,16 +1435,12 @@ void jit_uni_pool_kernel<isa>::generate() {
1428
1435
cmp (oi_iter, n_oi);
1429
1436
jl (ow_loop, T_NEAR);
1430
1437
}
1438
+ cur_n_oi += n_oi;
1431
1439
}
1432
1440
1433
- if (n_oi >= 0 ) {
1434
- const int r_pad1_tail = r_pad1 % ur_stride_w != 0
1435
- ? r_pad1 % ur_stride_w
1436
- : ur_stride_w;
1437
- for (int i = 0 ; i < r_pad_iterations; ++i) {
1438
- const int cur_r_pad = r_pad1_tail + ur_stride_w * i;
1439
- process_oi (ur_w, ur_bc, 0 , cur_r_pad, with_c_tail_processing);
1440
- }
1441
+ for (int i = cur_n_oi; i < n_oi_iterations; ++i) {
1442
+ const int cur_r_pad = calculate_end_padding (l_pad, ur_w * (i + 1 ), iw, stride_w, kw);
1443
+ process_oi (ur_w, ur_bc, 0 , cur_r_pad, with_c_tail_processing);
1441
1444
}
1442
1445
1443
1446
if (ur_w_tail != 0 ) {
0 commit comments