@@ -413,6 +413,20 @@ int fill_data(data_kind_t kind, const prb_t *prb, const cfg_t &cfg,
413
413
density_args.n_acc = prb->k ;
414
414
const auto density = cfg.get_density (density_args);
415
415
416
+ const auto &e_zp_src = prb->attr .zero_points .get (DNNL_ARG_SRC);
417
+ const bool has_src_zp = !e_zp_src.is_def ();
418
+ const int src_zp_mask = attr_t::get_default_mask (e_zp_src.policy );
419
+ // Apply src_zp for source tensor only.
420
+ int src_zp = kind == SRC && has_src_zp && src_zp_mask == 0 ? e_zp_src.value
421
+ : 0 ;
422
+
423
+ const auto &e_zp_wei = prb->attr .zero_points .get (DNNL_ARG_WEIGHTS);
424
+ const bool has_wei_zp = !e_zp_wei.is_def ();
425
+ const int wei_zp_mask = attr_t::get_default_mask (e_zp_wei.policy );
426
+ // Apply wei_zp for weights tensor only.
427
+ int wei_zp = kind == WEI && has_wei_zp && wei_zp_mask == 0 ? e_zp_wei.value
428
+ : 0 ;
429
+
416
430
/* Do fixed partitioning to have same filling for any number of threads */
417
431
const int64_t chunk_size = 64 ;
418
432
const int64_t n_chunks = div_up (nelems, chunk_size);
@@ -438,6 +452,7 @@ int fill_data(data_kind_t kind, const prb_t *prb, const cfg_t &cfg,
438
452
float val = 0 ;
439
453
while (val <= 0 )
440
454
val = gen (int_seed);
455
+ val += src_zp + wei_zp; // Add zp so that it will be subtracted.
441
456
mem_fp.set_elem (
442
457
0 , round_to_nearest_representable (cfg.get_dt (kind), val));
443
458
idx_start += 1 ;
@@ -453,6 +468,7 @@ int fill_data(data_kind_t kind, const prb_t *prb, const cfg_t &cfg,
453
468
val *= is_one;
454
469
} else {
455
470
val = is_one * gen (int_seed);
471
+ val += src_zp + wei_zp; // Add zp so that it will be subtracted.
456
472
}
457
473
mem_fp.set_elem (
458
474
idx, round_to_nearest_representable (cfg.get_dt (kind), val));
0 commit comments