Skip to content

Commit bf2f943

Browse files
JinZrcsukuangfj
andauthored
Enabling char_level and compute_CER for aishell recipe (#1554)
* init fix Co-authored-by: Fangjun Kuang <csukuangfj@gmail.com>
1 parent 2dfd5db commit bf2f943

File tree

13 files changed

+80
-26
lines changed

13 files changed

+80
-26
lines changed

egs/aishell/ASR/conformer_ctc/decode.py

+6-2
Original file line numberDiff line numberDiff line change
@@ -419,7 +419,7 @@ def save_results(
419419
for key, results in results_dict.items():
420420
recog_path = params.exp_dir / f"recogs-{test_set_name}-{key}.txt"
421421
results = sorted(results)
422-
store_transcripts(filename=recog_path, texts=results)
422+
store_transcripts(filename=recog_path, texts=results, char_level=True)
423423
if enable_log:
424424
logging.info(f"The transcripts are stored in {recog_path}")
425425

@@ -432,7 +432,11 @@ def save_results(
432432
results_char.append((res[0], list("".join(res[1])), list("".join(res[2]))))
433433
with open(errs_filename, "w") as f:
434434
wer = write_error_stats(
435-
f, f"{test_set_name}-{key}", results_char, enable_log=enable_log
435+
f,
436+
f"{test_set_name}-{key}",
437+
results_char,
438+
enable_log=enable_log,
439+
compute_CER=True,
436440
)
437441
test_set_wers[key] = wer
438442

egs/aishell/ASR/conformer_mmi/decode.py

+6-2
Original file line numberDiff line numberDiff line change
@@ -431,7 +431,7 @@ def save_results(
431431
for key, results in results_dict.items():
432432
recog_path = params.exp_dir / f"recogs-{test_set_name}-{key}.txt"
433433
results = sorted(results)
434-
store_transcripts(filename=recog_path, texts=results)
434+
store_transcripts(filename=recog_path, texts=results, char_level=True)
435435
if enable_log:
436436
logging.info(f"The transcripts are stored in {recog_path}")
437437

@@ -444,7 +444,11 @@ def save_results(
444444
results_char.append((res[0], list("".join(res[1])), list("".join(res[2]))))
445445
with open(errs_filename, "w") as f:
446446
wer = write_error_stats(
447-
f, f"{test_set_name}-{key}", results_char, enable_log=enable_log
447+
f,
448+
f"{test_set_name}-{key}",
449+
results_char,
450+
enable_log=enable_log,
451+
compute_CER=True,
448452
)
449453
test_set_wers[key] = wer
450454

egs/aishell/ASR/pruned_transducer_stateless2/decode.py

+6-2
Original file line numberDiff line numberDiff line change
@@ -390,7 +390,7 @@ def save_results(
390390
for key, results in results_dict.items():
391391
recog_path = params.res_dir / f"recogs-{test_set_name}-{params.suffix}.txt"
392392
results = sorted(results)
393-
store_transcripts(filename=recog_path, texts=results)
393+
store_transcripts(filename=recog_path, texts=results, char_level=True)
394394
logging.info(f"The transcripts are stored in {recog_path}")
395395

396396
# The following prints out WERs, per-word error statistics and aligned
@@ -402,7 +402,11 @@ def save_results(
402402
results_char.append((res[0], list("".join(res[1])), list("".join(res[2]))))
403403
with open(errs_filename, "w") as f:
404404
wer = write_error_stats(
405-
f, f"{test_set_name}-{key}", results_char, enable_log=True
405+
f,
406+
f"{test_set_name}-{key}",
407+
results_char,
408+
enable_log=True,
409+
compute_CER=True,
406410
)
407411
test_set_wers[key] = wer
408412

egs/aishell/ASR/pruned_transducer_stateless3/decode.py

+6-2
Original file line numberDiff line numberDiff line change
@@ -526,7 +526,7 @@ def save_results(
526526
for key, results in results_dict.items():
527527
recog_path = params.res_dir / f"recogs-{test_set_name}-{params.suffix}.txt"
528528
results = sorted(results)
529-
store_transcripts(filename=recog_path, texts=results)
529+
store_transcripts(filename=recog_path, texts=results, char_level=True)
530530
logging.info(f"The transcripts are stored in {recog_path}")
531531

532532
# The following prints out WERs, per-word error statistics and aligned
@@ -538,7 +538,11 @@ def save_results(
538538
results_char.append((res[0], list("".join(res[1])), list("".join(res[2]))))
539539
with open(errs_filename, "w") as f:
540540
wer = write_error_stats(
541-
f, f"{test_set_name}-{key}", results_char, enable_log=True
541+
f,
542+
f"{test_set_name}-{key}",
543+
results_char,
544+
enable_log=True,
545+
compute_CER=True,
542546
)
543547
test_set_wers[key] = wer
544548

egs/aishell/ASR/pruned_transducer_stateless7/decode.py

+6-2
Original file line numberDiff line numberDiff line change
@@ -444,15 +444,19 @@ def save_results(
444444
for res in results:
445445
results_char.append((res[0], list("".join(res[1])), list("".join(res[2]))))
446446

447-
store_transcripts(filename=recog_path, texts=results_char)
447+
store_transcripts(filename=recog_path, texts=results_char, char_level=True)
448448
logging.info(f"The transcripts are stored in {recog_path}")
449449

450450
# The following prints out WERs, per-word error statistics and aligned
451451
# ref/hyp pairs.
452452
errs_filename = params.res_dir / f"errs-{test_set_name}-{params.suffix}.txt"
453453
with open(errs_filename, "w") as f:
454454
wer = write_error_stats(
455-
f, f"{test_set_name}-{key}", results_char, enable_log=True
455+
f,
456+
f"{test_set_name}-{key}",
457+
results_char,
458+
enable_log=True,
459+
compute_CER=True,
456460
)
457461
test_set_wers[key] = wer
458462

egs/aishell/ASR/pruned_transducer_stateless7_bbpe/decode.py

+6-2
Original file line numberDiff line numberDiff line change
@@ -581,7 +581,7 @@ def save_results(
581581
for key, results in results_dict.items():
582582
recog_path = params.res_dir / f"recogs-{test_set_name}-{params.suffix}.txt"
583583
results = sorted(results)
584-
store_transcripts(filename=recog_path, texts=results)
584+
store_transcripts(filename=recog_path, texts=results, char_level=True)
585585
logging.info(f"The transcripts are stored in {recog_path}")
586586

587587
# The following prints out WERs, per-word error statistics and aligned
@@ -594,7 +594,11 @@ def save_results(
594594

595595
with open(errs_filename, "w") as f:
596596
wer = write_error_stats(
597-
f, f"{test_set_name}-{key}", results_char, enable_log=True
597+
f,
598+
f"{test_set_name}-{key}",
599+
results_char,
600+
enable_log=True,
601+
compute_CER=True,
598602
)
599603
test_set_wers[key] = wer
600604

egs/aishell/ASR/pruned_transducer_stateless7_streaming/decode.py

+6-2
Original file line numberDiff line numberDiff line change
@@ -492,15 +492,19 @@ def save_results(
492492
for key, results in results_dict.items():
493493
recog_path = params.res_dir / f"recogs-{test_set_name}-{params.suffix}.txt"
494494
results = sorted(results)
495-
store_transcripts(filename=recog_path, texts=results)
495+
store_transcripts(filename=recog_path, texts=results, char_level=True)
496496
logging.info(f"The transcripts are stored in {recog_path}")
497497

498498
# The following prints out WERs, per-word error statistics and aligned
499499
# ref/hyp pairs.
500500
errs_filename = params.res_dir / f"errs-{test_set_name}-{params.suffix}.txt"
501501
with open(errs_filename, "w") as f:
502502
wer = write_error_stats(
503-
f, f"{test_set_name}-{key}", results, enable_log=True
503+
f,
504+
f"{test_set_name}-{key}",
505+
results,
506+
enable_log=True,
507+
compute_CER=True,
504508
)
505509
test_set_wers[key] = wer
506510

egs/aishell/ASR/tdnn_lstm_ctc/decode.py

+8-2
Original file line numberDiff line numberDiff line change
@@ -278,7 +278,7 @@ def save_results(
278278
for key, results in results_dict.items():
279279
recog_path = params.exp_dir / f"recogs-{test_set_name}-{key}.txt"
280280
results = sorted(results)
281-
store_transcripts(filename=recog_path, texts=results)
281+
store_transcripts(filename=recog_path, texts=results, char_level=True)
282282
logging.info(f"The transcripts are stored in {recog_path}")
283283

284284
# The following prints out WERs, per-word error statistics and aligned
@@ -289,7 +289,13 @@ def save_results(
289289
for res in results:
290290
results_char.append((res[0], list("".join(res[1])), list("".join(res[2]))))
291291
with open(errs_filename, "w") as f:
292-
wer = write_error_stats(f, f"{test_set_name}-{key}", results_char)
292+
wer = write_error_stats(
293+
f,
294+
f"{test_set_name}-{key}",
295+
results_char,
296+
enable_log=True,
297+
compute_CER=True,
298+
)
293299
test_set_wers[key] = wer
294300

295301
logging.info("Wrote detailed error stats to {}".format(errs_filename))

egs/aishell/ASR/transducer_stateless/decode.py

+6-2
Original file line numberDiff line numberDiff line change
@@ -327,7 +327,7 @@ def save_results(
327327
for key, results in results_dict.items():
328328
recog_path = params.res_dir / f"recogs-{test_set_name}-{params.suffix}.txt"
329329
results = sorted(results)
330-
store_transcripts(filename=recog_path, texts=results)
330+
store_transcripts(filename=recog_path, texts=results, char_level=True)
331331

332332
# The following prints out WERs, per-word error statistics and aligned
333333
# ref/hyp pairs.
@@ -338,7 +338,11 @@ def save_results(
338338
results_char.append((res[0], list("".join(res[1])), list("".join(res[2]))))
339339
with open(errs_filename, "w") as f:
340340
wer = write_error_stats(
341-
f, f"{test_set_name}-{key}", results_char, enable_log=True
341+
f,
342+
f"{test_set_name}-{key}",
343+
results_char,
344+
enable_log=True,
345+
compute_CER=True,
342346
)
343347
test_set_wers[key] = wer
344348

egs/aishell/ASR/transducer_stateless_modified-2/decode.py

+6-2
Original file line numberDiff line numberDiff line change
@@ -372,7 +372,7 @@ def save_results(
372372
for key, results in results_dict.items():
373373
recog_path = params.res_dir / f"recogs-{test_set_name}-{params.suffix}.txt"
374374
results = sorted(results)
375-
store_transcripts(filename=recog_path, texts=results)
375+
store_transcripts(filename=recog_path, texts=results, char_level=True)
376376
logging.info(f"The transcripts are stored in {recog_path}")
377377

378378
# The following prints out WERs, per-word error statistics and aligned
@@ -384,7 +384,11 @@ def save_results(
384384
results_char.append((res[0], list("".join(res[1])), list("".join(res[2]))))
385385
with open(errs_filename, "w") as f:
386386
wer = write_error_stats(
387-
f, f"{test_set_name}-{key}", results_char, enable_log=True
387+
f,
388+
f"{test_set_name}-{key}",
389+
results_char,
390+
enable_log=True,
391+
compute_CER=True,
388392
)
389393
test_set_wers[key] = wer
390394

egs/aishell/ASR/transducer_stateless_modified/decode.py

+6-2
Original file line numberDiff line numberDiff line change
@@ -376,7 +376,7 @@ def save_results(
376376
for key, results in results_dict.items():
377377
recog_path = params.res_dir / f"recogs-{test_set_name}-{params.suffix}.txt"
378378
results = sorted(results)
379-
store_transcripts(filename=recog_path, texts=results)
379+
store_transcripts(filename=recog_path, texts=results, char_level=True)
380380
logging.info(f"The transcripts are stored in {recog_path}")
381381

382382
# The following prints out WERs, per-word error statistics and aligned
@@ -388,7 +388,11 @@ def save_results(
388388
results_char.append((res[0], list("".join(res[1])), list("".join(res[2]))))
389389
with open(errs_filename, "w") as f:
390390
wer = write_error_stats(
391-
f, f"{test_set_name}-{key}", results_char, enable_log=True
391+
f,
392+
f"{test_set_name}-{key}",
393+
results_char,
394+
enable_log=True,
395+
compute_CER=True,
392396
)
393397
test_set_wers[key] = wer
394398

egs/aishell/ASR/whisper/decode.py

+6-2
Original file line numberDiff line numberDiff line change
@@ -358,7 +358,7 @@ def save_results(
358358
params.exp_dir / f"recogs-{test_set_name}-{key}-{params.suffix}.txt"
359359
)
360360
results = sorted(results)
361-
store_transcripts(filename=recog_path, texts=results)
361+
store_transcripts(filename=recog_path, texts=results, char_level=True)
362362
if enable_log:
363363
logging.info(f"The transcripts are stored in {recog_path}")
364364

@@ -373,7 +373,11 @@ def save_results(
373373
results_char.append((res[0], list("".join(res[1])), list("".join(res[2]))))
374374
with open(errs_filename, "w") as f:
375375
wer = write_error_stats(
376-
f, f"{test_set_name}-{key}", results_char, enable_log=enable_log
376+
f,
377+
f"{test_set_name}-{key}",
378+
results_char,
379+
enable_log=enable_log,
380+
compute_CER=True,
377381
)
378382
test_set_wers[key] = wer
379383

egs/aishell/ASR/zipformer/decode.py

+6-2
Original file line numberDiff line numberDiff line change
@@ -560,7 +560,7 @@ def save_results(
560560
params.res_dir / f"recogs-{test_set_name}-{key}-{params.suffix}.txt"
561561
)
562562
results = sorted(results)
563-
store_transcripts(filename=recog_path, texts=results)
563+
store_transcripts(filename=recog_path, texts=results, char_level=True)
564564
logging.info(f"The transcripts are stored in {recog_path}")
565565

566566
# The following prints out WERs, per-word error statistics and aligned
@@ -570,7 +570,11 @@ def save_results(
570570
)
571571
with open(errs_filename, "w") as f:
572572
wer = write_error_stats(
573-
f, f"{test_set_name}-{key}", results, enable_log=True
573+
f,
574+
f"{test_set_name}-{key}",
575+
results,
576+
enable_log=True,
577+
compute_CER=True,
574578
)
575579
test_set_wers[key] = wer
576580

0 commit comments

Comments
 (0)