diff --git a/examples/hi_xiaowen/s0/run.sh b/examples/hi_xiaowen/s0/run.sh index a735251..3c964c5 100755 --- a/examples/hi_xiaowen/s0/run.sh +++ b/examples/hi_xiaowen/s0/run.sh @@ -21,7 +21,7 @@ score_checkpoint=$dir/avg_${num_average}.pt download_dir=./data/local # your data dir . tools/parse_options.sh || exit 1; - +window_shift=50 if [ ${stage} -le -1 ] && [ ${stop_stage} -ge -1 ]; then echo "Download and extracte all datasets" @@ -100,12 +100,14 @@ if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then --test_data data/test/data.list \ --batch_size 256 \ --checkpoint $score_checkpoint \ - --score_file $result_dir/score.txt \ + --score_file $result_dir/score.txt \ --num_workers 8 + for keyword in 0 1; do python kws/bin/compute_det.py \ --keyword $keyword \ --test_data data/test/data.list \ + --window_shift $window_shift \ --score_file $result_dir/score.txt \ --stats_file $result_dir/stats.${keyword}.txt done @@ -156,5 +158,4 @@ if [ ${stage} -le 5 ] && [ ${stop_stage} -ge 5 ]; then --config $dir/config.yaml \ --jit_model $dir/$jit_model \ --onnx_model $dir/$onnx_model -fi - +fi \ No newline at end of file diff --git a/kws/bin/compute_det.py b/kws/bin/compute_det.py index 53e3079..32b0280 100644 --- a/kws/bin/compute_det.py +++ b/kws/bin/compute_det.py @@ -1,4 +1,5 @@ # Copyright (c) 2021 Binbin Zhang(binbzha@qq.com) +# 2022 Shaoqing Yu(954793264@qq.com) # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -17,13 +18,18 @@ import json def load_label_and_score(keyword, label_file, score_file): + # score_table: {uttid: [keywordlist]} score_table = {} with open(score_file, 'r', encoding='utf8') as fin: for line in fin: arr = line.strip().split() key = arr[0] - score = float(arr[keyword + 1]) - score_table[key] = score + current_keyword = arr[1] + str_list = arr[2:] + if int(current_keyword) == keyword: + scores = list(map(float, str_list)) + if key not in score_table: + score_table.update({key: scores}) keyword_table = {} filler_table = {} filler_duration = 0.0 @@ -48,32 +54,47 @@ def load_label_and_score(keyword, label_file, score_file): if __name__ == '__main__': parser = argparse.ArgumentParser(description='compute det curve') parser.add_argument('--test_data', required=True, help='label file') - parser.add_argument('--keyword', type=int, default=0, help='score file') + parser.add_argument('--keyword', type=int, default=0, help='keyword label') parser.add_argument('--score_file', required=True, help='score file') - parser.add_argument('--step', type=float, default=0.01, help='score file') + parser.add_argument('--step', type=float, default=0.01, + help='threshold step') + parser.add_argument('--window_shift', type=int, default=50, + help='window_shift is used to skip the frames after triggered') parser.add_argument('--stats_file', required=True, help='false reject/alarm stats file') args = parser.parse_args() - + window_shift = args.window_shift keyword_table, filler_table, filler_duration = load_label_and_score( args.keyword, args.test_data, args.score_file) print('Filler total duration Hours: {}'.format(filler_duration / 3600.0)) - with open(args.stats_file, 'w', encoding='utf8') as fout: + keyword_index = int(args.keyword) threshold = 0.0 while threshold <= 1.0: num_false_reject = 0 - for key, score in keyword_table.items(): - if score < threshold: + # transverse the all keyword_table + for key, score_list in keyword_table.items(): + # computer positive test sample, use the max score of list. + score = max(score_list) + if float(score) < threshold: num_false_reject += 1 num_false_alarm = 0 - for key, score in filler_table.items(): - if score >= threshold: - num_false_alarm += 1 - false_reject_rate = num_false_reject / len(keyword_table) + # transverse the all filler_table + for key, score_list in filler_table.items(): + i = 0 + while i < len(score_list): + if score_list[i] >= threshold: + num_false_alarm += 1 + i += window_shift + else: + i += 1 + if len(keyword_table) != 0: + false_reject_rate = num_false_reject / len(keyword_table) num_false_alarm = max(num_false_alarm, 1e-6) - false_alarm_per_hour = num_false_alarm / (filler_duration / 3600.0) + if filler_duration != 0: + false_alarm_per_hour = num_false_alarm / \ + (filler_duration / 3600.0) fout.write('{:.6f} {:.6f} {:.6f}\n'.format(threshold, false_alarm_per_hour, false_reject_rate)) diff --git a/kws/bin/score.py b/kws/bin/score.py index b8e7c5c..a894704 100644 --- a/kws/bin/score.py +++ b/kws/bin/score.py @@ -1,4 +1,5 @@ # Copyright (c) 2021 Binbin Zhang(binbzha@qq.com) +# 2022 Shaoqing Yu(954793264@qq.com) # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -27,7 +28,6 @@ from torch.utils.data import DataLoader from kws.dataset.dataset import Dataset from kws.model.kws_model import init_model from kws.utils.checkpoint import load_checkpoint -from kws.utils.mask import padding_mask def get_args(): @@ -102,23 +102,25 @@ def main(): use_cuda = args.gpu >= 0 and torch.cuda.is_available() device = torch.device('cuda' if use_cuda else 'cpu') model = model.to(device) - model.eval() - with torch.no_grad(), open(args.score_file, 'w', encoding='utf8') as fout: + score_abs_path = os.path.abspath(args.score_file) + with torch.no_grad(), open(score_abs_path, 'w', encoding='utf8') as fout: for batch_idx, batch in enumerate(test_data_loader): keys, feats, target, lengths = batch feats = feats.to(device) lengths = lengths.to(device) - mask = padding_mask(lengths).unsqueeze(2) logits = model(feats) - logits = logits.masked_fill(mask, 0.0) - max_logits, _ = logits.max(dim=1) - max_logits = max_logits.cpu() + num_keywords = logits.shape[2] + logits = logits.cpu() for i in range(len(keys)): key = keys[i] - score = max_logits[i] - score = ' '.join([str(x) for x in score.tolist()]) - fout.write('{} {}\n'.format(key, score)) + score = logits[i][:lengths[i]] + for keyword_i in range(num_keywords): + keyword_scores = score[:, keyword_i] + score_frames = ' '.join(['{:.6f}'.format(x) + for x in keyword_scores.tolist()]) + fout.write('{} {} {}\n'.format( + key, keyword_i, score_frames)) if batch_idx % 10 == 0: print('Progress batch {}'.format(batch_idx)) sys.stdout.flush()