diff --git a/examples/hi_xiaowen/s0/run.sh b/examples/hi_xiaowen/s0/run.sh index a0d98e6..1f00ff0 100755 --- a/examples/hi_xiaowen/s0/run.sh +++ b/examples/hi_xiaowen/s0/run.sh @@ -21,7 +21,7 @@ score_checkpoint=$dir/avg_${num_average}.pt download_dir=./data/local # your data dir . tools/parse_options.sh || exit 1; - +window_shift=50 if [ ${stage} -le -1 ] && [ ${stop_stage} -ge -1 ]; then echo "Download and extracte all datasets" @@ -100,7 +100,7 @@ if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then --test_data data/test/data.list \ --batch_size 256 \ --checkpoint $score_checkpoint \ - --score_file_dir $result_dir \ + --score_file $result_dir/score_longwav.txt \ --num_keywords $num_keywords \ --num_workers 8 @@ -108,7 +108,8 @@ if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then python kws/bin/compute_det_longwav.py \ --keyword $keyword \ --test_data data/test/data.list \ - --score_file $result_dir/score_longwav.${keyword}.txt \ + --window_shift $window_shift \ + --score_file $result_dir/score_longwav.txt \ --stats_file $result_dir/stats_longwav.${keyword}.txt done fi @@ -158,5 +159,4 @@ if [ ${stage} -le 5 ] && [ ${stop_stage} -ge 5 ]; then --config $dir/config.yaml \ --jit_model $dir/$jit_model \ --onnx_model $dir/$onnx_model -fi - +fi \ No newline at end of file diff --git a/kws/bin/compute_det_longwav.py b/kws/bin/compute_det_longwav.py index e7983c7..ec20222 100644 --- a/kws/bin/compute_det_longwav.py +++ b/kws/bin/compute_det_longwav.py @@ -15,24 +15,20 @@ import argparse import json - +from collections import defaultdict def load_label_and_score(keyword, label_file, score_file): - # utt_id : score list - score_table = {} + score_table = defaultdict(list) with open(score_file, 'r', encoding='utf8') as fin: for line in fin: arr = line.strip().split() - # key = utt_id key = arr[0] - # scores is a list str_list = arr[1:] scores = list(map(float, str_list)) - score_table[key] = scores + score_table[key].append(scores) keyword_table = {} filler_table = {} filler_duration = 0.0 - # label_file = data.list with open(label_file, 'r', encoding='utf8') as fin: for line in fin: obj = json.loads(line.strip()) @@ -40,49 +36,48 @@ def load_label_and_score(keyword, label_file, score_file): assert 'txt' in obj assert 'duration' in obj key = obj['key'] - # txt is label index = obj['txt'] duration = obj['duration'] assert key in score_table - # txt == keyword , correct if index == keyword: keyword_table[key] = score_table[key] else: - # false filler_table[key] = score_table[key] filler_duration += duration return keyword_table, filler_table, filler_duration - if __name__ == '__main__': parser = argparse.ArgumentParser(description='compute det curve') parser.add_argument('--test_data', required=True, help='label file') parser.add_argument('--keyword', type=int, default=0, help='score file') parser.add_argument('--score_file', required=True, help='score file') parser.add_argument('--step', type=float, default=0.01, help='score file') + parser.add_argument('--window_shift', type=int, default=50, + help='window_shift is used to skip the frames after triggered') parser.add_argument('--stats_file', required=True, help='false reject/alarm stats file') args = parser.parse_args() - # 'window_shift' is used to skip the frames after triggered - window_shift = 50 + window_shift = args.window_shift keyword_table, filler_table, filler_duration = load_label_and_score( args.keyword, args.test_data, args.score_file) - print('Filler total duration Hours: {}'.format(filler_duration / 3600.0)) + print('Filler total duration Hours: {}'.format(filler_duration / 3600.0)) with open(args.stats_file, 'w', encoding='utf8') as fout: + keyword_index = int(args.stats_file.split('/')[-1].split('.')[1]) threshold = 0.0 while threshold <= 1.0: num_false_reject = 0 # transverse the all keyword_table - for key, score_list in keyword_table.items(): + for key, scores_list in keyword_table.items(): # computer positive test sample, use the max score of list. - score = max(score_list) + score = max(scores_list[keyword_index]) if float(score) < threshold: num_false_reject += 1 num_false_alarm = 0 # transverse the all filler_table - for key, score_list in filler_table.items(): + for key, scores_list in filler_table.items(): i = 0 + score_list = scores_list[keyword_index] while i < len(score_list): if score_list[i] >= threshold: num_false_alarm += 1 @@ -97,4 +92,4 @@ if __name__ == '__main__': fout.write('{:.6f} {:.6f} {:.6f}\n'.format(threshold, false_alarm_per_hour, false_reject_rate)) - threshold += args.step + threshold += args.step \ No newline at end of file diff --git a/kws/bin/score_longwav.py b/kws/bin/score_longwav.py index 05495d2..782763a 100644 --- a/kws/bin/score_longwav.py +++ b/kws/bin/score_longwav.py @@ -55,12 +55,12 @@ def get_args(): default=100, type=int, help='prefetch number') - parser.add_argument('--score_file_dir', + parser.add_argument('--score_file', required=True, help='output score file') parser.add_argument('--num_keywords', required=True, - help='the number of keywords') + help='the number of keywords') parser.add_argument('--jit_model', action='store_true', default=False, @@ -106,42 +106,26 @@ def main(): device = torch.device('cuda' if use_cuda else 'cpu') model = model.to(device) model.eval() - # add to write different keyword score file - num_keywords = int(args.num_keywords) - score_file_list = [] - dir_abs_path = os.path.abspath(args.score_file_dir) - for i in range(num_keywords): - temp_list = ['score_longwav', 'txt'] - temp_list.insert(1, str(i)) - suffix = '.'.join(temp_list) - # print('suffix = ', suffix) - score_abs_path = os.path.join(dir_abs_path, suffix) - score_file_list.append(score_abs_path) - for abs_path in score_file_list: - with torch.no_grad(), open(abs_path, 'w', encoding='utf8') as fout: - keyword_label = abs_path.split('/')[-1].split('.')[1] - # print('keyword_label = ', keyword_label) - for batch_idx, batch in enumerate(test_data_loader): - keys, feats, target, lengths = batch - feats = feats.to(device) - lengths = lengths.to(device) - # mask = padding_mask(lengths).unsqueeze(2) - logits = model(feats) - # mask对应的true的部分用0填充 - # Getting every frames desn't need to mask - # logits = logits.masked_fill(mask, 0.0) - logits = logits.cpu() - for i in range(len(keys)): - key = keys[i] - score = logits[i][:lengths[i]] - score = score[:, int(keyword_label)] - # keep 2 significant digits - score = ' '.join([str("%.2g" % x) for x in score.tolist()]) - fout.write('{} {}\n'.format(key, score)) - if batch_idx % 10 == 0: - print('Progress batch {}'.format(batch_idx)) - sys.stdout.flush() + score_abs_path = os.path.abspath(args.score_file) + num_keywords = int(args.num_keywords) + with torch.no_grad(), open(score_abs_path, 'w', encoding='utf8') as fout: + for batch_idx, batch in enumerate(test_data_loader): + keys, feats, target, lengths = batch + feats = feats.to(device) + lengths = lengths.to(device) + logits = model(feats) + logits = logits.cpu() + for i in range(len(keys)): + key = keys[i] + score = logits[i][:lengths[i]] + for keyword_i in range(num_keywords): + keyword_scores = score[:, keyword_i] + score_frames = ' '.join(['{:.3g}'.format(x) for x in keyword_scores.tolist()]) + fout.write('{} {}\n'.format(key, score_frames)) + if batch_idx % 10 == 0: + print('Progress batch {}'.format(batch_idx)) + sys.stdout.flush() if __name__ == '__main__':