From bb1fc50d5acff6711ba89144858e80c81383e035 Mon Sep 17 00:00:00 2001 From: blessyyyu <954793264@qq.com> Date: Thu, 24 Mar 2022 14:19:14 +0800 Subject: [PATCH] remove defaultdict --- kws/bin/compute_det.py | 33 ++++++++++++++++++++------------- kws/bin/score.py | 3 ++- 2 files changed, 22 insertions(+), 14 deletions(-) diff --git a/kws/bin/compute_det.py b/kws/bin/compute_det.py index 4c632f5..43f53a9 100644 --- a/kws/bin/compute_det.py +++ b/kws/bin/compute_det.py @@ -17,15 +17,20 @@ import argparse import json from collections import defaultdict + def load_label_and_score(keyword, label_file, score_file): - score_table = defaultdict(list) + # score_table: {uttid: [keywordlist]} + score_table = {} with open(score_file, 'r', encoding='utf8') as fin: for line in fin: arr = line.strip().split() key = arr[0] - str_list = arr[1:] - scores = list(map(float, str_list)) - score_table[key].append(scores) + current_keyword = arr[1] + str_list = arr[2:] + if int(current_keyword) == keyword: + scores = list(map(float, str_list)) + if key not in score_table: + score_table.update({key: scores}) keyword_table = {} filler_table = {} filler_duration = 0.0 @@ -46,12 +51,14 @@ def load_label_and_score(keyword, label_file, score_file): filler_duration += duration return keyword_table, filler_table, filler_duration + if __name__ == '__main__': parser = argparse.ArgumentParser(description='compute det curve') parser.add_argument('--test_data', required=True, help='label file') - parser.add_argument('--keyword', type=int, default=0, help='score file') + parser.add_argument('--keyword', type=int, default=0, help='keyword label') parser.add_argument('--score_file', required=True, help='score file') - parser.add_argument('--step', type=float, default=0.01, help='score file') + parser.add_argument('--step', type=float, default=0.01, + help='threshold step') parser.add_argument('--window_shift', type=int, default=50, help='window_shift is used to skip the frames after triggered') parser.add_argument('--stats_file', @@ -63,32 +70,32 @@ if __name__ == '__main__': args.keyword, args.test_data, args.score_file) print('Filler total duration Hours: {}'.format(filler_duration / 3600.0)) with open(args.stats_file, 'w', encoding='utf8') as fout: - keyword_index = int(args.stats_file.split('/')[-1].split('.')[1]) + keyword_index = int(args.keyword) threshold = 0.0 while threshold <= 1.0: num_false_reject = 0 # transverse the all keyword_table - for key, scores_list in keyword_table.items(): + for key, score_list in keyword_table.items(): # computer positive test sample, use the max score of list. - score = max(scores_list[keyword_index]) + score = max(score_list) if float(score) < threshold: num_false_reject += 1 num_false_alarm = 0 # transverse the all filler_table - for key, scores_list in filler_table.items(): + for key, score_list in filler_table.items(): i = 0 - score_list = scores_list[keyword_index] while i < len(score_list): if score_list[i] >= threshold: num_false_alarm += 1 i += window_shift else: i += 1 - if len(keyword_table) != 0 : + if len(keyword_table) != 0: false_reject_rate = num_false_reject / len(keyword_table) num_false_alarm = max(num_false_alarm, 1e-6) if filler_duration != 0: - false_alarm_per_hour = num_false_alarm / (filler_duration / 3600.0) + false_alarm_per_hour = num_false_alarm / \ + (filler_duration / 3600.0) fout.write('{:.6f} {:.6f} {:.6f}\n'.format(threshold, false_alarm_per_hour, false_reject_rate)) diff --git a/kws/bin/score.py b/kws/bin/score.py index 19ccca8..a894704 100644 --- a/kws/bin/score.py +++ b/kws/bin/score.py @@ -119,7 +119,8 @@ def main(): keyword_scores = score[:, keyword_i] score_frames = ' '.join(['{:.6f}'.format(x) for x in keyword_scores.tolist()]) - fout.write('{} {}\n'.format(key, score_frames)) + fout.write('{} {} {}\n'.format( + key, keyword_i, score_frames)) if batch_idx % 10 == 0: print('Progress batch {}'.format(batch_idx)) sys.stdout.flush()