remove defaultdict

2022-03-24 14:19:14 +08:00 · 2022-03-24 14:19:14 +08:00 · bb1fc50d5a
commit bb1fc50d5a
parent 00c5acf31a
2 changed files with 22 additions and 14 deletions
--- a/kws/bin/compute_det.py
+++ b/kws/bin/compute_det.py
@ -17,15 +17,20 @@ import argparse
 import json
 from collections import defaultdict

+
 def load_label_and_score(keyword, label_file, score_file):
-    score_table = defaultdict(list)
+    # score_table: {uttid: [keywordlist]}
+    score_table = {}
    with open(score_file, 'r', encoding='utf8') as fin:
        for line in fin:
            arr = line.strip().split()
            key = arr[0]
-            str_list = arr[1:]
-            scores = list(map(float, str_list))
-            score_table[key].append(scores)
+            current_keyword = arr[1]
+            str_list = arr[2:]
+            if int(current_keyword) == keyword:
+                scores = list(map(float, str_list))
+                if key not in score_table:
+                    score_table.update({key: scores})
    keyword_table = {}
    filler_table = {}
    filler_duration = 0.0
@ -46,12 +51,14 @@ def load_label_and_score(keyword, label_file, score_file):
                filler_duration += duration
    return keyword_table, filler_table, filler_duration

+
 if __name__ == '__main__':
    parser = argparse.ArgumentParser(description='compute det curve')
    parser.add_argument('--test_data', required=True, help='label file')
-    parser.add_argument('--keyword', type=int, default=0, help='score file')
+    parser.add_argument('--keyword', type=int, default=0, help='keyword label')
    parser.add_argument('--score_file', required=True, help='score file')
-    parser.add_argument('--step', type=float, default=0.01, help='score file')
+    parser.add_argument('--step', type=float, default=0.01,
+                        help='threshold step')
    parser.add_argument('--window_shift', type=int, default=50,
                        help='window_shift is used to skip the frames after triggered')
    parser.add_argument('--stats_file',
@ -63,32 +70,32 @@ if __name__ == '__main__':
        args.keyword, args.test_data, args.score_file)
    print('Filler total duration Hours: {}'.format(filler_duration / 3600.0))
    with open(args.stats_file, 'w', encoding='utf8') as fout:
-        keyword_index = int(args.stats_file.split('/')[-1].split('.')[1])
+        keyword_index = int(args.keyword)
        threshold = 0.0
        while threshold <= 1.0:
            num_false_reject = 0
            # transverse the all keyword_table
-            for key, scores_list in keyword_table.items():
+            for key, score_list in keyword_table.items():
                # computer positive test sample, use the max score of list.
-                score = max(scores_list[keyword_index])
+                score = max(score_list)
                if float(score) < threshold:
                    num_false_reject += 1
            num_false_alarm = 0
            # transverse the all filler_table
-            for key, scores_list in filler_table.items():
+            for key, score_list in filler_table.items():
                i = 0
-                score_list = scores_list[keyword_index]
                while i < len(score_list):
                    if score_list[i] >= threshold:
                        num_false_alarm += 1
                        i += window_shift
                    else:
                        i += 1
-            if len(keyword_table) != 0 :
+            if len(keyword_table) != 0:
                false_reject_rate = num_false_reject / len(keyword_table)
            num_false_alarm = max(num_false_alarm, 1e-6)
            if filler_duration != 0:
-                false_alarm_per_hour = num_false_alarm / (filler_duration / 3600.0)
+                false_alarm_per_hour = num_false_alarm / \
+                    (filler_duration / 3600.0)
            fout.write('{:.6f} {:.6f} {:.6f}\n'.format(threshold,
                                                       false_alarm_per_hour,
                                                       false_reject_rate))
--- a/kws/bin/score.py
+++ b/kws/bin/score.py
@ -119,7 +119,8 @@ def main():
                    keyword_scores = score[:, keyword_i]
                    score_frames = ' '.join(['{:.6f}'.format(x)
                                            for x in keyword_scores.tolist()])
-                    fout.write('{} {}\n'.format(key, score_frames))
+                    fout.write('{} {} {}\n'.format(
+                        key, keyword_i, score_frames))
            if batch_idx % 10 == 0:
                print('Progress batch {}'.format(batch_idx))
                sys.stdout.flush()