From bb1fc50d5acff6711ba89144858e80c81383e035 Mon Sep 17 00:00:00 2001
From: blessyyyu <954793264@qq.com>
Date: Thu, 24 Mar 2022 14:19:14 +0800
Subject: [PATCH] remove defaultdict

---
 kws/bin/compute_det.py | 33 ++++++++++++++++++++-------------
 kws/bin/score.py       |  3 ++-
 2 files changed, 22 insertions(+), 14 deletions(-)

diff --git a/kws/bin/compute_det.py b/kws/bin/compute_det.py
index 4c632f5..43f53a9 100644
--- a/kws/bin/compute_det.py
+++ b/kws/bin/compute_det.py
@@ -17,15 +17,20 @@ import argparse
 import json
 from collections import defaultdict
 
+
 def load_label_and_score(keyword, label_file, score_file):
-    score_table = defaultdict(list)
+    # score_table: {uttid: [keywordlist]}
+    score_table = {}
     with open(score_file, 'r', encoding='utf8') as fin:
         for line in fin:
             arr = line.strip().split()
             key = arr[0]
-            str_list = arr[1:]
-            scores = list(map(float, str_list))
-            score_table[key].append(scores)
+            current_keyword = arr[1]
+            str_list = arr[2:]
+            if int(current_keyword) == keyword:
+                scores = list(map(float, str_list))
+                if key not in score_table:
+                    score_table.update({key: scores})
     keyword_table = {}
     filler_table = {}
     filler_duration = 0.0
@@ -46,12 +51,14 @@ def load_label_and_score(keyword, label_file, score_file):
                 filler_duration += duration
     return keyword_table, filler_table, filler_duration
 
+
 if __name__ == '__main__':
     parser = argparse.ArgumentParser(description='compute det curve')
     parser.add_argument('--test_data', required=True, help='label file')
-    parser.add_argument('--keyword', type=int, default=0, help='score file')
+    parser.add_argument('--keyword', type=int, default=0, help='keyword label')
     parser.add_argument('--score_file', required=True, help='score file')
-    parser.add_argument('--step', type=float, default=0.01, help='score file')
+    parser.add_argument('--step', type=float, default=0.01,
+                        help='threshold step')
     parser.add_argument('--window_shift', type=int, default=50,
                         help='window_shift is used to skip the frames after triggered')
     parser.add_argument('--stats_file',
@@ -63,32 +70,32 @@ if __name__ == '__main__':
         args.keyword, args.test_data, args.score_file)
     print('Filler total duration Hours: {}'.format(filler_duration / 3600.0))
     with open(args.stats_file, 'w', encoding='utf8') as fout:
-        keyword_index = int(args.stats_file.split('/')[-1].split('.')[1])
+        keyword_index = int(args.keyword)
         threshold = 0.0
         while threshold <= 1.0:
             num_false_reject = 0
             # transverse the all keyword_table
-            for key, scores_list in keyword_table.items():
+            for key, score_list in keyword_table.items():
                 # computer positive test sample, use the max score of list.
-                score = max(scores_list[keyword_index])
+                score = max(score_list)
                 if float(score) < threshold:
                     num_false_reject += 1
             num_false_alarm = 0
             # transverse the all filler_table
-            for key, scores_list in filler_table.items():
+            for key, score_list in filler_table.items():
                 i = 0
-                score_list = scores_list[keyword_index]
                 while i < len(score_list):
                     if score_list[i] >= threshold:
                         num_false_alarm += 1
                         i += window_shift
                     else:
                         i += 1
-            if len(keyword_table) != 0 :
+            if len(keyword_table) != 0:
                 false_reject_rate = num_false_reject / len(keyword_table)
             num_false_alarm = max(num_false_alarm, 1e-6)
             if filler_duration != 0:
-                false_alarm_per_hour = num_false_alarm / (filler_duration / 3600.0)
+                false_alarm_per_hour = num_false_alarm / \
+                    (filler_duration / 3600.0)
             fout.write('{:.6f} {:.6f} {:.6f}\n'.format(threshold,
                                                        false_alarm_per_hour,
                                                        false_reject_rate))
diff --git a/kws/bin/score.py b/kws/bin/score.py
index 19ccca8..a894704 100644
--- a/kws/bin/score.py
+++ b/kws/bin/score.py
@@ -119,7 +119,8 @@ def main():
                     keyword_scores = score[:, keyword_i]
                     score_frames = ' '.join(['{:.6f}'.format(x)
                                             for x in keyword_scores.tolist()])
-                    fout.write('{} {}\n'.format(key, score_frames))
+                    fout.write('{} {} {}\n'.format(
+                        key, keyword_i, score_frames))
             if batch_idx % 10 == 0:
                 print('Progress batch {}'.format(batch_idx))
                 sys.stdout.flush()