better one

This commit is contained in:
blessyyyu 2022-03-23 17:53:48 +08:00
parent 1ebc3bff88
commit b2130d7458
3 changed files with 39 additions and 60 deletions

View File

@ -21,7 +21,7 @@ score_checkpoint=$dir/avg_${num_average}.pt
download_dir=./data/local # your data dir download_dir=./data/local # your data dir
. tools/parse_options.sh || exit 1; . tools/parse_options.sh || exit 1;
window_shift=50
if [ ${stage} -le -1 ] && [ ${stop_stage} -ge -1 ]; then if [ ${stage} -le -1 ] && [ ${stop_stage} -ge -1 ]; then
echo "Download and extracte all datasets" echo "Download and extracte all datasets"
@ -100,7 +100,7 @@ if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then
--test_data data/test/data.list \ --test_data data/test/data.list \
--batch_size 256 \ --batch_size 256 \
--checkpoint $score_checkpoint \ --checkpoint $score_checkpoint \
--score_file_dir $result_dir \ --score_file $result_dir/score_longwav.txt \
--num_keywords $num_keywords \ --num_keywords $num_keywords \
--num_workers 8 --num_workers 8
@ -108,7 +108,8 @@ if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then
python kws/bin/compute_det_longwav.py \ python kws/bin/compute_det_longwav.py \
--keyword $keyword \ --keyword $keyword \
--test_data data/test/data.list \ --test_data data/test/data.list \
--score_file $result_dir/score_longwav.${keyword}.txt \ --window_shift $window_shift \
--score_file $result_dir/score_longwav.txt \
--stats_file $result_dir/stats_longwav.${keyword}.txt --stats_file $result_dir/stats_longwav.${keyword}.txt
done done
fi fi
@ -158,5 +159,4 @@ if [ ${stage} -le 5 ] && [ ${stop_stage} -ge 5 ]; then
--config $dir/config.yaml \ --config $dir/config.yaml \
--jit_model $dir/$jit_model \ --jit_model $dir/$jit_model \
--onnx_model $dir/$onnx_model --onnx_model $dir/$onnx_model
fi fi

View File

@ -15,24 +15,20 @@
import argparse import argparse
import json import json
from collections import defaultdict
def load_label_and_score(keyword, label_file, score_file): def load_label_and_score(keyword, label_file, score_file):
# utt_id : score list score_table = defaultdict(list)
score_table = {}
with open(score_file, 'r', encoding='utf8') as fin: with open(score_file, 'r', encoding='utf8') as fin:
for line in fin: for line in fin:
arr = line.strip().split() arr = line.strip().split()
# key = utt_id
key = arr[0] key = arr[0]
# scores is a list
str_list = arr[1:] str_list = arr[1:]
scores = list(map(float, str_list)) scores = list(map(float, str_list))
score_table[key] = scores score_table[key].append(scores)
keyword_table = {} keyword_table = {}
filler_table = {} filler_table = {}
filler_duration = 0.0 filler_duration = 0.0
# label_file = data.list
with open(label_file, 'r', encoding='utf8') as fin: with open(label_file, 'r', encoding='utf8') as fin:
for line in fin: for line in fin:
obj = json.loads(line.strip()) obj = json.loads(line.strip())
@ -40,49 +36,48 @@ def load_label_and_score(keyword, label_file, score_file):
assert 'txt' in obj assert 'txt' in obj
assert 'duration' in obj assert 'duration' in obj
key = obj['key'] key = obj['key']
# txt is label
index = obj['txt'] index = obj['txt']
duration = obj['duration'] duration = obj['duration']
assert key in score_table assert key in score_table
# txt == keyword , correct
if index == keyword: if index == keyword:
keyword_table[key] = score_table[key] keyword_table[key] = score_table[key]
else: else:
# false
filler_table[key] = score_table[key] filler_table[key] = score_table[key]
filler_duration += duration filler_duration += duration
return keyword_table, filler_table, filler_duration return keyword_table, filler_table, filler_duration
if __name__ == '__main__': if __name__ == '__main__':
parser = argparse.ArgumentParser(description='compute det curve') parser = argparse.ArgumentParser(description='compute det curve')
parser.add_argument('--test_data', required=True, help='label file') parser.add_argument('--test_data', required=True, help='label file')
parser.add_argument('--keyword', type=int, default=0, help='score file') parser.add_argument('--keyword', type=int, default=0, help='score file')
parser.add_argument('--score_file', required=True, help='score file') parser.add_argument('--score_file', required=True, help='score file')
parser.add_argument('--step', type=float, default=0.01, help='score file') parser.add_argument('--step', type=float, default=0.01, help='score file')
parser.add_argument('--window_shift', type=int, default=50,
help='window_shift is used to skip the frames after triggered')
parser.add_argument('--stats_file', parser.add_argument('--stats_file',
required=True, required=True,
help='false reject/alarm stats file') help='false reject/alarm stats file')
args = parser.parse_args() args = parser.parse_args()
# 'window_shift' is used to skip the frames after triggered window_shift = args.window_shift
window_shift = 50
keyword_table, filler_table, filler_duration = load_label_and_score( keyword_table, filler_table, filler_duration = load_label_and_score(
args.keyword, args.test_data, args.score_file) args.keyword, args.test_data, args.score_file)
print('Filler total duration Hours: {}'.format(filler_duration / 3600.0)) print('Filler total duration Hours: {}'.format(filler_duration / 3600.0))
with open(args.stats_file, 'w', encoding='utf8') as fout: with open(args.stats_file, 'w', encoding='utf8') as fout:
keyword_index = int(args.stats_file.split('/')[-1].split('.')[1])
threshold = 0.0 threshold = 0.0
while threshold <= 1.0: while threshold <= 1.0:
num_false_reject = 0 num_false_reject = 0
# transverse the all keyword_table # transverse the all keyword_table
for key, score_list in keyword_table.items(): for key, scores_list in keyword_table.items():
# computer positive test sample, use the max score of list. # computer positive test sample, use the max score of list.
score = max(score_list) score = max(scores_list[keyword_index])
if float(score) < threshold: if float(score) < threshold:
num_false_reject += 1 num_false_reject += 1
num_false_alarm = 0 num_false_alarm = 0
# transverse the all filler_table # transverse the all filler_table
for key, score_list in filler_table.items(): for key, scores_list in filler_table.items():
i = 0 i = 0
score_list = scores_list[keyword_index]
while i < len(score_list): while i < len(score_list):
if score_list[i] >= threshold: if score_list[i] >= threshold:
num_false_alarm += 1 num_false_alarm += 1
@ -97,4 +92,4 @@ if __name__ == '__main__':
fout.write('{:.6f} {:.6f} {:.6f}\n'.format(threshold, fout.write('{:.6f} {:.6f} {:.6f}\n'.format(threshold,
false_alarm_per_hour, false_alarm_per_hour,
false_reject_rate)) false_reject_rate))
threshold += args.step threshold += args.step

View File

@ -55,12 +55,12 @@ def get_args():
default=100, default=100,
type=int, type=int,
help='prefetch number') help='prefetch number')
parser.add_argument('--score_file_dir', parser.add_argument('--score_file',
required=True, required=True,
help='output score file') help='output score file')
parser.add_argument('--num_keywords', parser.add_argument('--num_keywords',
required=True, required=True,
help='the number of keywords') help='the number of keywords')
parser.add_argument('--jit_model', parser.add_argument('--jit_model',
action='store_true', action='store_true',
default=False, default=False,
@ -106,42 +106,26 @@ def main():
device = torch.device('cuda' if use_cuda else 'cpu') device = torch.device('cuda' if use_cuda else 'cpu')
model = model.to(device) model = model.to(device)
model.eval() model.eval()
# add to write different keyword score file
num_keywords = int(args.num_keywords)
score_file_list = []
dir_abs_path = os.path.abspath(args.score_file_dir)
for i in range(num_keywords):
temp_list = ['score_longwav', 'txt']
temp_list.insert(1, str(i))
suffix = '.'.join(temp_list)
# print('suffix = ', suffix)
score_abs_path = os.path.join(dir_abs_path, suffix)
score_file_list.append(score_abs_path)
for abs_path in score_file_list: score_abs_path = os.path.abspath(args.score_file)
with torch.no_grad(), open(abs_path, 'w', encoding='utf8') as fout: num_keywords = int(args.num_keywords)
keyword_label = abs_path.split('/')[-1].split('.')[1] with torch.no_grad(), open(score_abs_path, 'w', encoding='utf8') as fout:
# print('keyword_label = ', keyword_label) for batch_idx, batch in enumerate(test_data_loader):
for batch_idx, batch in enumerate(test_data_loader): keys, feats, target, lengths = batch
keys, feats, target, lengths = batch feats = feats.to(device)
feats = feats.to(device) lengths = lengths.to(device)
lengths = lengths.to(device) logits = model(feats)
# mask = padding_mask(lengths).unsqueeze(2) logits = logits.cpu()
logits = model(feats) for i in range(len(keys)):
# mask对应的true的部分用0填充 key = keys[i]
# Getting every frames desn't need to mask score = logits[i][:lengths[i]]
# logits = logits.masked_fill(mask, 0.0) for keyword_i in range(num_keywords):
logits = logits.cpu() keyword_scores = score[:, keyword_i]
for i in range(len(keys)): score_frames = ' '.join(['{:.3g}'.format(x) for x in keyword_scores.tolist()])
key = keys[i] fout.write('{} {}\n'.format(key, score_frames))
score = logits[i][:lengths[i]] if batch_idx % 10 == 0:
score = score[:, int(keyword_label)] print('Progress batch {}'.format(batch_idx))
# keep 2 significant digits sys.stdout.flush()
score = ' '.join([str("%.2g" % x) for x in score.tolist()])
fout.write('{} {}\n'.format(key, score))
if batch_idx % 10 == 0:
print('Progress batch {}'.format(batch_idx))
sys.stdout.flush()
if __name__ == '__main__': if __name__ == '__main__':