[examples] refactor FAR computation to support long audio test (#64)

* add .gitattributes

* add long wav

* fix some bugs

* updated lint error

* back the hi_xiaowen/run.sh to the same

* remove the space

* better one

* remove 'num_keyword' parameter

* remove files

* flask8 examine

* override the score and compute_det file

* remove defaultdict

* remove import defaultdict
This commit is contained in:
Cyan 2022-03-24 14:35:07 +08:00 committed by GitHub
parent ff4b47f94d
commit 7d142b9528
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 51 additions and 27 deletions

View File

@ -21,7 +21,7 @@ score_checkpoint=$dir/avg_${num_average}.pt
download_dir=./data/local # your data dir
. tools/parse_options.sh || exit 1;
window_shift=50
if [ ${stage} -le -1 ] && [ ${stop_stage} -ge -1 ]; then
echo "Download and extracte all datasets"
@ -100,12 +100,14 @@ if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then
--test_data data/test/data.list \
--batch_size 256 \
--checkpoint $score_checkpoint \
--score_file $result_dir/score.txt \
--score_file $result_dir/score.txt \
--num_workers 8
for keyword in 0 1; do
python kws/bin/compute_det.py \
--keyword $keyword \
--test_data data/test/data.list \
--window_shift $window_shift \
--score_file $result_dir/score.txt \
--stats_file $result_dir/stats.${keyword}.txt
done
@ -156,5 +158,4 @@ if [ ${stage} -le 5 ] && [ ${stop_stage} -ge 5 ]; then
--config $dir/config.yaml \
--jit_model $dir/$jit_model \
--onnx_model $dir/$onnx_model
fi
fi

View File

@ -1,4 +1,5 @@
# Copyright (c) 2021 Binbin Zhang(binbzha@qq.com)
# 2022 Shaoqing Yu(954793264@qq.com)
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
@ -17,13 +18,18 @@ import json
def load_label_and_score(keyword, label_file, score_file):
# score_table: {uttid: [keywordlist]}
score_table = {}
with open(score_file, 'r', encoding='utf8') as fin:
for line in fin:
arr = line.strip().split()
key = arr[0]
score = float(arr[keyword + 1])
score_table[key] = score
current_keyword = arr[1]
str_list = arr[2:]
if int(current_keyword) == keyword:
scores = list(map(float, str_list))
if key not in score_table:
score_table.update({key: scores})
keyword_table = {}
filler_table = {}
filler_duration = 0.0
@ -48,32 +54,47 @@ def load_label_and_score(keyword, label_file, score_file):
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='compute det curve')
parser.add_argument('--test_data', required=True, help='label file')
parser.add_argument('--keyword', type=int, default=0, help='score file')
parser.add_argument('--keyword', type=int, default=0, help='keyword label')
parser.add_argument('--score_file', required=True, help='score file')
parser.add_argument('--step', type=float, default=0.01, help='score file')
parser.add_argument('--step', type=float, default=0.01,
help='threshold step')
parser.add_argument('--window_shift', type=int, default=50,
help='window_shift is used to skip the frames after triggered')
parser.add_argument('--stats_file',
required=True,
help='false reject/alarm stats file')
args = parser.parse_args()
window_shift = args.window_shift
keyword_table, filler_table, filler_duration = load_label_and_score(
args.keyword, args.test_data, args.score_file)
print('Filler total duration Hours: {}'.format(filler_duration / 3600.0))
with open(args.stats_file, 'w', encoding='utf8') as fout:
keyword_index = int(args.keyword)
threshold = 0.0
while threshold <= 1.0:
num_false_reject = 0
for key, score in keyword_table.items():
if score < threshold:
# transverse the all keyword_table
for key, score_list in keyword_table.items():
# computer positive test sample, use the max score of list.
score = max(score_list)
if float(score) < threshold:
num_false_reject += 1
num_false_alarm = 0
for key, score in filler_table.items():
if score >= threshold:
num_false_alarm += 1
false_reject_rate = num_false_reject / len(keyword_table)
# transverse the all filler_table
for key, score_list in filler_table.items():
i = 0
while i < len(score_list):
if score_list[i] >= threshold:
num_false_alarm += 1
i += window_shift
else:
i += 1
if len(keyword_table) != 0:
false_reject_rate = num_false_reject / len(keyword_table)
num_false_alarm = max(num_false_alarm, 1e-6)
false_alarm_per_hour = num_false_alarm / (filler_duration / 3600.0)
if filler_duration != 0:
false_alarm_per_hour = num_false_alarm / \
(filler_duration / 3600.0)
fout.write('{:.6f} {:.6f} {:.6f}\n'.format(threshold,
false_alarm_per_hour,
false_reject_rate))

View File

@ -1,4 +1,5 @@
# Copyright (c) 2021 Binbin Zhang(binbzha@qq.com)
# 2022 Shaoqing Yu(954793264@qq.com)
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
@ -27,7 +28,6 @@ from torch.utils.data import DataLoader
from kws.dataset.dataset import Dataset
from kws.model.kws_model import init_model
from kws.utils.checkpoint import load_checkpoint
from kws.utils.mask import padding_mask
def get_args():
@ -102,23 +102,25 @@ def main():
use_cuda = args.gpu >= 0 and torch.cuda.is_available()
device = torch.device('cuda' if use_cuda else 'cpu')
model = model.to(device)
model.eval()
with torch.no_grad(), open(args.score_file, 'w', encoding='utf8') as fout:
score_abs_path = os.path.abspath(args.score_file)
with torch.no_grad(), open(score_abs_path, 'w', encoding='utf8') as fout:
for batch_idx, batch in enumerate(test_data_loader):
keys, feats, target, lengths = batch
feats = feats.to(device)
lengths = lengths.to(device)
mask = padding_mask(lengths).unsqueeze(2)
logits = model(feats)
logits = logits.masked_fill(mask, 0.0)
max_logits, _ = logits.max(dim=1)
max_logits = max_logits.cpu()
num_keywords = logits.shape[2]
logits = logits.cpu()
for i in range(len(keys)):
key = keys[i]
score = max_logits[i]
score = ' '.join([str(x) for x in score.tolist()])
fout.write('{} {}\n'.format(key, score))
score = logits[i][:lengths[i]]
for keyword_i in range(num_keywords):
keyword_scores = score[:, keyword_i]
score_frames = ' '.join(['{:.6f}'.format(x)
for x in keyword_scores.tolist()])
fout.write('{} {} {}\n'.format(
key, keyword_i, score_frames))
if batch_idx % 10 == 0:
print('Progress batch {}'.format(batch_idx))
sys.stdout.flush()