[examples] support hi xiaowen dataset
This commit is contained in:
parent
aa0b0c11a8
commit
dbebee86fd
10
examples/hi_xiaowen/s0/README.md
Normal file
10
examples/hi_xiaowen/s0/README.md
Normal file
@ -0,0 +1,10 @@
|
||||
FRRs with FAR fixed at once per hour:
|
||||
|
||||
| model | params(K) | epoch | hi_xiaowen | nihao_wenwen |
|
||||
|------------------|-----------|-----------|------------|--------------|
|
||||
| GRU | 203 | 80(avg30) | 0.088901 | 0.083827 |
|
||||
| TCN | 134 | 80(avg30) | 0.023494 | 0.029884 |
|
||||
| DS_TCN | 21 | 60 | 0.011559 | 0.014190 |
|
||||
| DS_TCN | 21 | 80 | 0.010807 | 0.014754 |
|
||||
| DS_TCN | 21 | 80(avg30) | 0.009867 | 0.014472 |
|
||||
| DS_TCN(spec_aug) | 21 | 80(avg30) | 0.029039 | 0.022648 |
|
||||
44
examples/hi_xiaowen/s0/conf/ds_tcn.yaml
Normal file
44
examples/hi_xiaowen/s0/conf/ds_tcn.yaml
Normal file
@ -0,0 +1,44 @@
|
||||
dataset_conf:
|
||||
filter_conf:
|
||||
max_length: 2048
|
||||
min_length: 0
|
||||
resample_conf:
|
||||
resample_rate: 16000
|
||||
speed_perturb: false
|
||||
fbank_conf:
|
||||
num_mel_bins: 40
|
||||
frame_shift: 10
|
||||
frame_length: 25
|
||||
dither: 0.1
|
||||
spec_aug: true
|
||||
spec_aug_conf:
|
||||
num_t_mask: 1
|
||||
num_f_mask: 1
|
||||
max_t: 50
|
||||
max_f: 30
|
||||
shuffle: true
|
||||
shuffle_conf:
|
||||
shuffle_size: 1500
|
||||
batch_conf:
|
||||
batch_size: 256
|
||||
|
||||
model:
|
||||
hidden_dim: 64
|
||||
subsampling:
|
||||
type: linear
|
||||
body:
|
||||
type: tcn
|
||||
ds: true
|
||||
num_layers: 4
|
||||
kernel_size: 8
|
||||
dropout: 0.1
|
||||
|
||||
optim: adam
|
||||
optim_conf:
|
||||
lr: 0.001
|
||||
|
||||
training_config:
|
||||
grad_clip: 5
|
||||
max_epoch: 80
|
||||
log_interval: 10
|
||||
|
||||
41
examples/hi_xiaowen/s0/conf/gru.yaml
Normal file
41
examples/hi_xiaowen/s0/conf/gru.yaml
Normal file
@ -0,0 +1,41 @@
|
||||
dataset_conf:
|
||||
filter_conf:
|
||||
max_length: 2048
|
||||
min_length: 0
|
||||
resample_conf:
|
||||
resample_rate: 16000
|
||||
speed_perturb: false
|
||||
fbank_conf:
|
||||
num_mel_bins: 40
|
||||
frame_shift: 10
|
||||
frame_length: 25
|
||||
dither: 0.1
|
||||
spec_aug: false
|
||||
spec_aug_conf:
|
||||
num_t_mask: 2
|
||||
num_f_mask: 2
|
||||
max_t: 50
|
||||
max_f: 30
|
||||
shuffle: true
|
||||
shuffle_conf:
|
||||
shuffle_size: 1500
|
||||
batch_conf:
|
||||
batch_size: 256
|
||||
|
||||
model:
|
||||
hidden_dim: 128
|
||||
subsampling:
|
||||
type: linear
|
||||
body:
|
||||
type: gru
|
||||
num_layers: 2
|
||||
|
||||
optim: adam
|
||||
optim_conf:
|
||||
lr: 0.001
|
||||
|
||||
training_config:
|
||||
grad_clip: 5
|
||||
max_epoch: 80
|
||||
log_interval: 10
|
||||
|
||||
44
examples/hi_xiaowen/s0/conf/tcn.yaml
Normal file
44
examples/hi_xiaowen/s0/conf/tcn.yaml
Normal file
@ -0,0 +1,44 @@
|
||||
dataset_conf:
|
||||
filter_conf:
|
||||
max_length: 2048
|
||||
min_length: 0
|
||||
resample_conf:
|
||||
resample_rate: 16000
|
||||
speed_perturb: false
|
||||
fbank_conf:
|
||||
num_mel_bins: 40
|
||||
frame_shift: 10
|
||||
frame_length: 25
|
||||
dither: 0.1
|
||||
spec_aug: false
|
||||
spec_aug_conf:
|
||||
num_t_mask: 2
|
||||
num_f_mask: 2
|
||||
max_t: 50
|
||||
max_f: 30
|
||||
shuffle: true
|
||||
shuffle_conf:
|
||||
shuffle_size: 1500
|
||||
batch_conf:
|
||||
batch_size: 256
|
||||
|
||||
model:
|
||||
hidden_dim: 64
|
||||
subsampling:
|
||||
type: linear
|
||||
body:
|
||||
type: tcn
|
||||
ds: false
|
||||
num_layers: 4
|
||||
kernel_size: 8
|
||||
dropout: 0.1
|
||||
|
||||
optim: adam
|
||||
optim_conf:
|
||||
lr: 0.001
|
||||
|
||||
training_config:
|
||||
grad_clip: 5
|
||||
max_epoch: 80
|
||||
log_interval: 10
|
||||
|
||||
1
examples/hi_xiaowen/s0/kws
Symbolic link
1
examples/hi_xiaowen/s0/kws
Symbolic link
@ -0,0 +1 @@
|
||||
../../../kws
|
||||
65
examples/hi_xiaowen/s0/local/mobvoi_data_download.sh
Executable file
65
examples/hi_xiaowen/s0/local/mobvoi_data_download.sh
Executable file
@ -0,0 +1,65 @@
|
||||
#!/bin/bash
|
||||
|
||||
# Copyright 2018-2020 Yiming Wang
|
||||
# 2018-2020 Daniel Povey
|
||||
# 2021 Binbin Zhang
|
||||
|
||||
[ -f ./path.sh ] && . ./path.sh
|
||||
|
||||
dl_dir=data/download
|
||||
|
||||
. tools/parse_options.sh || exit 1;
|
||||
|
||||
mkdir -p $dl_dir
|
||||
|
||||
dataset=mobvoi_hotword_dataset.tgz
|
||||
resources=mobvoi_hotword_dataset_resources.tgz
|
||||
|
||||
# base url for downloads.
|
||||
data_url=http://www.openslr.org/resources/87
|
||||
|
||||
if [[ $(hostname -f) == *.clsp.jhu.edu ]]; then
|
||||
src_path=/export/fs04/a07/ywang/mobvoihotwords
|
||||
else
|
||||
src_path=$dl_dir
|
||||
fi
|
||||
|
||||
if [ ! -f $src_path/$dataset ] || [ ! -f $src_path/$resources ]; then
|
||||
if ! which wget >/dev/null; then
|
||||
echo "$0: wget is not installed."
|
||||
exit 1;
|
||||
fi
|
||||
|
||||
if [ ! -f $src_path/$dataset ]; then
|
||||
echo "$0: downloading data from $data_url/$dataset. This may take some time, please be patient."
|
||||
if ! wget --no-check-certificate -O $dl_dir/$dataset $data_url/$dataset; then
|
||||
echo "$0: error executing wget $data_url/$dataset"
|
||||
exit 1;
|
||||
fi
|
||||
fi
|
||||
|
||||
if [ ! -f $src_path/$resources ]; then
|
||||
if ! wget --no-check-certificate -O $dl_dir/$resources $data_url/$resources; then
|
||||
echo "$0: error executing wget $data_url/$resources"
|
||||
exit 1;
|
||||
fi
|
||||
fi
|
||||
fi
|
||||
|
||||
if [ -d $dl_dir/$(basename "$dataset" .tgz) ]; then
|
||||
echo "Not extracting $(basename "$dataset" .tgz) as it is already there."
|
||||
else
|
||||
echo "Extracting $dataset..."
|
||||
tar -xvzf $src_path/$dataset -C $dl_dir || exit 1;
|
||||
echo "Done extracting $dataset."
|
||||
fi
|
||||
|
||||
if [ -d $dl_dir/$(basename "$resources" .tgz) ]; then
|
||||
echo "Not extracting $(basename "$dataset" .tar.gz) as it is already there."
|
||||
else
|
||||
echo "Extracting $resources..."
|
||||
tar -xvzf $src_path/$resources -C $dl_dir || exit 1;
|
||||
echo "Done extracting $resources."
|
||||
fi
|
||||
|
||||
exit 0
|
||||
43
examples/hi_xiaowen/s0/local/prepare_data.py
Executable file
43
examples/hi_xiaowen/s0/local/prepare_data.py
Executable file
@ -0,0 +1,43 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
# Copyright 2018-2020 Yiming Wang
|
||||
# 2018-2020 Daniel Povey
|
||||
# 2021 Binbin Zhang
|
||||
# Apache 2.0
|
||||
""" This script prepares the Mobvoi data into kaldi format.
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import os
|
||||
import json
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description="""Prepare data.""")
|
||||
parser.add_argument('wav_dir',
|
||||
type=str,
|
||||
help='dir containing all the wav files')
|
||||
parser.add_argument('path', type=str, help='path to the json file')
|
||||
parser.add_argument('out_dir', type=str, help='out dir')
|
||||
args = parser.parse_args()
|
||||
|
||||
with open(args.path, 'r', encoding='utf-8') as f:
|
||||
data = json.load(f)
|
||||
utt_id, label = [], []
|
||||
for entry in data:
|
||||
utt_id.append(entry['utt_id'])
|
||||
label.append(int(entry['keyword_id']))
|
||||
|
||||
abs_dir = os.path.abspath(args.wav_dir)
|
||||
wav_path = os.path.join(args.out_dir, 'wav.scp')
|
||||
text_path = os.path.join(args.out_dir, 'text')
|
||||
with open(wav_path, 'w', encoding='utf-8') as f_wav, \
|
||||
open(text_path, 'w', encoding='utf-8') as f_text:
|
||||
for utt, l in zip(utt_id, label):
|
||||
f_wav.write('{} {}\n'.format(utt,
|
||||
os.path.join(abs_dir, utt + ".wav")))
|
||||
f_text.write('{} {}\n'.format(utt, l))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
5
examples/hi_xiaowen/s0/path.sh
Executable file
5
examples/hi_xiaowen/s0/path.sh
Executable file
@ -0,0 +1,5 @@
|
||||
export PATH=$PWD:$PATH
|
||||
|
||||
# NOTE(kan-bayashi): Use UTF-8 in Python to avoid UnicodeDecodeError when LC_ALL=C
|
||||
export PYTHONIOENCODING=UTF-8
|
||||
export PYTHONPATH=../../:$PYTHONPATH
|
||||
126
examples/hi_xiaowen/s0/run.sh
Executable file
126
examples/hi_xiaowen/s0/run.sh
Executable file
@ -0,0 +1,126 @@
|
||||
#!/bin/bash
|
||||
# Copyright 2021 Binbin Zhang
|
||||
|
||||
. ./path.sh
|
||||
|
||||
export CUDA_VISIBLE_DEVICES="0,1,2,3,4,5,6,7"
|
||||
|
||||
stage=0
|
||||
stop_stage=4
|
||||
num_keywords=2
|
||||
|
||||
config=conf/ds_tcn.yaml
|
||||
norm_mean=true
|
||||
norm_var=true
|
||||
gpu_id=0
|
||||
|
||||
checkpoint=
|
||||
dir=exp/ds_tcn
|
||||
|
||||
num_average=30
|
||||
score_checkpoint=$dir/avg_${num_average}.pt
|
||||
|
||||
download_dir=/export/expts6/binbinzhang/data/
|
||||
|
||||
. tools/parse_options.sh || exit 1;
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
if [ ${stage} -le -1 ] && [ ${stop_stage} -ge -1 ]; then
|
||||
echo "Download and extracte all datasets"
|
||||
local/mobvoi_data_download.sh --dl_dir $download_dir
|
||||
fi
|
||||
|
||||
|
||||
if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
|
||||
echo "Preparing datasets..."
|
||||
mkdir dict
|
||||
echo "<filler> -1" > dict/words.txt
|
||||
echo "Hi_Xiaowen 0" >> dict/words.txt
|
||||
echo "Nihao_Wenwen 1" >> dict/words.txt
|
||||
|
||||
for folder in train dev eval; do
|
||||
mkdir -p data/$folder
|
||||
for prefix in p n; do
|
||||
mkdir -p data/${prefix}_$folder
|
||||
json_path=$download_dir/mobvoi_hotword_dataset_resources/${prefix}_$folder.json
|
||||
if [ $folder = "eval" ]; then
|
||||
json_path=$download_dir/mobvoi_hotword_dataset_resources/${prefix}_test.json
|
||||
fi
|
||||
local/prepare_data.py $download_dir/mobvoi_hotword_dataset $json_path \
|
||||
data/${prefix}_$folder
|
||||
done
|
||||
cat data/p_$folder/wav.scp data/n_$folder/wav.scp > data/$folder/wav.scp
|
||||
cat data/p_$folder/text data/n_$folder/text > data/$folder/text
|
||||
rm -rf data/p_$folder data/n_$folder
|
||||
done
|
||||
fi
|
||||
|
||||
|
||||
if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
|
||||
echo "Compute CMVN and Format datasets"
|
||||
tools/compute_cmvn_stats.py --num_workers 16 --train_config $config \
|
||||
--in_scp data/train/wav.scp \
|
||||
--out_cmvn data/train/global_cmvn
|
||||
|
||||
for x in train dev eval; do
|
||||
tools/wav_to_duration.sh --nj 8 data/$x/wav.scp data/$x/wav.dur
|
||||
tools/make_list.py data/$x/wav.scp data/$x/text \
|
||||
data/$x/wav.dur data/$x/data.list
|
||||
done
|
||||
fi
|
||||
|
||||
|
||||
if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
|
||||
echo "Start training ..."
|
||||
mkdir -p $dir
|
||||
cmvn_opts=
|
||||
$norm_mean && cmvn_opts="--cmvn_file data/train/global_cmvn"
|
||||
$norm_var && cmvn_opts="$cmvn_opts --norm_var"
|
||||
python kws/bin/train.py --gpu $gpu_id \
|
||||
--config $config \
|
||||
--train_data data/train/data.list \
|
||||
--cv_data data/dev/data.list \
|
||||
--model_dir $dir \
|
||||
--num_workers 8 \
|
||||
--num_keywords $num_keywords \
|
||||
--min_duration 50 \
|
||||
$cmvn_opts \
|
||||
${checkpoint:+--checkpoint $checkpoint}
|
||||
fi
|
||||
|
||||
if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then
|
||||
# Do model average
|
||||
python kws/bin/average_model.py \
|
||||
--dst_model $score_checkpoint \
|
||||
--src_path $dir \
|
||||
--num ${num_average} \
|
||||
--val_best
|
||||
|
||||
# Compute posterior score
|
||||
result_dir=$dir/test_$(basename $score_checkpoint)
|
||||
mkdir -p $result_dir
|
||||
python kws/bin/score.py --gpu -1 \
|
||||
--config $dir/config.yaml \
|
||||
--test_data data/eval/data.list \
|
||||
--batch_size 256 \
|
||||
--checkpoint $score_checkpoint \
|
||||
--score_file $result_dir/score.txt
|
||||
|
||||
# Compute detection error tradeoff
|
||||
for keyword in 0 1; do
|
||||
python kws/bin/compute_det.py \
|
||||
--keyword $keyword \
|
||||
--test_data data/eval/data.list \
|
||||
--score_file $result_dir/score.txt \
|
||||
--stats_file $result_dir/stats.${keyword}.txt
|
||||
done
|
||||
fi
|
||||
|
||||
|
||||
if [ ${stage} -le 4 ] && [ ${stop_stage} -ge 4 ]; then
|
||||
python kws/bin/export_jit.py --config $dir/config.yaml \
|
||||
--checkpoint $score_checkpoint \
|
||||
--output_file $dir/final.zip \
|
||||
--output_quant_file $dir/final.quant.zip
|
||||
fi
|
||||
1
examples/hi_xiaowen/s0/tools
Symbolic link
1
examples/hi_xiaowen/s0/tools
Symbolic link
@ -0,0 +1 @@
|
||||
../../../tools
|
||||
Loading…
x
Reference in New Issue
Block a user