[examples] speech command data prepare (#27)
* [examples] added speech command data preparation code * update * updata path.sh
This commit is contained in:
parent
5241491e95
commit
8be4bef405
1
examples/speechcommand_v1/s0/kws
Symbolic link
1
examples/speechcommand_v1/s0/kws
Symbolic link
@ -0,0 +1 @@
|
|||||||
|
../../../kws
|
||||||
43
examples/speechcommand_v1/s0/local/data_download.sh
Executable file
43
examples/speechcommand_v1/s0/local/data_download.sh
Executable file
@ -0,0 +1,43 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
|
||||||
|
# Copyright (c) 2021 Jingyong Hou (houjingyong@gmail.com)
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
|
||||||
|
[ -f ./path.sh ] && . ./path.sh
|
||||||
|
|
||||||
|
dl_dir=./data/local
|
||||||
|
|
||||||
|
. tools/parse_options.sh || exit 1;
|
||||||
|
data_dir=$dl_dir
|
||||||
|
file_name=speech_commands_v0.01.tar.gz
|
||||||
|
speech_command_dir=$data_dir/speech_commands_v1
|
||||||
|
audio_dir=$data_dir/speech_commands_v1/audio
|
||||||
|
url=http://download.tensorflow.org/data/$file_name
|
||||||
|
mkdir -p $data_dir
|
||||||
|
if [ ! -f $data_dir/$file_name ]; then
|
||||||
|
echo "downloading $url..."
|
||||||
|
wget -O $data_dir/$file_name $url
|
||||||
|
else
|
||||||
|
echo "$file_name exist in $data_dir, skip download it"
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [ ! -f $speech_command_dir/.extracted ]; then
|
||||||
|
mkdir -p $audio_dir
|
||||||
|
tar -xzvf $data_dir/$file_name -C $audio_dir
|
||||||
|
touch $speech_command_dir/.extracted
|
||||||
|
else
|
||||||
|
echo "$speech_command_dir/.exatracted exist in $speech_command_dir, skip exatraction"
|
||||||
|
fi
|
||||||
|
|
||||||
|
exit 0
|
||||||
49
examples/speechcommand_v1/s0/local/prepare_speech_command.py
Executable file
49
examples/speechcommand_v1/s0/local/prepare_speech_command.py
Executable file
@ -0,0 +1,49 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
# Copyright (c) 2021 Jingyong Hou (houjingyong@gmail.com)
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
|
||||||
|
import os
|
||||||
|
import argparse
|
||||||
|
|
||||||
|
CLASSES = 'unknown, yes, no, up, down, left, right, on, off, stop, go'.split(
|
||||||
|
', ')
|
||||||
|
CLASS_TO_IDX = {CLASSES[i]: str(i) for i in range(len(CLASSES))}
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
parser = argparse.ArgumentParser(
|
||||||
|
description='prepare kaldi format file for google speech command')
|
||||||
|
parser.add_argument(
|
||||||
|
'--wav_list',
|
||||||
|
required=True,
|
||||||
|
help='full path of a wav file in google speech command dataset')
|
||||||
|
parser.add_argument('--data_dir',
|
||||||
|
required=True,
|
||||||
|
help='folder to write kaldi format files')
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
data_dir = args.data_dir
|
||||||
|
f_wav_scp = open(os.path.join(data_dir, 'wav.scp'), 'w')
|
||||||
|
f_text = open(os.path.join(data_dir, 'text'), 'w')
|
||||||
|
with open(args.wav_list) as f:
|
||||||
|
for line in f.readlines():
|
||||||
|
keyword, file_name = line.strip().split('/')[-2:]
|
||||||
|
file_name_new = file_name.split('.')[0]
|
||||||
|
wav_id = '_'.join([keyword, file_name_new])
|
||||||
|
file_dir = line.strip()
|
||||||
|
f_wav_scp.writelines(wav_id + ' ' + file_dir + '\n')
|
||||||
|
label = CLASS_TO_IDX[
|
||||||
|
keyword] if keyword in CLASS_TO_IDX else CLASS_TO_IDX["unknown"]
|
||||||
|
f_text.writelines(wav_id + ' ' + str(label) + '\n')
|
||||||
|
f_wav_scp.close()
|
||||||
|
f_text.close()
|
||||||
55
examples/speechcommand_v1/s0/local/split_dataset.py
Executable file
55
examples/speechcommand_v1/s0/local/split_dataset.py
Executable file
@ -0,0 +1,55 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
# Copyright (c) 2021 Jingyong Hou (houjingyong@gmail.com)
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
|
||||||
|
import os
|
||||||
|
import shutil
|
||||||
|
import argparse
|
||||||
|
|
||||||
|
|
||||||
|
def move_files(src_folder, to_folder, list_file):
|
||||||
|
with open(list_file) as f:
|
||||||
|
for line in f.readlines():
|
||||||
|
line = line.rstrip()
|
||||||
|
dirname = os.path.dirname(line)
|
||||||
|
dest = os.path.join(to_folder, dirname)
|
||||||
|
if not os.path.exists(dest):
|
||||||
|
os.mkdir(dest)
|
||||||
|
shutil.move(os.path.join(src_folder, line), dest)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
'''Splits the google speech commands into train, validation and test set'''
|
||||||
|
parser = argparse.ArgumentParser(
|
||||||
|
description='Split google command dataset.')
|
||||||
|
parser.add_argument(
|
||||||
|
'root',
|
||||||
|
type=str,
|
||||||
|
help='the path to the root folder of the google commands dataset')
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
audio_folder = os.path.join(args.root, 'audio')
|
||||||
|
validation_path = os.path.join(audio_folder, 'validation_list.txt')
|
||||||
|
test_path = os.path.join(audio_folder, 'testing_list.txt')
|
||||||
|
|
||||||
|
valid_folder = os.path.join(args.root, 'valid')
|
||||||
|
test_folder = os.path.join(args.root, 'test')
|
||||||
|
train_folder = os.path.join(args.root, 'train')
|
||||||
|
|
||||||
|
os.mkdir(valid_folder)
|
||||||
|
os.mkdir(test_folder)
|
||||||
|
|
||||||
|
move_files(audio_folder, test_folder, test_path)
|
||||||
|
move_files(audio_folder, valid_folder, validation_path)
|
||||||
|
os.rename(audio_folder, train_folder)
|
||||||
5
examples/speechcommand_v1/s0/path.sh
Executable file
5
examples/speechcommand_v1/s0/path.sh
Executable file
@ -0,0 +1,5 @@
|
|||||||
|
export PATH=$PWD:$PATH
|
||||||
|
|
||||||
|
# NOTE(kan-bayashi): Use UTF-8 in Python to avoid UnicodeDecodeError when LC_ALL=C
|
||||||
|
export PYTHONIOENCODING=UTF-8
|
||||||
|
export PYTHONPATH=../../../:$PYTHONPATH
|
||||||
37
examples/speechcommand_v1/s0/run.sh
Executable file
37
examples/speechcommand_v1/s0/run.sh
Executable file
@ -0,0 +1,37 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
# Copyright 2021 Binbin Zhang
|
||||||
|
# Jingyong Hou
|
||||||
|
|
||||||
|
. ./path.sh
|
||||||
|
|
||||||
|
export CUDA_VISIBLE_DEVICES="0"
|
||||||
|
|
||||||
|
stage=-1
|
||||||
|
stop_stage=0
|
||||||
|
|
||||||
|
# your data dir
|
||||||
|
download_dir=/mnt/mnt-data-3/jingyong.hou/data
|
||||||
|
speech_command_dir=$download_dir/speech_commands_v1
|
||||||
|
. tools/parse_options.sh || exit 1;
|
||||||
|
|
||||||
|
set -euo pipefail
|
||||||
|
|
||||||
|
if [ ${stage} -le -1 ] && [ ${stop_stage} -ge -1 ]; then
|
||||||
|
echo "Download and extract all datasets"
|
||||||
|
local/data_download.sh --dl_dir $download_dir
|
||||||
|
python local/split_dataset.py $download_dir/speech_commands_v1
|
||||||
|
fi
|
||||||
|
|
||||||
|
|
||||||
|
if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
|
||||||
|
echo "Start preparing Kaldi format files"
|
||||||
|
for x in train test valid;
|
||||||
|
do
|
||||||
|
data=data/$x
|
||||||
|
mkdir -p $data
|
||||||
|
# make wav.scp utt2spk text file
|
||||||
|
find $speech_command_dir/$x -name *.wav | grep -v "_background_noise_" > $data/wav.list
|
||||||
|
python local/prepare_speech_command.py --wav_list=$data/wav.list --data_dir=$data
|
||||||
|
done
|
||||||
|
fi
|
||||||
|
|
||||||
1
examples/speechcommand_v1/s0/tools
Symbolic link
1
examples/speechcommand_v1/s0/tools
Symbolic link
@ -0,0 +1 @@
|
|||||||
|
../../../tools
|
||||||
Loading…
x
Reference in New Issue
Block a user