[examples] speech command data prepare (#27)
* [examples] added speech command data preparation code * update * updata path.sh
This commit is contained in:
parent
5241491e95
commit
8be4bef405
1
examples/speechcommand_v1/s0/kws
Symbolic link
1
examples/speechcommand_v1/s0/kws
Symbolic link
@ -0,0 +1 @@
|
||||
../../../kws
|
||||
43
examples/speechcommand_v1/s0/local/data_download.sh
Executable file
43
examples/speechcommand_v1/s0/local/data_download.sh
Executable file
@ -0,0 +1,43 @@
|
||||
#!/bin/bash
|
||||
|
||||
# Copyright (c) 2021 Jingyong Hou (houjingyong@gmail.com)
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
[ -f ./path.sh ] && . ./path.sh
|
||||
|
||||
dl_dir=./data/local
|
||||
|
||||
. tools/parse_options.sh || exit 1;
|
||||
data_dir=$dl_dir
|
||||
file_name=speech_commands_v0.01.tar.gz
|
||||
speech_command_dir=$data_dir/speech_commands_v1
|
||||
audio_dir=$data_dir/speech_commands_v1/audio
|
||||
url=http://download.tensorflow.org/data/$file_name
|
||||
mkdir -p $data_dir
|
||||
if [ ! -f $data_dir/$file_name ]; then
|
||||
echo "downloading $url..."
|
||||
wget -O $data_dir/$file_name $url
|
||||
else
|
||||
echo "$file_name exist in $data_dir, skip download it"
|
||||
fi
|
||||
|
||||
if [ ! -f $speech_command_dir/.extracted ]; then
|
||||
mkdir -p $audio_dir
|
||||
tar -xzvf $data_dir/$file_name -C $audio_dir
|
||||
touch $speech_command_dir/.extracted
|
||||
else
|
||||
echo "$speech_command_dir/.exatracted exist in $speech_command_dir, skip exatraction"
|
||||
fi
|
||||
|
||||
exit 0
|
||||
49
examples/speechcommand_v1/s0/local/prepare_speech_command.py
Executable file
49
examples/speechcommand_v1/s0/local/prepare_speech_command.py
Executable file
@ -0,0 +1,49 @@
|
||||
#!/usr/bin/env python3
|
||||
# Copyright (c) 2021 Jingyong Hou (houjingyong@gmail.com)
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
import os
|
||||
import argparse
|
||||
|
||||
CLASSES = 'unknown, yes, no, up, down, left, right, on, off, stop, go'.split(
|
||||
', ')
|
||||
CLASS_TO_IDX = {CLASSES[i]: str(i) for i in range(len(CLASSES))}
|
||||
|
||||
if __name__ == '__main__':
|
||||
parser = argparse.ArgumentParser(
|
||||
description='prepare kaldi format file for google speech command')
|
||||
parser.add_argument(
|
||||
'--wav_list',
|
||||
required=True,
|
||||
help='full path of a wav file in google speech command dataset')
|
||||
parser.add_argument('--data_dir',
|
||||
required=True,
|
||||
help='folder to write kaldi format files')
|
||||
args = parser.parse_args()
|
||||
|
||||
data_dir = args.data_dir
|
||||
f_wav_scp = open(os.path.join(data_dir, 'wav.scp'), 'w')
|
||||
f_text = open(os.path.join(data_dir, 'text'), 'w')
|
||||
with open(args.wav_list) as f:
|
||||
for line in f.readlines():
|
||||
keyword, file_name = line.strip().split('/')[-2:]
|
||||
file_name_new = file_name.split('.')[0]
|
||||
wav_id = '_'.join([keyword, file_name_new])
|
||||
file_dir = line.strip()
|
||||
f_wav_scp.writelines(wav_id + ' ' + file_dir + '\n')
|
||||
label = CLASS_TO_IDX[
|
||||
keyword] if keyword in CLASS_TO_IDX else CLASS_TO_IDX["unknown"]
|
||||
f_text.writelines(wav_id + ' ' + str(label) + '\n')
|
||||
f_wav_scp.close()
|
||||
f_text.close()
|
||||
55
examples/speechcommand_v1/s0/local/split_dataset.py
Executable file
55
examples/speechcommand_v1/s0/local/split_dataset.py
Executable file
@ -0,0 +1,55 @@
|
||||
#!/usr/bin/env python3
|
||||
# Copyright (c) 2021 Jingyong Hou (houjingyong@gmail.com)
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
import os
|
||||
import shutil
|
||||
import argparse
|
||||
|
||||
|
||||
def move_files(src_folder, to_folder, list_file):
|
||||
with open(list_file) as f:
|
||||
for line in f.readlines():
|
||||
line = line.rstrip()
|
||||
dirname = os.path.dirname(line)
|
||||
dest = os.path.join(to_folder, dirname)
|
||||
if not os.path.exists(dest):
|
||||
os.mkdir(dest)
|
||||
shutil.move(os.path.join(src_folder, line), dest)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
'''Splits the google speech commands into train, validation and test set'''
|
||||
parser = argparse.ArgumentParser(
|
||||
description='Split google command dataset.')
|
||||
parser.add_argument(
|
||||
'root',
|
||||
type=str,
|
||||
help='the path to the root folder of the google commands dataset')
|
||||
args = parser.parse_args()
|
||||
|
||||
audio_folder = os.path.join(args.root, 'audio')
|
||||
validation_path = os.path.join(audio_folder, 'validation_list.txt')
|
||||
test_path = os.path.join(audio_folder, 'testing_list.txt')
|
||||
|
||||
valid_folder = os.path.join(args.root, 'valid')
|
||||
test_folder = os.path.join(args.root, 'test')
|
||||
train_folder = os.path.join(args.root, 'train')
|
||||
|
||||
os.mkdir(valid_folder)
|
||||
os.mkdir(test_folder)
|
||||
|
||||
move_files(audio_folder, test_folder, test_path)
|
||||
move_files(audio_folder, valid_folder, validation_path)
|
||||
os.rename(audio_folder, train_folder)
|
||||
5
examples/speechcommand_v1/s0/path.sh
Executable file
5
examples/speechcommand_v1/s0/path.sh
Executable file
@ -0,0 +1,5 @@
|
||||
export PATH=$PWD:$PATH
|
||||
|
||||
# NOTE(kan-bayashi): Use UTF-8 in Python to avoid UnicodeDecodeError when LC_ALL=C
|
||||
export PYTHONIOENCODING=UTF-8
|
||||
export PYTHONPATH=../../../:$PYTHONPATH
|
||||
37
examples/speechcommand_v1/s0/run.sh
Executable file
37
examples/speechcommand_v1/s0/run.sh
Executable file
@ -0,0 +1,37 @@
|
||||
#!/bin/bash
|
||||
# Copyright 2021 Binbin Zhang
|
||||
# Jingyong Hou
|
||||
|
||||
. ./path.sh
|
||||
|
||||
export CUDA_VISIBLE_DEVICES="0"
|
||||
|
||||
stage=-1
|
||||
stop_stage=0
|
||||
|
||||
# your data dir
|
||||
download_dir=/mnt/mnt-data-3/jingyong.hou/data
|
||||
speech_command_dir=$download_dir/speech_commands_v1
|
||||
. tools/parse_options.sh || exit 1;
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
if [ ${stage} -le -1 ] && [ ${stop_stage} -ge -1 ]; then
|
||||
echo "Download and extract all datasets"
|
||||
local/data_download.sh --dl_dir $download_dir
|
||||
python local/split_dataset.py $download_dir/speech_commands_v1
|
||||
fi
|
||||
|
||||
|
||||
if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
|
||||
echo "Start preparing Kaldi format files"
|
||||
for x in train test valid;
|
||||
do
|
||||
data=data/$x
|
||||
mkdir -p $data
|
||||
# make wav.scp utt2spk text file
|
||||
find $speech_command_dir/$x -name *.wav | grep -v "_background_noise_" > $data/wav.list
|
||||
python local/prepare_speech_command.py --wav_list=$data/wav.list --data_dir=$data
|
||||
done
|
||||
fi
|
||||
|
||||
1
examples/speechcommand_v1/s0/tools
Symbolic link
1
examples/speechcommand_v1/s0/tools
Symbolic link
@ -0,0 +1 @@
|
||||
../../../tools
|
||||
Loading…
x
Reference in New Issue
Block a user