[examples] speech command data prepare (#27)

* [examples] added speech command data preparation code

* update

* updata path.sh
This commit is contained in:
xiaohou 2021-12-06 12:00:25 +08:00 committed by GitHub
parent 5241491e95
commit 8be4bef405
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 191 additions and 0 deletions

View File

@ -0,0 +1 @@
../../../kws

View File

@ -0,0 +1,43 @@
#!/bin/bash
# Copyright (c) 2021 Jingyong Hou (houjingyong@gmail.com)
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
[ -f ./path.sh ] && . ./path.sh
dl_dir=./data/local
. tools/parse_options.sh || exit 1;
data_dir=$dl_dir
file_name=speech_commands_v0.01.tar.gz
speech_command_dir=$data_dir/speech_commands_v1
audio_dir=$data_dir/speech_commands_v1/audio
url=http://download.tensorflow.org/data/$file_name
mkdir -p $data_dir
if [ ! -f $data_dir/$file_name ]; then
echo "downloading $url..."
wget -O $data_dir/$file_name $url
else
echo "$file_name exist in $data_dir, skip download it"
fi
if [ ! -f $speech_command_dir/.extracted ]; then
mkdir -p $audio_dir
tar -xzvf $data_dir/$file_name -C $audio_dir
touch $speech_command_dir/.extracted
else
echo "$speech_command_dir/.exatracted exist in $speech_command_dir, skip exatraction"
fi
exit 0

View File

@ -0,0 +1,49 @@
#!/usr/bin/env python3
# Copyright (c) 2021 Jingyong Hou (houjingyong@gmail.com)
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import argparse
CLASSES = 'unknown, yes, no, up, down, left, right, on, off, stop, go'.split(
', ')
CLASS_TO_IDX = {CLASSES[i]: str(i) for i in range(len(CLASSES))}
if __name__ == '__main__':
parser = argparse.ArgumentParser(
description='prepare kaldi format file for google speech command')
parser.add_argument(
'--wav_list',
required=True,
help='full path of a wav file in google speech command dataset')
parser.add_argument('--data_dir',
required=True,
help='folder to write kaldi format files')
args = parser.parse_args()
data_dir = args.data_dir
f_wav_scp = open(os.path.join(data_dir, 'wav.scp'), 'w')
f_text = open(os.path.join(data_dir, 'text'), 'w')
with open(args.wav_list) as f:
for line in f.readlines():
keyword, file_name = line.strip().split('/')[-2:]
file_name_new = file_name.split('.')[0]
wav_id = '_'.join([keyword, file_name_new])
file_dir = line.strip()
f_wav_scp.writelines(wav_id + ' ' + file_dir + '\n')
label = CLASS_TO_IDX[
keyword] if keyword in CLASS_TO_IDX else CLASS_TO_IDX["unknown"]
f_text.writelines(wav_id + ' ' + str(label) + '\n')
f_wav_scp.close()
f_text.close()

View File

@ -0,0 +1,55 @@
#!/usr/bin/env python3
# Copyright (c) 2021 Jingyong Hou (houjingyong@gmail.com)
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import shutil
import argparse
def move_files(src_folder, to_folder, list_file):
with open(list_file) as f:
for line in f.readlines():
line = line.rstrip()
dirname = os.path.dirname(line)
dest = os.path.join(to_folder, dirname)
if not os.path.exists(dest):
os.mkdir(dest)
shutil.move(os.path.join(src_folder, line), dest)
if __name__ == '__main__':
'''Splits the google speech commands into train, validation and test set'''
parser = argparse.ArgumentParser(
description='Split google command dataset.')
parser.add_argument(
'root',
type=str,
help='the path to the root folder of the google commands dataset')
args = parser.parse_args()
audio_folder = os.path.join(args.root, 'audio')
validation_path = os.path.join(audio_folder, 'validation_list.txt')
test_path = os.path.join(audio_folder, 'testing_list.txt')
valid_folder = os.path.join(args.root, 'valid')
test_folder = os.path.join(args.root, 'test')
train_folder = os.path.join(args.root, 'train')
os.mkdir(valid_folder)
os.mkdir(test_folder)
move_files(audio_folder, test_folder, test_path)
move_files(audio_folder, valid_folder, validation_path)
os.rename(audio_folder, train_folder)

View File

@ -0,0 +1,5 @@
export PATH=$PWD:$PATH
# NOTE(kan-bayashi): Use UTF-8 in Python to avoid UnicodeDecodeError when LC_ALL=C
export PYTHONIOENCODING=UTF-8
export PYTHONPATH=../../../:$PYTHONPATH

View File

@ -0,0 +1,37 @@
#!/bin/bash
# Copyright 2021 Binbin Zhang
# Jingyong Hou
. ./path.sh
export CUDA_VISIBLE_DEVICES="0"
stage=-1
stop_stage=0
# your data dir
download_dir=/mnt/mnt-data-3/jingyong.hou/data
speech_command_dir=$download_dir/speech_commands_v1
. tools/parse_options.sh || exit 1;
set -euo pipefail
if [ ${stage} -le -1 ] && [ ${stop_stage} -ge -1 ]; then
echo "Download and extract all datasets"
local/data_download.sh --dl_dir $download_dir
python local/split_dataset.py $download_dir/speech_commands_v1
fi
if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
echo "Start preparing Kaldi format files"
for x in train test valid;
do
data=data/$x
mkdir -p $data
# make wav.scp utt2spk text file
find $speech_command_dir/$x -name *.wav | grep -v "_background_noise_" > $data/wav.list
python local/prepare_speech_command.py --wav_list=$data/wav.list --data_dir=$data
done
fi

View File

@ -0,0 +1 @@
../../../tools