[examples] speech command data prepare (#27)

* [examples] added speech command data preparation code * update * updata path.sh
2021-12-06 12:00:25 +08:00 · 2021-12-06 12:00:25 +08:00 · 8be4bef405
commit 8be4bef405
parent 5241491e95
7 changed files with 191 additions and 0 deletions
--- a/examples/speechcommand_v1/s0/kws
+++ b/examples/speechcommand_v1/s0/kws
@ -0,0 +1 @@
 ../../../kws
--- a/examples/speechcommand_v1/s0/local/data_download.sh
+++ b/examples/speechcommand_v1/s0/local/data_download.sh
@ -0,0 +1,43 @@
 #!/bin/bash
 # Copyright (c) 2021 Jingyong Hou (houjingyong@gmail.com)
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #   http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 [ -f ./path.sh ] && . ./path.sh
 dl_dir=./data/local
 . tools/parse_options.sh || exit 1;
 data_dir=$dl_dir
 file_name=speech_commands_v0.01.tar.gz
 speech_command_dir=$data_dir/speech_commands_v1
 audio_dir=$data_dir/speech_commands_v1/audio
 url=http://download.tensorflow.org/data/$file_name
 mkdir -p $data_dir
 if [ ! -f $data_dir/$file_name ]; then
    echo "downloading $url..."
    wget -O $data_dir/$file_name $url
 else
    echo "$file_name exist in $data_dir, skip download it"
 fi
 if [ ! -f $speech_command_dir/.extracted ]; then
    mkdir -p $audio_dir
    tar -xzvf $data_dir/$file_name -C $audio_dir
    touch $speech_command_dir/.extracted
 else
    echo "$speech_command_dir/.exatracted exist in $speech_command_dir, skip exatraction"
 fi
 exit 0
--- a/examples/speechcommand_v1/s0/local/prepare_speech_command.py
+++ b/examples/speechcommand_v1/s0/local/prepare_speech_command.py
@ -0,0 +1,49 @@
 #!/usr/bin/env python3
 # Copyright (c) 2021 Jingyong Hou (houjingyong@gmail.com)
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #   http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import os
 import argparse
 CLASSES = 'unknown, yes, no, up, down, left, right, on, off, stop, go'.split(
    ', ')
 CLASS_TO_IDX = {CLASSES[i]: str(i) for i in range(len(CLASSES))}
 if __name__ == '__main__':
    parser = argparse.ArgumentParser(
        description='prepare kaldi format file for google speech command')
    parser.add_argument(
        '--wav_list',
        required=True,
        help='full path of a wav file in google speech command dataset')
    parser.add_argument('--data_dir',
                        required=True,
                        help='folder to write kaldi format files')
    args = parser.parse_args()
    data_dir = args.data_dir
    f_wav_scp = open(os.path.join(data_dir, 'wav.scp'), 'w')
    f_text = open(os.path.join(data_dir, 'text'), 'w')
    with open(args.wav_list) as f:
        for line in f.readlines():
            keyword, file_name = line.strip().split('/')[-2:]
            file_name_new = file_name.split('.')[0]
            wav_id = '_'.join([keyword, file_name_new])
            file_dir = line.strip()
            f_wav_scp.writelines(wav_id + ' ' + file_dir + '\n')
            label = CLASS_TO_IDX[
                keyword] if keyword in CLASS_TO_IDX else CLASS_TO_IDX["unknown"]
            f_text.writelines(wav_id + ' ' + str(label) + '\n')
    f_wav_scp.close()
    f_text.close()
--- a/examples/speechcommand_v1/s0/local/split_dataset.py
+++ b/examples/speechcommand_v1/s0/local/split_dataset.py
@ -0,0 +1,55 @@
 #!/usr/bin/env python3
 # Copyright (c) 2021 Jingyong Hou (houjingyong@gmail.com)
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #   http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import os
 import shutil
 import argparse
 def move_files(src_folder, to_folder, list_file):
    with open(list_file) as f:
        for line in f.readlines():
            line = line.rstrip()
            dirname = os.path.dirname(line)
            dest = os.path.join(to_folder, dirname)
            if not os.path.exists(dest):
                os.mkdir(dest)
            shutil.move(os.path.join(src_folder, line), dest)
 if __name__ == '__main__':
    '''Splits the google speech commands into train, validation and test set'''
    parser = argparse.ArgumentParser(
        description='Split google command dataset.')
    parser.add_argument(
        'root',
        type=str,
        help='the path to the root folder of the google commands dataset')
    args = parser.parse_args()
    audio_folder = os.path.join(args.root, 'audio')
    validation_path = os.path.join(audio_folder, 'validation_list.txt')
    test_path = os.path.join(audio_folder, 'testing_list.txt')
    valid_folder = os.path.join(args.root, 'valid')
    test_folder = os.path.join(args.root, 'test')
    train_folder = os.path.join(args.root, 'train')
    os.mkdir(valid_folder)
    os.mkdir(test_folder)
    move_files(audio_folder, test_folder, test_path)
    move_files(audio_folder, valid_folder, validation_path)
    os.rename(audio_folder, train_folder)
--- a/examples/speechcommand_v1/s0/path.sh
+++ b/examples/speechcommand_v1/s0/path.sh
@ -0,0 +1,5 @@
 export PATH=$PWD:$PATH
 # NOTE(kan-bayashi): Use UTF-8 in Python to avoid UnicodeDecodeError when LC_ALL=C
 export PYTHONIOENCODING=UTF-8
 export PYTHONPATH=../../../:$PYTHONPATH
--- a/examples/speechcommand_v1/s0/run.sh
+++ b/examples/speechcommand_v1/s0/run.sh
@ -0,0 +1,37 @@
 #!/bin/bash
 # Copyright 2021  Binbin Zhang
 #                 Jingyong Hou
 . ./path.sh
 export CUDA_VISIBLE_DEVICES="0"
 stage=-1
 stop_stage=0
 # your data dir
 download_dir=/mnt/mnt-data-3/jingyong.hou/data
 speech_command_dir=$download_dir/speech_commands_v1
 . tools/parse_options.sh || exit 1;
 set -euo pipefail
 if [ ${stage} -le -1 ] && [ ${stop_stage} -ge -1 ]; then
  echo "Download and extract all datasets"
  local/data_download.sh --dl_dir $download_dir
  python local/split_dataset.py $download_dir/speech_commands_v1
 fi
 if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
  echo "Start preparing Kaldi format files"
  for x in train test valid;
  do
    data=data/$x
    mkdir -p $data
    # make wav.scp utt2spk text file
    find $speech_command_dir/$x -name *.wav | grep -v "_background_noise_" > $data/wav.list
    python local/prepare_speech_command.py --wav_list=$data/wav.list --data_dir=$data
  done
 fi
--- a/examples/speechcommand_v1/s0/tools
+++ b/examples/speechcommand_v1/s0/tools
@ -0,0 +1 @@
 ../../../tools