56 lines
1.9 KiB
Python
Executable File
56 lines
1.9 KiB
Python
Executable File
#!/usr/bin/env python3
|
|
# Copyright (c) 2021 Jingyong Hou (houjingyong@gmail.com)
|
|
#
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
# you may not use this file except in compliance with the License.
|
|
# You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
# See the License for the specific language governing permissions and
|
|
# limitations under the License.
|
|
|
|
import os
|
|
import shutil
|
|
import argparse
|
|
|
|
|
|
def move_files(src_folder, to_folder, list_file):
|
|
with open(list_file) as f:
|
|
for line in f.readlines():
|
|
line = line.rstrip()
|
|
dirname = os.path.dirname(line)
|
|
dest = os.path.join(to_folder, dirname)
|
|
if not os.path.exists(dest):
|
|
os.mkdir(dest)
|
|
shutil.move(os.path.join(src_folder, line), dest)
|
|
|
|
|
|
if __name__ == '__main__':
|
|
'''Splits the google speech commands into train, validation and test set'''
|
|
parser = argparse.ArgumentParser(
|
|
description='Split google command dataset.')
|
|
parser.add_argument(
|
|
'root',
|
|
type=str,
|
|
help='the path to the root folder of the google commands dataset')
|
|
args = parser.parse_args()
|
|
|
|
audio_folder = os.path.join(args.root, 'audio')
|
|
validation_path = os.path.join(audio_folder, 'validation_list.txt')
|
|
test_path = os.path.join(audio_folder, 'testing_list.txt')
|
|
|
|
valid_folder = os.path.join(args.root, 'valid')
|
|
test_folder = os.path.join(args.root, 'test')
|
|
train_folder = os.path.join(args.root, 'train')
|
|
|
|
os.mkdir(valid_folder)
|
|
os.mkdir(test_folder)
|
|
|
|
move_files(audio_folder, test_folder, test_path)
|
|
move_files(audio_folder, valid_folder, validation_path)
|
|
os.rename(audio_folder, train_folder)
|