From c7c5bd3edc41e11112b03b727e3961615bc91c39 Mon Sep 17 00:00:00 2001 From: Binbin Zhang Date: Mon, 6 Dec 2021 17:24:48 +0800 Subject: [PATCH 1/5] [kws] refine tcn and ds_tcp, add batchnorm (#31) * [kws] fix seed type * [kws] refine tcn and ds_tcn, add batch norm --- kws/model/tcn.py | 48 +++++++++++++++++++++++++----------------------- 1 file changed, 25 insertions(+), 23 deletions(-) diff --git a/kws/model/tcn.py b/kws/model/tcn.py index 6a002b1..255ea7d 100644 --- a/kws/model/tcn.py +++ b/kws/model/tcn.py @@ -29,12 +29,16 @@ class CnnBlock(nn.Module): super().__init__() # The CNN used here is causal convolution self.padding = (kernel_size - 1) * dilation - self.cnn = nn.Conv1d(channel, - channel, - kernel_size, - stride=1, - dilation=dilation) - self.dropout = nn.Dropout(dropout) + self.cnn = nn.Sequential( + nn.Conv1d(channel, + channel, + kernel_size, + stride=1, + dilation=dilation), + nn.BatchNorm1d(channel), + nn.ReLU(), + nn.Dropout(dropout), + ) def forward(self, x: torch.Tensor, cache: Optional[torch.Tensor] = None): """ @@ -51,8 +55,6 @@ class CnnBlock(nn.Module): new_cache = y[:, :, -self.padding:] y = self.cnn(y) - y = F.relu(y) - y = self.dropout(y) y = y + x # residual connection return y, new_cache @@ -68,17 +70,20 @@ class DsCnnBlock(nn.Module): super().__init__() # The CNN used here is causal convolution self.padding = (kernel_size - 1) * dilation - self.depthwise_cnn = nn.Conv1d(channel, - channel, - kernel_size, - stride=1, - dilation=dilation, - groups=channel) - self.pointwise_cnn = nn.Conv1d(channel, - channel, - kernel_size=1, - stride=1) - self.dropout = nn.Dropout(dropout) + self.cnn = nn.Sequential( + nn.Conv1d(channel, + channel, + kernel_size, + stride=1, + dilation=dilation, + groups=channel), + nn.BatchNorm1d(channel), + nn.ReLU(), + nn.Conv1d(channel, channel, kernel_size=1, stride=1), + nn.BatchNorm1d(channel), + nn.ReLU(), + nn.Dropout(dropout), + ) def forward(self, x: torch.Tensor, cache: Optional[torch.Tensor] = None): """ @@ -94,10 +99,7 @@ class DsCnnBlock(nn.Module): assert y.size(2) > self.padding new_cache = y[:, :, -self.padding:] - y = self.depthwise_cnn(y) - y = self.pointwise_cnn(y) - y = F.relu(y) - y = self.dropout(y) + y = self.cnn(y) y = y + x # residual connection return y, new_cache From 93bf93565e6e230f6c1ef44aa7320392d17f63ab Mon Sep 17 00:00:00 2001 From: Binbin Zhang Date: Mon, 6 Dec 2021 22:00:54 +0800 Subject: [PATCH 2/5] [doc] add wechat group discuss (#33) --- README.md | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/README.md b/README.md index e07cc6c..04b0f56 100644 --- a/README.md +++ b/README.md @@ -54,6 +54,15 @@ We plan to support a variaty of hardwares and platforms, including: * Android * Raspberry Pi +## Discussion + +For Chinese users, you can scan the QR code on the left to follow our offical account of WeNet. +We also created a WeChat group for better discussion and quicker response. +Please scan the QR code on the right to join the chat group. + +| | | +| ---- | ---- | + ## Reference * Mining Effective Negative Training Samples for Keyword Spotting From 92a4c19ffe9d3ec3af75dadbf051b18b86a8daec Mon Sep 17 00:00:00 2001 From: Binbin Zhang Date: Tue, 7 Dec 2021 10:36:38 +0800 Subject: [PATCH 3/5] [examples] use ds_tcn as default model (#34) * [examples] use ds_tcn as default model * fix scoring gpu id --- examples/hi_xiaowen/s0/README.md | 6 ++---- examples/hi_xiaowen/s0/run.sh | 13 +++++++------ 2 files changed, 9 insertions(+), 10 deletions(-) diff --git a/examples/hi_xiaowen/s0/README.md b/examples/hi_xiaowen/s0/README.md index b88ad49..87ff5c3 100644 --- a/examples/hi_xiaowen/s0/README.md +++ b/examples/hi_xiaowen/s0/README.md @@ -4,9 +4,7 @@ FRRs with FAR fixed at once per hour: |------------------|-----------|-----------|------------|--------------| | GRU | 203 | 80(avg30) | 0.088901 | 0.083827 | | TCN | 134 | 80(avg30) | 0.023494 | 0.029884 | -| DS_TCN | 21 | 60 | 0.011559 | 0.014190 | -| DS_TCN | 21 | 80 | 0.010807 | 0.014754 | -| DS_TCN | 21 | 80(avg30) | 0.009867 | 0.014472 | -| DS_TCN(spec_aug) | 21 | 80(avg30) | 0.029039 | 0.022648 | +| DS_TCN | 21 | 80(avg30) | 0.019641 | 0.018325 | +| DS_TCN(spec_aug) | 21 | 80(avg30) | 0.029509 | 0.008928 | | MDTC | 156 | 80(avg10) | 0.007142 | 0.005920 | | MDTC_Small | 31 | 80(avg10) | 0.005357 | 0.005920 | diff --git a/examples/hi_xiaowen/s0/run.sh b/examples/hi_xiaowen/s0/run.sh index 7c97999..3f7967b 100755 --- a/examples/hi_xiaowen/s0/run.sh +++ b/examples/hi_xiaowen/s0/run.sh @@ -9,15 +9,15 @@ stage=0 stop_stage=4 num_keywords=2 -config=conf/mdtc_small.yaml -norm_mean=false -norm_var=false +config=conf/ds_tcn.yaml +norm_mean=true +norm_var=true gpu_id=0 checkpoint= -dir=exp/mdtc_small +dir=exp/ds_tcn -num_average=10 +num_average=30 score_checkpoint=$dir/avg_${num_average}.pt download_dir=./data/local # your data dir @@ -82,6 +82,7 @@ if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then --num_workers 8 \ --num_keywords $num_keywords \ --min_duration 50 \ + --seed 666 \ $cmvn_opts \ ${checkpoint:+--checkpoint $checkpoint} fi @@ -97,7 +98,7 @@ if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then # Compute posterior score result_dir=$dir/test_$(basename $score_checkpoint) mkdir -p $result_dir - python kws/bin/score.py --gpu 1 \ + python kws/bin/score.py --gpu $gpu_id \ --config $dir/config.yaml \ --test_data data/test/data.list \ --batch_size 256 \ From b55ae111ae31d824e0f74d1883d2ff82aeb927d3 Mon Sep 17 00:00:00 2001 From: Binbin Zhang Date: Tue, 7 Dec 2021 10:52:14 +0800 Subject: [PATCH 4/5] [model] refactor tcn and ds_tcn share the same base class (#35) --- kws/model/tcn.py | 74 +++++++++++++++++++++--------------------------- 1 file changed, 32 insertions(+), 42 deletions(-) diff --git a/kws/model/tcn.py b/kws/model/tcn.py index 255ea7d..958a951 100644 --- a/kws/model/tcn.py +++ b/kws/model/tcn.py @@ -20,15 +20,43 @@ import torch.nn as nn import torch.nn.functional as F -class CnnBlock(nn.Module): +class Block(nn.Module): def __init__(self, channel: int, kernel_size: int, dilation: int, dropout: float = 0.1): super().__init__() - # The CNN used here is causal convolution self.padding = (kernel_size - 1) * dilation + + def forward(self, x: torch.Tensor, cache: Optional[torch.Tensor] = None): + """ + Args: + x(torch.Tensor): Input tensor (B, D, T) + Returns: + torch.Tensor(B, D, T) + """ + # The CNN used here is causal convolution + if cache is None: + y = F.pad(x, (self.padding, 0), value=0.0) + else: + y = torch.cat((cache, x), dim=2) + assert y.size(2) > self.padding + new_cache = y[:, :, -self.padding:] + + # self.cnn is defined in the subclass of Block + y = self.cnn(y) + y = y + x # residual connection + return y, new_cache + + +class CnnBlock(Block): + def __init__(self, + channel: int, + kernel_size: int, + dilation: int, + dropout: float = 0.1): + super().__init__(channel, kernel_size, dilation, dropout) self.cnn = nn.Sequential( nn.Conv1d(channel, channel, @@ -40,26 +68,8 @@ class CnnBlock(nn.Module): nn.Dropout(dropout), ) - def forward(self, x: torch.Tensor, cache: Optional[torch.Tensor] = None): - """ - Args: - x(torch.Tensor): Input tensor (B, D, T) - Returns: - torch.Tensor(B, D, T) - """ - if cache is None: - y = F.pad(x, (self.padding, 0), value=0.0) - else: - y = torch.cat((cache, x), dim=2) - assert y.size(2) > self.padding - new_cache = y[:, :, -self.padding:] - y = self.cnn(y) - y = y + x # residual connection - return y, new_cache - - -class DsCnnBlock(nn.Module): +class DsCnnBlock(Block): """ Depthwise Separable Convolution """ def __init__(self, @@ -67,9 +77,7 @@ class DsCnnBlock(nn.Module): kernel_size: int, dilation: int, dropout: float = 0.1): - super().__init__() - # The CNN used here is causal convolution - self.padding = (kernel_size - 1) * dilation + super().__init__(channel, kernel_size, dilation, dropout) self.cnn = nn.Sequential( nn.Conv1d(channel, channel, @@ -85,24 +93,6 @@ class DsCnnBlock(nn.Module): nn.Dropout(dropout), ) - def forward(self, x: torch.Tensor, cache: Optional[torch.Tensor] = None): - """ - Args: - x(torch.Tensor): Input tensor (B, D, T) - Returns: - torch.Tensor(B, D, T) - """ - if cache is None: - y = F.pad(x, (self.padding, 0), value=0.0) - else: - y = torch.cat((cache, x), dim=2) - assert y.size(2) > self.padding - new_cache = y[:, :, -self.padding:] - - y = self.cnn(y) - y = y + x # residual connection - return y, new_cache - class TCN(nn.Module): def __init__(self, From afbc1d2960cdb703ba77dcce4d16663be46abcff Mon Sep 17 00:00:00 2001 From: xiaohou Date: Tue, 7 Dec 2021 10:56:30 +0800 Subject: [PATCH 5/5] [example] add testing code for speech command dataset (#32) * update run.sh * update run.sh * rename test.py to compute_accuracy.py * update run,sh --- examples/speechcommand_v1/s0/run.sh | 31 ++++++++- kws/bin/compute_accuracy.py | 102 ++++++++++++++++++++++++++++ 2 files changed, 131 insertions(+), 2 deletions(-) create mode 100644 kws/bin/compute_accuracy.py diff --git a/examples/speechcommand_v1/s0/run.sh b/examples/speechcommand_v1/s0/run.sh index 44bf258..b18a856 100755 --- a/examples/speechcommand_v1/s0/run.sh +++ b/examples/speechcommand_v1/s0/run.sh @@ -7,13 +7,13 @@ export CUDA_VISIBLE_DEVICES="0" stage=-1 -stop_stage=2 +stop_stage=4 num_keywords=11 config=conf/mdtc.yaml norm_mean=false norm_var=false -gpu_id=4 +gpu_id=0 checkpoint= dir=exp/mdtc @@ -79,3 +79,30 @@ if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then $cmvn_opts \ ${checkpoint:+--checkpoint $checkpoint} fi + +if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then + # Do model average + python kws/bin/average_model.py \ + --dst_model $score_checkpoint \ + --src_path $dir \ + --num ${num_average} \ + --val_best + + # Testing + result_dir=$dir/test_$(basename $score_checkpoint) + mkdir -p $result_dir + python kws/bin/compute_accuracy.py --gpu 3 \ + --config $dir/config.yaml \ + --test_data data/test/data.list \ + --batch_size 256 \ + --num_workers 8 \ + --checkpoint $score_checkpoint +fi + + +if [ ${stage} -le 4 ] && [ ${stop_stage} -ge 4 ]; then + python kws/bin/export_jit.py --config $dir/config.yaml \ + --checkpoint $score_checkpoint \ + --output_file $dir/final.zip \ + --output_quant_file $dir/final.quant.zip +fi diff --git a/kws/bin/compute_accuracy.py b/kws/bin/compute_accuracy.py new file mode 100644 index 0000000..a6cde27 --- /dev/null +++ b/kws/bin/compute_accuracy.py @@ -0,0 +1,102 @@ +# Copyright (c) 2021 Binbin Zhang(binbzha@qq.com) +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import print_function + +import argparse +import copy +import logging +import os + +import torch +import yaml +from torch.utils.data import DataLoader + +from kws.dataset.dataset import Dataset +from kws.model.kws_model import init_model +from kws.utils.checkpoint import load_checkpoint +from kws.utils.executor import Executor + + +def get_args(): + parser = argparse.ArgumentParser(description='recognize with your model') + parser.add_argument('--config', required=True, help='config file') + parser.add_argument('--test_data', required=True, help='test data file') + parser.add_argument('--gpu', + type=int, + default=-1, + help='gpu id for this rank, -1 for cpu') + parser.add_argument('--checkpoint', required=True, help='checkpoint model') + parser.add_argument('--batch_size', + default=16, + type=int, + help='batch size for inference') + parser.add_argument('--num_workers', + default=0, + type=int, + help='num of subprocess workers for reading') + parser.add_argument('--pin_memory', + action='store_true', + default=False, + help='Use pinned memory buffers used for reading') + parser.add_argument('--prefetch', + default=100, + type=int, + help='prefetch number') + args = parser.parse_args() + return args + + +def main(): + args = get_args() + logging.basicConfig(level=logging.DEBUG, + format='%(asctime)s %(levelname)s %(message)s') + os.environ['CUDA_VISIBLE_DEVICES'] = str(args.gpu) + + with open(args.config, 'r') as fin: + configs = yaml.load(fin, Loader=yaml.FullLoader) + + test_conf = copy.deepcopy(configs['dataset_conf']) + test_conf['filter_conf']['max_length'] = 102400 + test_conf['filter_conf']['min_length'] = 0 + test_conf['speed_perturb'] = False + test_conf['spec_aug'] = False + test_conf['shuffle'] = False + test_conf['feature_extraction_conf']['dither'] = 0.0 + test_conf['batch_conf']['batch_size'] = args.batch_size + + test_dataset = Dataset(args.test_data, test_conf) + test_data_loader = DataLoader(test_dataset, + batch_size=None, + pin_memory=args.pin_memory, + num_workers=args.num_workers) + + # Init asr model from configs + model = init_model(configs['model']) + + load_checkpoint(model, args.checkpoint) + use_cuda = args.gpu >= 0 and torch.cuda.is_available() + device = torch.device('cuda' if use_cuda else 'cpu') + model = model.to(device) + executor = Executor() + model.eval() + training_config = configs['training_config'] + with torch.no_grad(): + test_loss, test_acc = executor.test(model, test_data_loader, device, + training_config) + logging.info('Test Loss {} Acc {}'.format(test_loss, test_acc)) + + +if __name__ == '__main__': + main()