Merge pull request #9 from wenet-e2e/dev-jingyonghou

add results of mdtc
2021-11-19 17:01:23 +08:00 · 2021-11-19 17:01:23 +08:00 · f7fd62db7d
commit f7fd62db7d
parent 2072abeb70 edfc6de743
4 changed files with 57 additions and 4 deletions
--- a/examples/hi_xiaowen/s0/README.md
+++ b/examples/hi_xiaowen/s0/README.md
@ -8,3 +8,5 @@ FRRs with FAR fixed at once per hour:
 | DS_TCN           | 21        | 80        | 0.010807   | 0.014754     |
 | DS_TCN           | 21        | 80(avg30) | 0.009867   | 0.014472     |
 | DS_TCN(spec_aug) | 21        | 80(avg30) | 0.029039   | 0.022648     |
 | MDTC             | 156       | 80(avg10) | 0.007142   | 0.005920     |
 | MDTC_Small       | 31        | 80(avg10) | 0.005357   | 0.005920     |
--- a/examples/hi_xiaowen/s0/conf/mdtc_small.yaml
+++ b/examples/hi_xiaowen/s0/conf/mdtc_small.yaml
@ -0,0 +1,51 @@
 dataset_conf:
    filter_conf:
        max_length: 2048
        min_length: 0
    resample_conf:
        resample_rate: 16000
    speed_perturb: false
    feature_extraction_conf:
        feature_type: 'mfcc'
        num_ceps: 80
        num_mel_bins: 80
        frame_shift: 10
        frame_length: 25
        dither: 1.0
    feature_dither: 0.0
    spec_aug: true
    spec_aug_conf:
        num_t_mask: 1
        num_f_mask: 1
        max_t: 20
        max_f: 40
    shuffle: true
    shuffle_conf:
        shuffle_size: 1500
    batch_conf:
        batch_size: 100
 model:
    hidden_dim: 32
    preprocessing:
        type: none
    backbone:
        type: mdtc
        num_stack: 3
        stack_size: 4
        kernel_size: 5
        hidden_dim: 32
    classifier:
        type: linear
 optim: adam
 optim_conf:
    lr: 0.001
    weight_decay: 5e-5
    warm_up_step: 2500
 training_config:
    grad_clip: 5
    max_epoch: 100
    log_interval: 10
    criterion: max_pooling
--- a/examples/hi_xiaowen/s0/run.sh
+++ b/examples/hi_xiaowen/s0/run.sh
@ -9,13 +9,13 @@ stage=0
 stop_stage=4
 num_keywords=2
-config=conf/mdtc.yaml
+config=conf/mdtc_small.yaml
 norm_mean=false
 norm_var=false
 gpu_id=0
 checkpoint=
-dir=exp/mdtc
+dir=exp/mdtc_small
 num_average=10
 score_checkpoint=$dir/avg_${num_average}.pt
--- a/kws/model/loss.py
+++ b/kws/model/loss.py
@ -21,7 +21,7 @@ def max_polling_loss(logits: torch.Tensor,
                     target: torch.Tensor,
                     lengths: torch.Tensor,
                     min_duration: int = 0):
-    """ Max-pooling loss
+    ''' Max-pooling loss
        For keyword, select the frame with the highest posterior.
            The keyword is triggered when any of the frames is triggered.
        For none keyword, select the hardest frame, namely the frame
@ -36,7 +36,7 @@ def max_polling_loss(logits: torch.Tensor,
    Returns:
        (float): loss of current batch
        (float): accuracy of current batch
-    """
+    '''
    mask = padding_mask(lengths)
    num_utts = logits.size(0)
    num_keywords = logits.size(2)