ASLP-lab commited on
Commit
ddfca26
·
verified ·
1 Parent(s): a997529

Upload train.yaml

Browse files
Files changed (1) hide show
  1. whisper_medium_yue/train.yaml +121 -0
whisper_medium_yue/train.yaml ADDED
@@ -0,0 +1,121 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ accum_grad: 4
2
+ cmvn: null
3
+ cmvn_conf:
4
+ cmvn_file: null
5
+ is_json_cmvn: null
6
+ ctc: ctc
7
+ ctc_conf:
8
+ ctc_blank_id: 50363
9
+ dataset: asr
10
+ dataset_conf:
11
+ batch_conf:
12
+ batch_type: dynamic
13
+ max_frames_in_batch: 8000
14
+ cycle: 6
15
+ feats_type: log_mel_spectrogram
16
+ filter_conf:
17
+ max_length: 2700
18
+ min_length: 0
19
+ token_max_length: 448
20
+ token_min_length: 1
21
+ log_mel_spectrogram_conf:
22
+ hop_length: 160
23
+ n_fft: 400
24
+ num_mel_bins: 80
25
+ padding: 0
26
+ resample_conf:
27
+ resample_rate: 16000
28
+ shuffle: true
29
+ shuffle_conf:
30
+ shuffle_size: 1500
31
+ sort: true
32
+ sort_conf:
33
+ sort_size: 500
34
+ spec_aug: true
35
+ spec_aug_conf:
36
+ max_f: 10
37
+ max_t: 50
38
+ num_f_mask: 2
39
+ num_t_mask: 2
40
+ spec_sub: true
41
+ spec_sub_conf:
42
+ max_t: 30
43
+ num_t_sub: 3
44
+ spec_trim: false
45
+ speed_perturb: true
46
+ decoder: transformer
47
+ decoder_conf:
48
+ activation_type: gelu
49
+ attention_heads: 16
50
+ dropout_rate: 0.1
51
+ gradient_checkpointing: true
52
+ input_layer: embed_learnable_pe
53
+ key_bias: false
54
+ linear_units: 4096
55
+ normalize_before: true
56
+ num_blocks: 24
57
+ positional_dropout_rate: 0.1
58
+ self_attention_dropout_rate: 0.0
59
+ src_attention: true
60
+ src_attention_dropout_rate: 0.0
61
+ src_key_bias: false
62
+ tie_word_embedding: true
63
+ use_output_layer: true
64
+ dtype: fp32
65
+ encoder: transformer
66
+ encoder_conf:
67
+ activation_type: gelu
68
+ attention_dropout_rate: 0.0
69
+ attention_heads: 16
70
+ dropout_rate: 0.0
71
+ gradient_checkpointing: true
72
+ input_layer: conv1d2
73
+ key_bias: false
74
+ linear_units: 4096
75
+ normalize_before: true
76
+ num_blocks: 24
77
+ output_size: 1024
78
+ pos_enc_layer_type: abs_pos_whisper
79
+ positional_dropout_rate: 0.1
80
+ static_chunk_size: -1
81
+ use_dynamic_chunk: false
82
+ use_dynamic_left_chunk: false
83
+ grad_clip: 5
84
+ input_dim: 80
85
+ log_interval: 100
86
+ max_epoch: 6
87
+ model: whisper
88
+ model_conf:
89
+ ctc_weight: 0.0
90
+ length_normalized_loss: false
91
+ lsm_weight: 0.1
92
+ model_dir: exp/whisper_medium_fintune_wenetspeech_yue_opensource
93
+ optim: adam
94
+ optim_conf:
95
+ lr: 1.0e-05
96
+ output_dim: 51865
97
+ save_interval: 2000
98
+ save_states: model_only
99
+ scheduler: warmuplr
100
+ scheduler_conf:
101
+ warmup_steps: 12000
102
+ tokenizer: whisper
103
+ tokenizer_conf:
104
+ bpe_path: null
105
+ is_multilingual: true
106
+ non_lang_syms_path: null
107
+ num_languages: 99
108
+ special_tokens:
109
+ eot: 50257
110
+ no_speech: 50362
111
+ no_timestamps: 50363
112
+ sot: 50258
113
+ sot_prev: 50361
114
+ timestamp_begin: 50364
115
+ transcribe: 50359
116
+ translate: 50358
117
+ split_with_space: false
118
+ symbol_table_path: null
119
+ train_engine: torch_ddp
120
+ use_amp: false
121
+ vocab_size: 51865