ASLP-lab's picture
Upload train.yaml
ddfca26 verified
accum_grad: 4
cmvn: null
cmvn_conf:
cmvn_file: null
is_json_cmvn: null
ctc: ctc
ctc_conf:
ctc_blank_id: 50363
dataset: asr
dataset_conf:
batch_conf:
batch_type: dynamic
max_frames_in_batch: 8000
cycle: 6
feats_type: log_mel_spectrogram
filter_conf:
max_length: 2700
min_length: 0
token_max_length: 448
token_min_length: 1
log_mel_spectrogram_conf:
hop_length: 160
n_fft: 400
num_mel_bins: 80
padding: 0
resample_conf:
resample_rate: 16000
shuffle: true
shuffle_conf:
shuffle_size: 1500
sort: true
sort_conf:
sort_size: 500
spec_aug: true
spec_aug_conf:
max_f: 10
max_t: 50
num_f_mask: 2
num_t_mask: 2
spec_sub: true
spec_sub_conf:
max_t: 30
num_t_sub: 3
spec_trim: false
speed_perturb: true
decoder: transformer
decoder_conf:
activation_type: gelu
attention_heads: 16
dropout_rate: 0.1
gradient_checkpointing: true
input_layer: embed_learnable_pe
key_bias: false
linear_units: 4096
normalize_before: true
num_blocks: 24
positional_dropout_rate: 0.1
self_attention_dropout_rate: 0.0
src_attention: true
src_attention_dropout_rate: 0.0
src_key_bias: false
tie_word_embedding: true
use_output_layer: true
dtype: fp32
encoder: transformer
encoder_conf:
activation_type: gelu
attention_dropout_rate: 0.0
attention_heads: 16
dropout_rate: 0.0
gradient_checkpointing: true
input_layer: conv1d2
key_bias: false
linear_units: 4096
normalize_before: true
num_blocks: 24
output_size: 1024
pos_enc_layer_type: abs_pos_whisper
positional_dropout_rate: 0.1
static_chunk_size: -1
use_dynamic_chunk: false
use_dynamic_left_chunk: false
grad_clip: 5
input_dim: 80
log_interval: 100
max_epoch: 6
model: whisper
model_conf:
ctc_weight: 0.0
length_normalized_loss: false
lsm_weight: 0.1
model_dir: exp/whisper_medium_fintune_wenetspeech_yue_opensource
optim: adam
optim_conf:
lr: 1.0e-05
output_dim: 51865
save_interval: 2000
save_states: model_only
scheduler: warmuplr
scheduler_conf:
warmup_steps: 12000
tokenizer: whisper
tokenizer_conf:
bpe_path: null
is_multilingual: true
non_lang_syms_path: null
num_languages: 99
special_tokens:
eot: 50257
no_speech: 50362
no_timestamps: 50363
sot: 50258
sot_prev: 50361
timestamp_begin: 50364
transcribe: 50359
translate: 50358
split_with_space: false
symbol_table_path: null
train_engine: torch_ddp
use_amp: false
vocab_size: 51865