|
accum_grad: 4 |
|
cmvn: null |
|
cmvn_conf: |
|
cmvn_file: null |
|
is_json_cmvn: null |
|
ctc: ctc |
|
ctc_conf: |
|
ctc_blank_id: 50363 |
|
dataset: asr |
|
dataset_conf: |
|
batch_conf: |
|
batch_type: dynamic |
|
max_frames_in_batch: 8000 |
|
cycle: 6 |
|
feats_type: log_mel_spectrogram |
|
filter_conf: |
|
max_length: 2700 |
|
min_length: 0 |
|
token_max_length: 448 |
|
token_min_length: 1 |
|
log_mel_spectrogram_conf: |
|
hop_length: 160 |
|
n_fft: 400 |
|
num_mel_bins: 80 |
|
padding: 0 |
|
resample_conf: |
|
resample_rate: 16000 |
|
shuffle: true |
|
shuffle_conf: |
|
shuffle_size: 1500 |
|
sort: true |
|
sort_conf: |
|
sort_size: 500 |
|
spec_aug: true |
|
spec_aug_conf: |
|
max_f: 10 |
|
max_t: 50 |
|
num_f_mask: 2 |
|
num_t_mask: 2 |
|
spec_sub: true |
|
spec_sub_conf: |
|
max_t: 30 |
|
num_t_sub: 3 |
|
spec_trim: false |
|
speed_perturb: true |
|
decoder: transformer |
|
decoder_conf: |
|
activation_type: gelu |
|
attention_heads: 16 |
|
dropout_rate: 0.1 |
|
gradient_checkpointing: true |
|
input_layer: embed_learnable_pe |
|
key_bias: false |
|
linear_units: 4096 |
|
normalize_before: true |
|
num_blocks: 24 |
|
positional_dropout_rate: 0.1 |
|
self_attention_dropout_rate: 0.0 |
|
src_attention: true |
|
src_attention_dropout_rate: 0.0 |
|
src_key_bias: false |
|
tie_word_embedding: true |
|
use_output_layer: true |
|
dtype: fp32 |
|
encoder: transformer |
|
encoder_conf: |
|
activation_type: gelu |
|
attention_dropout_rate: 0.0 |
|
attention_heads: 16 |
|
dropout_rate: 0.0 |
|
gradient_checkpointing: true |
|
input_layer: conv1d2 |
|
key_bias: false |
|
linear_units: 4096 |
|
normalize_before: true |
|
num_blocks: 24 |
|
output_size: 1024 |
|
pos_enc_layer_type: abs_pos_whisper |
|
positional_dropout_rate: 0.1 |
|
static_chunk_size: -1 |
|
use_dynamic_chunk: false |
|
use_dynamic_left_chunk: false |
|
grad_clip: 5 |
|
input_dim: 80 |
|
log_interval: 100 |
|
max_epoch: 6 |
|
model: whisper |
|
model_conf: |
|
ctc_weight: 0.0 |
|
length_normalized_loss: false |
|
lsm_weight: 0.1 |
|
model_dir: exp/whisper_medium_fintune_wenetspeech_yue_opensource |
|
optim: adam |
|
optim_conf: |
|
lr: 1.0e-05 |
|
output_dim: 51865 |
|
save_interval: 2000 |
|
save_states: model_only |
|
scheduler: warmuplr |
|
scheduler_conf: |
|
warmup_steps: 12000 |
|
tokenizer: whisper |
|
tokenizer_conf: |
|
bpe_path: null |
|
is_multilingual: true |
|
non_lang_syms_path: null |
|
num_languages: 99 |
|
special_tokens: |
|
eot: 50257 |
|
no_speech: 50362 |
|
no_timestamps: 50363 |
|
sot: 50258 |
|
sot_prev: 50361 |
|
timestamp_begin: 50364 |
|
transcribe: 50359 |
|
translate: 50358 |
|
split_with_space: false |
|
symbol_table_path: null |
|
train_engine: torch_ddp |
|
use_amp: false |
|
vocab_size: 51865 |
|
|