File size: 1,334 Bytes
03be9c0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
#!/usr/bin/env bash
python ./run_flax_speech_recognition_seq2seq.py \
	--dataset_name=mozilla-foundation/common_voice_9_0 \
	--model_name_or_path=sanchit-gandhi/flax-wav2vec2-2-bart-large-scan \
	--dataset_config_name=en \
	--train_split_name=train \
	--eval_split_name=validation \
	--test_split_name=test \
	--dataset_cache_dir=/home/sanchitgandhi/cache/huggingface/datasets \
	--output_dir=./flax-wav2vec2-2-bart-large-cv9-feature-encoder \
	--preprocessing_num_workers=1 \
	--id_column_name=client_id \
	--length_column_name=input_length \
	--text_column_name=sentence \
	--overwrite_output_dir \
	--per_device_train_batch_size=8 \
	--per_device_eval_batch_size=4 \
	--logging_steps=25 \
	--max_steps=50000 \
	--eval_steps=10000 \
	--save_steps=10000 \
	--gradient_checkpointing \
	--max_duration_in_seconds=20 \
	--max_target_length=128 \
	--generation_max_length=40 \
	--generation_num_beams=1 \
	--generation_length_penalty=1.2 \
	--final_generation_max_length=200 \
	--final_generation_num_beams=5 \
	--learning_rate=1e-4 \
	--warmup_steps=500 \
	--save_total_limit=1 \
	--freeze_feature_encoder=False \
	--predict_with_generate \
	--do_lower_case \
	--do_eval \
	--do_train \
	--do_predict \
	--push_to_hub \
	--use_auth_token \
	--wandb_project=commonvoice_9_0 \
	--wandb_name=flax-wav2vec2-2-bart-large-cv9-feature-encoder