medllm-10m / training_config.yaml
raihan-js's picture
Upload MedLLM-10M medical language model
9afef3b verified
data:
max_length: 512
min_doc_length: 100
stride: 256
huggingface:
license: apache-2.0
model_name: raihan-js/medllm-10m-v2
private: false
model:
activation: gelu
d_ff: 2048
d_model: 512
dropout: 0.1
max_seq_len: 512
n_heads: 8
n_layers: 8
name: MedLLM-10M-v2
vocab_size: 5000
paths:
data_dir: ./data
logs_dir: ./logs
model_dir: ./checkpoints/medllm-10m
tokenizer_dir: ./tokenizer/vocab
scraping:
delay_between_requests: 0.5
max_retries: 3
max_workers: 8
timeout: 30
training:
batch_size: 4
eval_steps: 50
fp16: true
grad_clip: 1.0
gradient_accumulation_steps: 8
learning_rate: 0.0003
num_epochs: 10
save_steps: 100
warmup_steps: 200
weight_decay: 0.01