| data: | |
| max_length: 512 | |
| min_doc_length: 100 | |
| stride: 256 | |
| huggingface: | |
| license: apache-2.0 | |
| model_name: raihan-js/medllm-10m-v2 | |
| private: false | |
| model: | |
| activation: gelu | |
| d_ff: 2048 | |
| d_model: 512 | |
| dropout: 0.1 | |
| max_seq_len: 512 | |
| n_heads: 8 | |
| n_layers: 8 | |
| name: MedLLM-10M-v2 | |
| vocab_size: 5000 | |
| paths: | |
| data_dir: ./data | |
| logs_dir: ./logs | |
| model_dir: ./checkpoints/medllm-10m | |
| tokenizer_dir: ./tokenizer/vocab | |
| scraping: | |
| delay_between_requests: 0.5 | |
| max_retries: 3 | |
| max_workers: 8 | |
| timeout: 30 | |
| training: | |
| batch_size: 4 | |
| eval_steps: 50 | |
| fp16: true | |
| grad_clip: 1.0 | |
| gradient_accumulation_steps: 8 | |
| learning_rate: 0.0003 | |
| num_epochs: 10 | |
| save_steps: 100 | |
| warmup_steps: 200 | |
| weight_decay: 0.01 | |