| dataset: | |
| align_stage_components: | |
| - download/llava-laion-cc-sbu-558k/chat.json | |
| - download/llava-laion-cc-sbu-558k | |
| dataset_id: obelics | |
| dataset_resampled: true | |
| dataset_root_dir: /share/edc/home/weizhiwang/data/datacomp/datacomp_hq_single_pkl_pil:/share/edc/home/weizhiwang/data/ccs/ccs_single_pkl_pil/:/share/edc/home/weizhiwang/data/laion/laion_single_pkl_pil/ | |
| finetune_stage_components: | |
| - /fsx-training/shopqa-training-fsx-prod-us-east-1/home/wzwang/data/llava/llava_v1_5_mix665k.json | |
| - /fsx-training/shopqa-training-fsx-prod-us-east-1/home/wzwang/data/llava/data | |
| max_num_images: 6 | |
| min_num_images: 1 | |
| train_num_samples: 1 | |
| type: obelics | |
| workers: 4 | |
| model: | |
| align_epochs: 1 | |
| align_global_batch_size: 256 | |
| align_learning_rate: 0.001 | |
| align_lr_scheduler_type: linear-warmup+cosine-decay | |
| align_max_grad_norm: 1.0 | |
| align_max_steps: null | |
| align_per_device_batch_size: 16 | |
| align_train_strategy: fsdp-shard-grad-op | |
| align_warmup_ratio: 0.03 | |
| align_weight_decay: 0.0 | |
| arch_specifier: no-align+avgpool | |
| enable_gradient_checkpointing: true | |
| enable_mixed_precision_training: true | |
| finetune_epochs: 1 | |
| finetune_global_batch_size: 128 | |
| finetune_learning_rate: 2.0e-05 | |
| finetune_lr_scheduler_type: linear-warmup+cosine-decay | |
| finetune_max_grad_norm: 1.0 | |
| finetune_max_steps: null | |
| finetune_per_device_batch_size: 16 | |
| finetune_train_strategy: fsdp-full-shard | |
| finetune_warmup_ratio: 0.03 | |
| finetune_weight_decay: 0.1 | |
| image_resize_strategy: resize-naive | |
| llm_backbone_id: qwen2.5-1.5b-instruct | |
| llm_max_length: 4096 | |
| model_id: qwen2.5-1.5b-instruct-continue-training-ccs-datacomp-mlm-filter | |
| pretrain_epochs: 1 | |
| pretrain_global_batch_size: 256 | |
| pretrain_learning_rate: 5.0e-05 | |
| pretrain_lr_scheduler_type: linear-warmup+cosine-decay | |
| pretrain_max_grad_norm: 1.0 | |
| pretrain_max_steps: null | |
| pretrain_per_device_batch_size: 2 | |
| pretrain_train_strategy: fsdp-full-shard | |
| pretrain_warmup_ratio: 0.03 | |
| pretrain_weight_decay: 0.01 | |
| reduce_in_full_precision: false | |
| type: one-stage+7b | |
| vision_backbone_id: siglip-vit-so400m-384px | |
| mount_path: Qwen | |
| pretrained_checkpoint: null | |
| run_id: obelics+qwen2.5-1.5b-instruct-continue-training-ccs-datacomp-mlm-filter+stage-pretrain+x7 | |
| run_root_dir: /share/edc/home/weizhiwang/checkpoints | |
| seed: 7 | |
| stage: pretrain | |
| trackers: | |
| - jsonl | |
| wandb_entity: null | |
| wandb_project: mmpretrain | |