Upload training_config.yml with huggingface_hub
Browse files- training_config.yml +84 -0
    	
        training_config.yml
    ADDED
    
    | @@ -0,0 +1,84 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            model:
         | 
| 2 | 
            +
              _component_: models.lora_mmllama3_8b
         | 
| 3 | 
            +
              lora_attn_modules:
         | 
| 4 | 
            +
              - q_proj
         | 
| 5 | 
            +
              - v_proj
         | 
| 6 | 
            +
              apply_lora_to_mlp: false
         | 
| 7 | 
            +
              apply_lora_to_output: false
         | 
| 8 | 
            +
              lora_rank: 32
         | 
| 9 | 
            +
              lora_alpha: 64
         | 
| 10 | 
            +
              perception_tokens: 2
         | 
| 11 | 
            +
              use_clip: false
         | 
| 12 | 
            +
            tokenizer:
         | 
| 13 | 
            +
              _component_: models.a2a_tokenizer
         | 
| 14 | 
            +
              path: models/tokenizer.model
         | 
| 15 | 
            +
            checkpointer:
         | 
| 16 | 
            +
              _component_: torchtune.utils.FullModelMetaCheckpointer
         | 
| 17 | 
            +
              checkpoint_dir: 
         | 
| 18 | 
            +
              checkpoint_files:
         | 
| 19 | 
            +
              - 
         | 
| 20 | 
            +
              adapter_checkpoint: null
         | 
| 21 | 
            +
              recipe_checkpoint: null
         | 
| 22 | 
            +
              output_dir: output_checkpoints/experiment_1
         | 
| 23 | 
            +
              model_type: LLAMA3
         | 
| 24 | 
            +
            resume_from_checkpoint: false
         | 
| 25 | 
            +
            interim_checkpoint_steps: 15000
         | 
| 26 | 
            +
            interim_gen_steps: null
         | 
| 27 | 
            +
            max_new_tokens: 77
         | 
| 28 | 
            +
            temperature: 0.6
         | 
| 29 | 
            +
            top_k: 231
         | 
| 30 | 
            +
            dataset:
         | 
| 31 | 
            +
              _component_: ds.EvenBatcher
         | 
| 32 | 
            +
              buffer_size: 72
         | 
| 33 | 
            +
              dataset:
         | 
| 34 | 
            +
                _component_: ds.RoundRobinDataset
         | 
| 35 | 
            +
                datasets:
         | 
| 36 | 
            +
                - _component_: ds.OmegaVideoCaptionDataset
         | 
| 37 | 
            +
                  length: 500000
         | 
| 38 | 
            +
                - _component_: ds.LlavaInstructDataset
         | 
| 39 | 
            +
                  dataset_path: ds/coco_llava_instruct/output.parquet
         | 
| 40 | 
            +
                  train_on_input: false
         | 
| 41 | 
            +
                - _component_: ds.LlavaInstructDataset
         | 
| 42 | 
            +
                  dataset_path: ds/vision_flan/output.parquet
         | 
| 43 | 
            +
                  train_on_input: false
         | 
| 44 | 
            +
                - _component_: ds.CaptionInstructDataset
         | 
| 45 | 
            +
                  dataset_path: ds/sam_llava/output.parquet
         | 
| 46 | 
            +
                  train_on_input: false
         | 
| 47 | 
            +
            seed: null
         | 
| 48 | 
            +
            shuffle: true
         | 
| 49 | 
            +
            batch_size: 6
         | 
| 50 | 
            +
            optimizer:
         | 
| 51 | 
            +
              _component_: torch.optim.AdamW
         | 
| 52 | 
            +
              weight_decay: 0.99
         | 
| 53 | 
            +
              lr: 20.0e-05
         | 
| 54 | 
            +
            lr_scheduler:
         | 
| 55 | 
            +
              _component_: torchtune.modules.get_cosine_schedule_with_warmup
         | 
| 56 | 
            +
              num_warmup_steps: 4
         | 
| 57 | 
            +
            loss:
         | 
| 58 | 
            +
              _component_: torch.nn.CrossEntropyLoss
         | 
| 59 | 
            +
            epochs: 60
         | 
| 60 | 
            +
            max_steps_per_epoch: null
         | 
| 61 | 
            +
            gradient_accumulation_steps: 260
         | 
| 62 | 
            +
            compile: false
         | 
| 63 | 
            +
            output_dir: /tmp/lora_finetune_output
         | 
| 64 | 
            +
            metric_logger:
         | 
| 65 | 
            +
              _component_: torchtune.utils.metric_logging.DiskLogger
         | 
| 66 | 
            +
              log_dir: ${output_dir}
         | 
| 67 | 
            +
            log_every_n_steps: null
         | 
| 68 | 
            +
            device: cuda
         | 
| 69 | 
            +
            dtype: bf16
         | 
| 70 | 
            +
            enable_activation_checkpointing: false
         | 
| 71 | 
            +
            profiler:
         | 
| 72 | 
            +
              _component_: torchtune.utils.profiler
         | 
| 73 | 
            +
              enabled: false
         | 
| 74 | 
            +
            inference:
         | 
| 75 | 
            +
              prompt_template: 'Video:
         | 
| 76 | 
            +
             | 
| 77 | 
            +
                {video}
         | 
| 78 | 
            +
             | 
| 79 | 
            +
                Caption the previous video.'
         | 
| 80 | 
            +
              max_new_tokens: 231
         | 
| 81 | 
            +
              temperature: 0.8
         | 
| 82 | 
            +
              top_k: 231
         | 
| 83 | 
            +
              quantizer: null
         | 
| 84 | 
            +
            gradient-accumulation-steps: 32
         |