| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 1.0, | |
| "eval_steps": 500, | |
| "global_step": 185, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.005405405405405406, | |
| "grad_norm": 2.6678006649017334, | |
| "learning_rate": 1.0526315789473684e-05, | |
| "loss": 1.4457, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.02702702702702703, | |
| "grad_norm": 2.4659605026245117, | |
| "learning_rate": 5.2631578947368424e-05, | |
| "loss": 1.4311, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.05405405405405406, | |
| "grad_norm": 2.074678659439087, | |
| "learning_rate": 0.00010526315789473685, | |
| "loss": 1.376, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.08108108108108109, | |
| "grad_norm": 1.9776719808578491, | |
| "learning_rate": 0.00015789473684210527, | |
| "loss": 1.2676, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.10810810810810811, | |
| "grad_norm": 1.8173789978027344, | |
| "learning_rate": 0.00019998209226697376, | |
| "loss": 1.1517, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.13513513513513514, | |
| "grad_norm": 1.0727437734603882, | |
| "learning_rate": 0.0001993559947963185, | |
| "loss": 1.0512, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.16216216216216217, | |
| "grad_norm": 0.8763325214385986, | |
| "learning_rate": 0.00019784091409455728, | |
| "loss": 0.9971, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.1891891891891892, | |
| "grad_norm": 0.6051992774009705, | |
| "learning_rate": 0.0001954504062771555, | |
| "loss": 0.9685, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 0.21621621621621623, | |
| "grad_norm": 0.6363440752029419, | |
| "learning_rate": 0.00019220586030376134, | |
| "loss": 0.9609, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.24324324324324326, | |
| "grad_norm": 0.7076846957206726, | |
| "learning_rate": 0.00018813630660146488, | |
| "loss": 0.938, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 0.2702702702702703, | |
| "grad_norm": 0.5835558772087097, | |
| "learning_rate": 0.00018327815731637612, | |
| "loss": 0.9376, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.2972972972972973, | |
| "grad_norm": 0.6069886088371277, | |
| "learning_rate": 0.00017767488051760857, | |
| "loss": 0.9303, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 0.32432432432432434, | |
| "grad_norm": 0.7000340819358826, | |
| "learning_rate": 0.0001713766112687139, | |
| "loss": 0.9276, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.35135135135135137, | |
| "grad_norm": 0.6510019898414612, | |
| "learning_rate": 0.0001644397030464877, | |
| "loss": 0.9282, | |
| "step": 65 | |
| }, | |
| { | |
| "epoch": 0.3783783783783784, | |
| "grad_norm": 0.6287819147109985, | |
| "learning_rate": 0.00015692622352080662, | |
| "loss": 0.9125, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.40540540540540543, | |
| "grad_norm": 0.6086368560791016, | |
| "learning_rate": 0.00014890339920698334, | |
| "loss": 0.9291, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 0.43243243243243246, | |
| "grad_norm": 0.5720112919807434, | |
| "learning_rate": 0.0001404430139595877, | |
| "loss": 0.9152, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.4594594594594595, | |
| "grad_norm": 0.6146399974822998, | |
| "learning_rate": 0.0001316207666896824, | |
| "loss": 0.9201, | |
| "step": 85 | |
| }, | |
| { | |
| "epoch": 0.4864864864864865, | |
| "grad_norm": 0.5874430537223816, | |
| "learning_rate": 0.00012251559405226941, | |
| "loss": 0.9071, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.5135135135135135, | |
| "grad_norm": 0.609653115272522, | |
| "learning_rate": 0.00011320896416417026, | |
| "loss": 0.9111, | |
| "step": 95 | |
| }, | |
| { | |
| "epoch": 0.5405405405405406, | |
| "grad_norm": 0.5806834101676941, | |
| "learning_rate": 0.00010378414767176705, | |
| "loss": 0.9008, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.5675675675675675, | |
| "grad_norm": 0.5760082602500916, | |
| "learning_rate": 9.432547269069261e-05, | |
| "loss": 0.9053, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 0.5945945945945946, | |
| "grad_norm": 0.6656131148338318, | |
| "learning_rate": 8.491757028386263e-05, | |
| "loss": 0.9029, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.6216216216216216, | |
| "grad_norm": 0.6559913754463196, | |
| "learning_rate": 7.564461722890081e-05, | |
| "loss": 0.9103, | |
| "step": 115 | |
| }, | |
| { | |
| "epoch": 0.6486486486486487, | |
| "grad_norm": 0.5813584327697754, | |
| "learning_rate": 6.658958285026102e-05, | |
| "loss": 0.9033, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.6756756756756757, | |
| "grad_norm": 0.538943350315094, | |
| "learning_rate": 5.7833486654981606e-05, | |
| "loss": 0.9059, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 0.7027027027027027, | |
| "grad_norm": 0.5927494764328003, | |
| "learning_rate": 4.945467341434195e-05, | |
| "loss": 0.9031, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.7297297297297297, | |
| "grad_norm": 0.6069759726524353, | |
| "learning_rate": 4.152811217759529e-05, | |
| "loss": 0.9027, | |
| "step": 135 | |
| }, | |
| { | |
| "epoch": 0.7567567567567568, | |
| "grad_norm": 0.5718995332717896, | |
| "learning_rate": 3.4124725489820645e-05, | |
| "loss": 0.8891, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.7837837837837838, | |
| "grad_norm": 0.6052721738815308, | |
| "learning_rate": 2.7310754815685624e-05, | |
| "loss": 0.8972, | |
| "step": 145 | |
| }, | |
| { | |
| "epoch": 0.8108108108108109, | |
| "grad_norm": 0.6083750128746033, | |
| "learning_rate": 2.1147167846963422e-05, | |
| "loss": 0.8948, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.8378378378378378, | |
| "grad_norm": 0.5737211108207703, | |
| "learning_rate": 1.5689112996891576e-05, | |
| "loss": 0.9016, | |
| "step": 155 | |
| }, | |
| { | |
| "epoch": 0.8648648648648649, | |
| "grad_norm": 0.6462253332138062, | |
| "learning_rate": 1.0985425962260343e-05, | |
| "loss": 0.8989, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.8918918918918919, | |
| "grad_norm": 0.6070159077644348, | |
| "learning_rate": 7.078192768243486e-06, | |
| "loss": 0.8913, | |
| "step": 165 | |
| }, | |
| { | |
| "epoch": 0.918918918918919, | |
| "grad_norm": 0.5784199833869934, | |
| "learning_rate": 4.002373205607723e-06, | |
| "loss": 0.8962, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.9459459459459459, | |
| "grad_norm": 0.6253275871276855, | |
| "learning_rate": 1.7854880295797405e-06, | |
| "loss": 0.8815, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 0.972972972972973, | |
| "grad_norm": 0.5931078195571899, | |
| "learning_rate": 4.4737271914411236e-07, | |
| "loss": 0.8919, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "grad_norm": 0.6198851466178894, | |
| "learning_rate": 0.0, | |
| "loss": 0.893, | |
| "step": 185 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "eval_loss": 1.4024296998977661, | |
| "eval_runtime": 0.674, | |
| "eval_samples_per_second": 16.321, | |
| "eval_steps_per_second": 1.484, | |
| "step": 185 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "step": 185, | |
| "total_flos": 9.060485625492275e+17, | |
| "train_loss": 0.9603648733448338, | |
| "train_runtime": 724.3175, | |
| "train_samples_per_second": 57.2, | |
| "train_steps_per_second": 0.255 | |
| } | |
| ], | |
| "logging_steps": 5, | |
| "max_steps": 185, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 100, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 9.060485625492275e+17, | |
| "train_batch_size": 14, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |