| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 2.0, | |
| "eval_steps": 1757, | |
| "global_step": 35140, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.1, | |
| "grad_norm": 23.659637451171875, | |
| "learning_rate": 9.97723392145703e-07, | |
| "loss": 3.8346, | |
| "step": 1757 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "eval_nli-pairs_loss": 2.8535287380218506, | |
| "eval_nli-pairs_runtime": 23.1384, | |
| "eval_nli-pairs_samples_per_second": 294.229, | |
| "eval_nli-pairs_steps_per_second": 18.411, | |
| "step": 1757 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "eval_scitail-pairs-pos_loss": 2.323117256164551, | |
| "eval_scitail-pairs-pos_runtime": 5.1803, | |
| "eval_scitail-pairs-pos_samples_per_second": 251.722, | |
| "eval_scitail-pairs-pos_steps_per_second": 15.829, | |
| "step": 1757 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "eval_qnli-contrastive_loss": 3.09728741645813, | |
| "eval_qnli-contrastive_runtime": 15.5151, | |
| "eval_qnli-contrastive_samples_per_second": 352.109, | |
| "eval_qnli-contrastive_steps_per_second": 22.043, | |
| "step": 1757 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "grad_norm": 13.776155471801758, | |
| "learning_rate": 1.9965850882185546e-06, | |
| "loss": 1.8532, | |
| "step": 3514 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "eval_nli-pairs_loss": 1.3508331775665283, | |
| "eval_nli-pairs_runtime": 22.8642, | |
| "eval_nli-pairs_samples_per_second": 297.758, | |
| "eval_nli-pairs_steps_per_second": 18.632, | |
| "step": 3514 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "eval_scitail-pairs-pos_loss": 0.9754649996757507, | |
| "eval_scitail-pairs-pos_runtime": 5.1924, | |
| "eval_scitail-pairs-pos_samples_per_second": 251.136, | |
| "eval_scitail-pairs-pos_steps_per_second": 15.792, | |
| "step": 3514 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "eval_qnli-contrastive_loss": 2.0602548122406006, | |
| "eval_qnli-contrastive_runtime": 15.6036, | |
| "eval_qnli-contrastive_samples_per_second": 350.111, | |
| "eval_qnli-contrastive_steps_per_second": 21.918, | |
| "step": 3514 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "grad_norm": 0.6704504489898682, | |
| "learning_rate": 2.99601593625498e-06, | |
| "loss": 1.2185, | |
| "step": 5271 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "eval_nli-pairs_loss": 0.9380640983581543, | |
| "eval_nli-pairs_runtime": 23.1102, | |
| "eval_nli-pairs_samples_per_second": 294.589, | |
| "eval_nli-pairs_steps_per_second": 18.433, | |
| "step": 5271 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "eval_scitail-pairs-pos_loss": 0.7407301664352417, | |
| "eval_scitail-pairs-pos_runtime": 5.2512, | |
| "eval_scitail-pairs-pos_samples_per_second": 248.322, | |
| "eval_scitail-pairs-pos_steps_per_second": 15.615, | |
| "step": 5271 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "eval_qnli-contrastive_loss": 1.2534083127975464, | |
| "eval_qnli-contrastive_runtime": 15.6475, | |
| "eval_qnli-contrastive_samples_per_second": 349.129, | |
| "eval_qnli-contrastive_steps_per_second": 21.857, | |
| "step": 5271 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "grad_norm": 26.072860717773438, | |
| "learning_rate": 3.99601593625498e-06, | |
| "loss": 0.9584, | |
| "step": 7028 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "eval_nli-pairs_loss": 0.749484658241272, | |
| "eval_nli-pairs_runtime": 23.0514, | |
| "eval_nli-pairs_samples_per_second": 295.34, | |
| "eval_nli-pairs_steps_per_second": 18.48, | |
| "step": 7028 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "eval_scitail-pairs-pos_loss": 0.661561131477356, | |
| "eval_scitail-pairs-pos_runtime": 5.2207, | |
| "eval_scitail-pairs-pos_samples_per_second": 249.774, | |
| "eval_scitail-pairs-pos_steps_per_second": 15.707, | |
| "step": 7028 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "eval_qnli-contrastive_loss": 0.5139556527137756, | |
| "eval_qnli-contrastive_runtime": 15.681, | |
| "eval_qnli-contrastive_samples_per_second": 348.384, | |
| "eval_qnli-contrastive_steps_per_second": 21.81, | |
| "step": 7028 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "grad_norm": 24.09697914123535, | |
| "learning_rate": 4.995446784291406e-06, | |
| "loss": 0.8157, | |
| "step": 8785 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "eval_nli-pairs_loss": 0.6549726724624634, | |
| "eval_nli-pairs_runtime": 23.2274, | |
| "eval_nli-pairs_samples_per_second": 293.102, | |
| "eval_nli-pairs_steps_per_second": 18.34, | |
| "step": 8785 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "eval_scitail-pairs-pos_loss": 0.6056841611862183, | |
| "eval_scitail-pairs-pos_runtime": 5.2473, | |
| "eval_scitail-pairs-pos_samples_per_second": 248.508, | |
| "eval_scitail-pairs-pos_steps_per_second": 15.627, | |
| "step": 8785 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "eval_qnli-contrastive_loss": 0.3295331299304962, | |
| "eval_qnli-contrastive_runtime": 15.7204, | |
| "eval_qnli-contrastive_samples_per_second": 347.511, | |
| "eval_qnli-contrastive_steps_per_second": 21.755, | |
| "step": 8785 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "grad_norm": 9.664803504943848, | |
| "learning_rate": 5.994877632327832e-06, | |
| "loss": 0.6698, | |
| "step": 10542 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "eval_nli-pairs_loss": 0.5809468626976013, | |
| "eval_nli-pairs_runtime": 22.9525, | |
| "eval_nli-pairs_samples_per_second": 296.612, | |
| "eval_nli-pairs_steps_per_second": 18.56, | |
| "step": 10542 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "eval_scitail-pairs-pos_loss": 0.5820835828781128, | |
| "eval_scitail-pairs-pos_runtime": 5.1829, | |
| "eval_scitail-pairs-pos_samples_per_second": 251.599, | |
| "eval_scitail-pairs-pos_steps_per_second": 15.821, | |
| "step": 10542 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "eval_qnli-contrastive_loss": 0.24226614832878113, | |
| "eval_qnli-contrastive_runtime": 15.6321, | |
| "eval_qnli-contrastive_samples_per_second": 349.473, | |
| "eval_qnli-contrastive_steps_per_second": 21.878, | |
| "step": 10542 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "grad_norm": 66.77753448486328, | |
| "learning_rate": 6.994877632327832e-06, | |
| "loss": 0.6497, | |
| "step": 12299 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "eval_nli-pairs_loss": 0.5178281664848328, | |
| "eval_nli-pairs_runtime": 23.0673, | |
| "eval_nli-pairs_samples_per_second": 295.136, | |
| "eval_nli-pairs_steps_per_second": 18.468, | |
| "step": 12299 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "eval_scitail-pairs-pos_loss": 0.504002571105957, | |
| "eval_scitail-pairs-pos_runtime": 5.1845, | |
| "eval_scitail-pairs-pos_samples_per_second": 251.52, | |
| "eval_scitail-pairs-pos_steps_per_second": 15.816, | |
| "step": 12299 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "eval_qnli-contrastive_loss": 0.24089547991752625, | |
| "eval_qnli-contrastive_runtime": 15.5228, | |
| "eval_qnli-contrastive_samples_per_second": 351.933, | |
| "eval_qnli-contrastive_steps_per_second": 22.032, | |
| "step": 12299 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "grad_norm": 0.6044542193412781, | |
| "learning_rate": 7.994308480364257e-06, | |
| "loss": 0.5737, | |
| "step": 14056 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "eval_nli-pairs_loss": 0.5019380450248718, | |
| "eval_nli-pairs_runtime": 23.0659, | |
| "eval_nli-pairs_samples_per_second": 295.154, | |
| "eval_nli-pairs_steps_per_second": 18.469, | |
| "step": 14056 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "eval_scitail-pairs-pos_loss": 0.49418017268180847, | |
| "eval_scitail-pairs-pos_runtime": 5.2457, | |
| "eval_scitail-pairs-pos_samples_per_second": 248.585, | |
| "eval_scitail-pairs-pos_steps_per_second": 15.632, | |
| "step": 14056 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "eval_qnli-contrastive_loss": 0.14995019137859344, | |
| "eval_qnli-contrastive_runtime": 15.7177, | |
| "eval_qnli-contrastive_samples_per_second": 347.57, | |
| "eval_qnli-contrastive_steps_per_second": 21.759, | |
| "step": 14056 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "grad_norm": 0.4454790949821472, | |
| "learning_rate": 8.993739328400684e-06, | |
| "loss": 0.5896, | |
| "step": 15813 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "eval_nli-pairs_loss": 0.4803747236728668, | |
| "eval_nli-pairs_runtime": 23.0746, | |
| "eval_nli-pairs_samples_per_second": 295.043, | |
| "eval_nli-pairs_steps_per_second": 18.462, | |
| "step": 15813 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "eval_scitail-pairs-pos_loss": 0.47568026185035706, | |
| "eval_scitail-pairs-pos_runtime": 5.2076, | |
| "eval_scitail-pairs-pos_samples_per_second": 250.402, | |
| "eval_scitail-pairs-pos_steps_per_second": 15.746, | |
| "step": 15813 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "eval_qnli-contrastive_loss": 0.14648529887199402, | |
| "eval_qnli-contrastive_runtime": 15.5997, | |
| "eval_qnli-contrastive_samples_per_second": 350.199, | |
| "eval_qnli-contrastive_steps_per_second": 21.924, | |
| "step": 15813 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "grad_norm": 196.14842224121094, | |
| "learning_rate": 9.993739328400683e-06, | |
| "loss": 0.5174, | |
| "step": 17570 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "eval_nli-pairs_loss": 0.4586646258831024, | |
| "eval_nli-pairs_runtime": 22.8967, | |
| "eval_nli-pairs_samples_per_second": 297.336, | |
| "eval_nli-pairs_steps_per_second": 18.605, | |
| "step": 17570 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "eval_scitail-pairs-pos_loss": 0.5253121256828308, | |
| "eval_scitail-pairs-pos_runtime": 5.1603, | |
| "eval_scitail-pairs-pos_samples_per_second": 252.699, | |
| "eval_scitail-pairs-pos_steps_per_second": 15.891, | |
| "step": 17570 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "eval_qnli-contrastive_loss": 0.0533733032643795, | |
| "eval_qnli-contrastive_runtime": 15.5083, | |
| "eval_qnli-contrastive_samples_per_second": 352.263, | |
| "eval_qnli-contrastive_steps_per_second": 22.053, | |
| "step": 17570 | |
| }, | |
| { | |
| "epoch": 1.1, | |
| "grad_norm": 18.839372634887695, | |
| "learning_rate": 9.75831232890717e-06, | |
| "loss": 0.5059, | |
| "step": 19327 | |
| }, | |
| { | |
| "epoch": 1.1, | |
| "eval_nli-pairs_loss": 0.45871272683143616, | |
| "eval_nli-pairs_runtime": 22.8984, | |
| "eval_nli-pairs_samples_per_second": 297.313, | |
| "eval_nli-pairs_steps_per_second": 18.604, | |
| "step": 19327 | |
| }, | |
| { | |
| "epoch": 1.1, | |
| "eval_scitail-pairs-pos_loss": 0.5492986440658569, | |
| "eval_scitail-pairs-pos_runtime": 5.1782, | |
| "eval_scitail-pairs-pos_samples_per_second": 251.824, | |
| "eval_scitail-pairs-pos_steps_per_second": 15.836, | |
| "step": 19327 | |
| }, | |
| { | |
| "epoch": 1.1, | |
| "eval_qnli-contrastive_loss": 0.027841920033097267, | |
| "eval_qnli-contrastive_runtime": 15.522, | |
| "eval_qnli-contrastive_samples_per_second": 351.952, | |
| "eval_qnli-contrastive_steps_per_second": 22.033, | |
| "step": 19327 | |
| }, | |
| { | |
| "epoch": 1.2, | |
| "grad_norm": 6.800241947174072, | |
| "learning_rate": 9.051905444616243e-06, | |
| "loss": 0.4654, | |
| "step": 21084 | |
| }, | |
| { | |
| "epoch": 1.2, | |
| "eval_nli-pairs_loss": 0.44151321053504944, | |
| "eval_nli-pairs_runtime": 23.1311, | |
| "eval_nli-pairs_samples_per_second": 294.323, | |
| "eval_nli-pairs_steps_per_second": 18.417, | |
| "step": 21084 | |
| }, | |
| { | |
| "epoch": 1.2, | |
| "eval_scitail-pairs-pos_loss": 0.4850437045097351, | |
| "eval_scitail-pairs-pos_runtime": 5.2939, | |
| "eval_scitail-pairs-pos_samples_per_second": 246.321, | |
| "eval_scitail-pairs-pos_steps_per_second": 15.49, | |
| "step": 21084 | |
| }, | |
| { | |
| "epoch": 1.2, | |
| "eval_qnli-contrastive_loss": 0.05170624330639839, | |
| "eval_qnli-contrastive_runtime": 15.7737, | |
| "eval_qnli-contrastive_samples_per_second": 346.336, | |
| "eval_qnli-contrastive_steps_per_second": 21.682, | |
| "step": 21084 | |
| }, | |
| { | |
| "epoch": 1.3, | |
| "grad_norm": 0.41899746656417847, | |
| "learning_rate": 7.948320938272786e-06, | |
| "loss": 0.4224, | |
| "step": 22841 | |
| }, | |
| { | |
| "epoch": 1.3, | |
| "eval_nli-pairs_loss": 0.39569494128227234, | |
| "eval_nli-pairs_runtime": 23.2638, | |
| "eval_nli-pairs_samples_per_second": 292.643, | |
| "eval_nli-pairs_steps_per_second": 18.312, | |
| "step": 22841 | |
| }, | |
| { | |
| "epoch": 1.3, | |
| "eval_scitail-pairs-pos_loss": 0.42922988533973694, | |
| "eval_scitail-pairs-pos_runtime": 5.2769, | |
| "eval_scitail-pairs-pos_samples_per_second": 247.114, | |
| "eval_scitail-pairs-pos_steps_per_second": 15.539, | |
| "step": 22841 | |
| }, | |
| { | |
| "epoch": 1.3, | |
| "eval_qnli-contrastive_loss": 0.0938122496008873, | |
| "eval_qnli-contrastive_runtime": 15.6681, | |
| "eval_qnli-contrastive_samples_per_second": 348.67, | |
| "eval_qnli-contrastive_steps_per_second": 21.828, | |
| "step": 22841 | |
| }, | |
| { | |
| "epoch": 1.4, | |
| "grad_norm": 3.0029168128967285, | |
| "learning_rate": 6.556983832253587e-06, | |
| "loss": 0.4125, | |
| "step": 24598 | |
| }, | |
| { | |
| "epoch": 1.4, | |
| "eval_nli-pairs_loss": 0.3794442415237427, | |
| "eval_nli-pairs_runtime": 23.2107, | |
| "eval_nli-pairs_samples_per_second": 293.313, | |
| "eval_nli-pairs_steps_per_second": 18.354, | |
| "step": 24598 | |
| }, | |
| { | |
| "epoch": 1.4, | |
| "eval_scitail-pairs-pos_loss": 0.4623956084251404, | |
| "eval_scitail-pairs-pos_runtime": 5.2884, | |
| "eval_scitail-pairs-pos_samples_per_second": 246.577, | |
| "eval_scitail-pairs-pos_steps_per_second": 15.506, | |
| "step": 24598 | |
| }, | |
| { | |
| "epoch": 1.4, | |
| "eval_qnli-contrastive_loss": 0.0838843286037445, | |
| "eval_qnli-contrastive_runtime": 15.7017, | |
| "eval_qnli-contrastive_samples_per_second": 347.924, | |
| "eval_qnli-contrastive_steps_per_second": 21.781, | |
| "step": 24598 | |
| }, | |
| { | |
| "epoch": 1.5, | |
| "grad_norm": 10.91913890838623, | |
| "learning_rate": 5.012516292320938e-06, | |
| "loss": 0.4072, | |
| "step": 26355 | |
| }, | |
| { | |
| "epoch": 1.5, | |
| "eval_nli-pairs_loss": 0.3877629041671753, | |
| "eval_nli-pairs_runtime": 23.1072, | |
| "eval_nli-pairs_samples_per_second": 294.627, | |
| "eval_nli-pairs_steps_per_second": 18.436, | |
| "step": 26355 | |
| }, | |
| { | |
| "epoch": 1.5, | |
| "eval_scitail-pairs-pos_loss": 0.4480924606323242, | |
| "eval_scitail-pairs-pos_runtime": 5.2741, | |
| "eval_scitail-pairs-pos_samples_per_second": 247.244, | |
| "eval_scitail-pairs-pos_steps_per_second": 15.548, | |
| "step": 26355 | |
| }, | |
| { | |
| "epoch": 1.5, | |
| "eval_qnli-contrastive_loss": 0.06811495870351791, | |
| "eval_qnli-contrastive_runtime": 15.7641, | |
| "eval_qnli-contrastive_samples_per_second": 346.546, | |
| "eval_qnli-contrastive_steps_per_second": 21.695, | |
| "step": 26355 | |
| }, | |
| { | |
| "epoch": 1.6, | |
| "grad_norm": 3.676146984100342, | |
| "learning_rate": 3.4668235704897813e-06, | |
| "loss": 0.3572, | |
| "step": 28112 | |
| }, | |
| { | |
| "epoch": 1.6, | |
| "eval_nli-pairs_loss": 0.3715905547142029, | |
| "eval_nli-pairs_runtime": 23.1744, | |
| "eval_nli-pairs_samples_per_second": 293.773, | |
| "eval_nli-pairs_steps_per_second": 18.382, | |
| "step": 28112 | |
| }, | |
| { | |
| "epoch": 1.6, | |
| "eval_scitail-pairs-pos_loss": 0.49534013867378235, | |
| "eval_scitail-pairs-pos_runtime": 5.2856, | |
| "eval_scitail-pairs-pos_samples_per_second": 246.708, | |
| "eval_scitail-pairs-pos_steps_per_second": 15.514, | |
| "step": 28112 | |
| }, | |
| { | |
| "epoch": 1.6, | |
| "eval_qnli-contrastive_loss": 0.06735851615667343, | |
| "eval_qnli-contrastive_runtime": 15.7308, | |
| "eval_qnli-contrastive_samples_per_second": 347.281, | |
| "eval_qnli-contrastive_steps_per_second": 21.741, | |
| "step": 28112 | |
| }, | |
| { | |
| "epoch": 1.7, | |
| "grad_norm": 229.6580047607422, | |
| "learning_rate": 2.072658211127134e-06, | |
| "loss": 0.371, | |
| "step": 29869 | |
| }, | |
| { | |
| "epoch": 1.7, | |
| "eval_nli-pairs_loss": 0.36217835545539856, | |
| "eval_nli-pairs_runtime": 23.1495, | |
| "eval_nli-pairs_samples_per_second": 294.089, | |
| "eval_nli-pairs_steps_per_second": 18.402, | |
| "step": 29869 | |
| }, | |
| { | |
| "epoch": 1.7, | |
| "eval_scitail-pairs-pos_loss": 0.47673526406288147, | |
| "eval_scitail-pairs-pos_runtime": 5.2158, | |
| "eval_scitail-pairs-pos_samples_per_second": 250.008, | |
| "eval_scitail-pairs-pos_steps_per_second": 15.721, | |
| "step": 29869 | |
| }, | |
| { | |
| "epoch": 1.7, | |
| "eval_qnli-contrastive_loss": 0.06000087782740593, | |
| "eval_qnli-contrastive_runtime": 15.6328, | |
| "eval_qnli-contrastive_samples_per_second": 349.458, | |
| "eval_qnli-contrastive_steps_per_second": 21.877, | |
| "step": 29869 | |
| }, | |
| { | |
| "epoch": 1.8, | |
| "grad_norm": 0.6022229194641113, | |
| "learning_rate": 9.638670801112644e-07, | |
| "loss": 0.3332, | |
| "step": 31626 | |
| }, | |
| { | |
| "epoch": 1.8, | |
| "eval_nli-pairs_loss": 0.3600439131259918, | |
| "eval_nli-pairs_runtime": 23.0874, | |
| "eval_nli-pairs_samples_per_second": 294.879, | |
| "eval_nli-pairs_steps_per_second": 18.452, | |
| "step": 31626 | |
| }, | |
| { | |
| "epoch": 1.8, | |
| "eval_scitail-pairs-pos_loss": 0.465911865234375, | |
| "eval_scitail-pairs-pos_runtime": 5.3369, | |
| "eval_scitail-pairs-pos_samples_per_second": 244.338, | |
| "eval_scitail-pairs-pos_steps_per_second": 15.365, | |
| "step": 31626 | |
| }, | |
| { | |
| "epoch": 1.8, | |
| "eval_qnli-contrastive_loss": 0.05613844096660614, | |
| "eval_qnli-contrastive_runtime": 15.7089, | |
| "eval_qnli-contrastive_samples_per_second": 347.764, | |
| "eval_qnli-contrastive_steps_per_second": 21.771, | |
| "step": 31626 | |
| }, | |
| { | |
| "epoch": 1.9, | |
| "grad_norm": 0.23106251657009125, | |
| "learning_rate": 2.4943593464921476e-07, | |
| "loss": 0.3695, | |
| "step": 33383 | |
| }, | |
| { | |
| "epoch": 1.9, | |
| "eval_nli-pairs_loss": 0.35667526721954346, | |
| "eval_nli-pairs_runtime": 23.1588, | |
| "eval_nli-pairs_samples_per_second": 293.971, | |
| "eval_nli-pairs_steps_per_second": 18.395, | |
| "step": 33383 | |
| }, | |
| { | |
| "epoch": 1.9, | |
| "eval_scitail-pairs-pos_loss": 0.4603894352912903, | |
| "eval_scitail-pairs-pos_runtime": 5.248, | |
| "eval_scitail-pairs-pos_samples_per_second": 248.476, | |
| "eval_scitail-pairs-pos_steps_per_second": 15.625, | |
| "step": 33383 | |
| }, | |
| { | |
| "epoch": 1.9, | |
| "eval_qnli-contrastive_loss": 0.06141861155629158, | |
| "eval_qnli-contrastive_runtime": 15.6709, | |
| "eval_qnli-contrastive_samples_per_second": 348.608, | |
| "eval_qnli-contrastive_steps_per_second": 21.824, | |
| "step": 33383 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "grad_norm": Infinity, | |
| "learning_rate": 2.5896487759191624e-11, | |
| "loss": 0.3315, | |
| "step": 35140 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_nli-pairs_loss": 0.3597075045108795, | |
| "eval_nli-pairs_runtime": 23.1058, | |
| "eval_nli-pairs_samples_per_second": 294.645, | |
| "eval_nli-pairs_steps_per_second": 18.437, | |
| "step": 35140 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_scitail-pairs-pos_loss": 0.47120198607444763, | |
| "eval_scitail-pairs-pos_runtime": 5.2532, | |
| "eval_scitail-pairs-pos_samples_per_second": 248.23, | |
| "eval_scitail-pairs-pos_steps_per_second": 15.61, | |
| "step": 35140 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_qnli-contrastive_loss": 0.05398999899625778, | |
| "eval_qnli-contrastive_runtime": 15.7099, | |
| "eval_qnli-contrastive_samples_per_second": 347.743, | |
| "eval_qnli-contrastive_steps_per_second": 21.77, | |
| "step": 35140 | |
| } | |
| ], | |
| "logging_steps": 1757, | |
| "max_steps": 35140, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 2, | |
| "save_steps": 17570, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 0.0, | |
| "train_batch_size": 30, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |