diff --git "a/trainer_state.json" "b/trainer_state.json" --- "a/trainer_state.json" +++ "b/trainer_state.json" @@ -1,9 +1,9 @@ { "best_metric": null, "best_model_checkpoint": null, - "epoch": 0.8565068257937839, + "epoch": 0.8726673319408363, "eval_steps": 500, - "global_step": 424000, + "global_step": 432000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, @@ -296807,6 +296807,5606 @@ "learning_rate": 6.874316539637127e-07, "loss": 25.6927, "step": 424000 + }, + { + "epoch": 0.8565270264264677, + "grad_norm": 395.2640686035156, + "learning_rate": 6.872550253671207e-07, + "loss": 23.6256, + "step": 424010 + }, + { + "epoch": 0.8565472270591515, + "grad_norm": 1355.140625, + "learning_rate": 6.870784177903244e-07, + "loss": 32.2784, + "step": 424020 + }, + { + "epoch": 0.8565674276918354, + "grad_norm": 127.0737533569336, + "learning_rate": 6.869018312341841e-07, + "loss": 13.1837, + "step": 424030 + }, + { + "epoch": 0.8565876283245192, + "grad_norm": 36.54533767700195, + "learning_rate": 6.86725265699561e-07, + "loss": 16.7496, + "step": 424040 + }, + { + "epoch": 0.8566078289572029, + "grad_norm": 471.26873779296875, + "learning_rate": 6.865487211873167e-07, + "loss": 19.949, + "step": 424050 + }, + { + "epoch": 0.8566280295898867, + "grad_norm": 342.75933837890625, + "learning_rate": 6.863721976983112e-07, + "loss": 24.4788, + "step": 424060 + }, + { + "epoch": 0.8566482302225705, + "grad_norm": 656.39453125, + "learning_rate": 6.861956952334031e-07, + "loss": 19.6866, + "step": 424070 + }, + { + "epoch": 0.8566684308552543, + "grad_norm": 198.0663299560547, + "learning_rate": 6.860192137934552e-07, + "loss": 17.8723, + "step": 424080 + }, + { + "epoch": 0.8566886314879382, + "grad_norm": 217.781982421875, + "learning_rate": 6.858427533793261e-07, + "loss": 12.8337, + "step": 424090 + }, + { + "epoch": 0.856708832120622, + "grad_norm": 436.1506652832031, + "learning_rate": 6.856663139918751e-07, + "loss": 11.4889, + "step": 424100 + }, + { + "epoch": 0.8567290327533058, + "grad_norm": 229.63490295410156, + "learning_rate": 6.854898956319644e-07, + "loss": 25.8869, + "step": 424110 + }, + { + "epoch": 0.8567492333859896, + "grad_norm": 290.39178466796875, + "learning_rate": 6.853134983004517e-07, + "loss": 8.3607, + "step": 424120 + }, + { + "epoch": 0.8567694340186734, + "grad_norm": 339.2444152832031, + "learning_rate": 6.851371219981989e-07, + "loss": 16.9643, + "step": 424130 + }, + { + "epoch": 0.8567896346513573, + "grad_norm": 270.17657470703125, + "learning_rate": 6.849607667260643e-07, + "loss": 26.7416, + "step": 424140 + }, + { + "epoch": 0.8568098352840411, + "grad_norm": 309.4647216796875, + "learning_rate": 6.847844324849062e-07, + "loss": 36.7673, + "step": 424150 + }, + { + "epoch": 0.8568300359167249, + "grad_norm": 27.725566864013672, + "learning_rate": 6.846081192755871e-07, + "loss": 14.3943, + "step": 424160 + }, + { + "epoch": 0.8568502365494087, + "grad_norm": 513.9178466796875, + "learning_rate": 6.844318270989631e-07, + "loss": 17.2254, + "step": 424170 + }, + { + "epoch": 0.8568704371820925, + "grad_norm": 280.4992980957031, + "learning_rate": 6.842555559558961e-07, + "loss": 11.4802, + "step": 424180 + }, + { + "epoch": 0.8568906378147764, + "grad_norm": 255.67034912109375, + "learning_rate": 6.840793058472434e-07, + "loss": 12.5791, + "step": 424190 + }, + { + "epoch": 0.8569108384474602, + "grad_norm": 0.0, + "learning_rate": 6.839030767738653e-07, + "loss": 13.0745, + "step": 424200 + }, + { + "epoch": 0.856931039080144, + "grad_norm": 570.7583618164062, + "learning_rate": 6.837268687366199e-07, + "loss": 24.4529, + "step": 424210 + }, + { + "epoch": 0.8569512397128278, + "grad_norm": 310.4892883300781, + "learning_rate": 6.835506817363657e-07, + "loss": 13.3413, + "step": 424220 + }, + { + "epoch": 0.8569714403455116, + "grad_norm": 21.324195861816406, + "learning_rate": 6.83374515773963e-07, + "loss": 23.01, + "step": 424230 + }, + { + "epoch": 0.8569916409781955, + "grad_norm": 235.50726318359375, + "learning_rate": 6.831983708502693e-07, + "loss": 12.8942, + "step": 424240 + }, + { + "epoch": 0.8570118416108793, + "grad_norm": 315.3659973144531, + "learning_rate": 6.830222469661419e-07, + "loss": 12.0015, + "step": 424250 + }, + { + "epoch": 0.8570320422435631, + "grad_norm": 279.39208984375, + "learning_rate": 6.828461441224405e-07, + "loss": 14.5823, + "step": 424260 + }, + { + "epoch": 0.8570522428762469, + "grad_norm": 141.75877380371094, + "learning_rate": 6.826700623200255e-07, + "loss": 13.2339, + "step": 424270 + }, + { + "epoch": 0.8570724435089307, + "grad_norm": 268.73138427734375, + "learning_rate": 6.824940015597514e-07, + "loss": 15.8064, + "step": 424280 + }, + { + "epoch": 0.8570926441416146, + "grad_norm": 66.41516876220703, + "learning_rate": 6.823179618424774e-07, + "loss": 7.1832, + "step": 424290 + }, + { + "epoch": 0.8571128447742983, + "grad_norm": 131.48736572265625, + "learning_rate": 6.821419431690629e-07, + "loss": 12.0447, + "step": 424300 + }, + { + "epoch": 0.8571330454069821, + "grad_norm": 110.49434661865234, + "learning_rate": 6.819659455403654e-07, + "loss": 12.5332, + "step": 424310 + }, + { + "epoch": 0.8571532460396659, + "grad_norm": 460.3204345703125, + "learning_rate": 6.817899689572405e-07, + "loss": 17.4025, + "step": 424320 + }, + { + "epoch": 0.8571734466723497, + "grad_norm": 298.1443176269531, + "learning_rate": 6.816140134205479e-07, + "loss": 15.143, + "step": 424330 + }, + { + "epoch": 0.8571936473050336, + "grad_norm": 313.5130920410156, + "learning_rate": 6.81438078931147e-07, + "loss": 13.8155, + "step": 424340 + }, + { + "epoch": 0.8572138479377174, + "grad_norm": 8.115134239196777, + "learning_rate": 6.81262165489891e-07, + "loss": 13.0447, + "step": 424350 + }, + { + "epoch": 0.8572340485704012, + "grad_norm": 13.400871276855469, + "learning_rate": 6.810862730976392e-07, + "loss": 12.5523, + "step": 424360 + }, + { + "epoch": 0.857254249203085, + "grad_norm": 300.0811462402344, + "learning_rate": 6.809104017552503e-07, + "loss": 10.1747, + "step": 424370 + }, + { + "epoch": 0.8572744498357688, + "grad_norm": 104.60641479492188, + "learning_rate": 6.807345514635805e-07, + "loss": 16.5223, + "step": 424380 + }, + { + "epoch": 0.8572946504684527, + "grad_norm": 284.20074462890625, + "learning_rate": 6.80558722223485e-07, + "loss": 13.5617, + "step": 424390 + }, + { + "epoch": 0.8573148511011365, + "grad_norm": 176.078857421875, + "learning_rate": 6.803829140358237e-07, + "loss": 16.442, + "step": 424400 + }, + { + "epoch": 0.8573350517338203, + "grad_norm": 495.5717468261719, + "learning_rate": 6.802071269014527e-07, + "loss": 20.9467, + "step": 424410 + }, + { + "epoch": 0.8573552523665041, + "grad_norm": 232.489501953125, + "learning_rate": 6.800313608212261e-07, + "loss": 17.0568, + "step": 424420 + }, + { + "epoch": 0.8573754529991879, + "grad_norm": 238.38931274414062, + "learning_rate": 6.798556157960046e-07, + "loss": 11.8175, + "step": 424430 + }, + { + "epoch": 0.8573956536318718, + "grad_norm": 196.57803344726562, + "learning_rate": 6.796798918266417e-07, + "loss": 20.7263, + "step": 424440 + }, + { + "epoch": 0.8574158542645556, + "grad_norm": 15.518020629882812, + "learning_rate": 6.795041889139958e-07, + "loss": 15.2344, + "step": 424450 + }, + { + "epoch": 0.8574360548972394, + "grad_norm": 256.23760986328125, + "learning_rate": 6.793285070589229e-07, + "loss": 17.4303, + "step": 424460 + }, + { + "epoch": 0.8574562555299232, + "grad_norm": 298.38067626953125, + "learning_rate": 6.79152846262277e-07, + "loss": 27.6497, + "step": 424470 + }, + { + "epoch": 0.857476456162607, + "grad_norm": 383.74127197265625, + "learning_rate": 6.789772065249178e-07, + "loss": 12.2496, + "step": 424480 + }, + { + "epoch": 0.8574966567952909, + "grad_norm": 257.5712585449219, + "learning_rate": 6.788015878476983e-07, + "loss": 13.5638, + "step": 424490 + }, + { + "epoch": 0.8575168574279747, + "grad_norm": 113.62663269042969, + "learning_rate": 6.786259902314768e-07, + "loss": 11.9673, + "step": 424500 + }, + { + "epoch": 0.8575370580606585, + "grad_norm": 265.4411315917969, + "learning_rate": 6.784504136771075e-07, + "loss": 25.6099, + "step": 424510 + }, + { + "epoch": 0.8575572586933423, + "grad_norm": 281.8363952636719, + "learning_rate": 6.782748581854471e-07, + "loss": 14.7695, + "step": 424520 + }, + { + "epoch": 0.8575774593260261, + "grad_norm": 15.098465919494629, + "learning_rate": 6.780993237573513e-07, + "loss": 9.7567, + "step": 424530 + }, + { + "epoch": 0.85759765995871, + "grad_norm": 284.7987365722656, + "learning_rate": 6.779238103936742e-07, + "loss": 15.353, + "step": 424540 + }, + { + "epoch": 0.8576178605913938, + "grad_norm": 154.20411682128906, + "learning_rate": 6.777483180952732e-07, + "loss": 14.5259, + "step": 424550 + }, + { + "epoch": 0.8576380612240775, + "grad_norm": 78.2432632446289, + "learning_rate": 6.775728468630027e-07, + "loss": 20.6444, + "step": 424560 + }, + { + "epoch": 0.8576582618567613, + "grad_norm": 298.0225830078125, + "learning_rate": 6.773973966977165e-07, + "loss": 15.1713, + "step": 424570 + }, + { + "epoch": 0.8576784624894451, + "grad_norm": 301.41937255859375, + "learning_rate": 6.772219676002717e-07, + "loss": 17.4354, + "step": 424580 + }, + { + "epoch": 0.857698663122129, + "grad_norm": 208.0994110107422, + "learning_rate": 6.770465595715231e-07, + "loss": 14.8364, + "step": 424590 + }, + { + "epoch": 0.8577188637548128, + "grad_norm": 186.12237548828125, + "learning_rate": 6.768711726123261e-07, + "loss": 10.0088, + "step": 424600 + }, + { + "epoch": 0.8577390643874966, + "grad_norm": 385.00579833984375, + "learning_rate": 6.76695806723533e-07, + "loss": 14.7199, + "step": 424610 + }, + { + "epoch": 0.8577592650201804, + "grad_norm": 253.53329467773438, + "learning_rate": 6.765204619060012e-07, + "loss": 19.1844, + "step": 424620 + }, + { + "epoch": 0.8577794656528642, + "grad_norm": 269.28094482421875, + "learning_rate": 6.763451381605846e-07, + "loss": 15.1143, + "step": 424630 + }, + { + "epoch": 0.857799666285548, + "grad_norm": 25.068782806396484, + "learning_rate": 6.761698354881363e-07, + "loss": 27.9622, + "step": 424640 + }, + { + "epoch": 0.8578198669182319, + "grad_norm": 477.1641845703125, + "learning_rate": 6.759945538895119e-07, + "loss": 22.6392, + "step": 424650 + }, + { + "epoch": 0.8578400675509157, + "grad_norm": 243.30755615234375, + "learning_rate": 6.758192933655667e-07, + "loss": 23.6183, + "step": 424660 + }, + { + "epoch": 0.8578602681835995, + "grad_norm": 260.8081970214844, + "learning_rate": 6.756440539171533e-07, + "loss": 17.6033, + "step": 424670 + }, + { + "epoch": 0.8578804688162833, + "grad_norm": 212.6053009033203, + "learning_rate": 6.754688355451256e-07, + "loss": 16.9901, + "step": 424680 + }, + { + "epoch": 0.8579006694489671, + "grad_norm": 306.0473327636719, + "learning_rate": 6.752936382503394e-07, + "loss": 14.9527, + "step": 424690 + }, + { + "epoch": 0.857920870081651, + "grad_norm": 279.94818115234375, + "learning_rate": 6.751184620336471e-07, + "loss": 19.3849, + "step": 424700 + }, + { + "epoch": 0.8579410707143348, + "grad_norm": 225.95159912109375, + "learning_rate": 6.749433068959022e-07, + "loss": 8.6361, + "step": 424710 + }, + { + "epoch": 0.8579612713470186, + "grad_norm": 215.16275024414062, + "learning_rate": 6.747681728379601e-07, + "loss": 18.9797, + "step": 424720 + }, + { + "epoch": 0.8579814719797024, + "grad_norm": 318.85797119140625, + "learning_rate": 6.745930598606721e-07, + "loss": 18.8878, + "step": 424730 + }, + { + "epoch": 0.8580016726123862, + "grad_norm": 293.9589538574219, + "learning_rate": 6.744179679648943e-07, + "loss": 14.6065, + "step": 424740 + }, + { + "epoch": 0.8580218732450701, + "grad_norm": 232.8202667236328, + "learning_rate": 6.742428971514786e-07, + "loss": 13.9461, + "step": 424750 + }, + { + "epoch": 0.8580420738777539, + "grad_norm": 308.7608947753906, + "learning_rate": 6.74067847421277e-07, + "loss": 31.7157, + "step": 424760 + }, + { + "epoch": 0.8580622745104377, + "grad_norm": 279.5561218261719, + "learning_rate": 6.738928187751454e-07, + "loss": 17.1414, + "step": 424770 + }, + { + "epoch": 0.8580824751431215, + "grad_norm": 471.01397705078125, + "learning_rate": 6.737178112139342e-07, + "loss": 17.0854, + "step": 424780 + }, + { + "epoch": 0.8581026757758053, + "grad_norm": 155.662841796875, + "learning_rate": 6.735428247384989e-07, + "loss": 26.9572, + "step": 424790 + }, + { + "epoch": 0.8581228764084892, + "grad_norm": 217.6192169189453, + "learning_rate": 6.733678593496901e-07, + "loss": 14.6221, + "step": 424800 + }, + { + "epoch": 0.8581430770411729, + "grad_norm": 259.729248046875, + "learning_rate": 6.731929150483624e-07, + "loss": 13.059, + "step": 424810 + }, + { + "epoch": 0.8581632776738567, + "grad_norm": 312.6268310546875, + "learning_rate": 6.73017991835368e-07, + "loss": 12.951, + "step": 424820 + }, + { + "epoch": 0.8581834783065405, + "grad_norm": 282.1890563964844, + "learning_rate": 6.728430897115578e-07, + "loss": 19.1124, + "step": 424830 + }, + { + "epoch": 0.8582036789392243, + "grad_norm": 273.0890808105469, + "learning_rate": 6.726682086777869e-07, + "loss": 14.1467, + "step": 424840 + }, + { + "epoch": 0.8582238795719082, + "grad_norm": 122.47418212890625, + "learning_rate": 6.724933487349061e-07, + "loss": 15.8845, + "step": 424850 + }, + { + "epoch": 0.858244080204592, + "grad_norm": 112.8675765991211, + "learning_rate": 6.723185098837665e-07, + "loss": 8.5903, + "step": 424860 + }, + { + "epoch": 0.8582642808372758, + "grad_norm": 146.3318634033203, + "learning_rate": 6.721436921252223e-07, + "loss": 26.0564, + "step": 424870 + }, + { + "epoch": 0.8582844814699596, + "grad_norm": 86.2370834350586, + "learning_rate": 6.719688954601266e-07, + "loss": 15.4216, + "step": 424880 + }, + { + "epoch": 0.8583046821026434, + "grad_norm": 254.19744873046875, + "learning_rate": 6.717941198893274e-07, + "loss": 14.0053, + "step": 424890 + }, + { + "epoch": 0.8583248827353273, + "grad_norm": 297.9875183105469, + "learning_rate": 6.716193654136788e-07, + "loss": 25.1894, + "step": 424900 + }, + { + "epoch": 0.8583450833680111, + "grad_norm": 64.05819702148438, + "learning_rate": 6.714446320340334e-07, + "loss": 11.539, + "step": 424910 + }, + { + "epoch": 0.8583652840006949, + "grad_norm": 592.8305053710938, + "learning_rate": 6.712699197512418e-07, + "loss": 10.5414, + "step": 424920 + }, + { + "epoch": 0.8583854846333787, + "grad_norm": 5.57110595703125, + "learning_rate": 6.710952285661549e-07, + "loss": 22.1617, + "step": 424930 + }, + { + "epoch": 0.8584056852660625, + "grad_norm": 379.78790283203125, + "learning_rate": 6.709205584796241e-07, + "loss": 14.0794, + "step": 424940 + }, + { + "epoch": 0.8584258858987464, + "grad_norm": 131.6482696533203, + "learning_rate": 6.707459094925045e-07, + "loss": 10.1108, + "step": 424950 + }, + { + "epoch": 0.8584460865314302, + "grad_norm": 216.09938049316406, + "learning_rate": 6.705712816056415e-07, + "loss": 15.0898, + "step": 424960 + }, + { + "epoch": 0.858466287164114, + "grad_norm": 278.9071350097656, + "learning_rate": 6.703966748198892e-07, + "loss": 12.0141, + "step": 424970 + }, + { + "epoch": 0.8584864877967978, + "grad_norm": 110.3006820678711, + "learning_rate": 6.702220891360994e-07, + "loss": 9.9564, + "step": 424980 + }, + { + "epoch": 0.8585066884294816, + "grad_norm": 48.94906997680664, + "learning_rate": 6.700475245551218e-07, + "loss": 9.6111, + "step": 424990 + }, + { + "epoch": 0.8585268890621655, + "grad_norm": 309.4346618652344, + "learning_rate": 6.698729810778065e-07, + "loss": 20.069, + "step": 425000 + }, + { + "epoch": 0.8585470896948493, + "grad_norm": 392.30596923828125, + "learning_rate": 6.696984587050065e-07, + "loss": 11.179, + "step": 425010 + }, + { + "epoch": 0.8585672903275331, + "grad_norm": 245.6202850341797, + "learning_rate": 6.695239574375706e-07, + "loss": 8.6832, + "step": 425020 + }, + { + "epoch": 0.8585874909602169, + "grad_norm": 107.75462341308594, + "learning_rate": 6.693494772763487e-07, + "loss": 12.9651, + "step": 425030 + }, + { + "epoch": 0.8586076915929007, + "grad_norm": 174.42953491210938, + "learning_rate": 6.691750182221935e-07, + "loss": 22.0103, + "step": 425040 + }, + { + "epoch": 0.8586278922255846, + "grad_norm": 257.4122009277344, + "learning_rate": 6.69000580275953e-07, + "loss": 13.8525, + "step": 425050 + }, + { + "epoch": 0.8586480928582684, + "grad_norm": 3.2712879180908203, + "learning_rate": 6.688261634384791e-07, + "loss": 18.0985, + "step": 425060 + }, + { + "epoch": 0.8586682934909521, + "grad_norm": 55.044979095458984, + "learning_rate": 6.686517677106214e-07, + "loss": 13.2182, + "step": 425070 + }, + { + "epoch": 0.8586884941236359, + "grad_norm": 83.34304809570312, + "learning_rate": 6.684773930932281e-07, + "loss": 15.1368, + "step": 425080 + }, + { + "epoch": 0.8587086947563197, + "grad_norm": 51.55891418457031, + "learning_rate": 6.683030395871526e-07, + "loss": 32.3659, + "step": 425090 + }, + { + "epoch": 0.8587288953890035, + "grad_norm": 140.5757293701172, + "learning_rate": 6.681287071932408e-07, + "loss": 22.7444, + "step": 425100 + }, + { + "epoch": 0.8587490960216874, + "grad_norm": 348.9292907714844, + "learning_rate": 6.679543959123458e-07, + "loss": 19.0408, + "step": 425110 + }, + { + "epoch": 0.8587692966543712, + "grad_norm": 264.0926818847656, + "learning_rate": 6.677801057453143e-07, + "loss": 17.0035, + "step": 425120 + }, + { + "epoch": 0.858789497287055, + "grad_norm": 312.43890380859375, + "learning_rate": 6.676058366929988e-07, + "loss": 12.3295, + "step": 425130 + }, + { + "epoch": 0.8588096979197388, + "grad_norm": 260.50286865234375, + "learning_rate": 6.674315887562466e-07, + "loss": 16.7676, + "step": 425140 + }, + { + "epoch": 0.8588298985524226, + "grad_norm": 246.723388671875, + "learning_rate": 6.672573619359063e-07, + "loss": 20.2221, + "step": 425150 + }, + { + "epoch": 0.8588500991851065, + "grad_norm": 150.3035430908203, + "learning_rate": 6.67083156232829e-07, + "loss": 14.6619, + "step": 425160 + }, + { + "epoch": 0.8588702998177903, + "grad_norm": 295.06048583984375, + "learning_rate": 6.669089716478627e-07, + "loss": 24.0341, + "step": 425170 + }, + { + "epoch": 0.8588905004504741, + "grad_norm": 138.03895568847656, + "learning_rate": 6.667348081818559e-07, + "loss": 10.1707, + "step": 425180 + }, + { + "epoch": 0.8589107010831579, + "grad_norm": 520.5072631835938, + "learning_rate": 6.665606658356583e-07, + "loss": 17.2871, + "step": 425190 + }, + { + "epoch": 0.8589309017158417, + "grad_norm": 30.284364700317383, + "learning_rate": 6.663865446101192e-07, + "loss": 11.9157, + "step": 425200 + }, + { + "epoch": 0.8589511023485256, + "grad_norm": 169.39254760742188, + "learning_rate": 6.662124445060863e-07, + "loss": 15.764, + "step": 425210 + }, + { + "epoch": 0.8589713029812094, + "grad_norm": 1701.76171875, + "learning_rate": 6.660383655244074e-07, + "loss": 11.6654, + "step": 425220 + }, + { + "epoch": 0.8589915036138932, + "grad_norm": 62.773502349853516, + "learning_rate": 6.658643076659327e-07, + "loss": 18.329, + "step": 425230 + }, + { + "epoch": 0.859011704246577, + "grad_norm": 424.9864807128906, + "learning_rate": 6.6569027093151e-07, + "loss": 22.7725, + "step": 425240 + }, + { + "epoch": 0.8590319048792608, + "grad_norm": 532.7898559570312, + "learning_rate": 6.655162553219862e-07, + "loss": 31.5603, + "step": 425250 + }, + { + "epoch": 0.8590521055119447, + "grad_norm": 411.4581298828125, + "learning_rate": 6.653422608382105e-07, + "loss": 28.9217, + "step": 425260 + }, + { + "epoch": 0.8590723061446285, + "grad_norm": 137.09744262695312, + "learning_rate": 6.651682874810317e-07, + "loss": 10.206, + "step": 425270 + }, + { + "epoch": 0.8590925067773123, + "grad_norm": 362.9930419921875, + "learning_rate": 6.649943352512972e-07, + "loss": 19.2364, + "step": 425280 + }, + { + "epoch": 0.8591127074099961, + "grad_norm": 313.9099426269531, + "learning_rate": 6.648204041498534e-07, + "loss": 19.1666, + "step": 425290 + }, + { + "epoch": 0.8591329080426799, + "grad_norm": 218.5279083251953, + "learning_rate": 6.646464941775499e-07, + "loss": 11.514, + "step": 425300 + }, + { + "epoch": 0.8591531086753638, + "grad_norm": 265.6346435546875, + "learning_rate": 6.64472605335234e-07, + "loss": 14.4691, + "step": 425310 + }, + { + "epoch": 0.8591733093080475, + "grad_norm": 84.3724365234375, + "learning_rate": 6.642987376237514e-07, + "loss": 30.2023, + "step": 425320 + }, + { + "epoch": 0.8591935099407313, + "grad_norm": 241.3916473388672, + "learning_rate": 6.641248910439518e-07, + "loss": 12.6862, + "step": 425330 + }, + { + "epoch": 0.8592137105734151, + "grad_norm": 330.7503662109375, + "learning_rate": 6.639510655966813e-07, + "loss": 14.9626, + "step": 425340 + }, + { + "epoch": 0.8592339112060989, + "grad_norm": 192.02622985839844, + "learning_rate": 6.637772612827881e-07, + "loss": 12.9559, + "step": 425350 + }, + { + "epoch": 0.8592541118387828, + "grad_norm": 24.877470016479492, + "learning_rate": 6.636034781031181e-07, + "loss": 13.4675, + "step": 425360 + }, + { + "epoch": 0.8592743124714666, + "grad_norm": 316.19451904296875, + "learning_rate": 6.634297160585184e-07, + "loss": 17.2891, + "step": 425370 + }, + { + "epoch": 0.8592945131041504, + "grad_norm": 216.85694885253906, + "learning_rate": 6.632559751498369e-07, + "loss": 15.8184, + "step": 425380 + }, + { + "epoch": 0.8593147137368342, + "grad_norm": 355.86871337890625, + "learning_rate": 6.630822553779193e-07, + "loss": 27.9583, + "step": 425390 + }, + { + "epoch": 0.859334914369518, + "grad_norm": 207.43653869628906, + "learning_rate": 6.629085567436133e-07, + "loss": 26.3952, + "step": 425400 + }, + { + "epoch": 0.8593551150022019, + "grad_norm": 277.5365295410156, + "learning_rate": 6.627348792477639e-07, + "loss": 19.8084, + "step": 425410 + }, + { + "epoch": 0.8593753156348857, + "grad_norm": 15.553412437438965, + "learning_rate": 6.625612228912199e-07, + "loss": 29.9787, + "step": 425420 + }, + { + "epoch": 0.8593955162675695, + "grad_norm": 202.20901489257812, + "learning_rate": 6.623875876748265e-07, + "loss": 19.5246, + "step": 425430 + }, + { + "epoch": 0.8594157169002533, + "grad_norm": 419.8519592285156, + "learning_rate": 6.622139735994288e-07, + "loss": 17.6903, + "step": 425440 + }, + { + "epoch": 0.8594359175329371, + "grad_norm": 301.2924499511719, + "learning_rate": 6.620403806658754e-07, + "loss": 9.176, + "step": 425450 + }, + { + "epoch": 0.859456118165621, + "grad_norm": 241.92251586914062, + "learning_rate": 6.618668088750107e-07, + "loss": 21.5056, + "step": 425460 + }, + { + "epoch": 0.8594763187983048, + "grad_norm": 151.03933715820312, + "learning_rate": 6.616932582276798e-07, + "loss": 12.2095, + "step": 425470 + }, + { + "epoch": 0.8594965194309886, + "grad_norm": 128.7923583984375, + "learning_rate": 6.615197287247299e-07, + "loss": 13.9771, + "step": 425480 + }, + { + "epoch": 0.8595167200636724, + "grad_norm": 849.5505981445312, + "learning_rate": 6.61346220367009e-07, + "loss": 9.1107, + "step": 425490 + }, + { + "epoch": 0.8595369206963562, + "grad_norm": 566.014892578125, + "learning_rate": 6.611727331553585e-07, + "loss": 22.3019, + "step": 425500 + }, + { + "epoch": 0.85955712132904, + "grad_norm": 401.216796875, + "learning_rate": 6.609992670906251e-07, + "loss": 13.9003, + "step": 425510 + }, + { + "epoch": 0.8595773219617239, + "grad_norm": 368.768798828125, + "learning_rate": 6.608258221736568e-07, + "loss": 16.6687, + "step": 425520 + }, + { + "epoch": 0.8595975225944077, + "grad_norm": 299.59320068359375, + "learning_rate": 6.60652398405297e-07, + "loss": 26.3777, + "step": 425530 + }, + { + "epoch": 0.8596177232270915, + "grad_norm": 263.8229675292969, + "learning_rate": 6.604789957863899e-07, + "loss": 14.5886, + "step": 425540 + }, + { + "epoch": 0.8596379238597753, + "grad_norm": 432.37298583984375, + "learning_rate": 6.603056143177817e-07, + "loss": 15.311, + "step": 425550 + }, + { + "epoch": 0.8596581244924592, + "grad_norm": 0.0, + "learning_rate": 6.601322540003202e-07, + "loss": 26.5307, + "step": 425560 + }, + { + "epoch": 0.859678325125143, + "grad_norm": 212.65748596191406, + "learning_rate": 6.599589148348451e-07, + "loss": 18.4903, + "step": 425570 + }, + { + "epoch": 0.8596985257578267, + "grad_norm": 483.0402526855469, + "learning_rate": 6.597855968222038e-07, + "loss": 13.8314, + "step": 425580 + }, + { + "epoch": 0.8597187263905105, + "grad_norm": 102.29916381835938, + "learning_rate": 6.596122999632426e-07, + "loss": 21.5249, + "step": 425590 + }, + { + "epoch": 0.8597389270231943, + "grad_norm": 93.99577331542969, + "learning_rate": 6.594390242588044e-07, + "loss": 12.53, + "step": 425600 + }, + { + "epoch": 0.8597591276558781, + "grad_norm": 3.3775837421417236, + "learning_rate": 6.592657697097333e-07, + "loss": 20.3366, + "step": 425610 + }, + { + "epoch": 0.859779328288562, + "grad_norm": 512.7830200195312, + "learning_rate": 6.590925363168749e-07, + "loss": 23.7357, + "step": 425620 + }, + { + "epoch": 0.8597995289212458, + "grad_norm": 384.3551025390625, + "learning_rate": 6.589193240810732e-07, + "loss": 30.2439, + "step": 425630 + }, + { + "epoch": 0.8598197295539296, + "grad_norm": 1.1212437152862549, + "learning_rate": 6.587461330031714e-07, + "loss": 15.3178, + "step": 425640 + }, + { + "epoch": 0.8598399301866134, + "grad_norm": 226.32542419433594, + "learning_rate": 6.585729630840149e-07, + "loss": 17.0284, + "step": 425650 + }, + { + "epoch": 0.8598601308192972, + "grad_norm": 174.52976989746094, + "learning_rate": 6.583998143244463e-07, + "loss": 13.3965, + "step": 425660 + }, + { + "epoch": 0.8598803314519811, + "grad_norm": 115.52770233154297, + "learning_rate": 6.582266867253118e-07, + "loss": 8.3107, + "step": 425670 + }, + { + "epoch": 0.8599005320846649, + "grad_norm": 177.9041290283203, + "learning_rate": 6.580535802874538e-07, + "loss": 15.6579, + "step": 425680 + }, + { + "epoch": 0.8599207327173487, + "grad_norm": 277.89324951171875, + "learning_rate": 6.578804950117146e-07, + "loss": 14.2583, + "step": 425690 + }, + { + "epoch": 0.8599409333500325, + "grad_norm": 272.5760192871094, + "learning_rate": 6.577074308989406e-07, + "loss": 20.3047, + "step": 425700 + }, + { + "epoch": 0.8599611339827163, + "grad_norm": 21.752309799194336, + "learning_rate": 6.575343879499729e-07, + "loss": 16.8871, + "step": 425710 + }, + { + "epoch": 0.8599813346154002, + "grad_norm": 290.4754333496094, + "learning_rate": 6.57361366165657e-07, + "loss": 18.9391, + "step": 425720 + }, + { + "epoch": 0.860001535248084, + "grad_norm": 284.32489013671875, + "learning_rate": 6.571883655468336e-07, + "loss": 22.1051, + "step": 425730 + }, + { + "epoch": 0.8600217358807678, + "grad_norm": 203.83856201171875, + "learning_rate": 6.57015386094349e-07, + "loss": 18.6123, + "step": 425740 + }, + { + "epoch": 0.8600419365134516, + "grad_norm": 173.1763153076172, + "learning_rate": 6.568424278090446e-07, + "loss": 17.9551, + "step": 425750 + }, + { + "epoch": 0.8600621371461354, + "grad_norm": 356.8979797363281, + "learning_rate": 6.56669490691762e-07, + "loss": 17.9615, + "step": 425760 + }, + { + "epoch": 0.8600823377788193, + "grad_norm": 468.1793212890625, + "learning_rate": 6.564965747433472e-07, + "loss": 28.7568, + "step": 425770 + }, + { + "epoch": 0.8601025384115031, + "grad_norm": 1004.8388061523438, + "learning_rate": 6.563236799646405e-07, + "loss": 16.6156, + "step": 425780 + }, + { + "epoch": 0.8601227390441869, + "grad_norm": 442.2641296386719, + "learning_rate": 6.561508063564847e-07, + "loss": 14.7327, + "step": 425790 + }, + { + "epoch": 0.8601429396768707, + "grad_norm": 308.0071716308594, + "learning_rate": 6.559779539197231e-07, + "loss": 28.2071, + "step": 425800 + }, + { + "epoch": 0.8601631403095545, + "grad_norm": 878.6277465820312, + "learning_rate": 6.558051226551992e-07, + "loss": 19.1349, + "step": 425810 + }, + { + "epoch": 0.8601833409422384, + "grad_norm": 252.41970825195312, + "learning_rate": 6.556323125637542e-07, + "loss": 11.9119, + "step": 425820 + }, + { + "epoch": 0.8602035415749222, + "grad_norm": 3.001262903213501, + "learning_rate": 6.554595236462291e-07, + "loss": 16.0101, + "step": 425830 + }, + { + "epoch": 0.8602237422076059, + "grad_norm": 280.01397705078125, + "learning_rate": 6.552867559034687e-07, + "loss": 37.686, + "step": 425840 + }, + { + "epoch": 0.8602439428402897, + "grad_norm": 202.0115203857422, + "learning_rate": 6.551140093363135e-07, + "loss": 17.1321, + "step": 425850 + }, + { + "epoch": 0.8602641434729735, + "grad_norm": 294.38616943359375, + "learning_rate": 6.549412839456048e-07, + "loss": 14.5224, + "step": 425860 + }, + { + "epoch": 0.8602843441056574, + "grad_norm": 407.4664611816406, + "learning_rate": 6.547685797321851e-07, + "loss": 20.1108, + "step": 425870 + }, + { + "epoch": 0.8603045447383412, + "grad_norm": 20.88553810119629, + "learning_rate": 6.545958966968974e-07, + "loss": 9.0207, + "step": 425880 + }, + { + "epoch": 0.860324745371025, + "grad_norm": 390.5597229003906, + "learning_rate": 6.544232348405821e-07, + "loss": 18.4068, + "step": 425890 + }, + { + "epoch": 0.8603449460037088, + "grad_norm": 128.73533630371094, + "learning_rate": 6.542505941640803e-07, + "loss": 26.7374, + "step": 425900 + }, + { + "epoch": 0.8603651466363926, + "grad_norm": 414.9536437988281, + "learning_rate": 6.540779746682346e-07, + "loss": 28.0502, + "step": 425910 + }, + { + "epoch": 0.8603853472690765, + "grad_norm": 280.2080993652344, + "learning_rate": 6.53905376353886e-07, + "loss": 18.9999, + "step": 425920 + }, + { + "epoch": 0.8604055479017603, + "grad_norm": 367.2652893066406, + "learning_rate": 6.537327992218745e-07, + "loss": 15.2771, + "step": 425930 + }, + { + "epoch": 0.8604257485344441, + "grad_norm": 66.76766967773438, + "learning_rate": 6.535602432730432e-07, + "loss": 14.1451, + "step": 425940 + }, + { + "epoch": 0.8604459491671279, + "grad_norm": 112.82831573486328, + "learning_rate": 6.533877085082307e-07, + "loss": 12.8695, + "step": 425950 + }, + { + "epoch": 0.8604661497998117, + "grad_norm": 205.36558532714844, + "learning_rate": 6.532151949282811e-07, + "loss": 16.2951, + "step": 425960 + }, + { + "epoch": 0.8604863504324956, + "grad_norm": 121.46744537353516, + "learning_rate": 6.53042702534033e-07, + "loss": 11.2585, + "step": 425970 + }, + { + "epoch": 0.8605065510651794, + "grad_norm": 277.3069763183594, + "learning_rate": 6.528702313263264e-07, + "loss": 17.367, + "step": 425980 + }, + { + "epoch": 0.8605267516978632, + "grad_norm": 322.7912292480469, + "learning_rate": 6.526977813060042e-07, + "loss": 13.0788, + "step": 425990 + }, + { + "epoch": 0.860546952330547, + "grad_norm": 188.09046936035156, + "learning_rate": 6.52525352473905e-07, + "loss": 12.747, + "step": 426000 + }, + { + "epoch": 0.8605671529632308, + "grad_norm": 577.46630859375, + "learning_rate": 6.523529448308708e-07, + "loss": 18.9068, + "step": 426010 + }, + { + "epoch": 0.8605873535959147, + "grad_norm": 419.2146911621094, + "learning_rate": 6.521805583777396e-07, + "loss": 16.0069, + "step": 426020 + }, + { + "epoch": 0.8606075542285985, + "grad_norm": 259.5483703613281, + "learning_rate": 6.520081931153544e-07, + "loss": 17.9167, + "step": 426030 + }, + { + "epoch": 0.8606277548612823, + "grad_norm": 564.1424560546875, + "learning_rate": 6.518358490445542e-07, + "loss": 20.2891, + "step": 426040 + }, + { + "epoch": 0.8606479554939661, + "grad_norm": 196.95143127441406, + "learning_rate": 6.516635261661775e-07, + "loss": 16.5217, + "step": 426050 + }, + { + "epoch": 0.8606681561266499, + "grad_norm": 206.94544982910156, + "learning_rate": 6.514912244810662e-07, + "loss": 9.7611, + "step": 426060 + }, + { + "epoch": 0.8606883567593338, + "grad_norm": 224.210205078125, + "learning_rate": 6.513189439900591e-07, + "loss": 16.0879, + "step": 426070 + }, + { + "epoch": 0.8607085573920176, + "grad_norm": 307.1372985839844, + "learning_rate": 6.511466846939956e-07, + "loss": 17.7233, + "step": 426080 + }, + { + "epoch": 0.8607287580247013, + "grad_norm": 32.34778594970703, + "learning_rate": 6.509744465937151e-07, + "loss": 17.4611, + "step": 426090 + }, + { + "epoch": 0.8607489586573851, + "grad_norm": 294.2017517089844, + "learning_rate": 6.508022296900601e-07, + "loss": 23.336, + "step": 426100 + }, + { + "epoch": 0.8607691592900689, + "grad_norm": 32.162803649902344, + "learning_rate": 6.506300339838656e-07, + "loss": 6.6925, + "step": 426110 + }, + { + "epoch": 0.8607893599227527, + "grad_norm": 122.28897857666016, + "learning_rate": 6.504578594759725e-07, + "loss": 13.2891, + "step": 426120 + }, + { + "epoch": 0.8608095605554366, + "grad_norm": 172.11935424804688, + "learning_rate": 6.502857061672213e-07, + "loss": 24.6641, + "step": 426130 + }, + { + "epoch": 0.8608297611881204, + "grad_norm": 99.5322036743164, + "learning_rate": 6.501135740584502e-07, + "loss": 18.3852, + "step": 426140 + }, + { + "epoch": 0.8608499618208042, + "grad_norm": 181.11062622070312, + "learning_rate": 6.499414631504969e-07, + "loss": 26.8739, + "step": 426150 + }, + { + "epoch": 0.860870162453488, + "grad_norm": 196.2686004638672, + "learning_rate": 6.497693734442007e-07, + "loss": 16.5601, + "step": 426160 + }, + { + "epoch": 0.8608903630861718, + "grad_norm": 454.75830078125, + "learning_rate": 6.495973049404037e-07, + "loss": 14.189, + "step": 426170 + }, + { + "epoch": 0.8609105637188557, + "grad_norm": 85.71923065185547, + "learning_rate": 6.494252576399395e-07, + "loss": 15.6298, + "step": 426180 + }, + { + "epoch": 0.8609307643515395, + "grad_norm": 129.8979034423828, + "learning_rate": 6.49253231543649e-07, + "loss": 20.9725, + "step": 426190 + }, + { + "epoch": 0.8609509649842233, + "grad_norm": 42.13568878173828, + "learning_rate": 6.490812266523716e-07, + "loss": 15.4088, + "step": 426200 + }, + { + "epoch": 0.8609711656169071, + "grad_norm": 147.41806030273438, + "learning_rate": 6.489092429669447e-07, + "loss": 12.642, + "step": 426210 + }, + { + "epoch": 0.8609913662495909, + "grad_norm": 535.2359619140625, + "learning_rate": 6.487372804882053e-07, + "loss": 17.0153, + "step": 426220 + }, + { + "epoch": 0.8610115668822748, + "grad_norm": 418.01416015625, + "learning_rate": 6.485653392169938e-07, + "loss": 22.6266, + "step": 426230 + }, + { + "epoch": 0.8610317675149586, + "grad_norm": 167.09933471679688, + "learning_rate": 6.483934191541469e-07, + "loss": 14.8782, + "step": 426240 + }, + { + "epoch": 0.8610519681476424, + "grad_norm": 227.04527282714844, + "learning_rate": 6.482215203005016e-07, + "loss": 17.4681, + "step": 426250 + }, + { + "epoch": 0.8610721687803262, + "grad_norm": 281.4587707519531, + "learning_rate": 6.480496426568983e-07, + "loss": 34.5975, + "step": 426260 + }, + { + "epoch": 0.86109236941301, + "grad_norm": 180.57151794433594, + "learning_rate": 6.478777862241714e-07, + "loss": 12.0356, + "step": 426270 + }, + { + "epoch": 0.8611125700456939, + "grad_norm": 51.872047424316406, + "learning_rate": 6.477059510031619e-07, + "loss": 18.1245, + "step": 426280 + }, + { + "epoch": 0.8611327706783777, + "grad_norm": 286.2590637207031, + "learning_rate": 6.475341369947047e-07, + "loss": 17.6567, + "step": 426290 + }, + { + "epoch": 0.8611529713110615, + "grad_norm": 283.1455078125, + "learning_rate": 6.47362344199639e-07, + "loss": 10.539, + "step": 426300 + }, + { + "epoch": 0.8611731719437453, + "grad_norm": 261.40655517578125, + "learning_rate": 6.471905726188015e-07, + "loss": 16.6252, + "step": 426310 + }, + { + "epoch": 0.8611933725764291, + "grad_norm": 343.0833740234375, + "learning_rate": 6.470188222530282e-07, + "loss": 16.7142, + "step": 426320 + }, + { + "epoch": 0.861213573209113, + "grad_norm": 400.0595703125, + "learning_rate": 6.468470931031584e-07, + "loss": 27.5833, + "step": 426330 + }, + { + "epoch": 0.8612337738417968, + "grad_norm": 279.7911071777344, + "learning_rate": 6.466753851700264e-07, + "loss": 15.3073, + "step": 426340 + }, + { + "epoch": 0.8612539744744805, + "grad_norm": 348.87896728515625, + "learning_rate": 6.465036984544721e-07, + "loss": 27.4831, + "step": 426350 + }, + { + "epoch": 0.8612741751071643, + "grad_norm": 185.6573486328125, + "learning_rate": 6.463320329573303e-07, + "loss": 10.596, + "step": 426360 + }, + { + "epoch": 0.8612943757398481, + "grad_norm": 179.58221435546875, + "learning_rate": 6.46160388679437e-07, + "loss": 13.5588, + "step": 426370 + }, + { + "epoch": 0.861314576372532, + "grad_norm": 174.3289031982422, + "learning_rate": 6.459887656216318e-07, + "loss": 19.3493, + "step": 426380 + }, + { + "epoch": 0.8613347770052158, + "grad_norm": 380.564208984375, + "learning_rate": 6.458171637847488e-07, + "loss": 12.3408, + "step": 426390 + }, + { + "epoch": 0.8613549776378996, + "grad_norm": 140.37326049804688, + "learning_rate": 6.456455831696234e-07, + "loss": 15.1851, + "step": 426400 + }, + { + "epoch": 0.8613751782705834, + "grad_norm": 23.614601135253906, + "learning_rate": 6.454740237770934e-07, + "loss": 20.7154, + "step": 426410 + }, + { + "epoch": 0.8613953789032672, + "grad_norm": 117.17767333984375, + "learning_rate": 6.453024856079976e-07, + "loss": 10.1878, + "step": 426420 + }, + { + "epoch": 0.861415579535951, + "grad_norm": 66.0915756225586, + "learning_rate": 6.451309686631668e-07, + "loss": 14.5932, + "step": 426430 + }, + { + "epoch": 0.8614357801686349, + "grad_norm": 115.52134704589844, + "learning_rate": 6.449594729434394e-07, + "loss": 23.6677, + "step": 426440 + }, + { + "epoch": 0.8614559808013187, + "grad_norm": 200.53895568847656, + "learning_rate": 6.447879984496525e-07, + "loss": 22.4611, + "step": 426450 + }, + { + "epoch": 0.8614761814340025, + "grad_norm": 55.7606201171875, + "learning_rate": 6.446165451826409e-07, + "loss": 14.9015, + "step": 426460 + }, + { + "epoch": 0.8614963820666863, + "grad_norm": 367.1582946777344, + "learning_rate": 6.444451131432383e-07, + "loss": 25.4361, + "step": 426470 + }, + { + "epoch": 0.8615165826993701, + "grad_norm": 477.4083557128906, + "learning_rate": 6.442737023322826e-07, + "loss": 16.5428, + "step": 426480 + }, + { + "epoch": 0.861536783332054, + "grad_norm": 640.9025268554688, + "learning_rate": 6.441023127506096e-07, + "loss": 22.848, + "step": 426490 + }, + { + "epoch": 0.8615569839647378, + "grad_norm": 593.6867065429688, + "learning_rate": 6.439309443990532e-07, + "loss": 17.403, + "step": 426500 + }, + { + "epoch": 0.8615771845974216, + "grad_norm": 128.9621124267578, + "learning_rate": 6.437595972784483e-07, + "loss": 14.719, + "step": 426510 + }, + { + "epoch": 0.8615973852301054, + "grad_norm": 102.8722915649414, + "learning_rate": 6.435882713896319e-07, + "loss": 21.571, + "step": 426520 + }, + { + "epoch": 0.8616175858627892, + "grad_norm": 324.9844970703125, + "learning_rate": 6.434169667334378e-07, + "loss": 13.7523, + "step": 426530 + }, + { + "epoch": 0.8616377864954731, + "grad_norm": 164.3453826904297, + "learning_rate": 6.432456833106998e-07, + "loss": 29.9197, + "step": 426540 + }, + { + "epoch": 0.8616579871281569, + "grad_norm": 290.2485046386719, + "learning_rate": 6.43074421122255e-07, + "loss": 13.7085, + "step": 426550 + }, + { + "epoch": 0.8616781877608407, + "grad_norm": 162.91885375976562, + "learning_rate": 6.429031801689362e-07, + "loss": 23.4219, + "step": 426560 + }, + { + "epoch": 0.8616983883935245, + "grad_norm": 152.35107421875, + "learning_rate": 6.427319604515797e-07, + "loss": 11.5542, + "step": 426570 + }, + { + "epoch": 0.8617185890262083, + "grad_norm": 72.03697204589844, + "learning_rate": 6.425607619710195e-07, + "loss": 13.8766, + "step": 426580 + }, + { + "epoch": 0.8617387896588922, + "grad_norm": 410.8104553222656, + "learning_rate": 6.423895847280881e-07, + "loss": 16.488, + "step": 426590 + }, + { + "epoch": 0.8617589902915759, + "grad_norm": 369.3254699707031, + "learning_rate": 6.422184287236227e-07, + "loss": 16.7072, + "step": 426600 + }, + { + "epoch": 0.8617791909242597, + "grad_norm": 258.9056701660156, + "learning_rate": 6.420472939584549e-07, + "loss": 16.8887, + "step": 426610 + }, + { + "epoch": 0.8617993915569435, + "grad_norm": 283.20623779296875, + "learning_rate": 6.418761804334212e-07, + "loss": 23.0235, + "step": 426620 + }, + { + "epoch": 0.8618195921896273, + "grad_norm": 229.67816162109375, + "learning_rate": 6.417050881493536e-07, + "loss": 23.8269, + "step": 426630 + }, + { + "epoch": 0.8618397928223112, + "grad_norm": 298.65057373046875, + "learning_rate": 6.415340171070877e-07, + "loss": 13.5707, + "step": 426640 + }, + { + "epoch": 0.861859993454995, + "grad_norm": 189.23648071289062, + "learning_rate": 6.413629673074562e-07, + "loss": 10.3198, + "step": 426650 + }, + { + "epoch": 0.8618801940876788, + "grad_norm": 482.4182434082031, + "learning_rate": 6.411919387512922e-07, + "loss": 15.7056, + "step": 426660 + }, + { + "epoch": 0.8619003947203626, + "grad_norm": 316.34271240234375, + "learning_rate": 6.410209314394305e-07, + "loss": 36.8376, + "step": 426670 + }, + { + "epoch": 0.8619205953530464, + "grad_norm": 269.3982849121094, + "learning_rate": 6.408499453727046e-07, + "loss": 33.9866, + "step": 426680 + }, + { + "epoch": 0.8619407959857303, + "grad_norm": 109.6086196899414, + "learning_rate": 6.406789805519464e-07, + "loss": 12.0312, + "step": 426690 + }, + { + "epoch": 0.8619609966184141, + "grad_norm": 271.09686279296875, + "learning_rate": 6.405080369779898e-07, + "loss": 15.8506, + "step": 426700 + }, + { + "epoch": 0.8619811972510979, + "grad_norm": 332.6595764160156, + "learning_rate": 6.403371146516707e-07, + "loss": 20.6489, + "step": 426710 + }, + { + "epoch": 0.8620013978837817, + "grad_norm": 20.008983612060547, + "learning_rate": 6.401662135738174e-07, + "loss": 14.4732, + "step": 426720 + }, + { + "epoch": 0.8620215985164655, + "grad_norm": 477.0677185058594, + "learning_rate": 6.399953337452652e-07, + "loss": 20.5175, + "step": 426730 + }, + { + "epoch": 0.8620417991491494, + "grad_norm": 240.05218505859375, + "learning_rate": 6.398244751668481e-07, + "loss": 14.0642, + "step": 426740 + }, + { + "epoch": 0.8620619997818332, + "grad_norm": 179.6294403076172, + "learning_rate": 6.396536378393975e-07, + "loss": 29.609, + "step": 426750 + }, + { + "epoch": 0.862082200414517, + "grad_norm": 256.0860900878906, + "learning_rate": 6.394828217637455e-07, + "loss": 13.8209, + "step": 426760 + }, + { + "epoch": 0.8621024010472008, + "grad_norm": 427.2187805175781, + "learning_rate": 6.393120269407249e-07, + "loss": 12.9257, + "step": 426770 + }, + { + "epoch": 0.8621226016798846, + "grad_norm": 734.8992309570312, + "learning_rate": 6.391412533711711e-07, + "loss": 25.0278, + "step": 426780 + }, + { + "epoch": 0.8621428023125685, + "grad_norm": 194.74415588378906, + "learning_rate": 6.389705010559117e-07, + "loss": 17.5674, + "step": 426790 + }, + { + "epoch": 0.8621630029452523, + "grad_norm": 78.94375610351562, + "learning_rate": 6.387997699957815e-07, + "loss": 15.628, + "step": 426800 + }, + { + "epoch": 0.8621832035779361, + "grad_norm": 178.01707458496094, + "learning_rate": 6.386290601916129e-07, + "loss": 11.1852, + "step": 426810 + }, + { + "epoch": 0.8622034042106199, + "grad_norm": 331.4090881347656, + "learning_rate": 6.384583716442371e-07, + "loss": 28.6777, + "step": 426820 + }, + { + "epoch": 0.8622236048433037, + "grad_norm": 314.58819580078125, + "learning_rate": 6.382877043544855e-07, + "loss": 7.6431, + "step": 426830 + }, + { + "epoch": 0.8622438054759876, + "grad_norm": 733.0621337890625, + "learning_rate": 6.381170583231916e-07, + "loss": 22.413, + "step": 426840 + }, + { + "epoch": 0.8622640061086714, + "grad_norm": 0.0, + "learning_rate": 6.379464335511859e-07, + "loss": 27.8916, + "step": 426850 + }, + { + "epoch": 0.8622842067413551, + "grad_norm": 646.348388671875, + "learning_rate": 6.377758300392994e-07, + "loss": 21.2316, + "step": 426860 + }, + { + "epoch": 0.8623044073740389, + "grad_norm": 218.392822265625, + "learning_rate": 6.376052477883655e-07, + "loss": 18.2486, + "step": 426870 + }, + { + "epoch": 0.8623246080067227, + "grad_norm": 199.77964782714844, + "learning_rate": 6.374346867992138e-07, + "loss": 10.8684, + "step": 426880 + }, + { + "epoch": 0.8623448086394065, + "grad_norm": 355.8582763671875, + "learning_rate": 6.372641470726765e-07, + "loss": 20.5506, + "step": 426890 + }, + { + "epoch": 0.8623650092720904, + "grad_norm": 223.13693237304688, + "learning_rate": 6.370936286095842e-07, + "loss": 10.9489, + "step": 426900 + }, + { + "epoch": 0.8623852099047742, + "grad_norm": 261.5447998046875, + "learning_rate": 6.369231314107693e-07, + "loss": 21.3465, + "step": 426910 + }, + { + "epoch": 0.862405410537458, + "grad_norm": 166.69236755371094, + "learning_rate": 6.36752655477062e-07, + "loss": 21.7841, + "step": 426920 + }, + { + "epoch": 0.8624256111701418, + "grad_norm": 1603.099853515625, + "learning_rate": 6.36582200809292e-07, + "loss": 30.7529, + "step": 426930 + }, + { + "epoch": 0.8624458118028256, + "grad_norm": 296.5548400878906, + "learning_rate": 6.36411767408292e-07, + "loss": 18.8484, + "step": 426940 + }, + { + "epoch": 0.8624660124355095, + "grad_norm": 248.10475158691406, + "learning_rate": 6.362413552748908e-07, + "loss": 34.0839, + "step": 426950 + }, + { + "epoch": 0.8624862130681933, + "grad_norm": 375.36016845703125, + "learning_rate": 6.360709644099211e-07, + "loss": 19.8768, + "step": 426960 + }, + { + "epoch": 0.8625064137008771, + "grad_norm": 316.27777099609375, + "learning_rate": 6.359005948142122e-07, + "loss": 14.5303, + "step": 426970 + }, + { + "epoch": 0.8625266143335609, + "grad_norm": 248.04403686523438, + "learning_rate": 6.357302464885934e-07, + "loss": 14.8746, + "step": 426980 + }, + { + "epoch": 0.8625468149662447, + "grad_norm": 516.7435913085938, + "learning_rate": 6.355599194338974e-07, + "loss": 28.9377, + "step": 426990 + }, + { + "epoch": 0.8625670155989286, + "grad_norm": 120.26335144042969, + "learning_rate": 6.353896136509524e-07, + "loss": 11.3951, + "step": 427000 + }, + { + "epoch": 0.8625872162316124, + "grad_norm": 42.786903381347656, + "learning_rate": 6.352193291405884e-07, + "loss": 7.7517, + "step": 427010 + }, + { + "epoch": 0.8626074168642962, + "grad_norm": 533.2800903320312, + "learning_rate": 6.350490659036362e-07, + "loss": 25.1994, + "step": 427020 + }, + { + "epoch": 0.86262761749698, + "grad_norm": 232.50086975097656, + "learning_rate": 6.348788239409271e-07, + "loss": 10.8617, + "step": 427030 + }, + { + "epoch": 0.8626478181296638, + "grad_norm": 56.26165008544922, + "learning_rate": 6.347086032532873e-07, + "loss": 12.3419, + "step": 427040 + }, + { + "epoch": 0.8626680187623477, + "grad_norm": 276.5984191894531, + "learning_rate": 6.345384038415486e-07, + "loss": 13.2447, + "step": 427050 + }, + { + "epoch": 0.8626882193950315, + "grad_norm": 138.6144256591797, + "learning_rate": 6.343682257065408e-07, + "loss": 15.331, + "step": 427060 + }, + { + "epoch": 0.8627084200277153, + "grad_norm": 546.62939453125, + "learning_rate": 6.341980688490934e-07, + "loss": 36.8719, + "step": 427070 + }, + { + "epoch": 0.8627286206603991, + "grad_norm": 750.87451171875, + "learning_rate": 6.340279332700333e-07, + "loss": 23.4612, + "step": 427080 + }, + { + "epoch": 0.862748821293083, + "grad_norm": 248.04440307617188, + "learning_rate": 6.338578189701921e-07, + "loss": 11.5574, + "step": 427090 + }, + { + "epoch": 0.8627690219257668, + "grad_norm": 429.3407287597656, + "learning_rate": 6.336877259504004e-07, + "loss": 14.9368, + "step": 427100 + }, + { + "epoch": 0.8627892225584506, + "grad_norm": 296.7677001953125, + "learning_rate": 6.335176542114829e-07, + "loss": 20.3626, + "step": 427110 + }, + { + "epoch": 0.8628094231911343, + "grad_norm": 383.21343994140625, + "learning_rate": 6.333476037542707e-07, + "loss": 20.7349, + "step": 427120 + }, + { + "epoch": 0.8628296238238181, + "grad_norm": 161.70468139648438, + "learning_rate": 6.331775745795937e-07, + "loss": 24.1944, + "step": 427130 + }, + { + "epoch": 0.8628498244565019, + "grad_norm": 558.531005859375, + "learning_rate": 6.330075666882795e-07, + "loss": 21.6414, + "step": 427140 + }, + { + "epoch": 0.8628700250891858, + "grad_norm": 429.7414855957031, + "learning_rate": 6.328375800811559e-07, + "loss": 19.0061, + "step": 427150 + }, + { + "epoch": 0.8628902257218696, + "grad_norm": 613.4002075195312, + "learning_rate": 6.326676147590533e-07, + "loss": 20.4218, + "step": 427160 + }, + { + "epoch": 0.8629104263545534, + "grad_norm": 324.2467041015625, + "learning_rate": 6.324976707227993e-07, + "loss": 6.6113, + "step": 427170 + }, + { + "epoch": 0.8629306269872372, + "grad_norm": 0.0, + "learning_rate": 6.323277479732203e-07, + "loss": 13.0625, + "step": 427180 + }, + { + "epoch": 0.862950827619921, + "grad_norm": 705.1793212890625, + "learning_rate": 6.321578465111478e-07, + "loss": 24.1333, + "step": 427190 + }, + { + "epoch": 0.8629710282526049, + "grad_norm": 453.9203186035156, + "learning_rate": 6.319879663374068e-07, + "loss": 22.468, + "step": 427200 + }, + { + "epoch": 0.8629912288852887, + "grad_norm": 239.30142211914062, + "learning_rate": 6.318181074528279e-07, + "loss": 25.7906, + "step": 427210 + }, + { + "epoch": 0.8630114295179725, + "grad_norm": 391.6506652832031, + "learning_rate": 6.316482698582365e-07, + "loss": 9.0698, + "step": 427220 + }, + { + "epoch": 0.8630316301506563, + "grad_norm": 57.62110137939453, + "learning_rate": 6.314784535544627e-07, + "loss": 12.9475, + "step": 427230 + }, + { + "epoch": 0.8630518307833401, + "grad_norm": 92.63350677490234, + "learning_rate": 6.313086585423316e-07, + "loss": 14.8664, + "step": 427240 + }, + { + "epoch": 0.863072031416024, + "grad_norm": 255.9056854248047, + "learning_rate": 6.311388848226741e-07, + "loss": 20.5877, + "step": 427250 + }, + { + "epoch": 0.8630922320487078, + "grad_norm": 173.49542236328125, + "learning_rate": 6.309691323963152e-07, + "loss": 21.9859, + "step": 427260 + }, + { + "epoch": 0.8631124326813916, + "grad_norm": 1932.9476318359375, + "learning_rate": 6.307994012640822e-07, + "loss": 27.2474, + "step": 427270 + }, + { + "epoch": 0.8631326333140754, + "grad_norm": 410.6073303222656, + "learning_rate": 6.30629691426804e-07, + "loss": 17.2769, + "step": 427280 + }, + { + "epoch": 0.8631528339467592, + "grad_norm": 466.3726501464844, + "learning_rate": 6.304600028853065e-07, + "loss": 16.4853, + "step": 427290 + }, + { + "epoch": 0.863173034579443, + "grad_norm": 1031.7923583984375, + "learning_rate": 6.302903356404161e-07, + "loss": 19.3864, + "step": 427300 + }, + { + "epoch": 0.8631932352121269, + "grad_norm": 114.4725341796875, + "learning_rate": 6.301206896929607e-07, + "loss": 11.8854, + "step": 427310 + }, + { + "epoch": 0.8632134358448107, + "grad_norm": 333.98724365234375, + "learning_rate": 6.29951065043769e-07, + "loss": 17.1401, + "step": 427320 + }, + { + "epoch": 0.8632336364774945, + "grad_norm": 187.3678436279297, + "learning_rate": 6.297814616936637e-07, + "loss": 18.1092, + "step": 427330 + }, + { + "epoch": 0.8632538371101783, + "grad_norm": 267.3026428222656, + "learning_rate": 6.296118796434735e-07, + "loss": 14.566, + "step": 427340 + }, + { + "epoch": 0.8632740377428622, + "grad_norm": 216.7257537841797, + "learning_rate": 6.294423188940263e-07, + "loss": 9.6769, + "step": 427350 + }, + { + "epoch": 0.863294238375546, + "grad_norm": 264.22955322265625, + "learning_rate": 6.292727794461468e-07, + "loss": 21.8646, + "step": 427360 + }, + { + "epoch": 0.8633144390082297, + "grad_norm": 341.1600341796875, + "learning_rate": 6.291032613006604e-07, + "loss": 23.4711, + "step": 427370 + }, + { + "epoch": 0.8633346396409135, + "grad_norm": 241.09535217285156, + "learning_rate": 6.289337644583949e-07, + "loss": 11.735, + "step": 427380 + }, + { + "epoch": 0.8633548402735973, + "grad_norm": 340.7505187988281, + "learning_rate": 6.287642889201783e-07, + "loss": 16.4057, + "step": 427390 + }, + { + "epoch": 0.8633750409062811, + "grad_norm": 267.3469543457031, + "learning_rate": 6.28594834686832e-07, + "loss": 17.5358, + "step": 427400 + }, + { + "epoch": 0.863395241538965, + "grad_norm": 103.78594970703125, + "learning_rate": 6.284254017591845e-07, + "loss": 13.5318, + "step": 427410 + }, + { + "epoch": 0.8634154421716488, + "grad_norm": 490.9798583984375, + "learning_rate": 6.282559901380625e-07, + "loss": 16.8245, + "step": 427420 + }, + { + "epoch": 0.8634356428043326, + "grad_norm": 297.1769104003906, + "learning_rate": 6.280865998242908e-07, + "loss": 16.6822, + "step": 427430 + }, + { + "epoch": 0.8634558434370164, + "grad_norm": 249.63978576660156, + "learning_rate": 6.279172308186931e-07, + "loss": 17.7045, + "step": 427440 + }, + { + "epoch": 0.8634760440697002, + "grad_norm": 219.16079711914062, + "learning_rate": 6.277478831220979e-07, + "loss": 20.2264, + "step": 427450 + }, + { + "epoch": 0.8634962447023841, + "grad_norm": 120.80721282958984, + "learning_rate": 6.275785567353293e-07, + "loss": 14.207, + "step": 427460 + }, + { + "epoch": 0.8635164453350679, + "grad_norm": 152.2388458251953, + "learning_rate": 6.274092516592111e-07, + "loss": 9.7045, + "step": 427470 + }, + { + "epoch": 0.8635366459677517, + "grad_norm": 282.41778564453125, + "learning_rate": 6.272399678945712e-07, + "loss": 10.4501, + "step": 427480 + }, + { + "epoch": 0.8635568466004355, + "grad_norm": 539.1661376953125, + "learning_rate": 6.27070705442232e-07, + "loss": 20.9257, + "step": 427490 + }, + { + "epoch": 0.8635770472331193, + "grad_norm": 397.8507385253906, + "learning_rate": 6.269014643030214e-07, + "loss": 20.8538, + "step": 427500 + }, + { + "epoch": 0.8635972478658032, + "grad_norm": 242.8325653076172, + "learning_rate": 6.267322444777612e-07, + "loss": 14.1207, + "step": 427510 + }, + { + "epoch": 0.863617448498487, + "grad_norm": 347.0693359375, + "learning_rate": 6.265630459672789e-07, + "loss": 10.4487, + "step": 427520 + }, + { + "epoch": 0.8636376491311708, + "grad_norm": 114.38526153564453, + "learning_rate": 6.263938687723981e-07, + "loss": 19.362, + "step": 427530 + }, + { + "epoch": 0.8636578497638546, + "grad_norm": 223.4969482421875, + "learning_rate": 6.262247128939414e-07, + "loss": 10.003, + "step": 427540 + }, + { + "epoch": 0.8636780503965384, + "grad_norm": 290.72357177734375, + "learning_rate": 6.260555783327366e-07, + "loss": 20.083, + "step": 427550 + }, + { + "epoch": 0.8636982510292223, + "grad_norm": 2.7025504112243652, + "learning_rate": 6.258864650896051e-07, + "loss": 10.6744, + "step": 427560 + }, + { + "epoch": 0.8637184516619061, + "grad_norm": 568.3017578125, + "learning_rate": 6.257173731653738e-07, + "loss": 22.8239, + "step": 427570 + }, + { + "epoch": 0.8637386522945899, + "grad_norm": 264.5793151855469, + "learning_rate": 6.25548302560865e-07, + "loss": 20.8718, + "step": 427580 + }, + { + "epoch": 0.8637588529272737, + "grad_norm": 425.9575500488281, + "learning_rate": 6.253792532769026e-07, + "loss": 13.1263, + "step": 427590 + }, + { + "epoch": 0.8637790535599575, + "grad_norm": 428.3517761230469, + "learning_rate": 6.252102253143122e-07, + "loss": 24.8398, + "step": 427600 + }, + { + "epoch": 0.8637992541926414, + "grad_norm": 350.8314208984375, + "learning_rate": 6.250412186739163e-07, + "loss": 15.2756, + "step": 427610 + }, + { + "epoch": 0.8638194548253252, + "grad_norm": 480.5556945800781, + "learning_rate": 6.248722333565377e-07, + "loss": 19.9842, + "step": 427620 + }, + { + "epoch": 0.8638396554580089, + "grad_norm": 45.072906494140625, + "learning_rate": 6.247032693630012e-07, + "loss": 22.4469, + "step": 427630 + }, + { + "epoch": 0.8638598560906927, + "grad_norm": 325.568359375, + "learning_rate": 6.245343266941328e-07, + "loss": 17.0111, + "step": 427640 + }, + { + "epoch": 0.8638800567233765, + "grad_norm": 29.840423583984375, + "learning_rate": 6.243654053507515e-07, + "loss": 10.7177, + "step": 427650 + }, + { + "epoch": 0.8639002573560604, + "grad_norm": 824.6751708984375, + "learning_rate": 6.241965053336818e-07, + "loss": 21.3112, + "step": 427660 + }, + { + "epoch": 0.8639204579887442, + "grad_norm": 420.8976745605469, + "learning_rate": 6.24027626643749e-07, + "loss": 20.7418, + "step": 427670 + }, + { + "epoch": 0.863940658621428, + "grad_norm": 387.7466735839844, + "learning_rate": 6.238587692817749e-07, + "loss": 11.6445, + "step": 427680 + }, + { + "epoch": 0.8639608592541118, + "grad_norm": 350.6593933105469, + "learning_rate": 6.236899332485813e-07, + "loss": 16.1901, + "step": 427690 + }, + { + "epoch": 0.8639810598867956, + "grad_norm": 340.4710693359375, + "learning_rate": 6.235211185449919e-07, + "loss": 12.8927, + "step": 427700 + }, + { + "epoch": 0.8640012605194795, + "grad_norm": 278.5534973144531, + "learning_rate": 6.233523251718321e-07, + "loss": 15.8561, + "step": 427710 + }, + { + "epoch": 0.8640214611521633, + "grad_norm": 253.22122192382812, + "learning_rate": 6.231835531299202e-07, + "loss": 21.5683, + "step": 427720 + }, + { + "epoch": 0.8640416617848471, + "grad_norm": 315.7308654785156, + "learning_rate": 6.23014802420081e-07, + "loss": 18.8528, + "step": 427730 + }, + { + "epoch": 0.8640618624175309, + "grad_norm": 118.27812957763672, + "learning_rate": 6.228460730431374e-07, + "loss": 11.3726, + "step": 427740 + }, + { + "epoch": 0.8640820630502147, + "grad_norm": 78.91473388671875, + "learning_rate": 6.226773649999113e-07, + "loss": 11.204, + "step": 427750 + }, + { + "epoch": 0.8641022636828986, + "grad_norm": 241.45413208007812, + "learning_rate": 6.225086782912237e-07, + "loss": 20.8489, + "step": 427760 + }, + { + "epoch": 0.8641224643155824, + "grad_norm": 147.634765625, + "learning_rate": 6.223400129178992e-07, + "loss": 10.5424, + "step": 427770 + }, + { + "epoch": 0.8641426649482662, + "grad_norm": 196.81192016601562, + "learning_rate": 6.221713688807585e-07, + "loss": 30.416, + "step": 427780 + }, + { + "epoch": 0.86416286558095, + "grad_norm": 49.05225372314453, + "learning_rate": 6.220027461806222e-07, + "loss": 8.995, + "step": 427790 + }, + { + "epoch": 0.8641830662136338, + "grad_norm": 287.02032470703125, + "learning_rate": 6.218341448183141e-07, + "loss": 24.6338, + "step": 427800 + }, + { + "epoch": 0.8642032668463177, + "grad_norm": 224.36782836914062, + "learning_rate": 6.216655647946556e-07, + "loss": 14.2381, + "step": 427810 + }, + { + "epoch": 0.8642234674790015, + "grad_norm": 148.0165557861328, + "learning_rate": 6.214970061104686e-07, + "loss": 29.8094, + "step": 427820 + }, + { + "epoch": 0.8642436681116853, + "grad_norm": 323.8502502441406, + "learning_rate": 6.213284687665733e-07, + "loss": 24.0944, + "step": 427830 + }, + { + "epoch": 0.8642638687443691, + "grad_norm": 350.33251953125, + "learning_rate": 6.21159952763793e-07, + "loss": 13.3549, + "step": 427840 + }, + { + "epoch": 0.8642840693770529, + "grad_norm": 196.468017578125, + "learning_rate": 6.209914581029474e-07, + "loss": 24.0399, + "step": 427850 + }, + { + "epoch": 0.8643042700097368, + "grad_norm": 234.9713592529297, + "learning_rate": 6.20822984784858e-07, + "loss": 14.5471, + "step": 427860 + }, + { + "epoch": 0.8643244706424206, + "grad_norm": 250.5386199951172, + "learning_rate": 6.20654532810347e-07, + "loss": 18.132, + "step": 427870 + }, + { + "epoch": 0.8643446712751043, + "grad_norm": 156.40628051757812, + "learning_rate": 6.204861021802333e-07, + "loss": 9.2212, + "step": 427880 + }, + { + "epoch": 0.8643648719077881, + "grad_norm": 282.8708190917969, + "learning_rate": 6.203176928953403e-07, + "loss": 16.3524, + "step": 427890 + }, + { + "epoch": 0.8643850725404719, + "grad_norm": 752.982421875, + "learning_rate": 6.201493049564883e-07, + "loss": 14.544, + "step": 427900 + }, + { + "epoch": 0.8644052731731557, + "grad_norm": 416.86932373046875, + "learning_rate": 6.199809383644956e-07, + "loss": 11.0486, + "step": 427910 + }, + { + "epoch": 0.8644254738058396, + "grad_norm": 222.2848358154297, + "learning_rate": 6.198125931201848e-07, + "loss": 19.7606, + "step": 427920 + }, + { + "epoch": 0.8644456744385234, + "grad_norm": 133.27310180664062, + "learning_rate": 6.196442692243787e-07, + "loss": 21.9536, + "step": 427930 + }, + { + "epoch": 0.8644658750712072, + "grad_norm": 599.850830078125, + "learning_rate": 6.194759666778927e-07, + "loss": 20.2955, + "step": 427940 + }, + { + "epoch": 0.864486075703891, + "grad_norm": 325.1663513183594, + "learning_rate": 6.193076854815494e-07, + "loss": 18.8125, + "step": 427950 + }, + { + "epoch": 0.8645062763365748, + "grad_norm": 276.27618408203125, + "learning_rate": 6.191394256361699e-07, + "loss": 21.4611, + "step": 427960 + }, + { + "epoch": 0.8645264769692587, + "grad_norm": 255.7522430419922, + "learning_rate": 6.189711871425741e-07, + "loss": 14.2042, + "step": 427970 + }, + { + "epoch": 0.8645466776019425, + "grad_norm": 314.2364501953125, + "learning_rate": 6.188029700015802e-07, + "loss": 13.2705, + "step": 427980 + }, + { + "epoch": 0.8645668782346263, + "grad_norm": 315.06170654296875, + "learning_rate": 6.186347742140092e-07, + "loss": 30.0308, + "step": 427990 + }, + { + "epoch": 0.8645870788673101, + "grad_norm": 787.111328125, + "learning_rate": 6.184665997806832e-07, + "loss": 19.6346, + "step": 428000 + }, + { + "epoch": 0.8646072794999939, + "grad_norm": 374.6004638671875, + "learning_rate": 6.182984467024173e-07, + "loss": 19.146, + "step": 428010 + }, + { + "epoch": 0.8646274801326778, + "grad_norm": 391.1521911621094, + "learning_rate": 6.181303149800333e-07, + "loss": 20.1257, + "step": 428020 + }, + { + "epoch": 0.8646476807653616, + "grad_norm": 101.3083267211914, + "learning_rate": 6.179622046143513e-07, + "loss": 20.4961, + "step": 428030 + }, + { + "epoch": 0.8646678813980454, + "grad_norm": 271.12725830078125, + "learning_rate": 6.177941156061906e-07, + "loss": 9.0437, + "step": 428040 + }, + { + "epoch": 0.8646880820307292, + "grad_norm": 105.76915740966797, + "learning_rate": 6.17626047956369e-07, + "loss": 15.4698, + "step": 428050 + }, + { + "epoch": 0.864708282663413, + "grad_norm": 37.18621826171875, + "learning_rate": 6.174580016657073e-07, + "loss": 11.5056, + "step": 428060 + }, + { + "epoch": 0.8647284832960969, + "grad_norm": 721.3698120117188, + "learning_rate": 6.172899767350238e-07, + "loss": 17.9816, + "step": 428070 + }, + { + "epoch": 0.8647486839287807, + "grad_norm": 278.7106018066406, + "learning_rate": 6.171219731651362e-07, + "loss": 23.648, + "step": 428080 + }, + { + "epoch": 0.8647688845614645, + "grad_norm": 372.8246765136719, + "learning_rate": 6.169539909568656e-07, + "loss": 19.188, + "step": 428090 + }, + { + "epoch": 0.8647890851941483, + "grad_norm": 313.6341247558594, + "learning_rate": 6.167860301110284e-07, + "loss": 22.3156, + "step": 428100 + }, + { + "epoch": 0.8648092858268321, + "grad_norm": 133.35025024414062, + "learning_rate": 6.166180906284458e-07, + "loss": 23.5627, + "step": 428110 + }, + { + "epoch": 0.864829486459516, + "grad_norm": 80.14603424072266, + "learning_rate": 6.164501725099342e-07, + "loss": 20.9143, + "step": 428120 + }, + { + "epoch": 0.8648496870921998, + "grad_norm": 236.74549865722656, + "learning_rate": 6.162822757563136e-07, + "loss": 15.9507, + "step": 428130 + }, + { + "epoch": 0.8648698877248835, + "grad_norm": 366.94036865234375, + "learning_rate": 6.161144003684017e-07, + "loss": 20.8792, + "step": 428140 + }, + { + "epoch": 0.8648900883575673, + "grad_norm": 60.14889907836914, + "learning_rate": 6.159465463470149e-07, + "loss": 11.6091, + "step": 428150 + }, + { + "epoch": 0.8649102889902511, + "grad_norm": 298.1804504394531, + "learning_rate": 6.157787136929743e-07, + "loss": 13.0195, + "step": 428160 + }, + { + "epoch": 0.864930489622935, + "grad_norm": 63.309974670410156, + "learning_rate": 6.156109024070955e-07, + "loss": 19.1855, + "step": 428170 + }, + { + "epoch": 0.8649506902556188, + "grad_norm": 387.5739440917969, + "learning_rate": 6.154431124901983e-07, + "loss": 9.8993, + "step": 428180 + }, + { + "epoch": 0.8649708908883026, + "grad_norm": 644.6115112304688, + "learning_rate": 6.152753439430997e-07, + "loss": 15.9906, + "step": 428190 + }, + { + "epoch": 0.8649910915209864, + "grad_norm": 351.52520751953125, + "learning_rate": 6.151075967666165e-07, + "loss": 20.633, + "step": 428200 + }, + { + "epoch": 0.8650112921536702, + "grad_norm": 261.1536865234375, + "learning_rate": 6.149398709615678e-07, + "loss": 15.0145, + "step": 428210 + }, + { + "epoch": 0.865031492786354, + "grad_norm": 256.3823547363281, + "learning_rate": 6.147721665287703e-07, + "loss": 8.0933, + "step": 428220 + }, + { + "epoch": 0.8650516934190379, + "grad_norm": 15.405685424804688, + "learning_rate": 6.146044834690401e-07, + "loss": 8.8332, + "step": 428230 + }, + { + "epoch": 0.8650718940517217, + "grad_norm": 199.33229064941406, + "learning_rate": 6.144368217831965e-07, + "loss": 7.77, + "step": 428240 + }, + { + "epoch": 0.8650920946844055, + "grad_norm": 215.8980712890625, + "learning_rate": 6.142691814720575e-07, + "loss": 20.3456, + "step": 428250 + }, + { + "epoch": 0.8651122953170893, + "grad_norm": 255.4296417236328, + "learning_rate": 6.141015625364366e-07, + "loss": 8.5226, + "step": 428260 + }, + { + "epoch": 0.8651324959497732, + "grad_norm": 225.79835510253906, + "learning_rate": 6.139339649771525e-07, + "loss": 16.9742, + "step": 428270 + }, + { + "epoch": 0.865152696582457, + "grad_norm": 139.10226440429688, + "learning_rate": 6.137663887950235e-07, + "loss": 13.3792, + "step": 428280 + }, + { + "epoch": 0.8651728972151408, + "grad_norm": 816.4263305664062, + "learning_rate": 6.135988339908655e-07, + "loss": 21.1025, + "step": 428290 + }, + { + "epoch": 0.8651930978478246, + "grad_norm": 396.3490905761719, + "learning_rate": 6.134313005654929e-07, + "loss": 17.1094, + "step": 428300 + }, + { + "epoch": 0.8652132984805084, + "grad_norm": 302.6025390625, + "learning_rate": 6.132637885197251e-07, + "loss": 20.2784, + "step": 428310 + }, + { + "epoch": 0.8652334991131923, + "grad_norm": 144.55792236328125, + "learning_rate": 6.130962978543792e-07, + "loss": 11.4848, + "step": 428320 + }, + { + "epoch": 0.8652536997458761, + "grad_norm": 254.57180786132812, + "learning_rate": 6.129288285702672e-07, + "loss": 13.2996, + "step": 428330 + }, + { + "epoch": 0.8652739003785599, + "grad_norm": 144.44224548339844, + "learning_rate": 6.127613806682087e-07, + "loss": 21.9786, + "step": 428340 + }, + { + "epoch": 0.8652941010112437, + "grad_norm": 210.07656860351562, + "learning_rate": 6.1259395414902e-07, + "loss": 11.6278, + "step": 428350 + }, + { + "epoch": 0.8653143016439275, + "grad_norm": 202.88331604003906, + "learning_rate": 6.124265490135161e-07, + "loss": 19.1447, + "step": 428360 + }, + { + "epoch": 0.8653345022766114, + "grad_norm": 374.83978271484375, + "learning_rate": 6.122591652625126e-07, + "loss": 13.9947, + "step": 428370 + }, + { + "epoch": 0.8653547029092952, + "grad_norm": 1035.8675537109375, + "learning_rate": 6.120918028968265e-07, + "loss": 15.8294, + "step": 428380 + }, + { + "epoch": 0.8653749035419789, + "grad_norm": 681.4384155273438, + "learning_rate": 6.119244619172727e-07, + "loss": 20.7848, + "step": 428390 + }, + { + "epoch": 0.8653951041746627, + "grad_norm": 382.2265930175781, + "learning_rate": 6.117571423246655e-07, + "loss": 16.582, + "step": 428400 + }, + { + "epoch": 0.8654153048073465, + "grad_norm": 252.7736358642578, + "learning_rate": 6.11589844119822e-07, + "loss": 8.6321, + "step": 428410 + }, + { + "epoch": 0.8654355054400303, + "grad_norm": 1083.947998046875, + "learning_rate": 6.114225673035584e-07, + "loss": 21.8293, + "step": 428420 + }, + { + "epoch": 0.8654557060727142, + "grad_norm": 152.75503540039062, + "learning_rate": 6.112553118766889e-07, + "loss": 22.3239, + "step": 428430 + }, + { + "epoch": 0.865475906705398, + "grad_norm": 91.88584899902344, + "learning_rate": 6.110880778400275e-07, + "loss": 17.9463, + "step": 428440 + }, + { + "epoch": 0.8654961073380818, + "grad_norm": 205.8636932373047, + "learning_rate": 6.109208651943921e-07, + "loss": 19.4268, + "step": 428450 + }, + { + "epoch": 0.8655163079707656, + "grad_norm": 12.514071464538574, + "learning_rate": 6.107536739405956e-07, + "loss": 18.0592, + "step": 428460 + }, + { + "epoch": 0.8655365086034494, + "grad_norm": 41.81550598144531, + "learning_rate": 6.105865040794523e-07, + "loss": 20.8292, + "step": 428470 + }, + { + "epoch": 0.8655567092361333, + "grad_norm": 390.52593994140625, + "learning_rate": 6.104193556117793e-07, + "loss": 24.3912, + "step": 428480 + }, + { + "epoch": 0.8655769098688171, + "grad_norm": 324.08642578125, + "learning_rate": 6.102522285383888e-07, + "loss": 11.2023, + "step": 428490 + }, + { + "epoch": 0.8655971105015009, + "grad_norm": 257.45123291015625, + "learning_rate": 6.100851228600974e-07, + "loss": 30.6622, + "step": 428500 + }, + { + "epoch": 0.8656173111341847, + "grad_norm": 381.84051513671875, + "learning_rate": 6.099180385777192e-07, + "loss": 18.7013, + "step": 428510 + }, + { + "epoch": 0.8656375117668685, + "grad_norm": 375.1788635253906, + "learning_rate": 6.097509756920667e-07, + "loss": 25.043, + "step": 428520 + }, + { + "epoch": 0.8656577123995524, + "grad_norm": 195.8188934326172, + "learning_rate": 6.095839342039561e-07, + "loss": 11.9097, + "step": 428530 + }, + { + "epoch": 0.8656779130322362, + "grad_norm": 551.5490112304688, + "learning_rate": 6.094169141142014e-07, + "loss": 19.8383, + "step": 428540 + }, + { + "epoch": 0.86569811366492, + "grad_norm": 33.70741653442383, + "learning_rate": 6.092499154236148e-07, + "loss": 15.3881, + "step": 428550 + }, + { + "epoch": 0.8657183142976038, + "grad_norm": 307.7119140625, + "learning_rate": 6.090829381330116e-07, + "loss": 21.007, + "step": 428560 + }, + { + "epoch": 0.8657385149302876, + "grad_norm": 235.68484497070312, + "learning_rate": 6.089159822432073e-07, + "loss": 15.3744, + "step": 428570 + }, + { + "epoch": 0.8657587155629715, + "grad_norm": 263.1143493652344, + "learning_rate": 6.087490477550129e-07, + "loss": 20.2029, + "step": 428580 + }, + { + "epoch": 0.8657789161956553, + "grad_norm": 235.42340087890625, + "learning_rate": 6.085821346692427e-07, + "loss": 14.6232, + "step": 428590 + }, + { + "epoch": 0.8657991168283391, + "grad_norm": 162.44549560546875, + "learning_rate": 6.084152429867113e-07, + "loss": 13.1799, + "step": 428600 + }, + { + "epoch": 0.8658193174610229, + "grad_norm": 219.5083770751953, + "learning_rate": 6.082483727082317e-07, + "loss": 19.9044, + "step": 428610 + }, + { + "epoch": 0.8658395180937067, + "grad_norm": 18.062034606933594, + "learning_rate": 6.080815238346155e-07, + "loss": 11.8987, + "step": 428620 + }, + { + "epoch": 0.8658597187263906, + "grad_norm": 176.3072052001953, + "learning_rate": 6.079146963666777e-07, + "loss": 20.2592, + "step": 428630 + }, + { + "epoch": 0.8658799193590744, + "grad_norm": 196.42071533203125, + "learning_rate": 6.077478903052314e-07, + "loss": 9.7356, + "step": 428640 + }, + { + "epoch": 0.8659001199917581, + "grad_norm": 156.17787170410156, + "learning_rate": 6.075811056510894e-07, + "loss": 19.1281, + "step": 428650 + }, + { + "epoch": 0.8659203206244419, + "grad_norm": 282.8064880371094, + "learning_rate": 6.074143424050638e-07, + "loss": 11.9913, + "step": 428660 + }, + { + "epoch": 0.8659405212571257, + "grad_norm": 197.72618103027344, + "learning_rate": 6.072476005679684e-07, + "loss": 17.2073, + "step": 428670 + }, + { + "epoch": 0.8659607218898095, + "grad_norm": 118.91537475585938, + "learning_rate": 6.070808801406158e-07, + "loss": 14.8192, + "step": 428680 + }, + { + "epoch": 0.8659809225224934, + "grad_norm": 325.2646789550781, + "learning_rate": 6.069141811238166e-07, + "loss": 24.0653, + "step": 428690 + }, + { + "epoch": 0.8660011231551772, + "grad_norm": 277.614501953125, + "learning_rate": 6.067475035183862e-07, + "loss": 18.0918, + "step": 428700 + }, + { + "epoch": 0.866021323787861, + "grad_norm": 157.31773376464844, + "learning_rate": 6.06580847325135e-07, + "loss": 24.915, + "step": 428710 + }, + { + "epoch": 0.8660415244205448, + "grad_norm": 269.2095642089844, + "learning_rate": 6.064142125448763e-07, + "loss": 13.1471, + "step": 428720 + }, + { + "epoch": 0.8660617250532286, + "grad_norm": 101.45182800292969, + "learning_rate": 6.062475991784211e-07, + "loss": 7.803, + "step": 428730 + }, + { + "epoch": 0.8660819256859125, + "grad_norm": 337.7289123535156, + "learning_rate": 6.060810072265833e-07, + "loss": 16.5348, + "step": 428740 + }, + { + "epoch": 0.8661021263185963, + "grad_norm": 143.8042755126953, + "learning_rate": 6.059144366901737e-07, + "loss": 25.8053, + "step": 428750 + }, + { + "epoch": 0.8661223269512801, + "grad_norm": 247.76853942871094, + "learning_rate": 6.057478875700035e-07, + "loss": 18.8556, + "step": 428760 + }, + { + "epoch": 0.8661425275839639, + "grad_norm": 244.16229248046875, + "learning_rate": 6.055813598668853e-07, + "loss": 27.3512, + "step": 428770 + }, + { + "epoch": 0.8661627282166477, + "grad_norm": 577.4889526367188, + "learning_rate": 6.054148535816301e-07, + "loss": 14.9934, + "step": 428780 + }, + { + "epoch": 0.8661829288493316, + "grad_norm": 150.03895568847656, + "learning_rate": 6.052483687150512e-07, + "loss": 27.7736, + "step": 428790 + }, + { + "epoch": 0.8662031294820154, + "grad_norm": 507.7596435546875, + "learning_rate": 6.050819052679585e-07, + "loss": 18.6382, + "step": 428800 + }, + { + "epoch": 0.8662233301146992, + "grad_norm": 388.2171936035156, + "learning_rate": 6.049154632411625e-07, + "loss": 17.4887, + "step": 428810 + }, + { + "epoch": 0.866243530747383, + "grad_norm": 451.86663818359375, + "learning_rate": 6.047490426354763e-07, + "loss": 36.7194, + "step": 428820 + }, + { + "epoch": 0.8662637313800668, + "grad_norm": 526.371826171875, + "learning_rate": 6.045826434517104e-07, + "loss": 15.8983, + "step": 428830 + }, + { + "epoch": 0.8662839320127507, + "grad_norm": 326.0290832519531, + "learning_rate": 6.044162656906744e-07, + "loss": 21.5066, + "step": 428840 + }, + { + "epoch": 0.8663041326454345, + "grad_norm": 162.91583251953125, + "learning_rate": 6.042499093531806e-07, + "loss": 12.7883, + "step": 428850 + }, + { + "epoch": 0.8663243332781183, + "grad_norm": 134.56124877929688, + "learning_rate": 6.040835744400403e-07, + "loss": 13.7821, + "step": 428860 + }, + { + "epoch": 0.8663445339108021, + "grad_norm": 303.36114501953125, + "learning_rate": 6.039172609520639e-07, + "loss": 19.4464, + "step": 428870 + }, + { + "epoch": 0.866364734543486, + "grad_norm": 183.8982391357422, + "learning_rate": 6.037509688900606e-07, + "loss": 16.5134, + "step": 428880 + }, + { + "epoch": 0.8663849351761698, + "grad_norm": 228.36627197265625, + "learning_rate": 6.035846982548427e-07, + "loss": 12.6935, + "step": 428890 + }, + { + "epoch": 0.8664051358088536, + "grad_norm": 462.7586669921875, + "learning_rate": 6.034184490472195e-07, + "loss": 21.9789, + "step": 428900 + }, + { + "epoch": 0.8664253364415373, + "grad_norm": 352.4219665527344, + "learning_rate": 6.032522212680009e-07, + "loss": 22.4572, + "step": 428910 + }, + { + "epoch": 0.8664455370742211, + "grad_norm": 310.3164978027344, + "learning_rate": 6.030860149179973e-07, + "loss": 29.9658, + "step": 428920 + }, + { + "epoch": 0.8664657377069049, + "grad_norm": 452.3450622558594, + "learning_rate": 6.029198299980216e-07, + "loss": 18.4972, + "step": 428930 + }, + { + "epoch": 0.8664859383395888, + "grad_norm": 215.80081176757812, + "learning_rate": 6.027536665088795e-07, + "loss": 15.1169, + "step": 428940 + }, + { + "epoch": 0.8665061389722726, + "grad_norm": 553.4271850585938, + "learning_rate": 6.025875244513824e-07, + "loss": 22.0291, + "step": 428950 + }, + { + "epoch": 0.8665263396049564, + "grad_norm": 201.82196044921875, + "learning_rate": 6.024214038263415e-07, + "loss": 16.4352, + "step": 428960 + }, + { + "epoch": 0.8665465402376402, + "grad_norm": 415.3941650390625, + "learning_rate": 6.022553046345647e-07, + "loss": 14.2913, + "step": 428970 + }, + { + "epoch": 0.866566740870324, + "grad_norm": 242.05711364746094, + "learning_rate": 6.020892268768619e-07, + "loss": 15.1337, + "step": 428980 + }, + { + "epoch": 0.8665869415030079, + "grad_norm": 328.94195556640625, + "learning_rate": 6.019231705540435e-07, + "loss": 28.022, + "step": 428990 + }, + { + "epoch": 0.8666071421356917, + "grad_norm": 280.80255126953125, + "learning_rate": 6.017571356669183e-07, + "loss": 17.7552, + "step": 429000 + }, + { + "epoch": 0.8666273427683755, + "grad_norm": 910.9259643554688, + "learning_rate": 6.015911222162946e-07, + "loss": 32.5377, + "step": 429010 + }, + { + "epoch": 0.8666475434010593, + "grad_norm": 199.94970703125, + "learning_rate": 6.014251302029817e-07, + "loss": 13.5446, + "step": 429020 + }, + { + "epoch": 0.8666677440337431, + "grad_norm": 558.7119750976562, + "learning_rate": 6.012591596277906e-07, + "loss": 23.016, + "step": 429030 + }, + { + "epoch": 0.866687944666427, + "grad_norm": 146.7935028076172, + "learning_rate": 6.01093210491529e-07, + "loss": 11.2062, + "step": 429040 + }, + { + "epoch": 0.8667081452991108, + "grad_norm": 381.5577697753906, + "learning_rate": 6.009272827950042e-07, + "loss": 24.3796, + "step": 429050 + }, + { + "epoch": 0.8667283459317946, + "grad_norm": 342.403076171875, + "learning_rate": 6.007613765390274e-07, + "loss": 20.4655, + "step": 429060 + }, + { + "epoch": 0.8667485465644784, + "grad_norm": 295.2065734863281, + "learning_rate": 6.005954917244062e-07, + "loss": 19.4918, + "step": 429070 + }, + { + "epoch": 0.8667687471971622, + "grad_norm": 463.19683837890625, + "learning_rate": 6.004296283519478e-07, + "loss": 11.7546, + "step": 429080 + }, + { + "epoch": 0.8667889478298461, + "grad_norm": 61.19007873535156, + "learning_rate": 6.002637864224631e-07, + "loss": 23.4715, + "step": 429090 + }, + { + "epoch": 0.8668091484625299, + "grad_norm": 859.3980102539062, + "learning_rate": 6.000979659367579e-07, + "loss": 28.7117, + "step": 429100 + }, + { + "epoch": 0.8668293490952137, + "grad_norm": 394.579833984375, + "learning_rate": 5.999321668956425e-07, + "loss": 34.4005, + "step": 429110 + }, + { + "epoch": 0.8668495497278975, + "grad_norm": 18.669734954833984, + "learning_rate": 5.997663892999239e-07, + "loss": 15.9816, + "step": 429120 + }, + { + "epoch": 0.8668697503605813, + "grad_norm": 24.459758758544922, + "learning_rate": 5.996006331504095e-07, + "loss": 9.6649, + "step": 429130 + }, + { + "epoch": 0.8668899509932652, + "grad_norm": 282.9619140625, + "learning_rate": 5.994348984479092e-07, + "loss": 9.8981, + "step": 429140 + }, + { + "epoch": 0.866910151625949, + "grad_norm": 252.2718505859375, + "learning_rate": 5.992691851932292e-07, + "loss": 15.6859, + "step": 429150 + }, + { + "epoch": 0.8669303522586327, + "grad_norm": 630.28125, + "learning_rate": 5.991034933871764e-07, + "loss": 27.2728, + "step": 429160 + }, + { + "epoch": 0.8669505528913165, + "grad_norm": 322.893310546875, + "learning_rate": 5.989378230305592e-07, + "loss": 16.9584, + "step": 429170 + }, + { + "epoch": 0.8669707535240003, + "grad_norm": 592.335693359375, + "learning_rate": 5.987721741241864e-07, + "loss": 16.7208, + "step": 429180 + }, + { + "epoch": 0.8669909541566841, + "grad_norm": 223.1149444580078, + "learning_rate": 5.986065466688645e-07, + "loss": 8.3642, + "step": 429190 + }, + { + "epoch": 0.867011154789368, + "grad_norm": 315.1728820800781, + "learning_rate": 5.98440940665399e-07, + "loss": 7.3146, + "step": 429200 + }, + { + "epoch": 0.8670313554220518, + "grad_norm": 263.8191833496094, + "learning_rate": 5.982753561145999e-07, + "loss": 12.7024, + "step": 429210 + }, + { + "epoch": 0.8670515560547356, + "grad_norm": 391.21453857421875, + "learning_rate": 5.981097930172725e-07, + "loss": 12.3601, + "step": 429220 + }, + { + "epoch": 0.8670717566874194, + "grad_norm": 81.99147033691406, + "learning_rate": 5.979442513742234e-07, + "loss": 16.3302, + "step": 429230 + }, + { + "epoch": 0.8670919573201032, + "grad_norm": 91.91902923583984, + "learning_rate": 5.977787311862598e-07, + "loss": 29.2279, + "step": 429240 + }, + { + "epoch": 0.8671121579527871, + "grad_norm": 290.50225830078125, + "learning_rate": 5.9761323245419e-07, + "loss": 12.1331, + "step": 429250 + }, + { + "epoch": 0.8671323585854709, + "grad_norm": 386.3379821777344, + "learning_rate": 5.974477551788194e-07, + "loss": 11.5684, + "step": 429260 + }, + { + "epoch": 0.8671525592181547, + "grad_norm": 8.265846252441406, + "learning_rate": 5.972822993609534e-07, + "loss": 19.5983, + "step": 429270 + }, + { + "epoch": 0.8671727598508385, + "grad_norm": 323.8424072265625, + "learning_rate": 5.971168650014008e-07, + "loss": 17.9333, + "step": 429280 + }, + { + "epoch": 0.8671929604835223, + "grad_norm": 573.0355834960938, + "learning_rate": 5.969514521009662e-07, + "loss": 18.3358, + "step": 429290 + }, + { + "epoch": 0.8672131611162062, + "grad_norm": 368.9503173828125, + "learning_rate": 5.967860606604553e-07, + "loss": 9.7872, + "step": 429300 + }, + { + "epoch": 0.86723336174889, + "grad_norm": 634.7498168945312, + "learning_rate": 5.966206906806748e-07, + "loss": 22.7565, + "step": 429310 + }, + { + "epoch": 0.8672535623815738, + "grad_norm": 189.865478515625, + "learning_rate": 5.964553421624325e-07, + "loss": 11.0884, + "step": 429320 + }, + { + "epoch": 0.8672737630142576, + "grad_norm": 229.6313934326172, + "learning_rate": 5.962900151065326e-07, + "loss": 13.2101, + "step": 429330 + }, + { + "epoch": 0.8672939636469414, + "grad_norm": 147.2005615234375, + "learning_rate": 5.961247095137795e-07, + "loss": 11.2871, + "step": 429340 + }, + { + "epoch": 0.8673141642796253, + "grad_norm": 219.44606018066406, + "learning_rate": 5.959594253849821e-07, + "loss": 21.7122, + "step": 429350 + }, + { + "epoch": 0.8673343649123091, + "grad_norm": 391.3631591796875, + "learning_rate": 5.95794162720944e-07, + "loss": 22.3558, + "step": 429360 + }, + { + "epoch": 0.8673545655449929, + "grad_norm": 332.0225830078125, + "learning_rate": 5.956289215224703e-07, + "loss": 5.7098, + "step": 429370 + }, + { + "epoch": 0.8673747661776767, + "grad_norm": 146.01632690429688, + "learning_rate": 5.95463701790368e-07, + "loss": 10.8803, + "step": 429380 + }, + { + "epoch": 0.8673949668103605, + "grad_norm": 195.25027465820312, + "learning_rate": 5.9529850352544e-07, + "loss": 5.1518, + "step": 429390 + }, + { + "epoch": 0.8674151674430444, + "grad_norm": 222.6243133544922, + "learning_rate": 5.951333267284942e-07, + "loss": 17.026, + "step": 429400 + }, + { + "epoch": 0.8674353680757282, + "grad_norm": 353.83624267578125, + "learning_rate": 5.949681714003347e-07, + "loss": 16.2362, + "step": 429410 + }, + { + "epoch": 0.8674555687084119, + "grad_norm": 417.9424133300781, + "learning_rate": 5.948030375417646e-07, + "loss": 18.7633, + "step": 429420 + }, + { + "epoch": 0.8674757693410957, + "grad_norm": 157.91519165039062, + "learning_rate": 5.946379251535911e-07, + "loss": 16.1146, + "step": 429430 + }, + { + "epoch": 0.8674959699737795, + "grad_norm": 1.0067952871322632, + "learning_rate": 5.944728342366179e-07, + "loss": 16.6756, + "step": 429440 + }, + { + "epoch": 0.8675161706064634, + "grad_norm": 355.3564758300781, + "learning_rate": 5.943077647916496e-07, + "loss": 14.706, + "step": 429450 + }, + { + "epoch": 0.8675363712391472, + "grad_norm": 320.314453125, + "learning_rate": 5.941427168194902e-07, + "loss": 20.1117, + "step": 429460 + }, + { + "epoch": 0.867556571871831, + "grad_norm": 350.0729064941406, + "learning_rate": 5.93977690320946e-07, + "loss": 22.0971, + "step": 429470 + }, + { + "epoch": 0.8675767725045148, + "grad_norm": 410.6178894042969, + "learning_rate": 5.938126852968201e-07, + "loss": 11.3771, + "step": 429480 + }, + { + "epoch": 0.8675969731371986, + "grad_norm": 353.26922607421875, + "learning_rate": 5.936477017479158e-07, + "loss": 12.4809, + "step": 429490 + }, + { + "epoch": 0.8676171737698825, + "grad_norm": 100.64823150634766, + "learning_rate": 5.934827396750392e-07, + "loss": 26.9218, + "step": 429500 + }, + { + "epoch": 0.8676373744025663, + "grad_norm": 194.20530700683594, + "learning_rate": 5.933177990789934e-07, + "loss": 11.9476, + "step": 429510 + }, + { + "epoch": 0.8676575750352501, + "grad_norm": 345.63226318359375, + "learning_rate": 5.931528799605813e-07, + "loss": 20.6152, + "step": 429520 + }, + { + "epoch": 0.8676777756679339, + "grad_norm": 494.8049621582031, + "learning_rate": 5.92987982320607e-07, + "loss": 27.0089, + "step": 429530 + }, + { + "epoch": 0.8676979763006177, + "grad_norm": 0.0, + "learning_rate": 5.928231061598772e-07, + "loss": 14.0555, + "step": 429540 + }, + { + "epoch": 0.8677181769333016, + "grad_norm": 249.08834838867188, + "learning_rate": 5.926582514791912e-07, + "loss": 15.6392, + "step": 429550 + }, + { + "epoch": 0.8677383775659854, + "grad_norm": 352.59246826171875, + "learning_rate": 5.92493418279354e-07, + "loss": 17.8044, + "step": 429560 + }, + { + "epoch": 0.8677585781986692, + "grad_norm": 3.3819949626922607, + "learning_rate": 5.923286065611705e-07, + "loss": 10.717, + "step": 429570 + }, + { + "epoch": 0.867778778831353, + "grad_norm": 401.1866455078125, + "learning_rate": 5.921638163254423e-07, + "loss": 26.1984, + "step": 429580 + }, + { + "epoch": 0.8677989794640368, + "grad_norm": 331.5385437011719, + "learning_rate": 5.919990475729725e-07, + "loss": 20.3919, + "step": 429590 + }, + { + "epoch": 0.8678191800967207, + "grad_norm": 181.6807403564453, + "learning_rate": 5.918343003045656e-07, + "loss": 6.8367, + "step": 429600 + }, + { + "epoch": 0.8678393807294045, + "grad_norm": 0.0, + "learning_rate": 5.916695745210238e-07, + "loss": 11.5288, + "step": 429610 + }, + { + "epoch": 0.8678595813620883, + "grad_norm": 455.24578857421875, + "learning_rate": 5.915048702231491e-07, + "loss": 15.6658, + "step": 429620 + }, + { + "epoch": 0.8678797819947721, + "grad_norm": 199.43023681640625, + "learning_rate": 5.913401874117447e-07, + "loss": 13.4704, + "step": 429630 + }, + { + "epoch": 0.8678999826274559, + "grad_norm": 383.42352294921875, + "learning_rate": 5.911755260876145e-07, + "loss": 10.191, + "step": 429640 + }, + { + "epoch": 0.8679201832601398, + "grad_norm": 3.932265281677246, + "learning_rate": 5.910108862515596e-07, + "loss": 18.7308, + "step": 429650 + }, + { + "epoch": 0.8679403838928236, + "grad_norm": 317.3648376464844, + "learning_rate": 5.908462679043825e-07, + "loss": 17.859, + "step": 429660 + }, + { + "epoch": 0.8679605845255073, + "grad_norm": 756.0564575195312, + "learning_rate": 5.906816710468866e-07, + "loss": 18.1549, + "step": 429670 + }, + { + "epoch": 0.8679807851581911, + "grad_norm": 171.04759216308594, + "learning_rate": 5.905170956798739e-07, + "loss": 18.7377, + "step": 429680 + }, + { + "epoch": 0.8680009857908749, + "grad_norm": 316.7099304199219, + "learning_rate": 5.903525418041445e-07, + "loss": 19.8947, + "step": 429690 + }, + { + "epoch": 0.8680211864235587, + "grad_norm": 454.18304443359375, + "learning_rate": 5.901880094205037e-07, + "loss": 25.8028, + "step": 429700 + }, + { + "epoch": 0.8680413870562426, + "grad_norm": 155.0983428955078, + "learning_rate": 5.900234985297498e-07, + "loss": 18.6755, + "step": 429710 + }, + { + "epoch": 0.8680615876889264, + "grad_norm": 251.7493438720703, + "learning_rate": 5.898590091326884e-07, + "loss": 11.8688, + "step": 429720 + }, + { + "epoch": 0.8680817883216102, + "grad_norm": 270.7951965332031, + "learning_rate": 5.896945412301186e-07, + "loss": 19.235, + "step": 429730 + }, + { + "epoch": 0.868101988954294, + "grad_norm": 352.2210388183594, + "learning_rate": 5.895300948228421e-07, + "loss": 19.2813, + "step": 429740 + }, + { + "epoch": 0.8681221895869778, + "grad_norm": 80.35448455810547, + "learning_rate": 5.893656699116618e-07, + "loss": 11.6999, + "step": 429750 + }, + { + "epoch": 0.8681423902196617, + "grad_norm": 90.20613098144531, + "learning_rate": 5.892012664973784e-07, + "loss": 17.4248, + "step": 429760 + }, + { + "epoch": 0.8681625908523455, + "grad_norm": 21.49197769165039, + "learning_rate": 5.89036884580792e-07, + "loss": 11.329, + "step": 429770 + }, + { + "epoch": 0.8681827914850293, + "grad_norm": 106.82868957519531, + "learning_rate": 5.888725241627047e-07, + "loss": 26.8885, + "step": 429780 + }, + { + "epoch": 0.8682029921177131, + "grad_norm": 514.7811279296875, + "learning_rate": 5.887081852439186e-07, + "loss": 16.6248, + "step": 429790 + }, + { + "epoch": 0.8682231927503969, + "grad_norm": 229.88723754882812, + "learning_rate": 5.885438678252342e-07, + "loss": 7.963, + "step": 429800 + }, + { + "epoch": 0.8682433933830808, + "grad_norm": 109.64239501953125, + "learning_rate": 5.883795719074509e-07, + "loss": 12.7741, + "step": 429810 + }, + { + "epoch": 0.8682635940157646, + "grad_norm": 144.66868591308594, + "learning_rate": 5.882152974913713e-07, + "loss": 13.0036, + "step": 429820 + }, + { + "epoch": 0.8682837946484484, + "grad_norm": 333.4307556152344, + "learning_rate": 5.880510445777954e-07, + "loss": 25.0166, + "step": 429830 + }, + { + "epoch": 0.8683039952811322, + "grad_norm": 199.61651611328125, + "learning_rate": 5.878868131675225e-07, + "loss": 11.6992, + "step": 429840 + }, + { + "epoch": 0.868324195913816, + "grad_norm": 50.70903015136719, + "learning_rate": 5.877226032613542e-07, + "loss": 15.0655, + "step": 429850 + }, + { + "epoch": 0.8683443965464999, + "grad_norm": 103.51262664794922, + "learning_rate": 5.875584148600916e-07, + "loss": 11.3752, + "step": 429860 + }, + { + "epoch": 0.8683645971791837, + "grad_norm": 44.83159255981445, + "learning_rate": 5.873942479645345e-07, + "loss": 18.3231, + "step": 429870 + }, + { + "epoch": 0.8683847978118675, + "grad_norm": 338.2083435058594, + "learning_rate": 5.872301025754812e-07, + "loss": 12.4635, + "step": 429880 + }, + { + "epoch": 0.8684049984445513, + "grad_norm": 374.15869140625, + "learning_rate": 5.870659786937344e-07, + "loss": 13.0173, + "step": 429890 + }, + { + "epoch": 0.8684251990772351, + "grad_norm": 57.11210632324219, + "learning_rate": 5.869018763200929e-07, + "loss": 24.6361, + "step": 429900 + }, + { + "epoch": 0.868445399709919, + "grad_norm": 58.70858383178711, + "learning_rate": 5.867377954553555e-07, + "loss": 29.2412, + "step": 429910 + }, + { + "epoch": 0.8684656003426028, + "grad_norm": 292.7398681640625, + "learning_rate": 5.865737361003226e-07, + "loss": 16.5664, + "step": 429920 + }, + { + "epoch": 0.8684858009752865, + "grad_norm": 217.97293090820312, + "learning_rate": 5.864096982557949e-07, + "loss": 21.1409, + "step": 429930 + }, + { + "epoch": 0.8685060016079703, + "grad_norm": 613.9413452148438, + "learning_rate": 5.862456819225715e-07, + "loss": 19.6642, + "step": 429940 + }, + { + "epoch": 0.8685262022406541, + "grad_norm": 627.24072265625, + "learning_rate": 5.860816871014496e-07, + "loss": 18.9572, + "step": 429950 + }, + { + "epoch": 0.868546402873338, + "grad_norm": 354.0593566894531, + "learning_rate": 5.859177137932315e-07, + "loss": 20.9379, + "step": 429960 + }, + { + "epoch": 0.8685666035060218, + "grad_norm": 430.450927734375, + "learning_rate": 5.857537619987152e-07, + "loss": 14.9089, + "step": 429970 + }, + { + "epoch": 0.8685868041387056, + "grad_norm": 211.65225219726562, + "learning_rate": 5.855898317186992e-07, + "loss": 13.5329, + "step": 429980 + }, + { + "epoch": 0.8686070047713894, + "grad_norm": 142.22438049316406, + "learning_rate": 5.854259229539833e-07, + "loss": 20.4537, + "step": 429990 + }, + { + "epoch": 0.8686272054040732, + "grad_norm": 397.9911804199219, + "learning_rate": 5.852620357053651e-07, + "loss": 20.0013, + "step": 430000 + }, + { + "epoch": 0.868647406036757, + "grad_norm": 211.5362091064453, + "learning_rate": 5.850981699736453e-07, + "loss": 30.0219, + "step": 430010 + }, + { + "epoch": 0.8686676066694409, + "grad_norm": 334.8950500488281, + "learning_rate": 5.849343257596218e-07, + "loss": 15.0287, + "step": 430020 + }, + { + "epoch": 0.8686878073021247, + "grad_norm": 406.8493347167969, + "learning_rate": 5.847705030640915e-07, + "loss": 22.8116, + "step": 430030 + }, + { + "epoch": 0.8687080079348085, + "grad_norm": 266.61248779296875, + "learning_rate": 5.84606701887856e-07, + "loss": 14.4064, + "step": 430040 + }, + { + "epoch": 0.8687282085674923, + "grad_norm": 3.96455717086792, + "learning_rate": 5.844429222317111e-07, + "loss": 5.1193, + "step": 430050 + }, + { + "epoch": 0.8687484092001762, + "grad_norm": 405.39666748046875, + "learning_rate": 5.842791640964551e-07, + "loss": 22.1602, + "step": 430060 + }, + { + "epoch": 0.86876860983286, + "grad_norm": 109.13443756103516, + "learning_rate": 5.841154274828869e-07, + "loss": 14.5743, + "step": 430070 + }, + { + "epoch": 0.8687888104655438, + "grad_norm": 357.2668762207031, + "learning_rate": 5.839517123918059e-07, + "loss": 16.6738, + "step": 430080 + }, + { + "epoch": 0.8688090110982276, + "grad_norm": 161.05580139160156, + "learning_rate": 5.83788018824008e-07, + "loss": 11.4569, + "step": 430090 + }, + { + "epoch": 0.8688292117309114, + "grad_norm": 260.228271484375, + "learning_rate": 5.836243467802915e-07, + "loss": 16.5401, + "step": 430100 + }, + { + "epoch": 0.8688494123635953, + "grad_norm": 113.09424591064453, + "learning_rate": 5.834606962614548e-07, + "loss": 7.3593, + "step": 430110 + }, + { + "epoch": 0.8688696129962791, + "grad_norm": 264.3537902832031, + "learning_rate": 5.832970672682948e-07, + "loss": 30.4637, + "step": 430120 + }, + { + "epoch": 0.8688898136289629, + "grad_norm": 748.5376586914062, + "learning_rate": 5.831334598016086e-07, + "loss": 21.3425, + "step": 430130 + }, + { + "epoch": 0.8689100142616467, + "grad_norm": 207.56895446777344, + "learning_rate": 5.829698738621941e-07, + "loss": 21.533, + "step": 430140 + }, + { + "epoch": 0.8689302148943305, + "grad_norm": 250.3556671142578, + "learning_rate": 5.828063094508507e-07, + "loss": 22.9276, + "step": 430150 + }, + { + "epoch": 0.8689504155270144, + "grad_norm": 236.70950317382812, + "learning_rate": 5.826427665683715e-07, + "loss": 9.2027, + "step": 430160 + }, + { + "epoch": 0.8689706161596982, + "grad_norm": 214.5115203857422, + "learning_rate": 5.824792452155558e-07, + "loss": 15.084, + "step": 430170 + }, + { + "epoch": 0.868990816792382, + "grad_norm": 366.3526916503906, + "learning_rate": 5.823157453932015e-07, + "loss": 26.1997, + "step": 430180 + }, + { + "epoch": 0.8690110174250657, + "grad_norm": 9.377196311950684, + "learning_rate": 5.821522671021041e-07, + "loss": 18.4233, + "step": 430190 + }, + { + "epoch": 0.8690312180577495, + "grad_norm": 493.40399169921875, + "learning_rate": 5.819888103430598e-07, + "loss": 14.3293, + "step": 430200 + }, + { + "epoch": 0.8690514186904333, + "grad_norm": 587.1106567382812, + "learning_rate": 5.818253751168679e-07, + "loss": 16.7316, + "step": 430210 + }, + { + "epoch": 0.8690716193231172, + "grad_norm": 181.7979278564453, + "learning_rate": 5.816619614243224e-07, + "loss": 16.6902, + "step": 430220 + }, + { + "epoch": 0.869091819955801, + "grad_norm": 221.2801055908203, + "learning_rate": 5.814985692662201e-07, + "loss": 29.7676, + "step": 430230 + }, + { + "epoch": 0.8691120205884848, + "grad_norm": 205.5895538330078, + "learning_rate": 5.81335198643358e-07, + "loss": 10.7289, + "step": 430240 + }, + { + "epoch": 0.8691322212211686, + "grad_norm": 170.81640625, + "learning_rate": 5.811718495565327e-07, + "loss": 13.0024, + "step": 430250 + }, + { + "epoch": 0.8691524218538524, + "grad_norm": 643.1643676757812, + "learning_rate": 5.810085220065404e-07, + "loss": 43.2111, + "step": 430260 + }, + { + "epoch": 0.8691726224865363, + "grad_norm": 365.8726806640625, + "learning_rate": 5.808452159941752e-07, + "loss": 14.3514, + "step": 430270 + }, + { + "epoch": 0.8691928231192201, + "grad_norm": 131.76368713378906, + "learning_rate": 5.806819315202361e-07, + "loss": 15.9404, + "step": 430280 + }, + { + "epoch": 0.8692130237519039, + "grad_norm": 213.5436248779297, + "learning_rate": 5.805186685855163e-07, + "loss": 9.1838, + "step": 430290 + }, + { + "epoch": 0.8692332243845877, + "grad_norm": 116.876220703125, + "learning_rate": 5.803554271908124e-07, + "loss": 15.4149, + "step": 430300 + }, + { + "epoch": 0.8692534250172715, + "grad_norm": 422.8739318847656, + "learning_rate": 5.801922073369203e-07, + "loss": 14.9275, + "step": 430310 + }, + { + "epoch": 0.8692736256499554, + "grad_norm": 323.6262512207031, + "learning_rate": 5.800290090246346e-07, + "loss": 11.0438, + "step": 430320 + }, + { + "epoch": 0.8692938262826392, + "grad_norm": 235.24993896484375, + "learning_rate": 5.798658322547529e-07, + "loss": 16.0167, + "step": 430330 + }, + { + "epoch": 0.869314026915323, + "grad_norm": 190.68240356445312, + "learning_rate": 5.797026770280683e-07, + "loss": 6.4879, + "step": 430340 + }, + { + "epoch": 0.8693342275480068, + "grad_norm": 4.9392924308776855, + "learning_rate": 5.795395433453765e-07, + "loss": 13.5663, + "step": 430350 + }, + { + "epoch": 0.8693544281806906, + "grad_norm": 124.97154998779297, + "learning_rate": 5.793764312074735e-07, + "loss": 16.773, + "step": 430360 + }, + { + "epoch": 0.8693746288133745, + "grad_norm": 196.126220703125, + "learning_rate": 5.792133406151523e-07, + "loss": 13.8697, + "step": 430370 + }, + { + "epoch": 0.8693948294460583, + "grad_norm": 293.3162536621094, + "learning_rate": 5.790502715692104e-07, + "loss": 23.1517, + "step": 430380 + }, + { + "epoch": 0.8694150300787421, + "grad_norm": 188.63087463378906, + "learning_rate": 5.788872240704402e-07, + "loss": 9.6314, + "step": 430390 + }, + { + "epoch": 0.8694352307114259, + "grad_norm": 467.492431640625, + "learning_rate": 5.787241981196384e-07, + "loss": 24.1223, + "step": 430400 + }, + { + "epoch": 0.8694554313441097, + "grad_norm": 379.414794921875, + "learning_rate": 5.785611937175989e-07, + "loss": 12.4083, + "step": 430410 + }, + { + "epoch": 0.8694756319767936, + "grad_norm": 36.301517486572266, + "learning_rate": 5.783982108651148e-07, + "loss": 14.7152, + "step": 430420 + }, + { + "epoch": 0.8694958326094774, + "grad_norm": 358.90313720703125, + "learning_rate": 5.782352495629822e-07, + "loss": 24.9715, + "step": 430430 + }, + { + "epoch": 0.8695160332421611, + "grad_norm": 23.102815628051758, + "learning_rate": 5.780723098119951e-07, + "loss": 23.9682, + "step": 430440 + }, + { + "epoch": 0.8695362338748449, + "grad_norm": 267.063720703125, + "learning_rate": 5.779093916129464e-07, + "loss": 18.9859, + "step": 430450 + }, + { + "epoch": 0.8695564345075287, + "grad_norm": 198.9434051513672, + "learning_rate": 5.777464949666306e-07, + "loss": 33.027, + "step": 430460 + }, + { + "epoch": 0.8695766351402126, + "grad_norm": 112.8031234741211, + "learning_rate": 5.775836198738427e-07, + "loss": 49.3809, + "step": 430470 + }, + { + "epoch": 0.8695968357728964, + "grad_norm": 86.52124786376953, + "learning_rate": 5.774207663353765e-07, + "loss": 17.5758, + "step": 430480 + }, + { + "epoch": 0.8696170364055802, + "grad_norm": 138.0382843017578, + "learning_rate": 5.772579343520241e-07, + "loss": 27.4977, + "step": 430490 + }, + { + "epoch": 0.869637237038264, + "grad_norm": 160.53768920898438, + "learning_rate": 5.770951239245803e-07, + "loss": 20.9604, + "step": 430500 + }, + { + "epoch": 0.8696574376709478, + "grad_norm": 573.6380615234375, + "learning_rate": 5.769323350538391e-07, + "loss": 26.4926, + "step": 430510 + }, + { + "epoch": 0.8696776383036317, + "grad_norm": 221.80967712402344, + "learning_rate": 5.767695677405921e-07, + "loss": 23.4499, + "step": 430520 + }, + { + "epoch": 0.8696978389363155, + "grad_norm": 75.57762908935547, + "learning_rate": 5.766068219856341e-07, + "loss": 25.7678, + "step": 430530 + }, + { + "epoch": 0.8697180395689993, + "grad_norm": 231.1885223388672, + "learning_rate": 5.764440977897584e-07, + "loss": 21.1663, + "step": 430540 + }, + { + "epoch": 0.8697382402016831, + "grad_norm": 212.92323303222656, + "learning_rate": 5.762813951537582e-07, + "loss": 10.6941, + "step": 430550 + }, + { + "epoch": 0.8697584408343669, + "grad_norm": 170.4713592529297, + "learning_rate": 5.761187140784247e-07, + "loss": 13.3565, + "step": 430560 + }, + { + "epoch": 0.8697786414670508, + "grad_norm": 189.28836059570312, + "learning_rate": 5.759560545645527e-07, + "loss": 12.5459, + "step": 430570 + }, + { + "epoch": 0.8697988420997346, + "grad_norm": 190.5909881591797, + "learning_rate": 5.757934166129347e-07, + "loss": 11.0884, + "step": 430580 + }, + { + "epoch": 0.8698190427324184, + "grad_norm": 355.5611877441406, + "learning_rate": 5.756308002243622e-07, + "loss": 22.4758, + "step": 430590 + }, + { + "epoch": 0.8698392433651022, + "grad_norm": 286.4625549316406, + "learning_rate": 5.754682053996291e-07, + "loss": 13.3158, + "step": 430600 + }, + { + "epoch": 0.869859443997786, + "grad_norm": 263.8946533203125, + "learning_rate": 5.753056321395267e-07, + "loss": 13.0268, + "step": 430610 + }, + { + "epoch": 0.8698796446304699, + "grad_norm": 394.4566955566406, + "learning_rate": 5.751430804448488e-07, + "loss": 16.362, + "step": 430620 + }, + { + "epoch": 0.8698998452631537, + "grad_norm": 232.8829803466797, + "learning_rate": 5.749805503163869e-07, + "loss": 12.2157, + "step": 430630 + }, + { + "epoch": 0.8699200458958375, + "grad_norm": 28.242095947265625, + "learning_rate": 5.748180417549321e-07, + "loss": 9.1866, + "step": 430640 + }, + { + "epoch": 0.8699402465285213, + "grad_norm": 146.7534942626953, + "learning_rate": 5.746555547612781e-07, + "loss": 22.0129, + "step": 430650 + }, + { + "epoch": 0.8699604471612051, + "grad_norm": 537.522705078125, + "learning_rate": 5.744930893362166e-07, + "loss": 23.2022, + "step": 430660 + }, + { + "epoch": 0.869980647793889, + "grad_norm": 299.1604309082031, + "learning_rate": 5.74330645480538e-07, + "loss": 16.9488, + "step": 430670 + }, + { + "epoch": 0.8700008484265728, + "grad_norm": 251.43258666992188, + "learning_rate": 5.741682231950351e-07, + "loss": 19.7603, + "step": 430680 + }, + { + "epoch": 0.8700210490592566, + "grad_norm": 169.90646362304688, + "learning_rate": 5.740058224805001e-07, + "loss": 30.0848, + "step": 430690 + }, + { + "epoch": 0.8700412496919403, + "grad_norm": 8.344276428222656, + "learning_rate": 5.738434433377244e-07, + "loss": 17.9386, + "step": 430700 + }, + { + "epoch": 0.8700614503246241, + "grad_norm": 240.35447692871094, + "learning_rate": 5.736810857674979e-07, + "loss": 30.5829, + "step": 430710 + }, + { + "epoch": 0.8700816509573079, + "grad_norm": 625.955078125, + "learning_rate": 5.735187497706135e-07, + "loss": 25.5631, + "step": 430720 + }, + { + "epoch": 0.8701018515899918, + "grad_norm": 303.2730407714844, + "learning_rate": 5.733564353478622e-07, + "loss": 13.2096, + "step": 430730 + }, + { + "epoch": 0.8701220522226756, + "grad_norm": 0.0, + "learning_rate": 5.731941425000337e-07, + "loss": 26.3781, + "step": 430740 + }, + { + "epoch": 0.8701422528553594, + "grad_norm": 170.39614868164062, + "learning_rate": 5.730318712279203e-07, + "loss": 8.2164, + "step": 430750 + }, + { + "epoch": 0.8701624534880432, + "grad_norm": 369.62908935546875, + "learning_rate": 5.728696215323143e-07, + "loss": 13.3138, + "step": 430760 + }, + { + "epoch": 0.870182654120727, + "grad_norm": 4.718437194824219, + "learning_rate": 5.727073934140026e-07, + "loss": 17.5682, + "step": 430770 + }, + { + "epoch": 0.8702028547534109, + "grad_norm": 15.770259857177734, + "learning_rate": 5.725451868737786e-07, + "loss": 10.4337, + "step": 430780 + }, + { + "epoch": 0.8702230553860947, + "grad_norm": 57.28717041015625, + "learning_rate": 5.723830019124332e-07, + "loss": 10.0944, + "step": 430790 + }, + { + "epoch": 0.8702432560187785, + "grad_norm": 337.8901672363281, + "learning_rate": 5.722208385307559e-07, + "loss": 10.8083, + "step": 430800 + }, + { + "epoch": 0.8702634566514623, + "grad_norm": 271.6981506347656, + "learning_rate": 5.720586967295366e-07, + "loss": 10.8632, + "step": 430810 + }, + { + "epoch": 0.8702836572841461, + "grad_norm": 930.3674926757812, + "learning_rate": 5.718965765095669e-07, + "loss": 17.5699, + "step": 430820 + }, + { + "epoch": 0.87030385791683, + "grad_norm": 76.50482177734375, + "learning_rate": 5.717344778716361e-07, + "loss": 11.3183, + "step": 430830 + }, + { + "epoch": 0.8703240585495138, + "grad_norm": 262.0967712402344, + "learning_rate": 5.715724008165335e-07, + "loss": 18.3484, + "step": 430840 + }, + { + "epoch": 0.8703442591821976, + "grad_norm": 253.44366455078125, + "learning_rate": 5.714103453450498e-07, + "loss": 15.9579, + "step": 430850 + }, + { + "epoch": 0.8703644598148814, + "grad_norm": 283.9560241699219, + "learning_rate": 5.712483114579758e-07, + "loss": 20.7914, + "step": 430860 + }, + { + "epoch": 0.8703846604475652, + "grad_norm": 158.31321716308594, + "learning_rate": 5.710862991561006e-07, + "loss": 13.6228, + "step": 430870 + }, + { + "epoch": 0.8704048610802491, + "grad_norm": 201.527587890625, + "learning_rate": 5.709243084402128e-07, + "loss": 12.1568, + "step": 430880 + }, + { + "epoch": 0.8704250617129329, + "grad_norm": 3.624937057495117, + "learning_rate": 5.70762339311104e-07, + "loss": 15.7504, + "step": 430890 + }, + { + "epoch": 0.8704452623456167, + "grad_norm": 351.1962890625, + "learning_rate": 5.706003917695619e-07, + "loss": 25.6406, + "step": 430900 + }, + { + "epoch": 0.8704654629783005, + "grad_norm": 728.9517211914062, + "learning_rate": 5.704384658163748e-07, + "loss": 20.4086, + "step": 430910 + }, + { + "epoch": 0.8704856636109843, + "grad_norm": 509.3713073730469, + "learning_rate": 5.702765614523354e-07, + "loss": 28.421, + "step": 430920 + }, + { + "epoch": 0.8705058642436682, + "grad_norm": 411.2549743652344, + "learning_rate": 5.701146786782291e-07, + "loss": 22.6689, + "step": 430930 + }, + { + "epoch": 0.870526064876352, + "grad_norm": 541.7310791015625, + "learning_rate": 5.699528174948477e-07, + "loss": 19.4127, + "step": 430940 + }, + { + "epoch": 0.8705462655090357, + "grad_norm": 407.40045166015625, + "learning_rate": 5.697909779029786e-07, + "loss": 16.2929, + "step": 430950 + }, + { + "epoch": 0.8705664661417195, + "grad_norm": 226.20111083984375, + "learning_rate": 5.696291599034104e-07, + "loss": 33.7135, + "step": 430960 + }, + { + "epoch": 0.8705866667744033, + "grad_norm": 212.94078063964844, + "learning_rate": 5.694673634969334e-07, + "loss": 8.0405, + "step": 430970 + }, + { + "epoch": 0.8706068674070871, + "grad_norm": 305.7719421386719, + "learning_rate": 5.693055886843341e-07, + "loss": 22.9908, + "step": 430980 + }, + { + "epoch": 0.870627068039771, + "grad_norm": 152.94149780273438, + "learning_rate": 5.691438354664031e-07, + "loss": 21.9984, + "step": 430990 + }, + { + "epoch": 0.8706472686724548, + "grad_norm": 34.74179458618164, + "learning_rate": 5.689821038439264e-07, + "loss": 20.4876, + "step": 431000 + }, + { + "epoch": 0.8706674693051386, + "grad_norm": 41.64984893798828, + "learning_rate": 5.688203938176945e-07, + "loss": 16.412, + "step": 431010 + }, + { + "epoch": 0.8706876699378224, + "grad_norm": 222.1383056640625, + "learning_rate": 5.686587053884946e-07, + "loss": 9.7577, + "step": 431020 + }, + { + "epoch": 0.8707078705705062, + "grad_norm": 222.21275329589844, + "learning_rate": 5.684970385571137e-07, + "loss": 14.1611, + "step": 431030 + }, + { + "epoch": 0.8707280712031901, + "grad_norm": 361.74908447265625, + "learning_rate": 5.683353933243418e-07, + "loss": 28.9463, + "step": 431040 + }, + { + "epoch": 0.8707482718358739, + "grad_norm": 118.4416275024414, + "learning_rate": 5.681737696909656e-07, + "loss": 16.3694, + "step": 431050 + }, + { + "epoch": 0.8707684724685577, + "grad_norm": 393.716796875, + "learning_rate": 5.680121676577721e-07, + "loss": 12.5934, + "step": 431060 + }, + { + "epoch": 0.8707886731012415, + "grad_norm": 510.8108215332031, + "learning_rate": 5.678505872255496e-07, + "loss": 22.5018, + "step": 431070 + }, + { + "epoch": 0.8708088737339253, + "grad_norm": 211.59291076660156, + "learning_rate": 5.676890283950881e-07, + "loss": 12.1117, + "step": 431080 + }, + { + "epoch": 0.8708290743666092, + "grad_norm": 241.6061553955078, + "learning_rate": 5.675274911671702e-07, + "loss": 13.4908, + "step": 431090 + }, + { + "epoch": 0.870849274999293, + "grad_norm": 20.293502807617188, + "learning_rate": 5.673659755425859e-07, + "loss": 30.7483, + "step": 431100 + }, + { + "epoch": 0.8708694756319768, + "grad_norm": 216.94862365722656, + "learning_rate": 5.672044815221234e-07, + "loss": 17.7979, + "step": 431110 + }, + { + "epoch": 0.8708896762646606, + "grad_norm": 189.7763671875, + "learning_rate": 5.670430091065682e-07, + "loss": 11.0608, + "step": 431120 + }, + { + "epoch": 0.8709098768973444, + "grad_norm": 445.8848571777344, + "learning_rate": 5.668815582967074e-07, + "loss": 17.2823, + "step": 431130 + }, + { + "epoch": 0.8709300775300283, + "grad_norm": 261.6067810058594, + "learning_rate": 5.667201290933278e-07, + "loss": 11.3204, + "step": 431140 + }, + { + "epoch": 0.8709502781627121, + "grad_norm": 100.90369415283203, + "learning_rate": 5.665587214972173e-07, + "loss": 17.1483, + "step": 431150 + }, + { + "epoch": 0.8709704787953959, + "grad_norm": 190.43043518066406, + "learning_rate": 5.663973355091624e-07, + "loss": 22.1384, + "step": 431160 + }, + { + "epoch": 0.8709906794280797, + "grad_norm": 22.338645935058594, + "learning_rate": 5.662359711299481e-07, + "loss": 20.7694, + "step": 431170 + }, + { + "epoch": 0.8710108800607635, + "grad_norm": 785.1405029296875, + "learning_rate": 5.66074628360363e-07, + "loss": 13.5858, + "step": 431180 + }, + { + "epoch": 0.8710310806934474, + "grad_norm": 298.8449401855469, + "learning_rate": 5.659133072011919e-07, + "loss": 9.4031, + "step": 431190 + }, + { + "epoch": 0.8710512813261312, + "grad_norm": 337.2384338378906, + "learning_rate": 5.657520076532208e-07, + "loss": 22.3716, + "step": 431200 + }, + { + "epoch": 0.8710714819588149, + "grad_norm": 405.7594299316406, + "learning_rate": 5.655907297172375e-07, + "loss": 18.8737, + "step": 431210 + }, + { + "epoch": 0.8710916825914987, + "grad_norm": 286.75213623046875, + "learning_rate": 5.654294733940263e-07, + "loss": 18.9708, + "step": 431220 + }, + { + "epoch": 0.8711118832241825, + "grad_norm": 127.1952133178711, + "learning_rate": 5.65268238684375e-07, + "loss": 13.1412, + "step": 431230 + }, + { + "epoch": 0.8711320838568664, + "grad_norm": 256.9698181152344, + "learning_rate": 5.651070255890689e-07, + "loss": 14.3125, + "step": 431240 + }, + { + "epoch": 0.8711522844895502, + "grad_norm": 62.01204299926758, + "learning_rate": 5.649458341088915e-07, + "loss": 13.6529, + "step": 431250 + }, + { + "epoch": 0.871172485122234, + "grad_norm": 275.6658020019531, + "learning_rate": 5.647846642446314e-07, + "loss": 18.8037, + "step": 431260 + }, + { + "epoch": 0.8711926857549178, + "grad_norm": 8.213630676269531, + "learning_rate": 5.646235159970731e-07, + "loss": 16.7657, + "step": 431270 + }, + { + "epoch": 0.8712128863876016, + "grad_norm": 339.7079162597656, + "learning_rate": 5.64462389367001e-07, + "loss": 10.0411, + "step": 431280 + }, + { + "epoch": 0.8712330870202855, + "grad_norm": 299.1888122558594, + "learning_rate": 5.64301284355201e-07, + "loss": 28.3854, + "step": 431290 + }, + { + "epoch": 0.8712532876529693, + "grad_norm": 697.38671875, + "learning_rate": 5.641402009624591e-07, + "loss": 14.7147, + "step": 431300 + }, + { + "epoch": 0.8712734882856531, + "grad_norm": 106.74177551269531, + "learning_rate": 5.639791391895605e-07, + "loss": 30.6131, + "step": 431310 + }, + { + "epoch": 0.8712936889183369, + "grad_norm": 30.877931594848633, + "learning_rate": 5.638180990372882e-07, + "loss": 15.0457, + "step": 431320 + }, + { + "epoch": 0.8713138895510207, + "grad_norm": 304.257568359375, + "learning_rate": 5.6365708050643e-07, + "loss": 15.3636, + "step": 431330 + }, + { + "epoch": 0.8713340901837046, + "grad_norm": 267.9161682128906, + "learning_rate": 5.634960835977688e-07, + "loss": 11.9889, + "step": 431340 + }, + { + "epoch": 0.8713542908163884, + "grad_norm": 11.02646541595459, + "learning_rate": 5.633351083120886e-07, + "loss": 14.1122, + "step": 431350 + }, + { + "epoch": 0.8713744914490722, + "grad_norm": 25.108898162841797, + "learning_rate": 5.631741546501746e-07, + "loss": 13.3595, + "step": 431360 + }, + { + "epoch": 0.871394692081756, + "grad_norm": 526.7843627929688, + "learning_rate": 5.630132226128143e-07, + "loss": 13.9046, + "step": 431370 + }, + { + "epoch": 0.8714148927144398, + "grad_norm": 264.2872009277344, + "learning_rate": 5.628523122007867e-07, + "loss": 17.9338, + "step": 431380 + }, + { + "epoch": 0.8714350933471237, + "grad_norm": 719.3394165039062, + "learning_rate": 5.626914234148794e-07, + "loss": 15.2507, + "step": 431390 + }, + { + "epoch": 0.8714552939798075, + "grad_norm": 296.2950134277344, + "learning_rate": 5.625305562558764e-07, + "loss": 18.7133, + "step": 431400 + }, + { + "epoch": 0.8714754946124913, + "grad_norm": 280.3843688964844, + "learning_rate": 5.623697107245619e-07, + "loss": 19.4132, + "step": 431410 + }, + { + "epoch": 0.8714956952451751, + "grad_norm": 176.71728515625, + "learning_rate": 5.622088868217179e-07, + "loss": 9.178, + "step": 431420 + }, + { + "epoch": 0.8715158958778589, + "grad_norm": 202.95556640625, + "learning_rate": 5.620480845481291e-07, + "loss": 14.7262, + "step": 431430 + }, + { + "epoch": 0.8715360965105428, + "grad_norm": 244.365478515625, + "learning_rate": 5.618873039045825e-07, + "loss": 12.8064, + "step": 431440 + }, + { + "epoch": 0.8715562971432266, + "grad_norm": 137.4836883544922, + "learning_rate": 5.617265448918563e-07, + "loss": 17.5753, + "step": 431450 + }, + { + "epoch": 0.8715764977759103, + "grad_norm": 169.54965209960938, + "learning_rate": 5.615658075107366e-07, + "loss": 18.1799, + "step": 431460 + }, + { + "epoch": 0.8715966984085941, + "grad_norm": 285.4529113769531, + "learning_rate": 5.614050917620084e-07, + "loss": 13.4641, + "step": 431470 + }, + { + "epoch": 0.8716168990412779, + "grad_norm": 91.17354583740234, + "learning_rate": 5.612443976464527e-07, + "loss": 16.9061, + "step": 431480 + }, + { + "epoch": 0.8716370996739617, + "grad_norm": 187.19305419921875, + "learning_rate": 5.610837251648532e-07, + "loss": 18.3684, + "step": 431490 + }, + { + "epoch": 0.8716573003066456, + "grad_norm": 221.01214599609375, + "learning_rate": 5.609230743179939e-07, + "loss": 13.5701, + "step": 431500 + }, + { + "epoch": 0.8716775009393294, + "grad_norm": 332.5506591796875, + "learning_rate": 5.607624451066568e-07, + "loss": 14.1987, + "step": 431510 + }, + { + "epoch": 0.8716977015720132, + "grad_norm": 206.41293334960938, + "learning_rate": 5.606018375316246e-07, + "loss": 13.4488, + "step": 431520 + }, + { + "epoch": 0.871717902204697, + "grad_norm": 276.80419921875, + "learning_rate": 5.604412515936814e-07, + "loss": 15.5291, + "step": 431530 + }, + { + "epoch": 0.8717381028373808, + "grad_norm": 231.42120361328125, + "learning_rate": 5.602806872936079e-07, + "loss": 19.2064, + "step": 431540 + }, + { + "epoch": 0.8717583034700647, + "grad_norm": 480.11328125, + "learning_rate": 5.601201446321891e-07, + "loss": 11.7327, + "step": 431550 + }, + { + "epoch": 0.8717785041027485, + "grad_norm": 294.0952453613281, + "learning_rate": 5.599596236102068e-07, + "loss": 21.1699, + "step": 431560 + }, + { + "epoch": 0.8717987047354323, + "grad_norm": 163.67523193359375, + "learning_rate": 5.597991242284407e-07, + "loss": 18.4039, + "step": 431570 + }, + { + "epoch": 0.8718189053681161, + "grad_norm": 306.99981689453125, + "learning_rate": 5.596386464876769e-07, + "loss": 23.6068, + "step": 431580 + }, + { + "epoch": 0.8718391060008, + "grad_norm": 324.1244812011719, + "learning_rate": 5.594781903886942e-07, + "loss": 15.7907, + "step": 431590 + }, + { + "epoch": 0.8718593066334838, + "grad_norm": 341.48944091796875, + "learning_rate": 5.593177559322776e-07, + "loss": 41.7083, + "step": 431600 + }, + { + "epoch": 0.8718795072661676, + "grad_norm": 171.02854919433594, + "learning_rate": 5.591573431192066e-07, + "loss": 15.9038, + "step": 431610 + }, + { + "epoch": 0.8718997078988514, + "grad_norm": 260.77545166015625, + "learning_rate": 5.589969519502652e-07, + "loss": 18.2895, + "step": 431620 + }, + { + "epoch": 0.8719199085315352, + "grad_norm": 316.0719909667969, + "learning_rate": 5.588365824262343e-07, + "loss": 14.1478, + "step": 431630 + }, + { + "epoch": 0.871940109164219, + "grad_norm": 307.8163146972656, + "learning_rate": 5.586762345478935e-07, + "loss": 11.6938, + "step": 431640 + }, + { + "epoch": 0.8719603097969029, + "grad_norm": 262.48516845703125, + "learning_rate": 5.585159083160274e-07, + "loss": 18.0119, + "step": 431650 + }, + { + "epoch": 0.8719805104295867, + "grad_norm": 337.4559020996094, + "learning_rate": 5.583556037314164e-07, + "loss": 11.2033, + "step": 431660 + }, + { + "epoch": 0.8720007110622705, + "grad_norm": 370.2848815917969, + "learning_rate": 5.581953207948404e-07, + "loss": 12.8963, + "step": 431670 + }, + { + "epoch": 0.8720209116949543, + "grad_norm": 218.72433471679688, + "learning_rate": 5.58035059507081e-07, + "loss": 12.2936, + "step": 431680 + }, + { + "epoch": 0.8720411123276381, + "grad_norm": 207.97972106933594, + "learning_rate": 5.578748198689226e-07, + "loss": 16.1314, + "step": 431690 + }, + { + "epoch": 0.872061312960322, + "grad_norm": 369.6402893066406, + "learning_rate": 5.577146018811419e-07, + "loss": 23.1407, + "step": 431700 + }, + { + "epoch": 0.8720815135930058, + "grad_norm": 316.8023986816406, + "learning_rate": 5.575544055445209e-07, + "loss": 18.9181, + "step": 431710 + }, + { + "epoch": 0.8721017142256895, + "grad_norm": 333.3664245605469, + "learning_rate": 5.573942308598418e-07, + "loss": 14.8765, + "step": 431720 + }, + { + "epoch": 0.8721219148583733, + "grad_norm": 0.0, + "learning_rate": 5.572340778278845e-07, + "loss": 22.3963, + "step": 431730 + }, + { + "epoch": 0.8721421154910571, + "grad_norm": 142.32965087890625, + "learning_rate": 5.570739464494284e-07, + "loss": 24.0365, + "step": 431740 + }, + { + "epoch": 0.872162316123741, + "grad_norm": 211.55694580078125, + "learning_rate": 5.569138367252553e-07, + "loss": 7.9193, + "step": 431750 + }, + { + "epoch": 0.8721825167564248, + "grad_norm": 211.8068389892578, + "learning_rate": 5.567537486561476e-07, + "loss": 10.1956, + "step": 431760 + }, + { + "epoch": 0.8722027173891086, + "grad_norm": 172.6950225830078, + "learning_rate": 5.565936822428808e-07, + "loss": 12.9672, + "step": 431770 + }, + { + "epoch": 0.8722229180217924, + "grad_norm": 128.5299530029297, + "learning_rate": 5.564336374862373e-07, + "loss": 13.7229, + "step": 431780 + }, + { + "epoch": 0.8722431186544762, + "grad_norm": 308.6335754394531, + "learning_rate": 5.562736143869984e-07, + "loss": 14.2043, + "step": 431790 + }, + { + "epoch": 0.87226331928716, + "grad_norm": 286.74591064453125, + "learning_rate": 5.561136129459432e-07, + "loss": 11.8774, + "step": 431800 + }, + { + "epoch": 0.8722835199198439, + "grad_norm": 107.65582275390625, + "learning_rate": 5.559536331638498e-07, + "loss": 15.646, + "step": 431810 + }, + { + "epoch": 0.8723037205525277, + "grad_norm": 259.1940002441406, + "learning_rate": 5.557936750415011e-07, + "loss": 10.7716, + "step": 431820 + }, + { + "epoch": 0.8723239211852115, + "grad_norm": 307.3171691894531, + "learning_rate": 5.556337385796734e-07, + "loss": 10.6385, + "step": 431830 + }, + { + "epoch": 0.8723441218178953, + "grad_norm": 158.9854278564453, + "learning_rate": 5.55473823779149e-07, + "loss": 17.9587, + "step": 431840 + }, + { + "epoch": 0.8723643224505792, + "grad_norm": 44.122806549072266, + "learning_rate": 5.553139306407062e-07, + "loss": 21.5023, + "step": 431850 + }, + { + "epoch": 0.872384523083263, + "grad_norm": 276.1822814941406, + "learning_rate": 5.551540591651234e-07, + "loss": 7.8274, + "step": 431860 + }, + { + "epoch": 0.8724047237159468, + "grad_norm": 404.6112060546875, + "learning_rate": 5.549942093531812e-07, + "loss": 24.195, + "step": 431870 + }, + { + "epoch": 0.8724249243486306, + "grad_norm": 843.1182250976562, + "learning_rate": 5.548343812056584e-07, + "loss": 20.9582, + "step": 431880 + }, + { + "epoch": 0.8724451249813144, + "grad_norm": 184.09068298339844, + "learning_rate": 5.546745747233323e-07, + "loss": 9.6226, + "step": 431890 + }, + { + "epoch": 0.8724653256139983, + "grad_norm": 117.47186279296875, + "learning_rate": 5.545147899069836e-07, + "loss": 11.6845, + "step": 431900 + }, + { + "epoch": 0.8724855262466821, + "grad_norm": 707.5753784179688, + "learning_rate": 5.543550267573916e-07, + "loss": 18.8461, + "step": 431910 + }, + { + "epoch": 0.8725057268793659, + "grad_norm": 172.70492553710938, + "learning_rate": 5.541952852753341e-07, + "loss": 11.6597, + "step": 431920 + }, + { + "epoch": 0.8725259275120497, + "grad_norm": 126.88516998291016, + "learning_rate": 5.540355654615881e-07, + "loss": 14.0196, + "step": 431930 + }, + { + "epoch": 0.8725461281447335, + "grad_norm": 138.84596252441406, + "learning_rate": 5.538758673169348e-07, + "loss": 12.0072, + "step": 431940 + }, + { + "epoch": 0.8725663287774174, + "grad_norm": 415.06658935546875, + "learning_rate": 5.537161908421512e-07, + "loss": 19.3967, + "step": 431950 + }, + { + "epoch": 0.8725865294101012, + "grad_norm": 128.38372802734375, + "learning_rate": 5.535565360380146e-07, + "loss": 36.6789, + "step": 431960 + }, + { + "epoch": 0.872606730042785, + "grad_norm": 229.2674102783203, + "learning_rate": 5.533969029053043e-07, + "loss": 8.6572, + "step": 431970 + }, + { + "epoch": 0.8726269306754687, + "grad_norm": 375.0791320800781, + "learning_rate": 5.532372914448003e-07, + "loss": 16.2197, + "step": 431980 + }, + { + "epoch": 0.8726471313081525, + "grad_norm": 191.91627502441406, + "learning_rate": 5.530777016572763e-07, + "loss": 20.0092, + "step": 431990 + }, + { + "epoch": 0.8726673319408363, + "grad_norm": 126.85926055908203, + "learning_rate": 5.529181335435124e-07, + "loss": 15.4848, + "step": 432000 } ], "logging_steps": 10,