diff --git "a/modpo/lm/(0.1)helpful+(1-0.1)harmless/checkpoint-6000/trainer_state.json" "b/modpo/lm/(0.1)helpful+(1-0.1)harmless/checkpoint-6000/trainer_state.json" new file mode 100644--- /dev/null +++ "b/modpo/lm/(0.1)helpful+(1-0.1)harmless/checkpoint-6000/trainer_state.json" @@ -0,0 +1,7849 @@ +{ + "best_metric": 9.37901496887207, + "best_model_checkpoint": "./output/modpo/lm/(0.1)helpful+(1-0.1)harmless/checkpoint-6000", + "epoch": 1.5, + "eval_steps": 3000, + "global_step": 6000, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "accuracy": 0.48750001192092896, + "epoch": 0.0, + "learning_rate": 9.999997393764378e-06, + "logps/chosen": -93.3796157836914, + "logps/margins": 11.91098403930664, + "logps/rejected": -105.29060363769531, + "loss": 133.7584, + "rewards/chosen": 413.84527587890625, + "rewards/margins": 8.948919296264648, + "rewards/rejected": 404.8963317871094, + "step": 10 + }, + { + "accuracy": 0.5, + "epoch": 0.01, + "learning_rate": 9.999966893473195e-06, + "logps/chosen": -135.76376342773438, + "logps/margins": -6.7586541175842285, + "logps/rejected": -129.00511169433594, + "loss": 138.4836, + "rewards/chosen": 530.2479858398438, + "rewards/margins": -5.5106964111328125, + "rewards/rejected": 535.7586059570312, + "step": 20 + }, + { + "accuracy": 0.5375000238418579, + "epoch": 0.01, + "learning_rate": 9.99991014253465e-06, + "logps/chosen": -114.88323974609375, + "logps/margins": -1.0550857782363892, + "logps/rejected": -113.8281478881836, + "loss": 138.3358, + "rewards/chosen": 476.93487548828125, + "rewards/margins": 12.715731620788574, + "rewards/rejected": 464.2191467285156, + "step": 30 + }, + { + "accuracy": 0.5, + "epoch": 0.01, + "learning_rate": 9.999814530025966e-06, + "logps/chosen": -120.3341293334961, + "logps/margins": 2.649027109146118, + "logps/rejected": -122.98313903808594, + "loss": 135.182, + "rewards/chosen": 510.8414001464844, + "rewards/margins": 21.62598419189453, + "rewards/rejected": 489.21539306640625, + "step": 40 + }, + { + "accuracy": 0.5, + "epoch": 0.01, + "learning_rate": 9.999684648777178e-06, + "logps/chosen": -111.89051818847656, + "logps/margins": 2.990260362625122, + "logps/rejected": -114.88077545166016, + "loss": 109.2412, + "rewards/chosen": 486.3773498535156, + "rewards/margins": 3.3043746948242188, + "rewards/rejected": 483.07305908203125, + "step": 50 + }, + { + "accuracy": 0.6000000238418579, + "epoch": 0.01, + "learning_rate": 9.99952049967849e-06, + "logps/chosen": -102.34828186035156, + "logps/margins": -4.252016067504883, + "logps/rejected": -98.09626770019531, + "loss": 116.8267, + "rewards/chosen": 432.75390625, + "rewards/margins": 42.292823791503906, + "rewards/rejected": 390.4610595703125, + "step": 60 + }, + { + "accuracy": 0.4375, + "epoch": 0.02, + "learning_rate": 9.99932208385498e-06, + "logps/chosen": -94.0199966430664, + "logps/margins": 10.372164726257324, + "logps/rejected": -104.39215087890625, + "loss": 120.5842, + "rewards/chosen": 375.4162902832031, + "rewards/margins": 1.442042589187622, + "rewards/rejected": 373.9742126464844, + "step": 70 + }, + { + "accuracy": 0.574999988079071, + "epoch": 0.02, + "learning_rate": 9.999089402666595e-06, + "logps/chosen": -106.22200012207031, + "logps/margins": -1.2092262506484985, + "logps/rejected": -105.01277160644531, + "loss": 112.4909, + "rewards/chosen": 468.303466796875, + "rewards/margins": 21.131122589111328, + "rewards/rejected": 447.17236328125, + "step": 80 + }, + { + "accuracy": 0.48750001192092896, + "epoch": 0.02, + "learning_rate": 9.998822457708128e-06, + "logps/chosen": -122.74198913574219, + "logps/margins": -12.238363265991211, + "logps/rejected": -110.50362396240234, + "loss": 135.0719, + "rewards/chosen": 504.971435546875, + "rewards/margins": 34.12373352050781, + "rewards/rejected": 470.8477478027344, + "step": 90 + }, + { + "accuracy": 0.44999998807907104, + "epoch": 0.03, + "learning_rate": 9.99852125080922e-06, + "logps/chosen": -114.631591796875, + "logps/margins": 10.317441940307617, + "logps/rejected": -124.94903564453125, + "loss": 145.7158, + "rewards/chosen": 453.3251953125, + "rewards/margins": -50.17401885986328, + "rewards/rejected": 503.499267578125, + "step": 100 + }, + { + "accuracy": 0.5249999761581421, + "epoch": 0.03, + "learning_rate": 9.998185784034346e-06, + "logps/chosen": -118.44602966308594, + "logps/margins": -1.2335379123687744, + "logps/rejected": -117.2125015258789, + "loss": 138.0428, + "rewards/chosen": 484.1726989746094, + "rewards/margins": 37.568016052246094, + "rewards/rejected": 446.60467529296875, + "step": 110 + }, + { + "accuracy": 0.5, + "epoch": 0.03, + "learning_rate": 9.997816059682798e-06, + "logps/chosen": -123.51143646240234, + "logps/margins": -12.290125846862793, + "logps/rejected": -111.2213134765625, + "loss": 135.3236, + "rewards/chosen": 530.8043823242188, + "rewards/margins": 44.61811065673828, + "rewards/rejected": 486.186279296875, + "step": 120 + }, + { + "accuracy": 0.48750001192092896, + "epoch": 0.03, + "learning_rate": 9.997412080288662e-06, + "logps/chosen": -120.5673828125, + "logps/margins": 4.912835597991943, + "logps/rejected": -125.480224609375, + "loss": 114.3894, + "rewards/chosen": 513.3451538085938, + "rewards/margins": 17.779699325561523, + "rewards/rejected": 495.56549072265625, + "step": 130 + }, + { + "accuracy": 0.4375, + "epoch": 0.04, + "learning_rate": 9.997019213056157e-06, + "logps/chosen": -130.656494140625, + "logps/margins": -0.3943015933036804, + "logps/rejected": -130.26220703125, + "loss": 115.2596, + "rewards/chosen": 547.4656982421875, + "rewards/margins": 32.74653244018555, + "rewards/rejected": 514.7191162109375, + "step": 140 + }, + { + "accuracy": 0.550000011920929, + "epoch": 0.04, + "learning_rate": 9.996550156903396e-06, + "logps/chosen": -126.56694030761719, + "logps/margins": -22.663860321044922, + "logps/rejected": -103.903076171875, + "loss": 146.3508, + "rewards/chosen": 524.1796875, + "rewards/margins": 129.40298461914062, + "rewards/rejected": 394.7767333984375, + "step": 150 + }, + { + "accuracy": 0.5249999761581421, + "epoch": 0.04, + "learning_rate": 9.996046854384551e-06, + "logps/chosen": -116.5171127319336, + "logps/margins": -6.2448344230651855, + "logps/rejected": -110.27227783203125, + "loss": 148.769, + "rewards/chosen": 492.25518798828125, + "rewards/margins": 57.134300231933594, + "rewards/rejected": 435.12091064453125, + "step": 160 + }, + { + "accuracy": 0.5249999761581421, + "epoch": 0.04, + "learning_rate": 9.995509308949258e-06, + "logps/chosen": -107.74334716796875, + "logps/margins": 2.2624595165252686, + "logps/rejected": -110.00579833984375, + "loss": 146.3888, + "rewards/chosen": 447.30828857421875, + "rewards/margins": 9.063776969909668, + "rewards/rejected": 438.2445373535156, + "step": 170 + }, + { + "accuracy": 0.4749999940395355, + "epoch": 0.04, + "learning_rate": 9.994937524281855e-06, + "logps/chosen": -108.6446533203125, + "logps/margins": 8.883810043334961, + "logps/rejected": -117.52845764160156, + "loss": 141.5786, + "rewards/chosen": 442.46417236328125, + "rewards/margins": -29.99068832397461, + "rewards/rejected": 472.454833984375, + "step": 180 + }, + { + "accuracy": 0.44999998807907104, + "epoch": 0.05, + "learning_rate": 9.994331504301357e-06, + "logps/chosen": -127.9815444946289, + "logps/margins": -1.8211044073104858, + "logps/rejected": -126.16044616699219, + "loss": 157.7037, + "rewards/chosen": 519.9771728515625, + "rewards/margins": 23.830402374267578, + "rewards/rejected": 496.1468200683594, + "step": 190 + }, + { + "accuracy": 0.512499988079071, + "epoch": 0.05, + "learning_rate": 9.993691253161423e-06, + "logps/chosen": -103.15389251708984, + "logps/margins": 0.33909836411476135, + "logps/rejected": -103.49298095703125, + "loss": 137.1795, + "rewards/chosen": 443.7645568847656, + "rewards/margins": 26.6912841796875, + "rewards/rejected": 417.0732421875, + "step": 200 + }, + { + "accuracy": 0.612500011920929, + "epoch": 0.05, + "learning_rate": 9.993016775250337e-06, + "logps/chosen": -133.4051055908203, + "logps/margins": -10.04373836517334, + "logps/rejected": -123.36137390136719, + "loss": 142.2551, + "rewards/chosen": 588.5593872070312, + "rewards/margins": 82.30054473876953, + "rewards/rejected": 506.2587890625, + "step": 210 + }, + { + "accuracy": 0.512499988079071, + "epoch": 0.06, + "learning_rate": 9.992308075190974e-06, + "logps/chosen": -118.646240234375, + "logps/margins": -12.178031921386719, + "logps/rejected": -106.46821594238281, + "loss": 116.1819, + "rewards/chosen": 474.79888916015625, + "rewards/margins": 76.67096710205078, + "rewards/rejected": 398.1279296875, + "step": 220 + }, + { + "accuracy": 0.4625000059604645, + "epoch": 0.06, + "learning_rate": 9.991565157840761e-06, + "logps/chosen": -117.40019226074219, + "logps/margins": 8.61751937866211, + "logps/rejected": -126.0177230834961, + "loss": 119.7188, + "rewards/chosen": 491.43743896484375, + "rewards/margins": 3.2632508277893066, + "rewards/rejected": 488.1742248535156, + "step": 230 + }, + { + "accuracy": 0.48750001192092896, + "epoch": 0.06, + "learning_rate": 9.990788028291661e-06, + "logps/chosen": -139.20558166503906, + "logps/margins": 1.004795789718628, + "logps/rejected": -140.2103729248047, + "loss": 141.3583, + "rewards/chosen": 596.8570556640625, + "rewards/margins": 32.54899215698242, + "rewards/rejected": 564.30810546875, + "step": 240 + }, + { + "accuracy": 0.4375, + "epoch": 0.06, + "learning_rate": 9.989976691870117e-06, + "logps/chosen": -120.36201477050781, + "logps/margins": 6.580714225769043, + "logps/rejected": -126.94273376464844, + "loss": 153.5183, + "rewards/chosen": 475.3868103027344, + "rewards/margins": -29.04974937438965, + "rewards/rejected": 504.4365234375, + "step": 250 + }, + { + "accuracy": 0.375, + "epoch": 0.07, + "learning_rate": 9.989131154137032e-06, + "logps/chosen": -107.35289001464844, + "logps/margins": 21.417951583862305, + "logps/rejected": -128.77084350585938, + "loss": 144.7915, + "rewards/chosen": 442.931396484375, + "rewards/margins": -71.75289916992188, + "rewards/rejected": 514.6842651367188, + "step": 260 + }, + { + "accuracy": 0.5625, + "epoch": 0.07, + "learning_rate": 9.988251420887727e-06, + "logps/chosen": -123.69624328613281, + "logps/margins": 3.2014732360839844, + "logps/rejected": -126.897705078125, + "loss": 141.9748, + "rewards/chosen": 511.1015625, + "rewards/margins": 23.33009910583496, + "rewards/rejected": 487.77142333984375, + "step": 270 + }, + { + "accuracy": 0.42500001192092896, + "epoch": 0.07, + "learning_rate": 9.987337498151892e-06, + "logps/chosen": -85.78999328613281, + "logps/margins": 25.029130935668945, + "logps/rejected": -110.8191146850586, + "loss": 155.9791, + "rewards/chosen": 364.8916320800781, + "rewards/margins": -77.27149963378906, + "rewards/rejected": 442.16314697265625, + "step": 280 + }, + { + "accuracy": 0.550000011920929, + "epoch": 0.07, + "learning_rate": 9.986389392193556e-06, + "logps/chosen": -97.4422378540039, + "logps/margins": -6.79815673828125, + "logps/rejected": -90.64408111572266, + "loss": 148.4807, + "rewards/chosen": 394.6479187011719, + "rewards/margins": 35.904396057128906, + "rewards/rejected": 358.7435302734375, + "step": 290 + }, + { + "accuracy": 0.574999988079071, + "epoch": 0.07, + "learning_rate": 9.98540710951104e-06, + "logps/chosen": -166.30758666992188, + "logps/margins": -12.83739948272705, + "logps/rejected": -153.47018432617188, + "loss": 144.854, + "rewards/chosen": 647.3489990234375, + "rewards/margins": 42.80381393432617, + "rewards/rejected": 604.5452270507812, + "step": 300 + }, + { + "accuracy": 0.5874999761581421, + "epoch": 0.08, + "learning_rate": 9.98449383955723e-06, + "logps/chosen": -99.15072631835938, + "logps/margins": -8.964288711547852, + "logps/rejected": -90.18643951416016, + "loss": 135.6467, + "rewards/chosen": 431.578857421875, + "rewards/margins": 65.79521179199219, + "rewards/rejected": 365.78363037109375, + "step": 310 + }, + { + "accuracy": 0.5625, + "epoch": 0.08, + "learning_rate": 9.983446639840564e-06, + "logps/chosen": -120.82869720458984, + "logps/margins": -6.914085388183594, + "logps/rejected": -113.91461181640625, + "loss": 118.017, + "rewards/chosen": 500.6940002441406, + "rewards/margins": 63.750816345214844, + "rewards/rejected": 436.94317626953125, + "step": 320 + }, + { + "accuracy": 0.6000000238418579, + "epoch": 0.08, + "learning_rate": 9.98236528356935e-06, + "logps/chosen": -141.27989196777344, + "logps/margins": -14.53515911102295, + "logps/rejected": -126.74473571777344, + "loss": 121.3466, + "rewards/chosen": 596.2039184570312, + "rewards/margins": 105.9224853515625, + "rewards/rejected": 490.2814025878906, + "step": 330 + }, + { + "accuracy": 0.4124999940395355, + "epoch": 0.09, + "learning_rate": 9.981249778155204e-06, + "logps/chosen": -112.67228698730469, + "logps/margins": 12.975064277648926, + "logps/rejected": -125.6473617553711, + "loss": 159.1415, + "rewards/chosen": 516.9957275390625, + "rewards/margins": 0.5607239007949829, + "rewards/rejected": 516.4349975585938, + "step": 340 + }, + { + "accuracy": 0.4625000059604645, + "epoch": 0.09, + "learning_rate": 9.980100131243806e-06, + "logps/chosen": -122.88648986816406, + "logps/margins": -6.679039001464844, + "logps/rejected": -116.20744323730469, + "loss": 124.9974, + "rewards/chosen": 515.6216430664062, + "rewards/margins": 31.545093536376953, + "rewards/rejected": 484.0765686035156, + "step": 350 + }, + { + "accuracy": 0.550000011920929, + "epoch": 0.09, + "learning_rate": 9.978916350714834e-06, + "logps/chosen": -107.5947494506836, + "logps/margins": -3.0263094902038574, + "logps/rejected": -104.56844329833984, + "loss": 117.159, + "rewards/chosen": 452.861083984375, + "rewards/margins": 24.705232620239258, + "rewards/rejected": 428.15582275390625, + "step": 360 + }, + { + "accuracy": 0.550000011920929, + "epoch": 0.09, + "learning_rate": 9.977698444681927e-06, + "logps/chosen": -119.1046142578125, + "logps/margins": -12.394800186157227, + "logps/rejected": -106.7098159790039, + "loss": 146.0885, + "rewards/chosen": 484.4979553222656, + "rewards/margins": 63.81281661987305, + "rewards/rejected": 420.68505859375, + "step": 370 + }, + { + "accuracy": 0.4625000059604645, + "epoch": 0.1, + "learning_rate": 9.976446421492614e-06, + "logps/chosen": -132.10508728027344, + "logps/margins": -29.49811363220215, + "logps/rejected": -102.60699462890625, + "loss": 152.4038, + "rewards/chosen": 543.501220703125, + "rewards/margins": 132.93089294433594, + "rewards/rejected": 410.5703125, + "step": 380 + }, + { + "accuracy": 0.5, + "epoch": 0.1, + "learning_rate": 9.975160289728263e-06, + "logps/chosen": -126.52391052246094, + "logps/margins": 6.81997537612915, + "logps/rejected": -133.34390258789062, + "loss": 166.8653, + "rewards/chosen": 545.818603515625, + "rewards/margins": 9.508524894714355, + "rewards/rejected": 536.3101196289062, + "step": 390 + }, + { + "accuracy": 0.550000011920929, + "epoch": 0.1, + "learning_rate": 9.973840058204025e-06, + "logps/chosen": -112.14449310302734, + "logps/margins": 4.000561237335205, + "logps/rejected": -116.1450424194336, + "loss": 122.4277, + "rewards/chosen": 473.66680908203125, + "rewards/margins": -10.55711841583252, + "rewards/rejected": 484.22393798828125, + "step": 400 + }, + { + "accuracy": 0.512499988079071, + "epoch": 0.1, + "learning_rate": 9.972485735968773e-06, + "logps/chosen": -117.541015625, + "logps/margins": 2.5908284187316895, + "logps/rejected": -120.13185119628906, + "loss": 135.7887, + "rewards/chosen": 474.3944396972656, + "rewards/margins": 9.5200777053833, + "rewards/rejected": 464.87432861328125, + "step": 410 + }, + { + "accuracy": 0.48750001192092896, + "epoch": 0.1, + "learning_rate": 9.97109733230503e-06, + "logps/chosen": -110.06425476074219, + "logps/margins": 17.008153915405273, + "logps/rejected": -127.07242584228516, + "loss": 128.6588, + "rewards/chosen": 474.10406494140625, + "rewards/margins": -34.1292839050293, + "rewards/rejected": 508.23333740234375, + "step": 420 + }, + { + "accuracy": 0.5625, + "epoch": 0.11, + "learning_rate": 9.969674856728921e-06, + "logps/chosen": -134.01382446289062, + "logps/margins": -21.643993377685547, + "logps/rejected": -112.36983489990234, + "loss": 101.4936, + "rewards/chosen": 556.7682495117188, + "rewards/margins": 114.7724609375, + "rewards/rejected": 441.99578857421875, + "step": 430 + }, + { + "accuracy": 0.5375000238418579, + "epoch": 0.11, + "learning_rate": 9.968218318990095e-06, + "logps/chosen": -122.1700439453125, + "logps/margins": -15.732464790344238, + "logps/rejected": -106.43757629394531, + "loss": 128.4603, + "rewards/chosen": 471.40130615234375, + "rewards/margins": 71.82810974121094, + "rewards/rejected": 399.5732116699219, + "step": 440 + }, + { + "accuracy": 0.48750001192092896, + "epoch": 0.11, + "learning_rate": 9.966727729071666e-06, + "logps/chosen": -130.5093994140625, + "logps/margins": 1.7436010837554932, + "logps/rejected": -132.25299072265625, + "loss": 130.0241, + "rewards/chosen": 539.8803100585938, + "rewards/margins": -0.21205750107765198, + "rewards/rejected": 540.0924072265625, + "step": 450 + }, + { + "accuracy": 0.5249999761581421, + "epoch": 0.12, + "learning_rate": 9.96520309719014e-06, + "logps/chosen": -116.49913024902344, + "logps/margins": -11.854392051696777, + "logps/rejected": -104.64473724365234, + "loss": 134.6152, + "rewards/chosen": 494.7870178222656, + "rewards/margins": 89.06049346923828, + "rewards/rejected": 405.7265625, + "step": 460 + }, + { + "accuracy": 0.5249999761581421, + "epoch": 0.12, + "learning_rate": 9.963644433795348e-06, + "logps/chosen": -138.28787231445312, + "logps/margins": 3.3353500366210938, + "logps/rejected": -141.62322998046875, + "loss": 148.5505, + "rewards/chosen": 526.849365234375, + "rewards/margins": -24.756345748901367, + "rewards/rejected": 551.605712890625, + "step": 470 + }, + { + "accuracy": 0.550000011920929, + "epoch": 0.12, + "learning_rate": 9.962051749570372e-06, + "logps/chosen": -120.8651351928711, + "logps/margins": -11.14014720916748, + "logps/rejected": -109.72499084472656, + "loss": 148.0331, + "rewards/chosen": 495.11737060546875, + "rewards/margins": 53.423126220703125, + "rewards/rejected": 441.6942443847656, + "step": 480 + }, + { + "accuracy": 0.512499988079071, + "epoch": 0.12, + "learning_rate": 9.960425055431473e-06, + "logps/chosen": -113.05416107177734, + "logps/margins": 4.87796688079834, + "logps/rejected": -117.93212890625, + "loss": 131.5285, + "rewards/chosen": 456.3290100097656, + "rewards/margins": 1.6874191761016846, + "rewards/rejected": 454.6416015625, + "step": 490 + }, + { + "accuracy": 0.4375, + "epoch": 0.12, + "learning_rate": 9.958764362528018e-06, + "logps/chosen": -113.52494049072266, + "logps/margins": 13.0650053024292, + "logps/rejected": -126.5899429321289, + "loss": 148.3237, + "rewards/chosen": 471.31103515625, + "rewards/margins": -28.4617919921875, + "rewards/rejected": 499.7727966308594, + "step": 500 + }, + { + "accuracy": 0.550000011920929, + "epoch": 0.13, + "learning_rate": 9.957069682242398e-06, + "logps/chosen": -118.0287094116211, + "logps/margins": 8.119003295898438, + "logps/rejected": -126.14772033691406, + "loss": 152.7346, + "rewards/chosen": 485.8190002441406, + "rewards/margins": -3.1366684436798096, + "rewards/rejected": 488.9556579589844, + "step": 510 + }, + { + "accuracy": 0.5249999761581421, + "epoch": 0.13, + "learning_rate": 9.955341026189957e-06, + "logps/chosen": -126.87492370605469, + "logps/margins": -9.084429740905762, + "logps/rejected": -117.7905044555664, + "loss": 127.188, + "rewards/chosen": 541.4051513671875, + "rewards/margins": 78.24760437011719, + "rewards/rejected": 463.15753173828125, + "step": 520 + }, + { + "accuracy": 0.4625000059604645, + "epoch": 0.13, + "learning_rate": 9.953578406218904e-06, + "logps/chosen": -107.30830383300781, + "logps/margins": -2.4622342586517334, + "logps/rejected": -104.84605407714844, + "loss": 106.8134, + "rewards/chosen": 432.5232849121094, + "rewards/margins": 31.565603256225586, + "rewards/rejected": 400.9576721191406, + "step": 530 + }, + { + "accuracy": 0.5625, + "epoch": 0.14, + "learning_rate": 9.951781834410245e-06, + "logps/chosen": -131.9517822265625, + "logps/margins": -20.995197296142578, + "logps/rejected": -110.95658111572266, + "loss": 108.0191, + "rewards/chosen": 521.8550415039062, + "rewards/margins": 81.27650451660156, + "rewards/rejected": 440.57861328125, + "step": 540 + }, + { + "accuracy": 0.5, + "epoch": 0.14, + "learning_rate": 9.949951323077687e-06, + "logps/chosen": -121.85911560058594, + "logps/margins": -7.005537986755371, + "logps/rejected": -114.85357666015625, + "loss": 125.2937, + "rewards/chosen": 468.5213928222656, + "rewards/margins": 46.27653121948242, + "rewards/rejected": 422.244873046875, + "step": 550 + }, + { + "accuracy": 0.48750001192092896, + "epoch": 0.14, + "learning_rate": 9.948086884767554e-06, + "logps/chosen": -114.4699478149414, + "logps/margins": 5.539546012878418, + "logps/rejected": -120.00948333740234, + "loss": 131.9843, + "rewards/chosen": 434.02178955078125, + "rewards/margins": -3.2230522632598877, + "rewards/rejected": 437.24481201171875, + "step": 560 + }, + { + "accuracy": 0.5249999761581421, + "epoch": 0.14, + "learning_rate": 9.946188532258714e-06, + "logps/chosen": -118.58795166015625, + "logps/margins": -15.705572128295898, + "logps/rejected": -102.88238525390625, + "loss": 108.7325, + "rewards/chosen": 474.025390625, + "rewards/margins": 69.33538818359375, + "rewards/rejected": 404.69000244140625, + "step": 570 + }, + { + "accuracy": 0.5, + "epoch": 0.14, + "learning_rate": 9.94425627856248e-06, + "logps/chosen": -126.64949798583984, + "logps/margins": -15.877286911010742, + "logps/rejected": -110.772216796875, + "loss": 129.0806, + "rewards/chosen": 463.27703857421875, + "rewards/margins": 51.13494110107422, + "rewards/rejected": 412.14202880859375, + "step": 580 + }, + { + "accuracy": 0.5625, + "epoch": 0.15, + "learning_rate": 9.94229013692252e-06, + "logps/chosen": -119.6028823852539, + "logps/margins": 4.306146144866943, + "logps/rejected": -123.90901947021484, + "loss": 145.1564, + "rewards/chosen": 479.9534606933594, + "rewards/margins": 27.486495971679688, + "rewards/rejected": 452.46697998046875, + "step": 590 + }, + { + "accuracy": 0.5, + "epoch": 0.15, + "learning_rate": 9.940290120814777e-06, + "logps/chosen": -121.96507263183594, + "logps/margins": -1.4980885982513428, + "logps/rejected": -120.46699523925781, + "loss": 122.7967, + "rewards/chosen": 442.9217834472656, + "rewards/margins": -10.231027603149414, + "rewards/rejected": 453.15283203125, + "step": 600 + }, + { + "accuracy": 0.4375, + "epoch": 0.15, + "learning_rate": 9.938256243947365e-06, + "logps/chosen": -122.54063415527344, + "logps/margins": 5.938465595245361, + "logps/rejected": -128.47909545898438, + "loss": 125.4419, + "rewards/chosen": 463.1902770996094, + "rewards/margins": -8.743739128112793, + "rewards/rejected": 471.93402099609375, + "step": 610 + }, + { + "accuracy": 0.5249999761581421, + "epoch": 0.15, + "learning_rate": 9.936188520260478e-06, + "logps/chosen": -127.6741943359375, + "logps/margins": -20.188371658325195, + "logps/rejected": -107.4858169555664, + "loss": 135.4379, + "rewards/chosen": 473.99755859375, + "rewards/margins": 93.54142761230469, + "rewards/rejected": 380.4561462402344, + "step": 620 + }, + { + "accuracy": 0.4625000059604645, + "epoch": 0.16, + "learning_rate": 9.934086963926301e-06, + "logps/chosen": -132.5676727294922, + "logps/margins": 19.84319496154785, + "logps/rejected": -152.41085815429688, + "loss": 126.5401, + "rewards/chosen": 506.6675720214844, + "rewards/margins": -60.05859375, + "rewards/rejected": 566.7261962890625, + "step": 630 + }, + { + "accuracy": 0.5249999761581421, + "epoch": 0.16, + "learning_rate": 9.931951589348906e-06, + "logps/chosen": -155.48123168945312, + "logps/margins": -3.0628743171691895, + "logps/rejected": -152.41835021972656, + "loss": 130.4227, + "rewards/chosen": 583.5057983398438, + "rewards/margins": 43.2299919128418, + "rewards/rejected": 540.2757568359375, + "step": 640 + }, + { + "accuracy": 0.612500011920929, + "epoch": 0.16, + "learning_rate": 9.929782411164155e-06, + "logps/chosen": -136.21575927734375, + "logps/margins": -33.564117431640625, + "logps/rejected": -102.65164947509766, + "loss": 132.5515, + "rewards/chosen": 489.7196350097656, + "rewards/margins": 123.5712890625, + "rewards/rejected": 366.1483154296875, + "step": 650 + }, + { + "accuracy": 0.5, + "epoch": 0.17, + "learning_rate": 9.9275794442396e-06, + "logps/chosen": -122.29302978515625, + "logps/margins": 6.48095178604126, + "logps/rejected": -128.77395629882812, + "loss": 111.2998, + "rewards/chosen": 431.2083435058594, + "rewards/margins": 7.137983798980713, + "rewards/rejected": 424.07037353515625, + "step": 660 + }, + { + "accuracy": 0.5, + "epoch": 0.17, + "learning_rate": 9.925567897109623e-06, + "logps/chosen": -122.2877197265625, + "logps/margins": -3.691814422607422, + "logps/rejected": -118.5959243774414, + "loss": 133.0961, + "rewards/chosen": 421.86761474609375, + "rewards/margins": 8.400960922241211, + "rewards/rejected": 413.4666442871094, + "step": 670 + }, + { + "accuracy": 0.574999988079071, + "epoch": 0.17, + "learning_rate": 9.923300773368894e-06, + "logps/chosen": -141.562255859375, + "logps/margins": -31.060766220092773, + "logps/rejected": -110.50150299072266, + "loss": 109.1437, + "rewards/chosen": 494.37713623046875, + "rewards/margins": 127.0127182006836, + "rewards/rejected": 367.36444091796875, + "step": 680 + }, + { + "accuracy": 0.5375000238418579, + "epoch": 0.17, + "learning_rate": 9.920999905313523e-06, + "logps/chosen": -141.4644012451172, + "logps/margins": -9.384162902832031, + "logps/rejected": -132.0802459716797, + "loss": 112.9863, + "rewards/chosen": 504.72314453125, + "rewards/margins": 68.13895416259766, + "rewards/rejected": 436.58416748046875, + "step": 690 + }, + { + "accuracy": 0.6000000238418579, + "epoch": 0.17, + "learning_rate": 9.918665308713661e-06, + "logps/chosen": -153.2477569580078, + "logps/margins": -22.811376571655273, + "logps/rejected": -130.43637084960938, + "loss": 142.2903, + "rewards/chosen": 514.7084350585938, + "rewards/margins": 77.32051086425781, + "rewards/rejected": 437.38787841796875, + "step": 700 + }, + { + "accuracy": 0.5, + "epoch": 0.18, + "learning_rate": 9.916296999570643e-06, + "logps/chosen": -134.18746948242188, + "logps/margins": 1.5920852422714233, + "logps/rejected": -135.779541015625, + "loss": 131.1511, + "rewards/chosen": 420.29541015625, + "rewards/margins": 21.671588897705078, + "rewards/rejected": 398.62384033203125, + "step": 710 + }, + { + "accuracy": 0.574999988079071, + "epoch": 0.18, + "learning_rate": 9.913894994116861e-06, + "logps/chosen": -137.5404052734375, + "logps/margins": -9.69748592376709, + "logps/rejected": -127.84293365478516, + "loss": 101.6574, + "rewards/chosen": 415.5857849121094, + "rewards/margins": 43.645694732666016, + "rewards/rejected": 371.94012451171875, + "step": 720 + }, + { + "accuracy": 0.512499988079071, + "epoch": 0.18, + "learning_rate": 9.911459308815667e-06, + "logps/chosen": -149.5596466064453, + "logps/margins": 3.927189350128174, + "logps/rejected": -153.48684692382812, + "loss": 123.5127, + "rewards/chosen": 428.8338928222656, + "rewards/margins": 5.8749799728393555, + "rewards/rejected": 422.9588928222656, + "step": 730 + }, + { + "accuracy": 0.5, + "epoch": 0.18, + "learning_rate": 9.908989960361256e-06, + "logps/chosen": -150.97915649414062, + "logps/margins": 3.387871503829956, + "logps/rejected": -154.3670196533203, + "loss": 131.188, + "rewards/chosen": 410.087890625, + "rewards/margins": -16.447601318359375, + "rewards/rejected": 426.5354919433594, + "step": 740 + }, + { + "accuracy": 0.550000011920929, + "epoch": 0.19, + "learning_rate": 9.906486965678542e-06, + "logps/chosen": -176.5621337890625, + "logps/margins": -2.479750156402588, + "logps/rejected": -174.08238220214844, + "loss": 140.6822, + "rewards/chosen": 430.4986267089844, + "rewards/margins": 26.621023178100586, + "rewards/rejected": 403.87762451171875, + "step": 750 + }, + { + "accuracy": 0.4000000059604645, + "epoch": 0.19, + "learning_rate": 9.903950341923062e-06, + "logps/chosen": -166.80667114257812, + "logps/margins": 11.614864349365234, + "logps/rejected": -178.4215545654297, + "loss": 109.3536, + "rewards/chosen": 407.3410949707031, + "rewards/margins": 2.5636985301971436, + "rewards/rejected": 404.77740478515625, + "step": 760 + }, + { + "accuracy": 0.512499988079071, + "epoch": 0.19, + "learning_rate": 9.90138010648084e-06, + "logps/chosen": -215.47427368164062, + "logps/margins": -19.613048553466797, + "logps/rejected": -195.86119079589844, + "loss": 119.3495, + "rewards/chosen": 479.86328125, + "rewards/margins": 65.37360382080078, + "rewards/rejected": 414.48968505859375, + "step": 770 + }, + { + "accuracy": 0.5625, + "epoch": 0.2, + "learning_rate": 9.898776276968273e-06, + "logps/chosen": -223.4987335205078, + "logps/margins": 15.595968246459961, + "logps/rejected": -239.09469604492188, + "loss": 134.1903, + "rewards/chosen": 455.37860107421875, + "rewards/margins": -22.406774520874023, + "rewards/rejected": 477.785400390625, + "step": 780 + }, + { + "accuracy": 0.48750001192092896, + "epoch": 0.2, + "learning_rate": 9.896138871232017e-06, + "logps/chosen": -241.5811767578125, + "logps/margins": -30.245168685913086, + "logps/rejected": -211.3360137939453, + "loss": 101.2319, + "rewards/chosen": 445.71923828125, + "rewards/margins": 55.09416961669922, + "rewards/rejected": 390.6251220703125, + "step": 790 + }, + { + "accuracy": 0.5375000238418579, + "epoch": 0.2, + "learning_rate": 9.893467907348855e-06, + "logps/chosen": -263.97613525390625, + "logps/margins": -18.73114013671875, + "logps/rejected": -245.24502563476562, + "loss": 92.6476, + "rewards/chosen": 434.5860900878906, + "rewards/margins": 44.99291229248047, + "rewards/rejected": 389.59320068359375, + "step": 800 + }, + { + "accuracy": 0.5625, + "epoch": 0.2, + "learning_rate": 9.890763403625581e-06, + "logps/chosen": -289.3885803222656, + "logps/margins": -26.712799072265625, + "logps/rejected": -262.6757507324219, + "loss": 94.6458, + "rewards/chosen": 415.63446044921875, + "rewards/margins": 54.22618865966797, + "rewards/rejected": 361.4082336425781, + "step": 810 + }, + { + "accuracy": 0.550000011920929, + "epoch": 0.2, + "learning_rate": 9.888025378598866e-06, + "logps/chosen": -308.1341857910156, + "logps/margins": -10.70995044708252, + "logps/rejected": -297.4242248535156, + "loss": 90.866, + "rewards/chosen": 328.09674072265625, + "rewards/margins": 17.335880279541016, + "rewards/rejected": 310.7608642578125, + "step": 820 + }, + { + "accuracy": 0.48750001192092896, + "epoch": 0.21, + "learning_rate": 9.885253851035146e-06, + "logps/chosen": -327.7950744628906, + "logps/margins": -7.016164302825928, + "logps/rejected": -320.7789001464844, + "loss": 76.8957, + "rewards/chosen": 236.4355010986328, + "rewards/margins": -13.382349014282227, + "rewards/rejected": 249.81784057617188, + "step": 830 + }, + { + "accuracy": 0.4625000059604645, + "epoch": 0.21, + "learning_rate": 9.882448839930473e-06, + "logps/chosen": -377.1629638671875, + "logps/margins": -23.309783935546875, + "logps/rejected": -353.8531494140625, + "loss": 63.9763, + "rewards/chosen": 152.348388671875, + "rewards/margins": -7.720555782318115, + "rewards/rejected": 160.06893920898438, + "step": 840 + }, + { + "accuracy": 0.4749999940395355, + "epoch": 0.21, + "learning_rate": 9.879610364510398e-06, + "logps/chosen": -553.8534545898438, + "logps/margins": -46.613929748535156, + "logps/rejected": -507.23944091796875, + "loss": 61.1502, + "rewards/chosen": 134.6775360107422, + "rewards/margins": -14.201289176940918, + "rewards/rejected": 148.87881469726562, + "step": 850 + }, + { + "accuracy": 0.44999998807907104, + "epoch": 0.21, + "learning_rate": 9.876738444229845e-06, + "logps/chosen": -517.9915771484375, + "logps/margins": -17.600528717041016, + "logps/rejected": -500.3910217285156, + "loss": 38.0526, + "rewards/chosen": 39.79627990722656, + "rewards/margins": -0.1203649491071701, + "rewards/rejected": 39.91664505004883, + "step": 860 + }, + { + "accuracy": 0.4749999940395355, + "epoch": 0.22, + "learning_rate": 9.873833098772953e-06, + "logps/chosen": -522.0090942382812, + "logps/margins": -18.093042373657227, + "logps/rejected": -503.916015625, + "loss": 58.6969, + "rewards/chosen": 49.566627502441406, + "rewards/margins": -4.3236985206604, + "rewards/rejected": 53.89031982421875, + "step": 870 + }, + { + "accuracy": 0.48750001192092896, + "epoch": 0.22, + "learning_rate": 9.870894348052973e-06, + "logps/chosen": -615.4816284179688, + "logps/margins": -81.37036895751953, + "logps/rejected": -534.1112670898438, + "loss": 40.4342, + "rewards/chosen": 77.39622497558594, + "rewards/margins": 13.369977951049805, + "rewards/rejected": 64.0262451171875, + "step": 880 + }, + { + "accuracy": 0.4749999940395355, + "epoch": 0.22, + "learning_rate": 9.868519309215434e-06, + "logps/chosen": -508.20281982421875, + "logps/margins": -48.37471008300781, + "logps/rejected": -459.82806396484375, + "loss": 42.0667, + "rewards/chosen": 112.64872741699219, + "rewards/margins": -9.064292907714844, + "rewards/rejected": 121.71302795410156, + "step": 890 + }, + { + "accuracy": 0.4749999940395355, + "epoch": 0.23, + "learning_rate": 9.865520479934018e-06, + "logps/chosen": -442.31378173828125, + "logps/margins": 13.257832527160645, + "logps/rejected": -455.57159423828125, + "loss": 41.185, + "rewards/chosen": 53.9986572265625, + "rewards/margins": -16.399885177612305, + "rewards/rejected": 70.39854431152344, + "step": 900 + }, + { + "accuracy": 0.550000011920929, + "epoch": 0.23, + "learning_rate": 9.862488302364221e-06, + "logps/chosen": -504.52069091796875, + "logps/margins": -35.36737823486328, + "logps/rejected": -469.15325927734375, + "loss": 36.7501, + "rewards/chosen": 99.33773040771484, + "rewards/margins": 24.268329620361328, + "rewards/rejected": 75.06940460205078, + "step": 910 + }, + { + "accuracy": 0.5, + "epoch": 0.23, + "learning_rate": 9.859422797288592e-06, + "logps/chosen": -464.4171447753906, + "logps/margins": -42.11968231201172, + "logps/rejected": -422.2974548339844, + "loss": 37.0613, + "rewards/chosen": 103.9990463256836, + "rewards/margins": 8.000553131103516, + "rewards/rejected": 95.99848937988281, + "step": 920 + }, + { + "accuracy": 0.612500011920929, + "epoch": 0.23, + "learning_rate": 9.856323985718113e-06, + "logps/chosen": -658.6473999023438, + "logps/margins": -101.72129821777344, + "logps/rejected": -556.9261474609375, + "loss": 30.5355, + "rewards/chosen": 133.86251831054688, + "rewards/margins": 34.744510650634766, + "rewards/rejected": 99.1180191040039, + "step": 930 + }, + { + "accuracy": 0.625, + "epoch": 0.23, + "learning_rate": 9.853191888892044e-06, + "logps/chosen": -538.7434692382812, + "logps/margins": -132.65087890625, + "logps/rejected": -406.09259033203125, + "loss": 30.3593, + "rewards/chosen": 99.0390396118164, + "rewards/margins": 28.37063980102539, + "rewards/rejected": 70.66839599609375, + "step": 940 + }, + { + "accuracy": 0.512499988079071, + "epoch": 0.24, + "learning_rate": 9.850026528277783e-06, + "logps/chosen": -538.6829223632812, + "logps/margins": -33.99311828613281, + "logps/rejected": -504.6897888183594, + "loss": 33.6202, + "rewards/chosen": 69.96847534179688, + "rewards/margins": 13.770176887512207, + "rewards/rejected": 56.19830322265625, + "step": 950 + }, + { + "accuracy": 0.48750001192092896, + "epoch": 0.24, + "learning_rate": 9.84682792557072e-06, + "logps/chosen": -555.5313110351562, + "logps/margins": -6.0437469482421875, + "logps/rejected": -549.4874877929688, + "loss": 39.7646, + "rewards/chosen": 91.08760070800781, + "rewards/margins": 10.83223819732666, + "rewards/rejected": 80.25535583496094, + "step": 960 + }, + { + "accuracy": 0.5375000238418579, + "epoch": 0.24, + "learning_rate": 9.843596102694086e-06, + "logps/chosen": -564.9844970703125, + "logps/margins": 9.147089004516602, + "logps/rejected": -574.1315307617188, + "loss": 27.5936, + "rewards/chosen": 62.1285285949707, + "rewards/margins": 7.250605583190918, + "rewards/rejected": 54.87792205810547, + "step": 970 + }, + { + "accuracy": 0.512499988079071, + "epoch": 0.24, + "learning_rate": 9.8403310817988e-06, + "logps/chosen": -599.7926025390625, + "logps/margins": -108.795166015625, + "logps/rejected": -490.99737548828125, + "loss": 27.6697, + "rewards/chosen": 93.82215881347656, + "rewards/margins": 28.41013526916504, + "rewards/rejected": 65.41202545166016, + "step": 980 + }, + { + "accuracy": 0.5375000238418579, + "epoch": 0.25, + "learning_rate": 9.837032885263325e-06, + "logps/chosen": -575.2697143554688, + "logps/margins": -32.50746536254883, + "logps/rejected": -542.7622680664062, + "loss": 33.0137, + "rewards/chosen": 73.60626220703125, + "rewards/margins": 6.782553195953369, + "rewards/rejected": 66.8237075805664, + "step": 990 + }, + { + "accuracy": 0.5375000238418579, + "epoch": 0.25, + "learning_rate": 9.833701535693505e-06, + "logps/chosen": -543.0936889648438, + "logps/margins": -82.6046371459961, + "logps/rejected": -460.48907470703125, + "loss": 22.6168, + "rewards/chosen": 76.14232635498047, + "rewards/margins": 19.556093215942383, + "rewards/rejected": 56.58623504638672, + "step": 1000 + }, + { + "accuracy": 0.5249999761581421, + "epoch": 0.25, + "learning_rate": 9.83033705592242e-06, + "logps/chosen": -517.6455078125, + "logps/margins": -49.99604415893555, + "logps/rejected": -467.6495056152344, + "loss": 30.1354, + "rewards/chosen": 86.97395324707031, + "rewards/margins": 14.630824089050293, + "rewards/rejected": 72.34312438964844, + "step": 1010 + }, + { + "accuracy": 0.574999988079071, + "epoch": 0.26, + "learning_rate": 9.826939469010221e-06, + "logps/chosen": -601.3553466796875, + "logps/margins": -86.20882415771484, + "logps/rejected": -515.1465454101562, + "loss": 27.2986, + "rewards/chosen": 64.21337127685547, + "rewards/margins": 12.685049057006836, + "rewards/rejected": 51.5283317565918, + "step": 1020 + }, + { + "accuracy": 0.550000011920929, + "epoch": 0.26, + "learning_rate": 9.823508798243975e-06, + "logps/chosen": -435.2098693847656, + "logps/margins": 16.58740997314453, + "logps/rejected": -451.7972717285156, + "loss": 21.9361, + "rewards/chosen": 99.85035705566406, + "rewards/margins": 40.218257904052734, + "rewards/rejected": 59.63209915161133, + "step": 1030 + }, + { + "accuracy": 0.5874999761581421, + "epoch": 0.26, + "learning_rate": 9.820045067137509e-06, + "logps/chosen": -571.32763671875, + "logps/margins": 42.47711944580078, + "logps/rejected": -613.8048095703125, + "loss": 21.4531, + "rewards/chosen": 83.22547149658203, + "rewards/margins": 32.778831481933594, + "rewards/rejected": 50.44664764404297, + "step": 1040 + }, + { + "accuracy": 0.6000000238418579, + "epoch": 0.26, + "learning_rate": 9.816548299431244e-06, + "logps/chosen": -501.0069885253906, + "logps/margins": -53.48107147216797, + "logps/rejected": -447.52593994140625, + "loss": 25.1803, + "rewards/chosen": 53.4207649230957, + "rewards/margins": 10.510202407836914, + "rewards/rejected": 42.91056442260742, + "step": 1050 + }, + { + "accuracy": 0.5375000238418579, + "epoch": 0.27, + "learning_rate": 9.813018519092033e-06, + "logps/chosen": -482.2474670410156, + "logps/margins": -19.208236694335938, + "logps/rejected": -463.0392150878906, + "loss": 29.1467, + "rewards/chosen": 64.35306549072266, + "rewards/margins": -3.096162796020508, + "rewards/rejected": 67.44923400878906, + "step": 1060 + }, + { + "accuracy": 0.42500001192092896, + "epoch": 0.27, + "learning_rate": 9.809455750312996e-06, + "logps/chosen": -483.1666564941406, + "logps/margins": -41.64806365966797, + "logps/rejected": -441.51861572265625, + "loss": 26.2993, + "rewards/chosen": 60.20487594604492, + "rewards/margins": 14.459933280944824, + "rewards/rejected": 45.74494171142578, + "step": 1070 + }, + { + "accuracy": 0.5375000238418579, + "epoch": 0.27, + "learning_rate": 9.805860017513363e-06, + "logps/chosen": -559.7510986328125, + "logps/margins": -26.3972225189209, + "logps/rejected": -533.3538818359375, + "loss": 22.7783, + "rewards/chosen": 58.03987503051758, + "rewards/margins": 12.36357307434082, + "rewards/rejected": 45.676300048828125, + "step": 1080 + }, + { + "accuracy": 0.5, + "epoch": 0.27, + "learning_rate": 9.802231345338291e-06, + "logps/chosen": -630.19384765625, + "logps/margins": -112.15211486816406, + "logps/rejected": -518.0416870117188, + "loss": 25.8057, + "rewards/chosen": 64.20438385009766, + "rewards/margins": 5.96327018737793, + "rewards/rejected": 58.241119384765625, + "step": 1090 + }, + { + "accuracy": 0.512499988079071, + "epoch": 0.28, + "learning_rate": 9.79856975865871e-06, + "logps/chosen": -490.21392822265625, + "logps/margins": 76.55106353759766, + "logps/rejected": -566.7649536132812, + "loss": 27.4694, + "rewards/chosen": 56.94901657104492, + "rewards/margins": 6.143309116363525, + "rewards/rejected": 50.80570983886719, + "step": 1100 + }, + { + "accuracy": 0.5625, + "epoch": 0.28, + "learning_rate": 9.794875282571143e-06, + "logps/chosen": -581.5607299804688, + "logps/margins": -37.635284423828125, + "logps/rejected": -543.9254150390625, + "loss": 26.1387, + "rewards/chosen": 54.39244842529297, + "rewards/margins": 4.597799301147461, + "rewards/rejected": 49.794647216796875, + "step": 1110 + }, + { + "accuracy": 0.44999998807907104, + "epoch": 0.28, + "learning_rate": 9.791147942397536e-06, + "logps/chosen": -540.0931396484375, + "logps/margins": 53.6206169128418, + "logps/rejected": -593.7137451171875, + "loss": 28.4539, + "rewards/chosen": 66.68229675292969, + "rewards/margins": -7.71429443359375, + "rewards/rejected": 74.39659118652344, + "step": 1120 + }, + { + "accuracy": 0.5874999761581421, + "epoch": 0.28, + "learning_rate": 9.787387763685085e-06, + "logps/chosen": -481.46514892578125, + "logps/margins": -80.8868179321289, + "logps/rejected": -400.57830810546875, + "loss": 28.6371, + "rewards/chosen": 57.667137145996094, + "rewards/margins": 3.522404909133911, + "rewards/rejected": 54.144737243652344, + "step": 1130 + }, + { + "accuracy": 0.48750001192092896, + "epoch": 0.28, + "learning_rate": 9.78359477220607e-06, + "logps/chosen": -490.8694763183594, + "logps/margins": 6.0255126953125, + "logps/rejected": -496.89495849609375, + "loss": 26.7117, + "rewards/chosen": 41.92536544799805, + "rewards/margins": -2.540074586868286, + "rewards/rejected": 44.46543502807617, + "step": 1140 + }, + { + "accuracy": 0.5, + "epoch": 0.29, + "learning_rate": 9.779768993957658e-06, + "logps/chosen": -578.5317993164062, + "logps/margins": -22.679393768310547, + "logps/rejected": -555.8523559570312, + "loss": 25.4516, + "rewards/chosen": 33.736385345458984, + "rewards/margins": -4.243011951446533, + "rewards/rejected": 37.97939682006836, + "step": 1150 + }, + { + "accuracy": 0.48750001192092896, + "epoch": 0.29, + "learning_rate": 9.775910455161741e-06, + "logps/chosen": -646.1868286132812, + "logps/margins": -106.49690246582031, + "logps/rejected": -539.68994140625, + "loss": 25.428, + "rewards/chosen": 66.29207611083984, + "rewards/margins": -2.694920063018799, + "rewards/rejected": 68.98699188232422, + "step": 1160 + }, + { + "accuracy": 0.512499988079071, + "epoch": 0.29, + "learning_rate": 9.772019182264756e-06, + "logps/chosen": -520.0985107421875, + "logps/margins": -34.040977478027344, + "logps/rejected": -486.0575256347656, + "loss": 23.6695, + "rewards/chosen": 69.80307006835938, + "rewards/margins": 0.22590890526771545, + "rewards/rejected": 69.57716369628906, + "step": 1170 + }, + { + "accuracy": 0.574999988079071, + "epoch": 0.29, + "learning_rate": 9.768095201937495e-06, + "logps/chosen": -591.0509643554688, + "logps/margins": 17.074010848999023, + "logps/rejected": -608.1249389648438, + "loss": 21.9663, + "rewards/chosen": 41.83124542236328, + "rewards/margins": 7.521315097808838, + "rewards/rejected": 34.30992889404297, + "step": 1180 + }, + { + "accuracy": 0.512499988079071, + "epoch": 0.3, + "learning_rate": 9.764138541074927e-06, + "logps/chosen": -502.95697021484375, + "logps/margins": -54.69042205810547, + "logps/rejected": -448.26654052734375, + "loss": 22.7701, + "rewards/chosen": 49.032745361328125, + "rewards/margins": 7.824349403381348, + "rewards/rejected": 41.20839309692383, + "step": 1190 + }, + { + "accuracy": 0.550000011920929, + "epoch": 0.3, + "learning_rate": 9.760149226796017e-06, + "logps/chosen": -642.0862426757812, + "logps/margins": -28.133686065673828, + "logps/rejected": -613.9525756835938, + "loss": 19.8647, + "rewards/chosen": 77.7999496459961, + "rewards/margins": -2.5873701572418213, + "rewards/rejected": 80.38732147216797, + "step": 1200 + }, + { + "accuracy": 0.42500001192092896, + "epoch": 0.3, + "learning_rate": 9.756127286443532e-06, + "logps/chosen": -513.748779296875, + "logps/margins": -36.45684814453125, + "logps/rejected": -477.2919921875, + "loss": 21.8551, + "rewards/chosen": 50.418861389160156, + "rewards/margins": -4.949378967285156, + "rewards/rejected": 55.36824417114258, + "step": 1210 + }, + { + "accuracy": 0.42500001192092896, + "epoch": 0.3, + "learning_rate": 9.75207274758386e-06, + "logps/chosen": -522.87158203125, + "logps/margins": -10.421526908874512, + "logps/rejected": -512.4500122070312, + "loss": 22.6088, + "rewards/chosen": 39.2586555480957, + "rewards/margins": -15.369707107543945, + "rewards/rejected": 54.62836837768555, + "step": 1220 + }, + { + "accuracy": 0.5375000238418579, + "epoch": 0.31, + "learning_rate": 9.747985638006821e-06, + "logps/chosen": -542.8447875976562, + "logps/margins": -48.224971771240234, + "logps/rejected": -494.6197204589844, + "loss": 22.454, + "rewards/chosen": 30.779373168945312, + "rewards/margins": 2.1494483947753906, + "rewards/rejected": 28.62992286682129, + "step": 1230 + }, + { + "accuracy": 0.512499988079071, + "epoch": 0.31, + "learning_rate": 9.743865985725474e-06, + "logps/chosen": -549.7471923828125, + "logps/margins": -46.43690872192383, + "logps/rejected": -503.31024169921875, + "loss": 21.5575, + "rewards/chosen": 59.54804611206055, + "rewards/margins": -0.4462594985961914, + "rewards/rejected": 59.99430465698242, + "step": 1240 + }, + { + "accuracy": 0.4625000059604645, + "epoch": 0.31, + "learning_rate": 9.739713818975921e-06, + "logps/chosen": -669.3917236328125, + "logps/margins": 13.779818534851074, + "logps/rejected": -683.1715087890625, + "loss": 22.0046, + "rewards/chosen": 50.08589553833008, + "rewards/margins": -3.734248399734497, + "rewards/rejected": 53.82014083862305, + "step": 1250 + }, + { + "accuracy": 0.512499988079071, + "epoch": 0.32, + "learning_rate": 9.735529166217125e-06, + "logps/chosen": -597.8067626953125, + "logps/margins": -119.0348129272461, + "logps/rejected": -478.77203369140625, + "loss": 22.4209, + "rewards/chosen": 47.96198654174805, + "rewards/margins": 5.33328914642334, + "rewards/rejected": 42.628700256347656, + "step": 1260 + }, + { + "accuracy": 0.4749999940395355, + "epoch": 0.32, + "learning_rate": 9.731312056130709e-06, + "logps/chosen": -636.9605712890625, + "logps/margins": -27.541976928710938, + "logps/rejected": -609.4185791015625, + "loss": 23.1305, + "rewards/chosen": 46.564170837402344, + "rewards/margins": -0.4471861720085144, + "rewards/rejected": 47.011356353759766, + "step": 1270 + }, + { + "accuracy": 0.5375000238418579, + "epoch": 0.32, + "learning_rate": 9.72706251762075e-06, + "logps/chosen": -551.7139892578125, + "logps/margins": -88.34957122802734, + "logps/rejected": -463.36444091796875, + "loss": 21.2655, + "rewards/chosen": 41.20339584350586, + "rewards/margins": 6.312152862548828, + "rewards/rejected": 34.8912467956543, + "step": 1280 + }, + { + "accuracy": 0.5249999761581421, + "epoch": 0.32, + "learning_rate": 9.722780579813593e-06, + "logps/chosen": -533.3919677734375, + "logps/margins": -67.81855773925781, + "logps/rejected": -465.5733337402344, + "loss": 34.4737, + "rewards/chosen": 68.53815460205078, + "rewards/margins": 5.378384590148926, + "rewards/rejected": 63.15977096557617, + "step": 1290 + }, + { + "accuracy": 0.5249999761581421, + "epoch": 0.33, + "learning_rate": 9.718466272057656e-06, + "logps/chosen": -596.3392333984375, + "logps/margins": -70.9132080078125, + "logps/rejected": -525.4259643554688, + "loss": 18.2426, + "rewards/chosen": 60.979576110839844, + "rewards/margins": 1.9702014923095703, + "rewards/rejected": 59.009376525878906, + "step": 1300 + }, + { + "accuracy": 0.4749999940395355, + "epoch": 0.33, + "learning_rate": 9.714119623923208e-06, + "logps/chosen": -432.83502197265625, + "logps/margins": -35.29749298095703, + "logps/rejected": -397.5375061035156, + "loss": 19.7256, + "rewards/chosen": 32.66984558105469, + "rewards/margins": -6.598059177398682, + "rewards/rejected": 39.267906188964844, + "step": 1310 + }, + { + "accuracy": 0.375, + "epoch": 0.33, + "learning_rate": 9.70974066520219e-06, + "logps/chosen": -517.6624145507812, + "logps/margins": 116.9413833618164, + "logps/rejected": -634.6038208007812, + "loss": 20.6162, + "rewards/chosen": 34.88326644897461, + "rewards/margins": -5.43569803237915, + "rewards/rejected": 40.318965911865234, + "step": 1320 + }, + { + "accuracy": 0.5375000238418579, + "epoch": 0.33, + "learning_rate": 9.705329425907993e-06, + "logps/chosen": -462.745361328125, + "logps/margins": 6.182191371917725, + "logps/rejected": -468.92755126953125, + "loss": 22.5168, + "rewards/chosen": 44.15049362182617, + "rewards/margins": 2.039771556854248, + "rewards/rejected": 42.1107292175293, + "step": 1330 + }, + { + "accuracy": 0.4375, + "epoch": 0.34, + "learning_rate": 9.700885936275259e-06, + "logps/chosen": -569.2468872070312, + "logps/margins": 3.999840497970581, + "logps/rejected": -573.2467651367188, + "loss": 19.2061, + "rewards/chosen": 58.11320877075195, + "rewards/margins": -6.799729347229004, + "rewards/rejected": 64.9129409790039, + "step": 1340 + }, + { + "accuracy": 0.5249999761581421, + "epoch": 0.34, + "learning_rate": 9.696410226759678e-06, + "logps/chosen": -589.9055786132812, + "logps/margins": -2.0938849449157715, + "logps/rejected": -587.8116455078125, + "loss": 19.794, + "rewards/chosen": 49.223289489746094, + "rewards/margins": 3.8248531818389893, + "rewards/rejected": 45.398433685302734, + "step": 1350 + }, + { + "accuracy": 0.612500011920929, + "epoch": 0.34, + "learning_rate": 9.691902328037775e-06, + "logps/chosen": -543.6055297851562, + "logps/margins": 73.20223236083984, + "logps/rejected": -616.8077392578125, + "loss": 21.2294, + "rewards/chosen": 39.65159225463867, + "rewards/margins": 4.3111958503723145, + "rewards/rejected": 35.34039306640625, + "step": 1360 + }, + { + "accuracy": 0.48750001192092896, + "epoch": 0.34, + "learning_rate": 9.687362271006693e-06, + "logps/chosen": -568.1315307617188, + "logps/margins": -51.146949768066406, + "logps/rejected": -516.9845581054688, + "loss": 17.0099, + "rewards/chosen": 40.02988052368164, + "rewards/margins": 7.241976737976074, + "rewards/rejected": 32.78790283203125, + "step": 1370 + }, + { + "accuracy": 0.5249999761581421, + "epoch": 0.34, + "learning_rate": 9.682790086784e-06, + "logps/chosen": -649.5811767578125, + "logps/margins": -125.8785629272461, + "logps/rejected": -523.7025756835938, + "loss": 16.4055, + "rewards/chosen": 56.72700881958008, + "rewards/margins": 6.38532018661499, + "rewards/rejected": 50.34169387817383, + "step": 1380 + }, + { + "accuracy": 0.5874999761581421, + "epoch": 0.35, + "learning_rate": 9.678185806707449e-06, + "logps/chosen": -485.8077697753906, + "logps/margins": 40.63877487182617, + "logps/rejected": -526.4464721679688, + "loss": 15.9332, + "rewards/chosen": 58.778663635253906, + "rewards/margins": 7.008481502532959, + "rewards/rejected": 51.770179748535156, + "step": 1390 + }, + { + "accuracy": 0.512499988079071, + "epoch": 0.35, + "learning_rate": 9.673549462334795e-06, + "logps/chosen": -558.7322387695312, + "logps/margins": -21.732154846191406, + "logps/rejected": -537.0000610351562, + "loss": 15.0014, + "rewards/chosen": 34.78699493408203, + "rewards/margins": 8.713813781738281, + "rewards/rejected": 26.07318115234375, + "step": 1400 + }, + { + "accuracy": 0.574999988079071, + "epoch": 0.35, + "learning_rate": 9.668881085443545e-06, + "logps/chosen": -705.8190307617188, + "logps/margins": -244.4308624267578, + "logps/rejected": -461.38824462890625, + "loss": 13.2182, + "rewards/chosen": 39.64697265625, + "rewards/margins": 14.922632217407227, + "rewards/rejected": 24.72433853149414, + "step": 1410 + }, + { + "accuracy": 0.5625, + "epoch": 0.35, + "learning_rate": 9.664180708030768e-06, + "logps/chosen": -549.8201293945312, + "logps/margins": -114.0276870727539, + "logps/rejected": -435.79248046875, + "loss": 18.8816, + "rewards/chosen": 54.656654357910156, + "rewards/margins": 7.197030067443848, + "rewards/rejected": 47.459625244140625, + "step": 1420 + }, + { + "accuracy": 0.42500001192092896, + "epoch": 0.36, + "learning_rate": 9.659448362312862e-06, + "logps/chosen": -574.7101440429688, + "logps/margins": -73.7129898071289, + "logps/rejected": -500.9971618652344, + "loss": 18.4872, + "rewards/chosen": 36.25593566894531, + "rewards/margins": -6.002646446228027, + "rewards/rejected": 42.25858688354492, + "step": 1430 + }, + { + "accuracy": 0.5249999761581421, + "epoch": 0.36, + "learning_rate": 9.654684080725335e-06, + "logps/chosen": -594.8233642578125, + "logps/margins": -57.06914520263672, + "logps/rejected": -537.7542724609375, + "loss": 18.2855, + "rewards/chosen": 27.77854347229004, + "rewards/margins": 2.6284825801849365, + "rewards/rejected": 25.150060653686523, + "step": 1440 + }, + { + "accuracy": 0.42500001192092896, + "epoch": 0.36, + "learning_rate": 9.649887895922583e-06, + "logps/chosen": -625.2347412109375, + "logps/margins": -70.69374084472656, + "logps/rejected": -554.5409545898438, + "loss": 22.2981, + "rewards/chosen": 19.941816329956055, + "rewards/margins": -17.024343490600586, + "rewards/rejected": 36.96615982055664, + "step": 1450 + }, + { + "accuracy": 0.5, + "epoch": 0.36, + "learning_rate": 9.645059840777668e-06, + "logps/chosen": -623.9085083007812, + "logps/margins": -108.2332534790039, + "logps/rejected": -515.6751708984375, + "loss": 15.3711, + "rewards/chosen": 41.62677764892578, + "rewards/margins": 8.575868606567383, + "rewards/rejected": 33.0509147644043, + "step": 1460 + }, + { + "accuracy": 0.48750001192092896, + "epoch": 0.37, + "learning_rate": 9.640199948382091e-06, + "logps/chosen": -626.6790771484375, + "logps/margins": -79.18148040771484, + "logps/rejected": -547.4976806640625, + "loss": 19.9316, + "rewards/chosen": 28.907846450805664, + "rewards/margins": -5.655673503875732, + "rewards/rejected": 34.56351852416992, + "step": 1470 + }, + { + "accuracy": 0.6000000238418579, + "epoch": 0.37, + "learning_rate": 9.635308252045566e-06, + "logps/chosen": -547.428466796875, + "logps/margins": -92.78623962402344, + "logps/rejected": -454.6422424316406, + "loss": 15.3081, + "rewards/chosen": 34.333648681640625, + "rewards/margins": 15.059286117553711, + "rewards/rejected": 19.27436065673828, + "step": 1480 + }, + { + "accuracy": 0.5625, + "epoch": 0.37, + "learning_rate": 9.630384785295788e-06, + "logps/chosen": -534.7236328125, + "logps/margins": -114.59806060791016, + "logps/rejected": -420.12554931640625, + "loss": 15.2977, + "rewards/chosen": 57.59136962890625, + "rewards/margins": 25.403095245361328, + "rewards/rejected": 32.188270568847656, + "step": 1490 + }, + { + "accuracy": 0.5375000238418579, + "epoch": 0.38, + "learning_rate": 9.625429581878211e-06, + "logps/chosen": -475.57550048828125, + "logps/margins": 18.78350257873535, + "logps/rejected": -494.3590393066406, + "loss": 15.8209, + "rewards/chosen": 42.34707260131836, + "rewards/margins": 6.815553188323975, + "rewards/rejected": 35.53152084350586, + "step": 1500 + }, + { + "accuracy": 0.550000011920929, + "epoch": 0.38, + "learning_rate": 9.620442675755813e-06, + "logps/chosen": -517.6681518554688, + "logps/margins": -21.16585922241211, + "logps/rejected": -496.50225830078125, + "loss": 13.8172, + "rewards/chosen": 32.977882385253906, + "rewards/margins": 1.7547203302383423, + "rewards/rejected": 31.223155975341797, + "step": 1510 + }, + { + "accuracy": 0.5, + "epoch": 0.38, + "learning_rate": 9.615424101108856e-06, + "logps/chosen": -553.3767700195312, + "logps/margins": -89.54978942871094, + "logps/rejected": -463.82696533203125, + "loss": 14.6861, + "rewards/chosen": 25.396930694580078, + "rewards/margins": -1.1003880500793457, + "rewards/rejected": 26.4973201751709, + "step": 1520 + }, + { + "accuracy": 0.4375, + "epoch": 0.38, + "learning_rate": 9.610373892334666e-06, + "logps/chosen": -545.4134521484375, + "logps/margins": 49.03186798095703, + "logps/rejected": -594.4452514648438, + "loss": 15.8, + "rewards/chosen": 21.479780197143555, + "rewards/margins": 3.409785509109497, + "rewards/rejected": 18.069995880126953, + "step": 1530 + }, + { + "accuracy": 0.48750001192092896, + "epoch": 0.39, + "learning_rate": 9.60529208404738e-06, + "logps/chosen": -537.2205810546875, + "logps/margins": 18.74542236328125, + "logps/rejected": -555.9659423828125, + "loss": 18.157, + "rewards/chosen": 36.889930725097656, + "rewards/margins": 0.789467990398407, + "rewards/rejected": 36.10045623779297, + "step": 1540 + }, + { + "accuracy": 0.5625, + "epoch": 0.39, + "learning_rate": 9.600178711077728e-06, + "logps/chosen": -682.4976806640625, + "logps/margins": -84.18452453613281, + "logps/rejected": -598.3131713867188, + "loss": 18.4686, + "rewards/chosen": 25.744800567626953, + "rewards/margins": 1.4007503986358643, + "rewards/rejected": 24.344045639038086, + "step": 1550 + }, + { + "accuracy": 0.5625, + "epoch": 0.39, + "learning_rate": 9.595033808472778e-06, + "logps/chosen": -606.9437255859375, + "logps/margins": -49.973854064941406, + "logps/rejected": -556.9698486328125, + "loss": 13.907, + "rewards/chosen": 34.49666976928711, + "rewards/margins": 7.674096584320068, + "rewards/rejected": 26.82257652282715, + "step": 1560 + }, + { + "accuracy": 0.44999998807907104, + "epoch": 0.39, + "learning_rate": 9.589857411495704e-06, + "logps/chosen": -541.6856689453125, + "logps/margins": 46.20757293701172, + "logps/rejected": -587.8931884765625, + "loss": 20.8195, + "rewards/chosen": 26.664840698242188, + "rewards/margins": -12.008538246154785, + "rewards/rejected": 38.673377990722656, + "step": 1570 + }, + { + "accuracy": 0.4749999940395355, + "epoch": 0.4, + "learning_rate": 9.584649555625543e-06, + "logps/chosen": -616.451416015625, + "logps/margins": -35.85036087036133, + "logps/rejected": -580.6010131835938, + "loss": 12.5403, + "rewards/chosen": 23.773826599121094, + "rewards/margins": 7.5850043296813965, + "rewards/rejected": 16.188823699951172, + "step": 1580 + }, + { + "accuracy": 0.550000011920929, + "epoch": 0.4, + "learning_rate": 9.579410276556951e-06, + "logps/chosen": -681.342041015625, + "logps/margins": -139.56375122070312, + "logps/rejected": -541.7783203125, + "loss": 13.6939, + "rewards/chosen": 36.60581588745117, + "rewards/margins": 6.9373884201049805, + "rewards/rejected": 29.66843032836914, + "step": 1590 + }, + { + "accuracy": 0.4749999940395355, + "epoch": 0.4, + "learning_rate": 9.574139610199959e-06, + "logps/chosen": -540.5260009765625, + "logps/margins": -54.50944900512695, + "logps/rejected": -486.0165100097656, + "loss": 13.8911, + "rewards/chosen": 46.18475341796875, + "rewards/margins": 4.394420146942139, + "rewards/rejected": 41.79033660888672, + "step": 1600 + }, + { + "accuracy": 0.4625000059604645, + "epoch": 0.4, + "learning_rate": 9.568837592679724e-06, + "logps/chosen": -623.354736328125, + "logps/margins": -23.03877067565918, + "logps/rejected": -600.31591796875, + "loss": 17.8563, + "rewards/chosen": 56.618743896484375, + "rewards/margins": 3.0979230403900146, + "rewards/rejected": 53.52082061767578, + "step": 1610 + }, + { + "accuracy": 0.48750001192092896, + "epoch": 0.41, + "learning_rate": 9.56350426033629e-06, + "logps/chosen": -573.454833984375, + "logps/margins": 2.629852294921875, + "logps/rejected": -576.084716796875, + "loss": 17.8639, + "rewards/chosen": 33.775978088378906, + "rewards/margins": -6.805272102355957, + "rewards/rejected": 40.58124923706055, + "step": 1620 + }, + { + "accuracy": 0.5625, + "epoch": 0.41, + "learning_rate": 9.558139649724324e-06, + "logps/chosen": -528.5062255859375, + "logps/margins": -65.63377380371094, + "logps/rejected": -462.87237548828125, + "loss": 17.1732, + "rewards/chosen": 28.436267852783203, + "rewards/margins": 3.480639696121216, + "rewards/rejected": 24.955629348754883, + "step": 1630 + }, + { + "accuracy": 0.574999988079071, + "epoch": 0.41, + "learning_rate": 9.552743797612886e-06, + "logps/chosen": -664.2904052734375, + "logps/margins": -143.49313354492188, + "logps/rejected": -520.7973022460938, + "loss": 14.1961, + "rewards/chosen": 36.872276306152344, + "rewards/margins": 16.778888702392578, + "rewards/rejected": 20.093393325805664, + "step": 1640 + }, + { + "accuracy": 0.637499988079071, + "epoch": 0.41, + "learning_rate": 9.547316740985152e-06, + "logps/chosen": -646.8281860351562, + "logps/margins": -10.786784172058105, + "logps/rejected": -636.0413818359375, + "loss": 13.3396, + "rewards/chosen": 30.01534652709961, + "rewards/margins": 3.537604808807373, + "rewards/rejected": 26.477741241455078, + "step": 1650 + }, + { + "accuracy": 0.612500011920929, + "epoch": 0.41, + "learning_rate": 9.541858517038182e-06, + "logps/chosen": -558.0841064453125, + "logps/margins": -60.212005615234375, + "logps/rejected": -497.87213134765625, + "loss": 15.2455, + "rewards/chosen": 25.614269256591797, + "rewards/margins": 12.414861679077148, + "rewards/rejected": 13.199411392211914, + "step": 1660 + }, + { + "accuracy": 0.4375, + "epoch": 0.42, + "learning_rate": 9.53636916318266e-06, + "logps/chosen": -509.52191162109375, + "logps/margins": 112.27693176269531, + "logps/rejected": -621.7988891601562, + "loss": 17.0814, + "rewards/chosen": 24.030040740966797, + "rewards/margins": -10.050365447998047, + "rewards/rejected": 34.080406188964844, + "step": 1670 + }, + { + "accuracy": 0.550000011920929, + "epoch": 0.42, + "learning_rate": 9.530848717042623e-06, + "logps/chosen": -496.45751953125, + "logps/margins": -2.630995512008667, + "logps/rejected": -493.8265075683594, + "loss": 14.5797, + "rewards/chosen": 28.074481964111328, + "rewards/margins": 4.477761268615723, + "rewards/rejected": 23.596717834472656, + "step": 1680 + }, + { + "accuracy": 0.42500001192092896, + "epoch": 0.42, + "learning_rate": 9.525297216455224e-06, + "logps/chosen": -610.0650634765625, + "logps/margins": -71.30603790283203, + "logps/rejected": -538.7589721679688, + "loss": 14.861, + "rewards/chosen": 22.093124389648438, + "rewards/margins": -5.563172340393066, + "rewards/rejected": 27.656295776367188, + "step": 1690 + }, + { + "accuracy": 0.550000011920929, + "epoch": 0.42, + "learning_rate": 9.519714699470463e-06, + "logps/chosen": -562.2117309570312, + "logps/margins": 39.16172790527344, + "logps/rejected": -601.37353515625, + "loss": 12.6701, + "rewards/chosen": 35.709896087646484, + "rewards/margins": 3.117079496383667, + "rewards/rejected": 32.59281921386719, + "step": 1700 + }, + { + "accuracy": 0.44999998807907104, + "epoch": 0.43, + "learning_rate": 9.51410120435092e-06, + "logps/chosen": -657.4244995117188, + "logps/margins": -104.96844482421875, + "logps/rejected": -552.4560546875, + "loss": 16.4779, + "rewards/chosen": 31.632781982421875, + "rewards/margins": -5.166750907897949, + "rewards/rejected": 36.79953384399414, + "step": 1710 + }, + { + "accuracy": 0.4749999940395355, + "epoch": 0.43, + "learning_rate": 9.508456769571508e-06, + "logps/chosen": -587.2263793945312, + "logps/margins": -8.174795150756836, + "logps/rejected": -579.051513671875, + "loss": 16.1158, + "rewards/chosen": 13.733772277832031, + "rewards/margins": -3.9058749675750732, + "rewards/rejected": 17.639650344848633, + "step": 1720 + }, + { + "accuracy": 0.4749999940395355, + "epoch": 0.43, + "learning_rate": 9.50278143381919e-06, + "logps/chosen": -571.4589233398438, + "logps/margins": -9.603372573852539, + "logps/rejected": -561.8555297851562, + "loss": 13.9188, + "rewards/chosen": 18.605789184570312, + "rewards/margins": 0.5999595522880554, + "rewards/rejected": 18.005830764770508, + "step": 1730 + }, + { + "accuracy": 0.5625, + "epoch": 0.43, + "learning_rate": 9.497075235992735e-06, + "logps/chosen": -666.2179565429688, + "logps/margins": -95.2730484008789, + "logps/rejected": -570.9448852539062, + "loss": 15.2908, + "rewards/chosen": 23.416473388671875, + "rewards/margins": -2.188861846923828, + "rewards/rejected": 25.605335235595703, + "step": 1740 + }, + { + "accuracy": 0.4625000059604645, + "epoch": 0.44, + "learning_rate": 9.491338215202434e-06, + "logps/chosen": -654.6417236328125, + "logps/margins": -124.35968017578125, + "logps/rejected": -530.2821044921875, + "loss": 14.3854, + "rewards/chosen": 30.502532958984375, + "rewards/margins": 0.3984741270542145, + "rewards/rejected": 30.104055404663086, + "step": 1750 + }, + { + "accuracy": 0.550000011920929, + "epoch": 0.44, + "learning_rate": 9.48557041076984e-06, + "logps/chosen": -629.0152587890625, + "logps/margins": -84.0464096069336, + "logps/rejected": -544.9688720703125, + "loss": 14.1748, + "rewards/chosen": 51.954925537109375, + "rewards/margins": 14.970242500305176, + "rewards/rejected": 36.98468017578125, + "step": 1760 + }, + { + "accuracy": 0.4124999940395355, + "epoch": 0.44, + "learning_rate": 9.479771862227496e-06, + "logps/chosen": -638.5265502929688, + "logps/margins": -68.16188049316406, + "logps/rejected": -570.36474609375, + "loss": 15.2731, + "rewards/chosen": 32.782188415527344, + "rewards/margins": -1.7880420684814453, + "rewards/rejected": 34.570228576660156, + "step": 1770 + }, + { + "accuracy": 0.5375000238418579, + "epoch": 0.45, + "learning_rate": 9.47394260931867e-06, + "logps/chosen": -633.3779296875, + "logps/margins": -28.14316177368164, + "logps/rejected": -605.2347412109375, + "loss": 14.5766, + "rewards/chosen": 25.8236083984375, + "rewards/margins": 12.189434051513672, + "rewards/rejected": 13.634173393249512, + "step": 1780 + }, + { + "accuracy": 0.4749999940395355, + "epoch": 0.45, + "learning_rate": 9.468082691997076e-06, + "logps/chosen": -607.9192504882812, + "logps/margins": -38.225242614746094, + "logps/rejected": -569.6939697265625, + "loss": 13.2147, + "rewards/chosen": 18.074871063232422, + "rewards/margins": -0.11832847446203232, + "rewards/rejected": 18.193201065063477, + "step": 1790 + }, + { + "accuracy": 0.5249999761581421, + "epoch": 0.45, + "learning_rate": 9.462192150426596e-06, + "logps/chosen": -566.7503662109375, + "logps/margins": 39.4453010559082, + "logps/rejected": -606.1956787109375, + "loss": 14.6105, + "rewards/chosen": 17.88623046875, + "rewards/margins": -0.9126178622245789, + "rewards/rejected": 18.798847198486328, + "step": 1800 + }, + { + "accuracy": 0.612500011920929, + "epoch": 0.45, + "learning_rate": 9.456271024981018e-06, + "logps/chosen": -628.9312744140625, + "logps/margins": -88.76264953613281, + "logps/rejected": -540.1685791015625, + "loss": 13.6563, + "rewards/chosen": 20.591129302978516, + "rewards/margins": 9.947909355163574, + "rewards/rejected": 10.643220901489258, + "step": 1810 + }, + { + "accuracy": 0.550000011920929, + "epoch": 0.46, + "learning_rate": 9.450319356243748e-06, + "logps/chosen": -575.6384887695312, + "logps/margins": -125.8467788696289, + "logps/rejected": -449.79168701171875, + "loss": 14.021, + "rewards/chosen": 37.3083381652832, + "rewards/margins": 7.473193168640137, + "rewards/rejected": 29.83514404296875, + "step": 1820 + }, + { + "accuracy": 0.512499988079071, + "epoch": 0.46, + "learning_rate": 9.444337185007537e-06, + "logps/chosen": -591.4012451171875, + "logps/margins": 31.36812400817871, + "logps/rejected": -622.7694091796875, + "loss": 13.1627, + "rewards/chosen": 50.398414611816406, + "rewards/margins": 9.322660446166992, + "rewards/rejected": 41.07575225830078, + "step": 1830 + }, + { + "accuracy": 0.4375, + "epoch": 0.46, + "learning_rate": 9.438324552274202e-06, + "logps/chosen": -634.4253540039062, + "logps/margins": -78.15516662597656, + "logps/rejected": -556.2701416015625, + "loss": 15.0967, + "rewards/chosen": 25.66843032836914, + "rewards/margins": -5.582737922668457, + "rewards/rejected": 31.25116539001465, + "step": 1840 + }, + { + "accuracy": 0.4625000059604645, + "epoch": 0.46, + "learning_rate": 9.432281499254339e-06, + "logps/chosen": -625.7207641601562, + "logps/margins": -63.74444580078125, + "logps/rejected": -561.9762573242188, + "loss": 12.3084, + "rewards/chosen": 18.403047561645508, + "rewards/margins": -3.6304008960723877, + "rewards/rejected": 22.033447265625, + "step": 1850 + }, + { + "accuracy": 0.5625, + "epoch": 0.47, + "learning_rate": 9.42620806736705e-06, + "logps/chosen": -598.7117919921875, + "logps/margins": -139.65859985351562, + "logps/rejected": -459.05328369140625, + "loss": 11.799, + "rewards/chosen": 46.8648681640625, + "rewards/margins": 12.769689559936523, + "rewards/rejected": 34.09518051147461, + "step": 1860 + }, + { + "accuracy": 0.5249999761581421, + "epoch": 0.47, + "learning_rate": 9.420104298239648e-06, + "logps/chosen": -575.7594604492188, + "logps/margins": -70.94566345214844, + "logps/rejected": -504.8138732910156, + "loss": 11.2715, + "rewards/chosen": 35.17518615722656, + "rewards/margins": 5.026805877685547, + "rewards/rejected": 30.14838218688965, + "step": 1870 + }, + { + "accuracy": 0.44999998807907104, + "epoch": 0.47, + "learning_rate": 9.413970233707379e-06, + "logps/chosen": -646.8281860351562, + "logps/margins": -175.18923950195312, + "logps/rejected": -471.63897705078125, + "loss": 14.4495, + "rewards/chosen": 21.566890716552734, + "rewards/margins": -1.6644004583358765, + "rewards/rejected": 23.23128890991211, + "step": 1880 + }, + { + "accuracy": 0.5625, + "epoch": 0.47, + "learning_rate": 9.407805915813141e-06, + "logps/chosen": -593.1277465820312, + "logps/margins": -36.3474235534668, + "logps/rejected": -556.7803344726562, + "loss": 12.4256, + "rewards/chosen": 32.634788513183594, + "rewards/margins": 8.0280122756958, + "rewards/rejected": 24.606775283813477, + "step": 1890 + }, + { + "accuracy": 0.5, + "epoch": 0.47, + "learning_rate": 9.401611386807179e-06, + "logps/chosen": -475.2322692871094, + "logps/margins": -20.21805191040039, + "logps/rejected": -455.0142517089844, + "loss": 13.3234, + "rewards/chosen": 33.61613845825195, + "rewards/margins": -0.3309977650642395, + "rewards/rejected": 33.94713592529297, + "step": 1900 + }, + { + "accuracy": 0.550000011920929, + "epoch": 0.48, + "learning_rate": 9.395386689146809e-06, + "logps/chosen": -503.7867126464844, + "logps/margins": 0.76055908203125, + "logps/rejected": -504.5472717285156, + "loss": 14.3642, + "rewards/chosen": 22.038436889648438, + "rewards/margins": 10.71655559539795, + "rewards/rejected": 11.321880340576172, + "step": 1910 + }, + { + "accuracy": 0.5249999761581421, + "epoch": 0.48, + "learning_rate": 9.389131865496122e-06, + "logps/chosen": -588.5488891601562, + "logps/margins": -36.5074577331543, + "logps/rejected": -552.04150390625, + "loss": 14.4594, + "rewards/chosen": 27.45322608947754, + "rewards/margins": -1.971853256225586, + "rewards/rejected": 29.42508316040039, + "step": 1920 + }, + { + "accuracy": 0.512499988079071, + "epoch": 0.48, + "learning_rate": 9.382846958725695e-06, + "logps/chosen": -674.86669921875, + "logps/margins": -14.49365234375, + "logps/rejected": -660.3729248046875, + "loss": 11.9848, + "rewards/chosen": 32.30889129638672, + "rewards/margins": 0.2277446687221527, + "rewards/rejected": 32.08115005493164, + "step": 1930 + }, + { + "accuracy": 0.48750001192092896, + "epoch": 0.48, + "learning_rate": 9.376532011912294e-06, + "logps/chosen": -638.4736328125, + "logps/margins": 8.738263130187988, + "logps/rejected": -647.2119140625, + "loss": 13.5921, + "rewards/chosen": 34.67123031616211, + "rewards/margins": 1.4161654710769653, + "rewards/rejected": 33.25506591796875, + "step": 1940 + }, + { + "accuracy": 0.550000011920929, + "epoch": 0.49, + "learning_rate": 9.370187068338576e-06, + "logps/chosen": -587.0968017578125, + "logps/margins": -61.557960510253906, + "logps/rejected": -525.5388793945312, + "loss": 11.902, + "rewards/chosen": 27.554393768310547, + "rewards/margins": 8.065832138061523, + "rewards/rejected": 19.488561630249023, + "step": 1950 + }, + { + "accuracy": 0.4749999940395355, + "epoch": 0.49, + "learning_rate": 9.363812171492802e-06, + "logps/chosen": -585.7796630859375, + "logps/margins": -28.49386215209961, + "logps/rejected": -557.2857666015625, + "loss": 12.5238, + "rewards/chosen": 20.46634292602539, + "rewards/margins": 0.6409767866134644, + "rewards/rejected": 19.825366973876953, + "step": 1960 + }, + { + "accuracy": 0.5249999761581421, + "epoch": 0.49, + "learning_rate": 9.357407365068527e-06, + "logps/chosen": -713.6062622070312, + "logps/margins": -166.30718994140625, + "logps/rejected": -547.2990112304688, + "loss": 12.8542, + "rewards/chosen": 26.4906063079834, + "rewards/margins": 2.047576427459717, + "rewards/rejected": 24.44303321838379, + "step": 1970 + }, + { + "accuracy": 0.48750001192092896, + "epoch": 0.49, + "learning_rate": 9.35097269296431e-06, + "logps/chosen": -518.8494873046875, + "logps/margins": 19.5637149810791, + "logps/rejected": -538.4132080078125, + "loss": 13.6505, + "rewards/chosen": 20.73946189880371, + "rewards/margins": -6.4752984046936035, + "rewards/rejected": 27.214757919311523, + "step": 1980 + }, + { + "accuracy": 0.5249999761581421, + "epoch": 0.5, + "learning_rate": 9.344508199283407e-06, + "logps/chosen": -609.3377685546875, + "logps/margins": -77.1791000366211, + "logps/rejected": -532.1587524414062, + "loss": 12.2109, + "rewards/chosen": 21.00510597229004, + "rewards/margins": -0.2330617904663086, + "rewards/rejected": 21.238168716430664, + "step": 1990 + }, + { + "accuracy": 0.48750001192092896, + "epoch": 0.5, + "learning_rate": 9.338013928333472e-06, + "logps/chosen": -460.7699279785156, + "logps/margins": 17.400569915771484, + "logps/rejected": -478.17047119140625, + "loss": 15.2276, + "rewards/chosen": 21.651203155517578, + "rewards/margins": -2.3227298259735107, + "rewards/rejected": 23.97393226623535, + "step": 2000 + }, + { + "accuracy": 0.5625, + "epoch": 0.5, + "learning_rate": 9.331489924626253e-06, + "logps/chosen": -641.2543334960938, + "logps/margins": -75.30561828613281, + "logps/rejected": -565.94873046875, + "loss": 12.276, + "rewards/chosen": 23.918691635131836, + "rewards/margins": -2.791476011276245, + "rewards/rejected": 26.710168838500977, + "step": 2010 + }, + { + "accuracy": 0.4749999940395355, + "epoch": 0.51, + "learning_rate": 9.324936232877289e-06, + "logps/chosen": -630.9451904296875, + "logps/margins": -72.93824005126953, + "logps/rejected": -558.0070190429688, + "loss": 14.6183, + "rewards/chosen": 19.59995460510254, + "rewards/margins": -1.3345954418182373, + "rewards/rejected": 20.934551239013672, + "step": 2020 + }, + { + "accuracy": 0.4749999940395355, + "epoch": 0.51, + "learning_rate": 9.318352898005593e-06, + "logps/chosen": -549.7072143554688, + "logps/margins": 122.813720703125, + "logps/rejected": -672.52099609375, + "loss": 13.483, + "rewards/chosen": 20.307849884033203, + "rewards/margins": -2.9660115242004395, + "rewards/rejected": 23.273860931396484, + "step": 2030 + }, + { + "accuracy": 0.612500011920929, + "epoch": 0.51, + "learning_rate": 9.311739965133365e-06, + "logps/chosen": -622.2337036132812, + "logps/margins": -32.368934631347656, + "logps/rejected": -589.8648681640625, + "loss": 10.0102, + "rewards/chosen": 22.56910514831543, + "rewards/margins": 8.03900146484375, + "rewards/rejected": 14.530102729797363, + "step": 2040 + }, + { + "accuracy": 0.574999988079071, + "epoch": 0.51, + "learning_rate": 9.305097479585652e-06, + "logps/chosen": -621.7113037109375, + "logps/margins": -75.39019012451172, + "logps/rejected": -546.3211669921875, + "loss": 12.3668, + "rewards/chosen": 36.85089111328125, + "rewards/margins": 7.329715728759766, + "rewards/rejected": 29.521175384521484, + "step": 2050 + }, + { + "accuracy": 0.4749999940395355, + "epoch": 0.52, + "learning_rate": 9.298425486890073e-06, + "logps/chosen": -643.7429809570312, + "logps/margins": -104.81253814697266, + "logps/rejected": -538.930419921875, + "loss": 11.1242, + "rewards/chosen": 22.33073616027832, + "rewards/margins": 2.674382209777832, + "rewards/rejected": 19.656354904174805, + "step": 2060 + }, + { + "accuracy": 0.5375000238418579, + "epoch": 0.52, + "learning_rate": 9.291724032776475e-06, + "logps/chosen": -523.957763671875, + "logps/margins": -41.012672424316406, + "logps/rejected": -482.945068359375, + "loss": 11.0904, + "rewards/chosen": 26.3901424407959, + "rewards/margins": 3.9443917274475098, + "rewards/rejected": 22.44575309753418, + "step": 2070 + }, + { + "accuracy": 0.4625000059604645, + "epoch": 0.52, + "learning_rate": 9.284993163176644e-06, + "logps/chosen": -647.9600830078125, + "logps/margins": -135.23568725585938, + "logps/rejected": -512.724365234375, + "loss": 10.397, + "rewards/chosen": 16.236602783203125, + "rewards/margins": -2.093750476837158, + "rewards/rejected": 18.33035659790039, + "step": 2080 + }, + { + "accuracy": 0.512499988079071, + "epoch": 0.52, + "learning_rate": 9.278232924223974e-06, + "logps/chosen": -577.3107299804688, + "logps/margins": -15.585044860839844, + "logps/rejected": -561.7257080078125, + "loss": 12.4502, + "rewards/chosen": 24.722522735595703, + "rewards/margins": -2.417186737060547, + "rewards/rejected": 27.139713287353516, + "step": 2090 + }, + { + "accuracy": 0.5249999761581421, + "epoch": 0.53, + "learning_rate": 9.271443362253159e-06, + "logps/chosen": -606.1764526367188, + "logps/margins": -2.0769410133361816, + "logps/rejected": -604.0994873046875, + "loss": 11.521, + "rewards/chosen": 22.060165405273438, + "rewards/margins": 0.23056812584400177, + "rewards/rejected": 21.8295955657959, + "step": 2100 + }, + { + "accuracy": 0.4749999940395355, + "epoch": 0.53, + "learning_rate": 9.26462452379987e-06, + "logps/chosen": -608.9471435546875, + "logps/margins": -31.446868896484375, + "logps/rejected": -577.5003051757812, + "loss": 13.2322, + "rewards/chosen": 22.507360458374023, + "rewards/margins": -5.47988224029541, + "rewards/rejected": 27.98724365234375, + "step": 2110 + }, + { + "accuracy": 0.625, + "epoch": 0.53, + "learning_rate": 9.257776455600443e-06, + "logps/chosen": -553.6565551757812, + "logps/margins": 10.0465669631958, + "logps/rejected": -563.7030639648438, + "loss": 12.5284, + "rewards/chosen": 18.662477493286133, + "rewards/margins": 4.719099521636963, + "rewards/rejected": 13.943377494812012, + "step": 2120 + }, + { + "accuracy": 0.5874999761581421, + "epoch": 0.53, + "learning_rate": 9.250899204591552e-06, + "logps/chosen": -469.525390625, + "logps/margins": 10.8422212600708, + "logps/rejected": -480.3675842285156, + "loss": 11.2268, + "rewards/chosen": 20.613054275512695, + "rewards/margins": 9.212384223937988, + "rewards/rejected": 11.40066909790039, + "step": 2130 + }, + { + "accuracy": 0.48750001192092896, + "epoch": 0.54, + "learning_rate": 9.243992817909891e-06, + "logps/chosen": -573.618408203125, + "logps/margins": -139.2906951904297, + "logps/rejected": -434.32769775390625, + "loss": 12.9604, + "rewards/chosen": 18.2756290435791, + "rewards/margins": -1.5501413345336914, + "rewards/rejected": 19.825767517089844, + "step": 2140 + }, + { + "accuracy": 0.48750001192092896, + "epoch": 0.54, + "learning_rate": 9.237057342891852e-06, + "logps/chosen": -558.3365478515625, + "logps/margins": -33.4783935546875, + "logps/rejected": -524.8582153320312, + "loss": 13.4818, + "rewards/chosen": 23.163623809814453, + "rewards/margins": 2.8346927165985107, + "rewards/rejected": 20.328927993774414, + "step": 2150 + }, + { + "accuracy": 0.550000011920929, + "epoch": 0.54, + "learning_rate": 9.230092827073193e-06, + "logps/chosen": -509.3055725097656, + "logps/margins": 32.06549072265625, + "logps/rejected": -541.37109375, + "loss": 11.8625, + "rewards/chosen": 22.804096221923828, + "rewards/margins": 3.70770001411438, + "rewards/rejected": 19.096393585205078, + "step": 2160 + }, + { + "accuracy": 0.5249999761581421, + "epoch": 0.54, + "learning_rate": 9.223099318188723e-06, + "logps/chosen": -647.0440673828125, + "logps/margins": -90.48458862304688, + "logps/rejected": -556.5595092773438, + "loss": 13.2488, + "rewards/chosen": 19.096633911132812, + "rewards/margins": -3.2188942432403564, + "rewards/rejected": 22.315526962280273, + "step": 2170 + }, + { + "accuracy": 0.4625000059604645, + "epoch": 0.55, + "learning_rate": 9.216076864171969e-06, + "logps/chosen": -668.204833984375, + "logps/margins": -81.31346893310547, + "logps/rejected": -586.8914184570312, + "loss": 13.6828, + "rewards/chosen": 22.10203742980957, + "rewards/margins": -3.3537166118621826, + "rewards/rejected": 25.455753326416016, + "step": 2180 + }, + { + "accuracy": 0.5874999761581421, + "epoch": 0.55, + "learning_rate": 9.209025513154844e-06, + "logps/chosen": -630.75439453125, + "logps/margins": -128.52731323242188, + "logps/rejected": -502.22711181640625, + "loss": 10.8342, + "rewards/chosen": 21.599353790283203, + "rewards/margins": 2.9209370613098145, + "rewards/rejected": 18.678417205810547, + "step": 2190 + }, + { + "accuracy": 0.5375000238418579, + "epoch": 0.55, + "learning_rate": 9.201945313467326e-06, + "logps/chosen": -510.7772521972656, + "logps/margins": 64.22492218017578, + "logps/rejected": -575.0021362304688, + "loss": 11.4474, + "rewards/chosen": 15.622041702270508, + "rewards/margins": 0.14915180206298828, + "rewards/rejected": 15.47288990020752, + "step": 2200 + }, + { + "accuracy": 0.4375, + "epoch": 0.55, + "learning_rate": 9.194836313637119e-06, + "logps/chosen": -542.2071533203125, + "logps/margins": -34.66168212890625, + "logps/rejected": -507.54547119140625, + "loss": 14.9623, + "rewards/chosen": 21.664554595947266, + "rewards/margins": 6.036484718322754, + "rewards/rejected": 15.628069877624512, + "step": 2210 + }, + { + "accuracy": 0.5874999761581421, + "epoch": 0.56, + "learning_rate": 9.187698562389324e-06, + "logps/chosen": -646.9637451171875, + "logps/margins": -57.70018768310547, + "logps/rejected": -589.2635498046875, + "loss": 11.0743, + "rewards/chosen": 33.347293853759766, + "rewards/margins": 10.410330772399902, + "rewards/rejected": 22.936960220336914, + "step": 2220 + }, + { + "accuracy": 0.4375, + "epoch": 0.56, + "learning_rate": 9.180532108646103e-06, + "logps/chosen": -596.1644897460938, + "logps/margins": -105.70103454589844, + "logps/rejected": -490.4634704589844, + "loss": 11.5116, + "rewards/chosen": 27.99969482421875, + "rewards/margins": 9.762500762939453, + "rewards/rejected": 18.237192153930664, + "step": 2230 + }, + { + "accuracy": 0.48750001192092896, + "epoch": 0.56, + "learning_rate": 9.173337001526349e-06, + "logps/chosen": -535.3031005859375, + "logps/margins": 9.61864948272705, + "logps/rejected": -544.9217529296875, + "loss": 12.9939, + "rewards/chosen": 16.268144607543945, + "rewards/margins": 8.668462753295898, + "rewards/rejected": 7.599682807922363, + "step": 2240 + }, + { + "accuracy": 0.5249999761581421, + "epoch": 0.56, + "learning_rate": 9.166113290345338e-06, + "logps/chosen": -517.0496826171875, + "logps/margins": -31.795446395874023, + "logps/rejected": -485.254150390625, + "loss": 12.1339, + "rewards/chosen": 14.863940238952637, + "rewards/margins": -1.1877628564834595, + "rewards/rejected": 16.05170440673828, + "step": 2250 + }, + { + "accuracy": 0.4749999940395355, + "epoch": 0.56, + "learning_rate": 9.158861024614408e-06, + "logps/chosen": -554.612548828125, + "logps/margins": 77.40425872802734, + "logps/rejected": -632.0167846679688, + "loss": 13.2652, + "rewards/chosen": 42.41765213012695, + "rewards/margins": -0.09895782172679901, + "rewards/rejected": 42.51660919189453, + "step": 2260 + }, + { + "accuracy": 0.699999988079071, + "epoch": 0.57, + "learning_rate": 9.1515802540406e-06, + "logps/chosen": -519.5484008789062, + "logps/margins": -59.85576248168945, + "logps/rejected": -459.69256591796875, + "loss": 11.403, + "rewards/chosen": 26.43393325805664, + "rewards/margins": 8.895792961120605, + "rewards/rejected": 17.538137435913086, + "step": 2270 + }, + { + "accuracy": 0.5874999761581421, + "epoch": 0.57, + "learning_rate": 9.144271028526335e-06, + "logps/chosen": -613.0343017578125, + "logps/margins": -82.08473205566406, + "logps/rejected": -530.9495849609375, + "loss": 12.3724, + "rewards/chosen": 25.426912307739258, + "rewards/margins": 5.2682576179504395, + "rewards/rejected": 20.15865707397461, + "step": 2280 + }, + { + "accuracy": 0.550000011920929, + "epoch": 0.57, + "learning_rate": 9.136933398169061e-06, + "logps/chosen": -597.3190307617188, + "logps/margins": -65.6583023071289, + "logps/rejected": -531.6607666015625, + "loss": 11.4649, + "rewards/chosen": 22.350276947021484, + "rewards/margins": 6.48030948638916, + "rewards/rejected": 15.869969367980957, + "step": 2290 + }, + { + "accuracy": 0.550000011920929, + "epoch": 0.57, + "learning_rate": 9.129567413260912e-06, + "logps/chosen": -587.7022094726562, + "logps/margins": 1.9218521118164062, + "logps/rejected": -589.6239624023438, + "loss": 11.1419, + "rewards/chosen": 21.70797348022461, + "rewards/margins": 1.8594261407852173, + "rewards/rejected": 19.84854507446289, + "step": 2300 + }, + { + "accuracy": 0.612500011920929, + "epoch": 0.58, + "learning_rate": 9.122173124288366e-06, + "logps/chosen": -595.6090087890625, + "logps/margins": 42.64912414550781, + "logps/rejected": -638.2581787109375, + "loss": 11.861, + "rewards/chosen": 22.650680541992188, + "rewards/margins": 2.426558256149292, + "rewards/rejected": 20.22412109375, + "step": 2310 + }, + { + "accuracy": 0.4000000059604645, + "epoch": 0.58, + "learning_rate": 9.114750581931897e-06, + "logps/chosen": -539.8800048828125, + "logps/margins": 18.802982330322266, + "logps/rejected": -558.6829833984375, + "loss": 13.1818, + "rewards/chosen": 29.583759307861328, + "rewards/margins": -4.160604000091553, + "rewards/rejected": 33.744361877441406, + "step": 2320 + }, + { + "accuracy": 0.512499988079071, + "epoch": 0.58, + "learning_rate": 9.107299837065623e-06, + "logps/chosen": -631.6174926757812, + "logps/margins": -100.43499755859375, + "logps/rejected": -531.1824951171875, + "loss": 11.648, + "rewards/chosen": 34.35652542114258, + "rewards/margins": 6.76669454574585, + "rewards/rejected": 27.589832305908203, + "step": 2330 + }, + { + "accuracy": 0.512499988079071, + "epoch": 0.58, + "learning_rate": 9.099820940756974e-06, + "logps/chosen": -640.0103759765625, + "logps/margins": -57.07097244262695, + "logps/rejected": -582.9393310546875, + "loss": 11.9688, + "rewards/chosen": 27.823028564453125, + "rewards/margins": 1.9917895793914795, + "rewards/rejected": 25.83123779296875, + "step": 2340 + }, + { + "accuracy": 0.5874999761581421, + "epoch": 0.59, + "learning_rate": 9.092313944266314e-06, + "logps/chosen": -566.525146484375, + "logps/margins": -33.89141845703125, + "logps/rejected": -532.6337280273438, + "loss": 10.9515, + "rewards/chosen": 16.825084686279297, + "rewards/margins": 5.004764556884766, + "rewards/rejected": 11.820322036743164, + "step": 2350 + }, + { + "accuracy": 0.5249999761581421, + "epoch": 0.59, + "learning_rate": 9.08477889904662e-06, + "logps/chosen": -533.1007080078125, + "logps/margins": 16.99818992614746, + "logps/rejected": -550.0989990234375, + "loss": 9.5014, + "rewards/chosen": 19.428865432739258, + "rewards/margins": 8.846073150634766, + "rewards/rejected": 10.582791328430176, + "step": 2360 + }, + { + "accuracy": 0.5, + "epoch": 0.59, + "learning_rate": 9.077215856743105e-06, + "logps/chosen": -521.9766235351562, + "logps/margins": -74.03758239746094, + "logps/rejected": -447.93902587890625, + "loss": 11.9667, + "rewards/chosen": 25.6577091217041, + "rewards/margins": 1.9734967947006226, + "rewards/rejected": 23.68421173095703, + "step": 2370 + }, + { + "accuracy": 0.4749999940395355, + "epoch": 0.59, + "learning_rate": 9.069624869192879e-06, + "logps/chosen": -529.2022094726562, + "logps/margins": -48.153038024902344, + "logps/rejected": -481.0491638183594, + "loss": 10.3487, + "rewards/chosen": 29.79203224182129, + "rewards/margins": 3.614915370941162, + "rewards/rejected": 26.177114486694336, + "step": 2380 + }, + { + "accuracy": 0.5625, + "epoch": 0.6, + "learning_rate": 9.06200598842459e-06, + "logps/chosen": -596.5222778320312, + "logps/margins": -98.75775146484375, + "logps/rejected": -497.7645568847656, + "loss": 11.3957, + "rewards/chosen": 25.61130142211914, + "rewards/margins": 5.584354877471924, + "rewards/rejected": 20.026945114135742, + "step": 2390 + }, + { + "accuracy": 0.44999998807907104, + "epoch": 0.6, + "learning_rate": 9.054359266658061e-06, + "logps/chosen": -515.2425537109375, + "logps/margins": -3.2292511463165283, + "logps/rejected": -512.0133056640625, + "loss": 12.7075, + "rewards/chosen": 17.6035213470459, + "rewards/margins": -0.9672435522079468, + "rewards/rejected": 18.570764541625977, + "step": 2400 + }, + { + "accuracy": 0.4749999940395355, + "epoch": 0.6, + "learning_rate": 9.046684756303942e-06, + "logps/chosen": -652.3137817382812, + "logps/margins": -69.58396911621094, + "logps/rejected": -582.7298583984375, + "loss": 15.6994, + "rewards/chosen": 19.8869571685791, + "rewards/margins": -6.234736442565918, + "rewards/rejected": 26.1216983795166, + "step": 2410 + }, + { + "accuracy": 0.5874999761581421, + "epoch": 0.6, + "learning_rate": 9.038982509963347e-06, + "logps/chosen": -542.5596923828125, + "logps/margins": -8.10017204284668, + "logps/rejected": -534.4595947265625, + "loss": 11.6496, + "rewards/chosen": 29.044620513916016, + "rewards/margins": 10.775490760803223, + "rewards/rejected": 18.269128799438477, + "step": 2420 + }, + { + "accuracy": 0.5625, + "epoch": 0.61, + "learning_rate": 9.031252580427484e-06, + "logps/chosen": -577.5907592773438, + "logps/margins": -3.6852822303771973, + "logps/rejected": -573.9054565429688, + "loss": 9.6537, + "rewards/chosen": 30.326366424560547, + "rewards/margins": 9.628961563110352, + "rewards/rejected": 20.697406768798828, + "step": 2430 + }, + { + "accuracy": 0.5874999761581421, + "epoch": 0.61, + "learning_rate": 9.023495020677311e-06, + "logps/chosen": -544.9383544921875, + "logps/margins": -3.314984083175659, + "logps/rejected": -541.6233520507812, + "loss": 10.3818, + "rewards/chosen": 27.949710845947266, + "rewards/margins": 3.602196455001831, + "rewards/rejected": 24.347515106201172, + "step": 2440 + }, + { + "accuracy": 0.6000000238418579, + "epoch": 0.61, + "learning_rate": 9.01570988388316e-06, + "logps/chosen": -674.2739868164062, + "logps/margins": -100.08638000488281, + "logps/rejected": -574.1876220703125, + "loss": 12.4392, + "rewards/chosen": 12.903955459594727, + "rewards/margins": 6.835461616516113, + "rewards/rejected": 6.068493366241455, + "step": 2450 + }, + { + "accuracy": 0.5625, + "epoch": 0.61, + "learning_rate": 9.007897223404376e-06, + "logps/chosen": -549.592529296875, + "logps/margins": -50.512420654296875, + "logps/rejected": -499.080078125, + "loss": 11.3925, + "rewards/chosen": 26.970630645751953, + "rewards/margins": 4.498965263366699, + "rewards/rejected": 22.471662521362305, + "step": 2460 + }, + { + "accuracy": 0.5, + "epoch": 0.62, + "learning_rate": 9.00005709278895e-06, + "logps/chosen": -536.4571533203125, + "logps/margins": -16.58860969543457, + "logps/rejected": -519.8685302734375, + "loss": 10.7356, + "rewards/chosen": 26.961563110351562, + "rewards/margins": 6.383793830871582, + "rewards/rejected": 20.577770233154297, + "step": 2470 + }, + { + "accuracy": 0.512499988079071, + "epoch": 0.62, + "learning_rate": 8.992189545773157e-06, + "logps/chosen": -687.9878540039062, + "logps/margins": -58.64374923706055, + "logps/rejected": -629.3441162109375, + "loss": 15.5556, + "rewards/chosen": 18.635011672973633, + "rewards/margins": -2.0264105796813965, + "rewards/rejected": 20.661420822143555, + "step": 2480 + }, + { + "accuracy": 0.512499988079071, + "epoch": 0.62, + "learning_rate": 8.984294636281184e-06, + "logps/chosen": -623.5209350585938, + "logps/margins": -34.64851379394531, + "logps/rejected": -588.8724365234375, + "loss": 11.7701, + "rewards/chosen": 19.841110229492188, + "rewards/margins": 2.6305441856384277, + "rewards/rejected": 17.210567474365234, + "step": 2490 + }, + { + "accuracy": 0.48750001192092896, + "epoch": 0.62, + "learning_rate": 8.976372418424753e-06, + "logps/chosen": -518.74951171875, + "logps/margins": 10.712152481079102, + "logps/rejected": -529.4617919921875, + "loss": 11.029, + "rewards/chosen": 20.168075561523438, + "rewards/margins": 3.6723599433898926, + "rewards/rejected": 16.495716094970703, + "step": 2500 + }, + { + "accuracy": 0.48750001192092896, + "epoch": 0.63, + "learning_rate": 8.968422946502766e-06, + "logps/chosen": -574.5670776367188, + "logps/margins": -70.0477523803711, + "logps/rejected": -504.5193786621094, + "loss": 13.0534, + "rewards/chosen": 28.198543548583984, + "rewards/margins": -3.6000137329101562, + "rewards/rejected": 31.798553466796875, + "step": 2510 + }, + { + "accuracy": 0.4000000059604645, + "epoch": 0.63, + "learning_rate": 8.960446275000922e-06, + "logps/chosen": -586.9291381835938, + "logps/margins": 51.30474853515625, + "logps/rejected": -638.2339477539062, + "loss": 15.6766, + "rewards/chosen": 26.78310203552246, + "rewards/margins": -14.443018913269043, + "rewards/rejected": 41.22612380981445, + "step": 2520 + }, + { + "accuracy": 0.5249999761581421, + "epoch": 0.63, + "learning_rate": 8.952442458591346e-06, + "logps/chosen": -649.9849853515625, + "logps/margins": -83.92903137207031, + "logps/rejected": -566.0560302734375, + "loss": 12.7927, + "rewards/chosen": 12.050697326660156, + "rewards/margins": -2.3645100593566895, + "rewards/rejected": 14.415206909179688, + "step": 2530 + }, + { + "accuracy": 0.512499988079071, + "epoch": 0.64, + "learning_rate": 8.944411552132213e-06, + "logps/chosen": -593.6459350585938, + "logps/margins": -23.824214935302734, + "logps/rejected": -569.8217163085938, + "loss": 12.2908, + "rewards/chosen": 12.996249198913574, + "rewards/margins": -4.54555082321167, + "rewards/rejected": 17.541799545288086, + "step": 2540 + }, + { + "accuracy": 0.574999988079071, + "epoch": 0.64, + "learning_rate": 8.936353610667374e-06, + "logps/chosen": -670.3822631835938, + "logps/margins": -63.9212646484375, + "logps/rejected": -606.4609375, + "loss": 13.5609, + "rewards/chosen": 32.044551849365234, + "rewards/margins": 6.4017839431762695, + "rewards/rejected": 25.64276695251465, + "step": 2550 + }, + { + "accuracy": 0.550000011920929, + "epoch": 0.64, + "learning_rate": 8.928268689425977e-06, + "logps/chosen": -643.9003295898438, + "logps/margins": 4.849920749664307, + "logps/rejected": -648.750244140625, + "loss": 11.8572, + "rewards/chosen": 18.33321762084961, + "rewards/margins": 4.547055721282959, + "rewards/rejected": 13.786163330078125, + "step": 2560 + }, + { + "accuracy": 0.574999988079071, + "epoch": 0.64, + "learning_rate": 8.920156843822092e-06, + "logps/chosen": -687.8771362304688, + "logps/margins": 19.926515579223633, + "logps/rejected": -707.8037109375, + "loss": 11.4197, + "rewards/chosen": 11.455920219421387, + "rewards/margins": 11.555013656616211, + "rewards/rejected": -0.09909238666296005, + "step": 2570 + }, + { + "accuracy": 0.5375000238418579, + "epoch": 0.65, + "learning_rate": 8.912018129454327e-06, + "logps/chosen": -521.3567504882812, + "logps/margins": -50.08830642700195, + "logps/rejected": -471.2684631347656, + "loss": 13.3383, + "rewards/chosen": 7.661962985992432, + "rewards/margins": -1.9665218591690063, + "rewards/rejected": 9.628484725952148, + "step": 2580 + }, + { + "accuracy": 0.512499988079071, + "epoch": 0.65, + "learning_rate": 8.903852602105449e-06, + "logps/chosen": -528.0267333984375, + "logps/margins": -54.04829025268555, + "logps/rejected": -473.978515625, + "loss": 15.1081, + "rewards/chosen": 17.646923065185547, + "rewards/margins": 0.5581150054931641, + "rewards/rejected": 17.088808059692383, + "step": 2590 + }, + { + "accuracy": 0.5, + "epoch": 0.65, + "learning_rate": 8.895660317741998e-06, + "logps/chosen": -538.2623291015625, + "logps/margins": -32.42020797729492, + "logps/rejected": -505.84210205078125, + "loss": 14.558, + "rewards/chosen": 16.14577293395996, + "rewards/margins": -10.941680908203125, + "rewards/rejected": 27.087453842163086, + "step": 2600 + }, + { + "accuracy": 0.4749999940395355, + "epoch": 0.65, + "learning_rate": 8.887441332513913e-06, + "logps/chosen": -585.9078369140625, + "logps/margins": 49.35792541503906, + "logps/rejected": -635.2658081054688, + "loss": 11.698, + "rewards/chosen": 14.0872220993042, + "rewards/margins": -1.1893078088760376, + "rewards/rejected": 15.276532173156738, + "step": 2610 + }, + { + "accuracy": 0.550000011920929, + "epoch": 0.66, + "learning_rate": 8.879195702754138e-06, + "logps/chosen": -544.89892578125, + "logps/margins": 109.51057434082031, + "logps/rejected": -654.4095458984375, + "loss": 10.8882, + "rewards/chosen": 14.435445785522461, + "rewards/margins": 7.674253940582275, + "rewards/rejected": 6.761192321777344, + "step": 2620 + }, + { + "accuracy": 0.48750001192092896, + "epoch": 0.66, + "learning_rate": 8.870923484978233e-06, + "logps/chosen": -601.8475341796875, + "logps/margins": -20.19939613342285, + "logps/rejected": -581.6480712890625, + "loss": 10.8704, + "rewards/chosen": 15.312139511108398, + "rewards/margins": 2.2132010459899902, + "rewards/rejected": 13.09893798828125, + "step": 2630 + }, + { + "accuracy": 0.5, + "epoch": 0.66, + "learning_rate": 8.862624735884003e-06, + "logps/chosen": -693.1573486328125, + "logps/margins": -26.312490463256836, + "logps/rejected": -666.8449096679688, + "loss": 9.4949, + "rewards/chosen": 12.901723861694336, + "rewards/margins": -2.675187110900879, + "rewards/rejected": 15.576910018920898, + "step": 2640 + }, + { + "accuracy": 0.5625, + "epoch": 0.66, + "learning_rate": 8.85429951235109e-06, + "logps/chosen": -548.8619384765625, + "logps/margins": -37.924991607666016, + "logps/rejected": -510.93695068359375, + "loss": 11.5737, + "rewards/chosen": 19.872821807861328, + "rewards/margins": 5.544877529144287, + "rewards/rejected": 14.327943801879883, + "step": 2650 + }, + { + "accuracy": 0.5249999761581421, + "epoch": 0.67, + "learning_rate": 8.845947871440596e-06, + "logps/chosen": -728.1278076171875, + "logps/margins": -99.82443237304688, + "logps/rejected": -628.3033447265625, + "loss": 10.9029, + "rewards/chosen": 23.13753890991211, + "rewards/margins": -0.4299777150154114, + "rewards/rejected": 23.567514419555664, + "step": 2660 + }, + { + "accuracy": 0.5249999761581421, + "epoch": 0.67, + "learning_rate": 8.837569870394685e-06, + "logps/chosen": -563.2623291015625, + "logps/margins": -12.755287170410156, + "logps/rejected": -550.5071411132812, + "loss": 10.9568, + "rewards/chosen": 35.122535705566406, + "rewards/margins": 3.902243137359619, + "rewards/rejected": 31.220294952392578, + "step": 2670 + }, + { + "accuracy": 0.4625000059604645, + "epoch": 0.67, + "learning_rate": 8.829165566636198e-06, + "logps/chosen": -586.9114990234375, + "logps/margins": -41.42841720581055, + "logps/rejected": -545.4830322265625, + "loss": 13.3629, + "rewards/chosen": 16.59609603881836, + "rewards/margins": -2.70041823387146, + "rewards/rejected": 19.296512603759766, + "step": 2680 + }, + { + "accuracy": 0.4749999940395355, + "epoch": 0.67, + "learning_rate": 8.82073501776825e-06, + "logps/chosen": -547.1573486328125, + "logps/margins": -3.7962493896484375, + "logps/rejected": -543.3609619140625, + "loss": 13.806, + "rewards/chosen": 11.401498794555664, + "rewards/margins": -3.5657577514648438, + "rewards/rejected": 14.967254638671875, + "step": 2690 + }, + { + "accuracy": 0.5625, + "epoch": 0.68, + "learning_rate": 8.81227828157384e-06, + "logps/chosen": -597.0332641601562, + "logps/margins": -54.0127067565918, + "logps/rejected": -543.0204467773438, + "loss": 11.6568, + "rewards/chosen": 9.768872261047363, + "rewards/margins": 3.2988505363464355, + "rewards/rejected": 6.4700212478637695, + "step": 2700 + }, + { + "accuracy": 0.550000011920929, + "epoch": 0.68, + "learning_rate": 8.803795416015463e-06, + "logps/chosen": -614.135009765625, + "logps/margins": -107.68509674072266, + "logps/rejected": -506.44989013671875, + "loss": 9.7796, + "rewards/chosen": 18.68087387084961, + "rewards/margins": 3.14738130569458, + "rewards/rejected": 15.533491134643555, + "step": 2710 + }, + { + "accuracy": 0.5375000238418579, + "epoch": 0.68, + "learning_rate": 8.795286479234694e-06, + "logps/chosen": -618.6973876953125, + "logps/margins": -61.996116638183594, + "logps/rejected": -556.7012939453125, + "loss": 11.1913, + "rewards/chosen": 20.10247230529785, + "rewards/margins": 3.7067909240722656, + "rewards/rejected": 16.395679473876953, + "step": 2720 + }, + { + "accuracy": 0.5874999761581421, + "epoch": 0.68, + "learning_rate": 8.786751529551807e-06, + "logps/chosen": -458.27374267578125, + "logps/margins": -39.4249267578125, + "logps/rejected": -418.84881591796875, + "loss": 9.1191, + "rewards/chosen": 34.23222732543945, + "rewards/margins": 3.1947035789489746, + "rewards/rejected": 31.037525177001953, + "step": 2730 + }, + { + "accuracy": 0.625, + "epoch": 0.69, + "learning_rate": 8.778190625465364e-06, + "logps/chosen": -715.5987548828125, + "logps/margins": -78.1912841796875, + "logps/rejected": -637.4075317382812, + "loss": 11.4775, + "rewards/chosen": 23.183700561523438, + "rewards/margins": 9.291934967041016, + "rewards/rejected": 13.891764640808105, + "step": 2740 + }, + { + "accuracy": 0.42500001192092896, + "epoch": 0.69, + "learning_rate": 8.769603825651823e-06, + "logps/chosen": -681.9344482421875, + "logps/margins": 5.6123504638671875, + "logps/rejected": -687.546875, + "loss": 9.4407, + "rewards/chosen": 17.647628784179688, + "rewards/margins": -2.703650712966919, + "rewards/rejected": 20.35127830505371, + "step": 2750 + }, + { + "accuracy": 0.5375000238418579, + "epoch": 0.69, + "learning_rate": 8.760991188965129e-06, + "logps/chosen": -486.98101806640625, + "logps/margins": 98.01480102539062, + "logps/rejected": -584.9957885742188, + "loss": 9.809, + "rewards/chosen": 22.11068344116211, + "rewards/margins": 4.863184928894043, + "rewards/rejected": 17.247499465942383, + "step": 2760 + }, + { + "accuracy": 0.4124999940395355, + "epoch": 0.69, + "learning_rate": 8.752352774436313e-06, + "logps/chosen": -493.25830078125, + "logps/margins": 66.71929931640625, + "logps/rejected": -559.9775390625, + "loss": 11.8655, + "rewards/chosen": 17.60287857055664, + "rewards/margins": -3.3498122692108154, + "rewards/rejected": 20.952688217163086, + "step": 2770 + }, + { + "accuracy": 0.44999998807907104, + "epoch": 0.69, + "learning_rate": 8.743688641273087e-06, + "logps/chosen": -567.2623291015625, + "logps/margins": -31.907154083251953, + "logps/rejected": -535.355224609375, + "loss": 10.8393, + "rewards/chosen": 13.757858276367188, + "rewards/margins": 2.615328311920166, + "rewards/rejected": 11.14253044128418, + "step": 2780 + }, + { + "accuracy": 0.5874999761581421, + "epoch": 0.7, + "learning_rate": 8.734998848859443e-06, + "logps/chosen": -563.7305908203125, + "logps/margins": -113.3327407836914, + "logps/rejected": -450.3978576660156, + "loss": 9.9628, + "rewards/chosen": 20.737144470214844, + "rewards/margins": 6.5731940269470215, + "rewards/rejected": 14.16395092010498, + "step": 2790 + }, + { + "accuracy": 0.5249999761581421, + "epoch": 0.7, + "learning_rate": 8.726283456755234e-06, + "logps/chosen": -647.6878662109375, + "logps/margins": -84.67866516113281, + "logps/rejected": -563.0091552734375, + "loss": 13.3414, + "rewards/chosen": 15.253016471862793, + "rewards/margins": 0.09246359020471573, + "rewards/rejected": 15.160552978515625, + "step": 2800 + }, + { + "accuracy": 0.48750001192092896, + "epoch": 0.7, + "learning_rate": 8.717542524695778e-06, + "logps/chosen": -532.8272094726562, + "logps/margins": 72.64997863769531, + "logps/rejected": -605.4771728515625, + "loss": 11.1287, + "rewards/chosen": 7.2594170570373535, + "rewards/margins": 2.4153311252593994, + "rewards/rejected": 4.844085693359375, + "step": 2810 + }, + { + "accuracy": 0.4124999940395355, + "epoch": 0.7, + "learning_rate": 8.708776112591444e-06, + "logps/chosen": -556.40625, + "logps/margins": 20.582887649536133, + "logps/rejected": -576.9891357421875, + "loss": 11.2541, + "rewards/chosen": 13.570486068725586, + "rewards/margins": -7.35394811630249, + "rewards/rejected": 20.924436569213867, + "step": 2820 + }, + { + "accuracy": 0.512499988079071, + "epoch": 0.71, + "learning_rate": 8.69998428052724e-06, + "logps/chosen": -593.0245361328125, + "logps/margins": -32.00289535522461, + "logps/rejected": -561.0216064453125, + "loss": 11.9256, + "rewards/chosen": 19.626041412353516, + "rewards/margins": 1.011570692062378, + "rewards/rejected": 18.614471435546875, + "step": 2830 + }, + { + "accuracy": 0.5625, + "epoch": 0.71, + "learning_rate": 8.691167088762402e-06, + "logps/chosen": -559.6341552734375, + "logps/margins": 16.009429931640625, + "logps/rejected": -575.6435546875, + "loss": 10.1276, + "rewards/chosen": 14.341974258422852, + "rewards/margins": 4.266587257385254, + "rewards/rejected": 10.075384140014648, + "step": 2840 + }, + { + "accuracy": 0.574999988079071, + "epoch": 0.71, + "learning_rate": 8.682324597729982e-06, + "logps/chosen": -626.4402465820312, + "logps/margins": -34.38554763793945, + "logps/rejected": -592.0546875, + "loss": 11.2081, + "rewards/chosen": 22.619762420654297, + "rewards/margins": 6.417317867279053, + "rewards/rejected": 16.202444076538086, + "step": 2850 + }, + { + "accuracy": 0.550000011920929, + "epoch": 0.71, + "learning_rate": 8.67345686803644e-06, + "logps/chosen": -654.6260986328125, + "logps/margins": 26.408056259155273, + "logps/rejected": -681.0341186523438, + "loss": 11.8856, + "rewards/chosen": 18.485950469970703, + "rewards/margins": 2.944094657897949, + "rewards/rejected": 15.541854858398438, + "step": 2860 + }, + { + "accuracy": 0.4749999940395355, + "epoch": 0.72, + "learning_rate": 8.664563960461205e-06, + "logps/chosen": -678.5271606445312, + "logps/margins": -69.243408203125, + "logps/rejected": -609.2837524414062, + "loss": 13.5847, + "rewards/chosen": 18.90741539001465, + "rewards/margins": 1.0067245960235596, + "rewards/rejected": 17.900691986083984, + "step": 2870 + }, + { + "accuracy": 0.5249999761581421, + "epoch": 0.72, + "learning_rate": 8.655645935956291e-06, + "logps/chosen": -521.7791748046875, + "logps/margins": -54.80826950073242, + "logps/rejected": -466.97088623046875, + "loss": 8.8431, + "rewards/chosen": 23.11695671081543, + "rewards/margins": 4.101020812988281, + "rewards/rejected": 19.01593589782715, + "step": 2880 + }, + { + "accuracy": 0.550000011920929, + "epoch": 0.72, + "learning_rate": 8.646702855645858e-06, + "logps/chosen": -585.0939331054688, + "logps/margins": -36.30695724487305, + "logps/rejected": -548.7869873046875, + "loss": 11.8319, + "rewards/chosen": 11.752206802368164, + "rewards/margins": -3.994729995727539, + "rewards/rejected": 15.74693775177002, + "step": 2890 + }, + { + "accuracy": 0.5375000238418579, + "epoch": 0.72, + "learning_rate": 8.637734780825801e-06, + "logps/chosen": -549.0783081054688, + "logps/margins": 15.802528381347656, + "logps/rejected": -564.8807983398438, + "loss": 12.978, + "rewards/chosen": 23.013439178466797, + "rewards/margins": 3.579294204711914, + "rewards/rejected": 19.434146881103516, + "step": 2900 + }, + { + "accuracy": 0.5625, + "epoch": 0.73, + "learning_rate": 8.62874177296332e-06, + "logps/chosen": -605.3807983398438, + "logps/margins": -21.380813598632812, + "logps/rejected": -583.9999389648438, + "loss": 10.3861, + "rewards/chosen": 15.841707229614258, + "rewards/margins": -0.6660584211349487, + "rewards/rejected": 16.507762908935547, + "step": 2910 + }, + { + "accuracy": 0.5625, + "epoch": 0.73, + "learning_rate": 8.619723893696517e-06, + "logps/chosen": -630.3705444335938, + "logps/margins": -20.128646850585938, + "logps/rejected": -610.2418823242188, + "loss": 9.6511, + "rewards/chosen": 21.782791137695312, + "rewards/margins": 6.0860915184021, + "rewards/rejected": 15.696698188781738, + "step": 2920 + }, + { + "accuracy": 0.6000000238418579, + "epoch": 0.73, + "learning_rate": 8.610681204833951e-06, + "logps/chosen": -551.95703125, + "logps/margins": -78.12256622314453, + "logps/rejected": -473.83447265625, + "loss": 10.8118, + "rewards/chosen": 22.444982528686523, + "rewards/margins": 5.114820957183838, + "rewards/rejected": 17.33016014099121, + "step": 2930 + }, + { + "accuracy": 0.612500011920929, + "epoch": 0.73, + "learning_rate": 8.601613768354235e-06, + "logps/chosen": -598.6685791015625, + "logps/margins": -42.750648498535156, + "logps/rejected": -555.91796875, + "loss": 11.2769, + "rewards/chosen": 29.512035369873047, + "rewards/margins": 6.566622257232666, + "rewards/rejected": 22.945415496826172, + "step": 2940 + }, + { + "accuracy": 0.512499988079071, + "epoch": 0.74, + "learning_rate": 8.592521646405601e-06, + "logps/chosen": -655.9473876953125, + "logps/margins": -78.31257629394531, + "logps/rejected": -577.634765625, + "loss": 12.3091, + "rewards/chosen": 20.974647521972656, + "rewards/margins": -1.750798225402832, + "rewards/rejected": 22.725448608398438, + "step": 2950 + }, + { + "accuracy": 0.550000011920929, + "epoch": 0.74, + "learning_rate": 8.583404901305468e-06, + "logps/chosen": -578.7459106445312, + "logps/margins": -43.37543869018555, + "logps/rejected": -535.3704833984375, + "loss": 9.5829, + "rewards/chosen": 11.995574951171875, + "rewards/margins": 1.5827876329421997, + "rewards/rejected": 10.412786483764648, + "step": 2960 + }, + { + "accuracy": 0.512499988079071, + "epoch": 0.74, + "learning_rate": 8.574263595540033e-06, + "logps/chosen": -511.0155334472656, + "logps/margins": -25.8415584564209, + "logps/rejected": -485.17401123046875, + "loss": 10.6524, + "rewards/chosen": 19.8052978515625, + "rewards/margins": -0.1506361961364746, + "rewards/rejected": 19.955934524536133, + "step": 2970 + }, + { + "accuracy": 0.4749999940395355, + "epoch": 0.74, + "learning_rate": 8.565097791763823e-06, + "logps/chosen": -527.5399169921875, + "logps/margins": 35.12897872924805, + "logps/rejected": -562.6688842773438, + "loss": 9.9852, + "rewards/chosen": 23.186450958251953, + "rewards/margins": 3.152350902557373, + "rewards/rejected": 20.034099578857422, + "step": 2980 + }, + { + "accuracy": 0.48750001192092896, + "epoch": 0.75, + "learning_rate": 8.555907552799281e-06, + "logps/chosen": -580.979736328125, + "logps/margins": -75.36361694335938, + "logps/rejected": -505.6161193847656, + "loss": 10.6207, + "rewards/chosen": 20.42185401916504, + "rewards/margins": 4.668212890625, + "rewards/rejected": 15.753641128540039, + "step": 2990 + }, + { + "accuracy": 0.5874999761581421, + "epoch": 0.75, + "learning_rate": 8.546692941636323e-06, + "logps/chosen": -576.7174072265625, + "logps/margins": 11.404217720031738, + "logps/rejected": -588.1216430664062, + "loss": 9.7078, + "rewards/chosen": 20.363977432250977, + "rewards/margins": 15.003039360046387, + "rewards/rejected": 5.3609395027160645, + "step": 3000 + }, + { + "epoch": 0.75, + "eval_accuracy": 0.528893587033122, + "eval_logps/chosen": -587.0786743164062, + "eval_logps/margins": -38.78566360473633, + "eval_logps/rejected": -548.29296875, + "eval_loss": 11.00704288482666, + "eval_rewards/chosen": 23.971202850341797, + "eval_rewards/margins": 2.896495819091797, + "eval_rewards/rejected": 21.07470703125, + "eval_runtime": 1178.4945, + "eval_samples_per_second": 12.041, + "eval_steps_per_second": 1.505, + "step": 3000 + }, + { + "accuracy": 0.512499988079071, + "epoch": 0.75, + "learning_rate": 8.537454021431914e-06, + "logps/chosen": -592.0455322265625, + "logps/margins": 5.142987251281738, + "logps/rejected": -597.1884765625, + "loss": 12.1415, + "rewards/chosen": 20.409128189086914, + "rewards/margins": 6.214966773986816, + "rewards/rejected": 14.194162368774414, + "step": 3010 + }, + { + "accuracy": 0.574999988079071, + "epoch": 0.76, + "learning_rate": 8.528190855509636e-06, + "logps/chosen": -713.1627197265625, + "logps/margins": -34.188262939453125, + "logps/rejected": -678.9744873046875, + "loss": 12.5608, + "rewards/chosen": 11.235442161560059, + "rewards/margins": 6.261881351470947, + "rewards/rejected": 4.973560810089111, + "step": 3020 + }, + { + "accuracy": 0.550000011920929, + "epoch": 0.76, + "learning_rate": 8.518903507359251e-06, + "logps/chosen": -571.0693359375, + "logps/margins": 23.126665115356445, + "logps/rejected": -594.1959228515625, + "loss": 11.1628, + "rewards/chosen": 13.8278169631958, + "rewards/margins": 5.018679618835449, + "rewards/rejected": 8.809138298034668, + "step": 3030 + }, + { + "accuracy": 0.48750001192092896, + "epoch": 0.76, + "learning_rate": 8.50959204063626e-06, + "logps/chosen": -531.1340942382812, + "logps/margins": -71.95362854003906, + "logps/rejected": -459.180419921875, + "loss": 11.422, + "rewards/chosen": 23.312522888183594, + "rewards/margins": -0.60028076171875, + "rewards/rejected": 23.912803649902344, + "step": 3040 + }, + { + "accuracy": 0.5249999761581421, + "epoch": 0.76, + "learning_rate": 8.500256519161478e-06, + "logps/chosen": -607.8170166015625, + "logps/margins": -42.828834533691406, + "logps/rejected": -564.9882202148438, + "loss": 10.5429, + "rewards/chosen": 16.863420486450195, + "rewards/margins": 2.0597140789031982, + "rewards/rejected": 14.803706169128418, + "step": 3050 + }, + { + "accuracy": 0.4749999940395355, + "epoch": 0.77, + "learning_rate": 8.490897006920593e-06, + "logps/chosen": -650.1954956054688, + "logps/margins": -95.52268981933594, + "logps/rejected": -554.6727294921875, + "loss": 10.3339, + "rewards/chosen": 69.7179946899414, + "rewards/margins": 1.8333747386932373, + "rewards/rejected": 67.8846206665039, + "step": 3060 + }, + { + "accuracy": 0.5375000238418579, + "epoch": 0.77, + "learning_rate": 8.481513568063722e-06, + "logps/chosen": -644.6566162109375, + "logps/margins": -61.34050369262695, + "logps/rejected": -583.316162109375, + "loss": 9.9108, + "rewards/chosen": 12.812225341796875, + "rewards/margins": 0.30187854170799255, + "rewards/rejected": 12.510346412658691, + "step": 3070 + }, + { + "accuracy": 0.574999988079071, + "epoch": 0.77, + "learning_rate": 8.472106266904974e-06, + "logps/chosen": -723.0354614257812, + "logps/margins": -175.1554718017578, + "logps/rejected": -547.8800048828125, + "loss": 9.8777, + "rewards/chosen": 17.0506534576416, + "rewards/margins": 7.33676290512085, + "rewards/rejected": 9.713891983032227, + "step": 3080 + }, + { + "accuracy": 0.5874999761581421, + "epoch": 0.77, + "learning_rate": 8.462675167922015e-06, + "logps/chosen": -705.3953857421875, + "logps/margins": -186.85194396972656, + "logps/rejected": -518.5433959960938, + "loss": 10.0868, + "rewards/chosen": 52.273033142089844, + "rewards/margins": 6.303023338317871, + "rewards/rejected": 45.970008850097656, + "step": 3090 + }, + { + "accuracy": 0.5, + "epoch": 0.78, + "learning_rate": 8.453220335755616e-06, + "logps/chosen": -574.177978515625, + "logps/margins": 12.037064552307129, + "logps/rejected": -586.2150268554688, + "loss": 10.4521, + "rewards/chosen": 30.27509117126465, + "rewards/margins": 4.26205587387085, + "rewards/rejected": 26.013036727905273, + "step": 3100 + }, + { + "accuracy": 0.512499988079071, + "epoch": 0.78, + "learning_rate": 8.443741835209222e-06, + "logps/chosen": -696.2009887695312, + "logps/margins": -183.9231414794922, + "logps/rejected": -512.2778930664062, + "loss": 10.9806, + "rewards/chosen": 11.048113822937012, + "rewards/margins": -1.0858529806137085, + "rewards/rejected": 12.133966445922852, + "step": 3110 + }, + { + "accuracy": 0.5874999761581421, + "epoch": 0.78, + "learning_rate": 8.434239731248493e-06, + "logps/chosen": -471.6160583496094, + "logps/margins": 42.40312957763672, + "logps/rejected": -514.0191650390625, + "loss": 9.8894, + "rewards/chosen": 19.61908531188965, + "rewards/margins": 2.6063990592956543, + "rewards/rejected": 17.012685775756836, + "step": 3120 + }, + { + "accuracy": 0.4625000059604645, + "epoch": 0.78, + "learning_rate": 8.424714089000873e-06, + "logps/chosen": -521.9314575195312, + "logps/margins": -44.80106735229492, + "logps/rejected": -477.13037109375, + "loss": 12.5412, + "rewards/chosen": 23.353687286376953, + "rewards/margins": -1.6675012111663818, + "rewards/rejected": 25.021188735961914, + "step": 3130 + }, + { + "accuracy": 0.4000000059604645, + "epoch": 0.79, + "learning_rate": 8.415164973755136e-06, + "logps/chosen": -580.8997802734375, + "logps/margins": 21.875686645507812, + "logps/rejected": -602.7755126953125, + "loss": 11.6823, + "rewards/chosen": 22.068866729736328, + "rewards/margins": -8.161616325378418, + "rewards/rejected": 30.230484008789062, + "step": 3140 + }, + { + "accuracy": 0.5375000238418579, + "epoch": 0.79, + "learning_rate": 8.40559245096094e-06, + "logps/chosen": -515.0301513671875, + "logps/margins": 58.92683029174805, + "logps/rejected": -573.9570922851562, + "loss": 9.9466, + "rewards/chosen": 8.593412399291992, + "rewards/margins": 1.2842706441879272, + "rewards/rejected": 7.309141635894775, + "step": 3150 + }, + { + "accuracy": 0.5874999761581421, + "epoch": 0.79, + "learning_rate": 8.395996586228377e-06, + "logps/chosen": -589.3844604492188, + "logps/margins": -54.964515686035156, + "logps/rejected": -534.4199829101562, + "loss": 9.6959, + "rewards/chosen": 13.812200546264648, + "rewards/margins": 2.2055344581604004, + "rewards/rejected": 11.606666564941406, + "step": 3160 + }, + { + "accuracy": 0.5249999761581421, + "epoch": 0.79, + "learning_rate": 8.386377445327525e-06, + "logps/chosen": -694.5712890625, + "logps/margins": -50.960975646972656, + "logps/rejected": -643.6103515625, + "loss": 10.5496, + "rewards/chosen": 22.736196517944336, + "rewards/margins": 5.509345054626465, + "rewards/rejected": 17.226852416992188, + "step": 3170 + }, + { + "accuracy": 0.550000011920929, + "epoch": 0.8, + "learning_rate": 8.376735094187998e-06, + "logps/chosen": -550.1765747070312, + "logps/margins": -35.95713424682617, + "logps/rejected": -514.219482421875, + "loss": 9.7045, + "rewards/chosen": 13.028892517089844, + "rewards/margins": 5.049592018127441, + "rewards/rejected": 7.979300498962402, + "step": 3180 + }, + { + "accuracy": 0.4625000059604645, + "epoch": 0.8, + "learning_rate": 8.367069598898493e-06, + "logps/chosen": -484.75640869140625, + "logps/margins": 64.71224975585938, + "logps/rejected": -549.4686279296875, + "loss": 10.271, + "rewards/chosen": 5.908952713012695, + "rewards/margins": -1.0636565685272217, + "rewards/rejected": 6.9726104736328125, + "step": 3190 + }, + { + "accuracy": 0.5375000238418579, + "epoch": 0.8, + "learning_rate": 8.357381025706336e-06, + "logps/chosen": -574.8518676757812, + "logps/margins": -39.20204162597656, + "logps/rejected": -535.6498413085938, + "loss": 11.0216, + "rewards/chosen": 3.8692448139190674, + "rewards/margins": 2.738515853881836, + "rewards/rejected": 1.1307283639907837, + "step": 3200 + }, + { + "accuracy": 0.5, + "epoch": 0.8, + "learning_rate": 8.34766944101703e-06, + "logps/chosen": -614.4434814453125, + "logps/margins": -4.9648661613464355, + "logps/rejected": -609.4785766601562, + "loss": 11.2176, + "rewards/chosen": 10.441901206970215, + "rewards/margins": 0.7612373232841492, + "rewards/rejected": 9.680663108825684, + "step": 3210 + }, + { + "accuracy": 0.48750001192092896, + "epoch": 0.81, + "learning_rate": 8.337934911393797e-06, + "logps/chosen": -597.3021240234375, + "logps/margins": -90.88215637207031, + "logps/rejected": -506.4200134277344, + "loss": 11.4757, + "rewards/chosen": 17.063451766967773, + "rewards/margins": -1.1545127630233765, + "rewards/rejected": 18.21796417236328, + "step": 3220 + }, + { + "accuracy": 0.4749999940395355, + "epoch": 0.81, + "learning_rate": 8.32817750355713e-06, + "logps/chosen": -585.353759765625, + "logps/margins": -40.85610580444336, + "logps/rejected": -544.4976196289062, + "loss": 10.9561, + "rewards/chosen": 10.614065170288086, + "rewards/margins": 0.4825889468193054, + "rewards/rejected": 10.131476402282715, + "step": 3230 + }, + { + "accuracy": 0.6499999761581421, + "epoch": 0.81, + "learning_rate": 8.318397284384317e-06, + "logps/chosen": -488.92169189453125, + "logps/margins": 32.99739456176758, + "logps/rejected": -521.9191284179688, + "loss": 7.6803, + "rewards/chosen": 13.133671760559082, + "rewards/margins": 8.195175170898438, + "rewards/rejected": 4.9384965896606445, + "step": 3240 + }, + { + "accuracy": 0.4625000059604645, + "epoch": 0.81, + "learning_rate": 8.308594320909005e-06, + "logps/chosen": -747.7303466796875, + "logps/margins": -149.6534423828125, + "logps/rejected": -598.0768432617188, + "loss": 12.027, + "rewards/chosen": 14.671236991882324, + "rewards/margins": -2.223201274871826, + "rewards/rejected": 16.894439697265625, + "step": 3250 + }, + { + "accuracy": 0.5249999761581421, + "epoch": 0.81, + "learning_rate": 8.29876868032073e-06, + "logps/chosen": -611.6815185546875, + "logps/margins": -42.70147705078125, + "logps/rejected": -568.9801025390625, + "loss": 11.3528, + "rewards/chosen": 16.898107528686523, + "rewards/margins": 1.2483265399932861, + "rewards/rejected": 15.649778366088867, + "step": 3260 + }, + { + "accuracy": 0.5625, + "epoch": 0.82, + "learning_rate": 8.288920429964455e-06, + "logps/chosen": -558.2926635742188, + "logps/margins": 40.71219253540039, + "logps/rejected": -599.0048217773438, + "loss": 9.3924, + "rewards/chosen": 7.422599792480469, + "rewards/margins": -0.16577453911304474, + "rewards/rejected": 7.588374137878418, + "step": 3270 + }, + { + "accuracy": 0.5, + "epoch": 0.82, + "learning_rate": 8.27904963734011e-06, + "logps/chosen": -679.0831298828125, + "logps/margins": -78.85713195800781, + "logps/rejected": -600.2259521484375, + "loss": 12.462, + "rewards/chosen": 17.566518783569336, + "rewards/margins": -0.20607924461364746, + "rewards/rejected": 17.772598266601562, + "step": 3280 + }, + { + "accuracy": 0.5625, + "epoch": 0.82, + "learning_rate": 8.269156370102127e-06, + "logps/chosen": -643.384033203125, + "logps/margins": 24.7620792388916, + "logps/rejected": -668.1461181640625, + "loss": 10.6107, + "rewards/chosen": 18.30504035949707, + "rewards/margins": 5.829590797424316, + "rewards/rejected": 12.47545051574707, + "step": 3290 + }, + { + "accuracy": 0.4749999940395355, + "epoch": 0.82, + "learning_rate": 8.259240696058984e-06, + "logps/chosen": -537.9276733398438, + "logps/margins": -24.31950569152832, + "logps/rejected": -513.6082153320312, + "loss": 9.8458, + "rewards/chosen": 14.890310287475586, + "rewards/margins": 0.35617581009864807, + "rewards/rejected": 14.534136772155762, + "step": 3300 + }, + { + "accuracy": 0.5874999761581421, + "epoch": 0.83, + "learning_rate": 8.249302683172734e-06, + "logps/chosen": -668.287109375, + "logps/margins": -114.14908599853516, + "logps/rejected": -554.1380615234375, + "loss": 8.9063, + "rewards/chosen": 8.8367338180542, + "rewards/margins": 3.824545383453369, + "rewards/rejected": 5.012188911437988, + "step": 3310 + }, + { + "accuracy": 0.5625, + "epoch": 0.83, + "learning_rate": 8.239342399558539e-06, + "logps/chosen": -635.1751708984375, + "logps/margins": -28.71160888671875, + "logps/rejected": -606.4635620117188, + "loss": 9.3172, + "rewards/chosen": 28.072551727294922, + "rewards/margins": 3.76531720161438, + "rewards/rejected": 24.307235717773438, + "step": 3320 + }, + { + "accuracy": 0.550000011920929, + "epoch": 0.83, + "learning_rate": 8.229359913484206e-06, + "logps/chosen": -564.9771728515625, + "logps/margins": -35.79045867919922, + "logps/rejected": -529.1867065429688, + "loss": 10.5233, + "rewards/chosen": 20.325950622558594, + "rewards/margins": 2.1301403045654297, + "rewards/rejected": 18.195810317993164, + "step": 3330 + }, + { + "accuracy": 0.6000000238418579, + "epoch": 0.83, + "learning_rate": 8.219355293369715e-06, + "logps/chosen": -795.2503662109375, + "logps/margins": -130.8046875, + "logps/rejected": -664.4456787109375, + "loss": 10.4903, + "rewards/chosen": 9.494401931762695, + "rewards/margins": 1.4555656909942627, + "rewards/rejected": 8.038837432861328, + "step": 3340 + }, + { + "accuracy": 0.5, + "epoch": 0.84, + "learning_rate": 8.209328607786758e-06, + "logps/chosen": -577.583740234375, + "logps/margins": 13.89433765411377, + "logps/rejected": -591.47802734375, + "loss": 8.8606, + "rewards/chosen": 7.97430419921875, + "rewards/margins": 0.7148431539535522, + "rewards/rejected": 7.25946044921875, + "step": 3350 + }, + { + "accuracy": 0.512499988079071, + "epoch": 0.84, + "learning_rate": 8.19927992545826e-06, + "logps/chosen": -665.0473022460938, + "logps/margins": -121.62535095214844, + "logps/rejected": -543.4219970703125, + "loss": 9.9774, + "rewards/chosen": 40.83235168457031, + "rewards/margins": 3.930485248565674, + "rewards/rejected": 36.90186309814453, + "step": 3360 + }, + { + "accuracy": 0.6625000238418579, + "epoch": 0.84, + "learning_rate": 8.18920931525791e-06, + "logps/chosen": -658.7816162109375, + "logps/margins": -134.9693603515625, + "logps/rejected": -523.8121948242188, + "loss": 10.0595, + "rewards/chosen": 24.297611236572266, + "rewards/margins": 8.954660415649414, + "rewards/rejected": 15.342951774597168, + "step": 3370 + }, + { + "accuracy": 0.5249999761581421, + "epoch": 0.84, + "learning_rate": 8.179116846209695e-06, + "logps/chosen": -582.1703491210938, + "logps/margins": -41.33595657348633, + "logps/rejected": -540.8343505859375, + "loss": 8.5278, + "rewards/chosen": 23.78520965576172, + "rewards/margins": 2.8009278774261475, + "rewards/rejected": 20.984283447265625, + "step": 3380 + }, + { + "accuracy": 0.5249999761581421, + "epoch": 0.85, + "learning_rate": 8.169002587487422e-06, + "logps/chosen": -593.1799926757812, + "logps/margins": -14.60925006866455, + "logps/rejected": -578.5707397460938, + "loss": 11.2245, + "rewards/chosen": 20.195598602294922, + "rewards/margins": 1.767198920249939, + "rewards/rejected": 18.42840003967285, + "step": 3390 + }, + { + "accuracy": 0.574999988079071, + "epoch": 0.85, + "learning_rate": 8.158866608414241e-06, + "logps/chosen": -653.7808837890625, + "logps/margins": -44.71619415283203, + "logps/rejected": -609.064697265625, + "loss": 8.0693, + "rewards/chosen": 20.85158348083496, + "rewards/margins": 4.416594505310059, + "rewards/rejected": 16.43499183654785, + "step": 3400 + }, + { + "accuracy": 0.5249999761581421, + "epoch": 0.85, + "learning_rate": 8.148708978462177e-06, + "logps/chosen": -673.0454711914062, + "logps/margins": -90.73997497558594, + "logps/rejected": -582.3054809570312, + "loss": 10.8492, + "rewards/chosen": 19.65393829345703, + "rewards/margins": 1.4700915813446045, + "rewards/rejected": 18.183847427368164, + "step": 3410 + }, + { + "accuracy": 0.6625000238418579, + "epoch": 0.85, + "learning_rate": 8.138529767251647e-06, + "logps/chosen": -648.8692626953125, + "logps/margins": -120.49897766113281, + "logps/rejected": -528.3702392578125, + "loss": 9.2589, + "rewards/chosen": 16.66043472290039, + "rewards/margins": 11.488256454467773, + "rewards/rejected": 5.172178745269775, + "step": 3420 + }, + { + "accuracy": 0.5375000238418579, + "epoch": 0.86, + "learning_rate": 8.128329044550986e-06, + "logps/chosen": -708.0987548828125, + "logps/margins": -140.07037353515625, + "logps/rejected": -568.0283203125, + "loss": 10.0804, + "rewards/chosen": 18.884510040283203, + "rewards/margins": 2.3543405532836914, + "rewards/rejected": 16.530170440673828, + "step": 3430 + }, + { + "accuracy": 0.4749999940395355, + "epoch": 0.86, + "learning_rate": 8.118106880275978e-06, + "logps/chosen": -574.4185180664062, + "logps/margins": -20.684133529663086, + "logps/rejected": -553.7343139648438, + "loss": 11.0103, + "rewards/chosen": 26.078536987304688, + "rewards/margins": 0.745343029499054, + "rewards/rejected": 25.333194732666016, + "step": 3440 + }, + { + "accuracy": 0.4375, + "epoch": 0.86, + "learning_rate": 8.107863344489351e-06, + "logps/chosen": -522.4810791015625, + "logps/margins": 32.531471252441406, + "logps/rejected": -555.0125732421875, + "loss": 9.5404, + "rewards/chosen": 18.130870819091797, + "rewards/margins": 0.8302842378616333, + "rewards/rejected": 17.300586700439453, + "step": 3450 + }, + { + "accuracy": 0.4375, + "epoch": 0.86, + "learning_rate": 8.097598507400328e-06, + "logps/chosen": -558.696533203125, + "logps/margins": 24.14459800720215, + "logps/rejected": -582.8411254882812, + "loss": 10.995, + "rewards/chosen": 7.363114833831787, + "rewards/margins": -3.5262503623962402, + "rewards/rejected": 10.889366149902344, + "step": 3460 + }, + { + "accuracy": 0.625, + "epoch": 0.87, + "learning_rate": 8.087312439364125e-06, + "logps/chosen": -742.0897216796875, + "logps/margins": -26.351852416992188, + "logps/rejected": -715.73779296875, + "loss": 8.9996, + "rewards/chosen": 15.51904582977295, + "rewards/margins": 9.318208694458008, + "rewards/rejected": 6.200835227966309, + "step": 3470 + }, + { + "accuracy": 0.5375000238418579, + "epoch": 0.87, + "learning_rate": 8.077005210881474e-06, + "logps/chosen": -544.5194702148438, + "logps/margins": -74.82978820800781, + "logps/rejected": -469.68963623046875, + "loss": 10.5624, + "rewards/chosen": 12.441781997680664, + "rewards/margins": -0.5381302833557129, + "rewards/rejected": 12.979913711547852, + "step": 3480 + }, + { + "accuracy": 0.512499988079071, + "epoch": 0.87, + "learning_rate": 8.066676892598144e-06, + "logps/chosen": -502.7864685058594, + "logps/margins": 113.5978775024414, + "logps/rejected": -616.3843994140625, + "loss": 11.9636, + "rewards/chosen": 20.935791015625, + "rewards/margins": 0.6394471526145935, + "rewards/rejected": 20.296342849731445, + "step": 3490 + }, + { + "accuracy": 0.5375000238418579, + "epoch": 0.88, + "learning_rate": 8.056327555304451e-06, + "logps/chosen": -664.3436279296875, + "logps/margins": -215.05886840820312, + "logps/rejected": -449.2847595214844, + "loss": 10.7513, + "rewards/chosen": 6.141355991363525, + "rewards/margins": -0.23536062240600586, + "rewards/rejected": 6.376716613769531, + "step": 3500 + }, + { + "accuracy": 0.612500011920929, + "epoch": 0.88, + "learning_rate": 8.045957269934777e-06, + "logps/chosen": -618.7826538085938, + "logps/margins": -41.38749313354492, + "logps/rejected": -577.3951416015625, + "loss": 9.4355, + "rewards/chosen": 18.072790145874023, + "rewards/margins": 5.946843147277832, + "rewards/rejected": 12.125948905944824, + "step": 3510 + }, + { + "accuracy": 0.5, + "epoch": 0.88, + "learning_rate": 8.035566107567079e-06, + "logps/chosen": -577.3445434570312, + "logps/margins": 68.25811004638672, + "logps/rejected": -645.6026611328125, + "loss": 9.7491, + "rewards/chosen": 25.621906280517578, + "rewards/margins": 5.693347930908203, + "rewards/rejected": 19.928556442260742, + "step": 3520 + }, + { + "accuracy": 0.48750001192092896, + "epoch": 0.88, + "learning_rate": 8.025154139422409e-06, + "logps/chosen": -605.0213012695312, + "logps/margins": 18.198001861572266, + "logps/rejected": -623.21923828125, + "loss": 10.9343, + "rewards/chosen": 8.894143104553223, + "rewards/margins": 2.081423282623291, + "rewards/rejected": 6.812719821929932, + "step": 3530 + }, + { + "accuracy": 0.44999998807907104, + "epoch": 0.89, + "learning_rate": 8.01472143686442e-06, + "logps/chosen": -490.2281188964844, + "logps/margins": 17.360719680786133, + "logps/rejected": -507.5888671875, + "loss": 9.0982, + "rewards/chosen": 10.083946228027344, + "rewards/margins": -0.07837333530187607, + "rewards/rejected": 10.162318229675293, + "step": 3540 + }, + { + "accuracy": 0.550000011920929, + "epoch": 0.89, + "learning_rate": 8.004268071398882e-06, + "logps/chosen": -686.8416137695312, + "logps/margins": -155.02093505859375, + "logps/rejected": -531.8206787109375, + "loss": 9.8422, + "rewards/chosen": 20.610958099365234, + "rewards/margins": 2.7297470569610596, + "rewards/rejected": 17.881210327148438, + "step": 3550 + }, + { + "accuracy": 0.44999998807907104, + "epoch": 0.89, + "learning_rate": 7.993794114673182e-06, + "logps/chosen": -531.8142700195312, + "logps/margins": 98.95161437988281, + "logps/rejected": -630.765869140625, + "loss": 13.5673, + "rewards/chosen": 15.945837020874023, + "rewards/margins": -3.926853895187378, + "rewards/rejected": 19.872692108154297, + "step": 3560 + }, + { + "accuracy": 0.5874999761581421, + "epoch": 0.89, + "learning_rate": 7.983299638475844e-06, + "logps/chosen": -672.9358520507812, + "logps/margins": 4.3897247314453125, + "logps/rejected": -677.3255004882812, + "loss": 10.8481, + "rewards/chosen": 10.326391220092773, + "rewards/margins": 4.405461311340332, + "rewards/rejected": 5.920930862426758, + "step": 3570 + }, + { + "accuracy": 0.48750001192092896, + "epoch": 0.9, + "learning_rate": 7.97278471473603e-06, + "logps/chosen": -569.230224609375, + "logps/margins": -152.9534454345703, + "logps/rejected": -416.27679443359375, + "loss": 11.7097, + "rewards/chosen": 14.2042875289917, + "rewards/margins": -1.8978767395019531, + "rewards/rejected": 16.102163314819336, + "step": 3580 + }, + { + "accuracy": 0.574999988079071, + "epoch": 0.9, + "learning_rate": 7.962249415523053e-06, + "logps/chosen": -496.5062561035156, + "logps/margins": 24.402597427368164, + "logps/rejected": -520.9088745117188, + "loss": 9.7405, + "rewards/chosen": 20.112430572509766, + "rewards/margins": 5.830418586730957, + "rewards/rejected": 14.282014846801758, + "step": 3590 + }, + { + "accuracy": 0.550000011920929, + "epoch": 0.9, + "learning_rate": 7.951693813045877e-06, + "logps/chosen": -566.36865234375, + "logps/margins": 55.19733810424805, + "logps/rejected": -621.5660400390625, + "loss": 10.7422, + "rewards/chosen": 24.813159942626953, + "rewards/margins": 1.7257187366485596, + "rewards/rejected": 23.087438583374023, + "step": 3600 + }, + { + "accuracy": 0.550000011920929, + "epoch": 0.9, + "learning_rate": 7.941117979652627e-06, + "logps/chosen": -524.0512084960938, + "logps/margins": -71.71086120605469, + "logps/rejected": -452.34039306640625, + "loss": 7.3534, + "rewards/chosen": 19.603607177734375, + "rewards/margins": 3.2682807445526123, + "rewards/rejected": 16.335329055786133, + "step": 3610 + }, + { + "accuracy": 0.5375000238418579, + "epoch": 0.91, + "learning_rate": 7.930521987830086e-06, + "logps/chosen": -691.5386962890625, + "logps/margins": -89.1763916015625, + "logps/rejected": -602.3624267578125, + "loss": 9.1852, + "rewards/chosen": 21.67724609375, + "rewards/margins": 6.021707057952881, + "rewards/rejected": 15.655540466308594, + "step": 3620 + }, + { + "accuracy": 0.5249999761581421, + "epoch": 0.91, + "learning_rate": 7.91990591020321e-06, + "logps/chosen": -589.9906616210938, + "logps/margins": 30.194122314453125, + "logps/rejected": -620.184814453125, + "loss": 8.8813, + "rewards/chosen": 23.663148880004883, + "rewards/margins": 9.488630294799805, + "rewards/rejected": 14.174517631530762, + "step": 3630 + }, + { + "accuracy": 0.5, + "epoch": 0.91, + "learning_rate": 7.909269819534615e-06, + "logps/chosen": -602.5100708007812, + "logps/margins": -22.3107967376709, + "logps/rejected": -580.1993408203125, + "loss": 8.3438, + "rewards/chosen": 16.662282943725586, + "rewards/margins": 5.950827121734619, + "rewards/rejected": 10.711456298828125, + "step": 3640 + }, + { + "accuracy": 0.5249999761581421, + "epoch": 0.91, + "learning_rate": 7.898613788724092e-06, + "logps/chosen": -542.5157470703125, + "logps/margins": -95.60443115234375, + "logps/rejected": -446.91131591796875, + "loss": 10.4712, + "rewards/chosen": 22.013347625732422, + "rewards/margins": 0.6519597768783569, + "rewards/rejected": 21.36138916015625, + "step": 3650 + }, + { + "accuracy": 0.4625000059604645, + "epoch": 0.92, + "learning_rate": 7.88900637253509e-06, + "logps/chosen": -536.93896484375, + "logps/margins": 73.59855651855469, + "logps/rejected": -610.5374755859375, + "loss": 11.1221, + "rewards/chosen": 15.317214965820312, + "rewards/margins": 1.162511944770813, + "rewards/rejected": 14.154703140258789, + "step": 3660 + }, + { + "accuracy": 0.625, + "epoch": 0.92, + "learning_rate": 7.878312656782903e-06, + "logps/chosen": -550.0130004882812, + "logps/margins": 25.890583038330078, + "logps/rejected": -575.9035034179688, + "loss": 8.3827, + "rewards/chosen": 9.671546936035156, + "rewards/margins": 3.9940857887268066, + "rewards/rejected": 5.677460670471191, + "step": 3670 + }, + { + "accuracy": 0.512499988079071, + "epoch": 0.92, + "learning_rate": 7.867599213069254e-06, + "logps/chosen": -605.7865600585938, + "logps/margins": 43.248878479003906, + "logps/rejected": -649.0354614257812, + "loss": 11.7397, + "rewards/chosen": 7.296550750732422, + "rewards/margins": -0.5635194778442383, + "rewards/rejected": 7.86007022857666, + "step": 3680 + }, + { + "accuracy": 0.5, + "epoch": 0.92, + "learning_rate": 7.856866114824106e-06, + "logps/chosen": -542.1768798828125, + "logps/margins": -50.091064453125, + "logps/rejected": -492.0858459472656, + "loss": 11.5064, + "rewards/chosen": 30.71941566467285, + "rewards/margins": -0.7555795907974243, + "rewards/rejected": 31.474994659423828, + "step": 3690 + }, + { + "accuracy": 0.5375000238418579, + "epoch": 0.93, + "learning_rate": 7.846113435612141e-06, + "logps/chosen": -544.1241455078125, + "logps/margins": 0.8223663568496704, + "logps/rejected": -544.9464721679688, + "loss": 9.4594, + "rewards/chosen": 24.787845611572266, + "rewards/margins": 6.067644119262695, + "rewards/rejected": 18.720203399658203, + "step": 3700 + }, + { + "accuracy": 0.6000000238418579, + "epoch": 0.93, + "learning_rate": 7.835341249132247e-06, + "logps/chosen": -568.6514282226562, + "logps/margins": -30.892887115478516, + "logps/rejected": -537.758544921875, + "loss": 9.9563, + "rewards/chosen": 11.988810539245605, + "rewards/margins": 10.362959861755371, + "rewards/rejected": 1.6258512735366821, + "step": 3710 + }, + { + "accuracy": 0.637499988079071, + "epoch": 0.93, + "learning_rate": 7.824549629217014e-06, + "logps/chosen": -647.934814453125, + "logps/margins": -147.85287475585938, + "logps/rejected": -500.0819396972656, + "loss": 10.7803, + "rewards/chosen": 20.397146224975586, + "rewards/margins": 7.69791316986084, + "rewards/rejected": 12.699233055114746, + "step": 3720 + }, + { + "accuracy": 0.512499988079071, + "epoch": 0.93, + "learning_rate": 7.81373864983223e-06, + "logps/chosen": -682.5199584960938, + "logps/margins": -29.673885345458984, + "logps/rejected": -652.8460693359375, + "loss": 10.4613, + "rewards/chosen": 27.286357879638672, + "rewards/margins": 0.10265235602855682, + "rewards/rejected": 27.183706283569336, + "step": 3730 + }, + { + "accuracy": 0.48750001192092896, + "epoch": 0.94, + "learning_rate": 7.802908385076372e-06, + "logps/chosen": -568.5144653320312, + "logps/margins": -114.43055725097656, + "logps/rejected": -454.083984375, + "loss": 10.8957, + "rewards/chosen": 17.90009117126465, + "rewards/margins": -3.0548481941223145, + "rewards/rejected": 20.95494270324707, + "step": 3740 + }, + { + "accuracy": 0.637499988079071, + "epoch": 0.94, + "learning_rate": 7.792058909180096e-06, + "logps/chosen": -591.3794555664062, + "logps/margins": -66.30741882324219, + "logps/rejected": -525.072021484375, + "loss": 9.8198, + "rewards/chosen": 20.1241397857666, + "rewards/margins": 5.063544273376465, + "rewards/rejected": 15.06059455871582, + "step": 3750 + }, + { + "accuracy": 0.550000011920929, + "epoch": 0.94, + "learning_rate": 7.781190296505738e-06, + "logps/chosen": -518.2606201171875, + "logps/margins": 19.163639068603516, + "logps/rejected": -537.4242553710938, + "loss": 9.6539, + "rewards/chosen": 25.938913345336914, + "rewards/margins": -0.1592981368303299, + "rewards/rejected": 26.09821128845215, + "step": 3760 + }, + { + "accuracy": 0.5625, + "epoch": 0.94, + "learning_rate": 7.770302621546793e-06, + "logps/chosen": -681.4319458007812, + "logps/margins": -100.9528579711914, + "logps/rejected": -580.4790649414062, + "loss": 9.4029, + "rewards/chosen": 19.000181198120117, + "rewards/margins": -1.6768465042114258, + "rewards/rejected": 20.67702865600586, + "step": 3770 + }, + { + "accuracy": 0.5625, + "epoch": 0.94, + "learning_rate": 7.759395958927411e-06, + "logps/chosen": -572.2050170898438, + "logps/margins": 63.0095100402832, + "logps/rejected": -635.2144775390625, + "loss": 11.5559, + "rewards/chosen": 8.42485237121582, + "rewards/margins": 2.8352527618408203, + "rewards/rejected": 5.589601039886475, + "step": 3780 + }, + { + "accuracy": 0.637499988079071, + "epoch": 0.95, + "learning_rate": 7.748470383401881e-06, + "logps/chosen": -583.0518188476562, + "logps/margins": -80.54475402832031, + "logps/rejected": -502.50701904296875, + "loss": 11.198, + "rewards/chosen": 17.389463424682617, + "rewards/margins": 6.845282077789307, + "rewards/rejected": 10.544181823730469, + "step": 3790 + }, + { + "accuracy": 0.5249999761581421, + "epoch": 0.95, + "learning_rate": 7.73752596985412e-06, + "logps/chosen": -482.6484375, + "logps/margins": 129.3907012939453, + "logps/rejected": -612.0391235351562, + "loss": 10.0375, + "rewards/chosen": 16.296772003173828, + "rewards/margins": -0.11539535224437714, + "rewards/rejected": 16.412166595458984, + "step": 3800 + }, + { + "accuracy": 0.5874999761581421, + "epoch": 0.95, + "learning_rate": 7.726562793297166e-06, + "logps/chosen": -675.8967895507812, + "logps/margins": -238.5634307861328, + "logps/rejected": -437.33331298828125, + "loss": 9.1438, + "rewards/chosen": 22.535127639770508, + "rewards/margins": 7.849436283111572, + "rewards/rejected": 14.685691833496094, + "step": 3810 + }, + { + "accuracy": 0.4375, + "epoch": 0.95, + "learning_rate": 7.715580928872657e-06, + "logps/chosen": -565.17724609375, + "logps/margins": 75.16026306152344, + "logps/rejected": -640.3375244140625, + "loss": 9.7283, + "rewards/chosen": 29.549388885498047, + "rewards/margins": 3.121112585067749, + "rewards/rejected": 26.42827796936035, + "step": 3820 + }, + { + "accuracy": 0.48750001192092896, + "epoch": 0.96, + "learning_rate": 7.704580451850315e-06, + "logps/chosen": -535.541259765625, + "logps/margins": -38.62874984741211, + "logps/rejected": -496.91253662109375, + "loss": 7.9549, + "rewards/chosen": 25.085689544677734, + "rewards/margins": 0.287175714969635, + "rewards/rejected": 24.79851531982422, + "step": 3830 + }, + { + "accuracy": 0.550000011920929, + "epoch": 0.96, + "learning_rate": 7.693561437627433e-06, + "logps/chosen": -596.3238525390625, + "logps/margins": 44.347381591796875, + "logps/rejected": -640.6712646484375, + "loss": 12.0001, + "rewards/chosen": 17.400714874267578, + "rewards/margins": 2.5862326622009277, + "rewards/rejected": 14.814483642578125, + "step": 3840 + }, + { + "accuracy": 0.512499988079071, + "epoch": 0.96, + "learning_rate": 7.682523961728362e-06, + "logps/chosen": -559.55810546875, + "logps/margins": -9.524127006530762, + "logps/rejected": -550.0339965820312, + "loss": 8.4152, + "rewards/chosen": 3.2484123706817627, + "rewards/margins": 1.4683513641357422, + "rewards/rejected": 1.7800607681274414, + "step": 3850 + }, + { + "accuracy": 0.5, + "epoch": 0.96, + "learning_rate": 7.671468099803985e-06, + "logps/chosen": -724.8496704101562, + "logps/margins": -46.546661376953125, + "logps/rejected": -678.3030395507812, + "loss": 11.1863, + "rewards/chosen": 22.058523178100586, + "rewards/margins": 6.375931739807129, + "rewards/rejected": 15.682592391967773, + "step": 3860 + }, + { + "accuracy": 0.4749999940395355, + "epoch": 0.97, + "learning_rate": 7.660393927631206e-06, + "logps/chosen": -525.0152587890625, + "logps/margins": 31.793914794921875, + "logps/rejected": -556.8091430664062, + "loss": 9.6478, + "rewards/chosen": 14.582748413085938, + "rewards/margins": 1.3341197967529297, + "rewards/rejected": 13.248629570007324, + "step": 3870 + }, + { + "accuracy": 0.574999988079071, + "epoch": 0.97, + "learning_rate": 7.649301521112427e-06, + "logps/chosen": -572.9917602539062, + "logps/margins": 19.38045883178711, + "logps/rejected": -592.3721923828125, + "loss": 10.016, + "rewards/chosen": 14.786270141601562, + "rewards/margins": 2.7644567489624023, + "rewards/rejected": 12.021815299987793, + "step": 3880 + }, + { + "accuracy": 0.6000000238418579, + "epoch": 0.97, + "learning_rate": 7.638190956275024e-06, + "logps/chosen": -664.0778198242188, + "logps/margins": -107.64323425292969, + "logps/rejected": -556.4345703125, + "loss": 10.1195, + "rewards/chosen": 17.066375732421875, + "rewards/margins": 8.49321460723877, + "rewards/rejected": 8.573160171508789, + "step": 3890 + }, + { + "accuracy": 0.5, + "epoch": 0.97, + "learning_rate": 7.627062309270836e-06, + "logps/chosen": -509.6859436035156, + "logps/margins": 26.752639770507812, + "logps/rejected": -536.4385375976562, + "loss": 9.2987, + "rewards/chosen": 20.353092193603516, + "rewards/margins": 1.8782964944839478, + "rewards/rejected": 18.474794387817383, + "step": 3900 + }, + { + "accuracy": 0.5, + "epoch": 0.98, + "learning_rate": 7.615915656375634e-06, + "logps/chosen": -520.7156372070312, + "logps/margins": -30.83820152282715, + "logps/rejected": -489.87744140625, + "loss": 8.9334, + "rewards/chosen": 12.524681091308594, + "rewards/margins": 0.7851442098617554, + "rewards/rejected": 11.739537239074707, + "step": 3910 + }, + { + "accuracy": 0.5, + "epoch": 0.98, + "learning_rate": 7.604751073988602e-06, + "logps/chosen": -559.7516479492188, + "logps/margins": -65.13895416259766, + "logps/rejected": -494.61273193359375, + "loss": 10.9305, + "rewards/chosen": 17.97171974182129, + "rewards/margins": 1.1420542001724243, + "rewards/rejected": 16.829662322998047, + "step": 3920 + }, + { + "accuracy": 0.48750001192092896, + "epoch": 0.98, + "learning_rate": 7.593568638631814e-06, + "logps/chosen": -516.7072143554688, + "logps/margins": 134.2766876220703, + "logps/rejected": -650.9840087890625, + "loss": 8.8826, + "rewards/chosen": 27.66353988647461, + "rewards/margins": 2.9184353351593018, + "rewards/rejected": 24.745101928710938, + "step": 3930 + }, + { + "accuracy": 0.5874999761581421, + "epoch": 0.98, + "learning_rate": 7.582368426949707e-06, + "logps/chosen": -681.2191772460938, + "logps/margins": -72.19258117675781, + "logps/rejected": -609.0265502929688, + "loss": 10.8405, + "rewards/chosen": 12.949435234069824, + "rewards/margins": 5.107350826263428, + "rewards/rejected": 7.842083930969238, + "step": 3940 + }, + { + "accuracy": 0.550000011920929, + "epoch": 0.99, + "learning_rate": 7.571150515708556e-06, + "logps/chosen": -578.3388671875, + "logps/margins": -11.005200386047363, + "logps/rejected": -567.3336791992188, + "loss": 9.119, + "rewards/chosen": 14.62934684753418, + "rewards/margins": 2.3392677307128906, + "rewards/rejected": 12.290080070495605, + "step": 3950 + }, + { + "accuracy": 0.574999988079071, + "epoch": 0.99, + "learning_rate": 7.55991498179595e-06, + "logps/chosen": -733.1796875, + "logps/margins": -62.109886169433594, + "logps/rejected": -671.0697631835938, + "loss": 10.3119, + "rewards/chosen": 15.53846549987793, + "rewards/margins": 4.128052234649658, + "rewards/rejected": 11.410415649414062, + "step": 3960 + }, + { + "accuracy": 0.4375, + "epoch": 0.99, + "learning_rate": 7.548661902220267e-06, + "logps/chosen": -558.9464111328125, + "logps/margins": 28.542200088500977, + "logps/rejected": -587.488525390625, + "loss": 11.5539, + "rewards/chosen": 11.556836128234863, + "rewards/margins": -1.6085067987442017, + "rewards/rejected": 13.165342330932617, + "step": 3970 + }, + { + "accuracy": 0.4375, + "epoch": 0.99, + "learning_rate": 7.537391354110135e-06, + "logps/chosen": -596.0733642578125, + "logps/margins": -36.80451202392578, + "logps/rejected": -559.2688598632812, + "loss": 11.0467, + "rewards/chosen": 14.827234268188477, + "rewards/margins": 0.7807717323303223, + "rewards/rejected": 14.04646110534668, + "step": 3980 + }, + { + "accuracy": 0.512499988079071, + "epoch": 1.0, + "learning_rate": 7.5261034147139214e-06, + "logps/chosen": -503.9693908691406, + "logps/margins": -66.37969970703125, + "logps/rejected": -437.5897521972656, + "loss": 11.8516, + "rewards/chosen": 18.21736717224121, + "rewards/margins": -0.6564952731132507, + "rewards/rejected": 18.873859405517578, + "step": 3990 + }, + { + "accuracy": 0.5249999761581421, + "epoch": 1.0, + "learning_rate": 7.514798161399186e-06, + "logps/chosen": -701.7164916992188, + "logps/margins": -80.7412109375, + "logps/rejected": -620.9752197265625, + "loss": 9.513, + "rewards/chosen": 12.667195320129395, + "rewards/margins": 5.035970687866211, + "rewards/rejected": 7.631224155426025, + "step": 4000 + }, + { + "accuracy": 0.4749999940395355, + "epoch": 1.0, + "learning_rate": 7.503475671652158e-06, + "logps/chosen": -657.3594360351562, + "logps/margins": 35.20092010498047, + "logps/rejected": -692.5603637695312, + "loss": 12.0115, + "rewards/chosen": 22.099361419677734, + "rewards/margins": -3.3416831493377686, + "rewards/rejected": 25.441043853759766, + "step": 4010 + }, + { + "accuracy": 0.5375000238418579, + "epoch": 1.0, + "learning_rate": 7.492136023077211e-06, + "logps/chosen": -656.546630859375, + "logps/margins": 20.269567489624023, + "logps/rejected": -676.8162231445312, + "loss": 8.255, + "rewards/chosen": 14.197685241699219, + "rewards/margins": 4.022665977478027, + "rewards/rejected": 10.175020217895508, + "step": 4020 + }, + { + "accuracy": 0.5, + "epoch": 1.01, + "learning_rate": 7.4807792933963205e-06, + "logps/chosen": -595.1652221679688, + "logps/margins": -35.4486198425293, + "logps/rejected": -559.716552734375, + "loss": 10.0572, + "rewards/chosen": 16.891971588134766, + "rewards/margins": -2.3584752082824707, + "rewards/rejected": 19.250446319580078, + "step": 4030 + }, + { + "accuracy": 0.574999988079071, + "epoch": 1.01, + "learning_rate": 7.469405560448539e-06, + "logps/chosen": -634.9166870117188, + "logps/margins": -2.9178192615509033, + "logps/rejected": -631.9989013671875, + "loss": 8.0821, + "rewards/chosen": 26.616100311279297, + "rewards/margins": 5.1511430740356445, + "rewards/rejected": 21.4649600982666, + "step": 4040 + }, + { + "accuracy": 0.512499988079071, + "epoch": 1.01, + "learning_rate": 7.458014902189459e-06, + "logps/chosen": -664.1004638671875, + "logps/margins": -25.032140731811523, + "logps/rejected": -639.0682373046875, + "loss": 9.822, + "rewards/chosen": 18.148151397705078, + "rewards/margins": 2.4134926795959473, + "rewards/rejected": 15.734660148620605, + "step": 4050 + }, + { + "accuracy": 0.5625, + "epoch": 1.01, + "learning_rate": 7.446607396690674e-06, + "logps/chosen": -620.4410400390625, + "logps/margins": -5.398178577423096, + "logps/rejected": -615.0428466796875, + "loss": 8.6786, + "rewards/chosen": 10.08648681640625, + "rewards/margins": 2.590729236602783, + "rewards/rejected": 7.495758056640625, + "step": 4060 + }, + { + "accuracy": 0.550000011920929, + "epoch": 1.02, + "learning_rate": 7.435183122139255e-06, + "logps/chosen": -637.9634399414062, + "logps/margins": 20.559955596923828, + "logps/rejected": -658.5233764648438, + "loss": 11.0632, + "rewards/chosen": 13.691388130187988, + "rewards/margins": -1.3703029155731201, + "rewards/rejected": 15.061689376831055, + "step": 4070 + }, + { + "accuracy": 0.512499988079071, + "epoch": 1.02, + "learning_rate": 7.423742156837204e-06, + "logps/chosen": -674.7283935546875, + "logps/margins": -70.2494125366211, + "logps/rejected": -604.47900390625, + "loss": 9.8457, + "rewards/chosen": 14.415226936340332, + "rewards/margins": 6.195303916931152, + "rewards/rejected": 8.219922065734863, + "step": 4080 + }, + { + "accuracy": 0.44999998807907104, + "epoch": 1.02, + "learning_rate": 7.412284579200925e-06, + "logps/chosen": -552.8629150390625, + "logps/margins": -51.17779541015625, + "logps/rejected": -501.68505859375, + "loss": 9.68, + "rewards/chosen": 12.8196439743042, + "rewards/margins": -4.233913898468018, + "rewards/rejected": 17.053558349609375, + "step": 4090 + }, + { + "accuracy": 0.550000011920929, + "epoch": 1.02, + "learning_rate": 7.40081046776068e-06, + "logps/chosen": -594.5017700195312, + "logps/margins": -62.89166259765625, + "logps/rejected": -531.610107421875, + "loss": 10.6221, + "rewards/chosen": 9.387157440185547, + "rewards/margins": -0.9759442210197449, + "rewards/rejected": 10.3631010055542, + "step": 4100 + }, + { + "accuracy": 0.625, + "epoch": 1.03, + "learning_rate": 7.389319901160056e-06, + "logps/chosen": -584.715576171875, + "logps/margins": -84.28900146484375, + "logps/rejected": -500.42657470703125, + "loss": 8.1936, + "rewards/chosen": 20.205387115478516, + "rewards/margins": 7.967074394226074, + "rewards/rejected": 12.238309860229492, + "step": 4110 + }, + { + "accuracy": 0.48750001192092896, + "epoch": 1.03, + "learning_rate": 7.37781295815542e-06, + "logps/chosen": -624.7756958007812, + "logps/margins": -27.46091079711914, + "logps/rejected": -597.3148193359375, + "loss": 9.8127, + "rewards/chosen": 20.323345184326172, + "rewards/margins": 0.1769283264875412, + "rewards/rejected": 20.146419525146484, + "step": 4120 + }, + { + "accuracy": 0.5625, + "epoch": 1.03, + "learning_rate": 7.366289717615388e-06, + "logps/chosen": -641.2994995117188, + "logps/margins": -22.580509185791016, + "logps/rejected": -618.718994140625, + "loss": 8.293, + "rewards/chosen": 12.823104858398438, + "rewards/margins": 1.9570577144622803, + "rewards/rejected": 10.866048812866211, + "step": 4130 + }, + { + "accuracy": 0.625, + "epoch": 1.03, + "learning_rate": 7.354750258520276e-06, + "logps/chosen": -667.0498046875, + "logps/margins": 24.271678924560547, + "logps/rejected": -691.3214721679688, + "loss": 8.3521, + "rewards/chosen": 16.209936141967773, + "rewards/margins": 7.602850437164307, + "rewards/rejected": 8.607085227966309, + "step": 4140 + }, + { + "accuracy": 0.48750001192092896, + "epoch": 1.04, + "learning_rate": 7.34319465996156e-06, + "logps/chosen": -666.50146484375, + "logps/margins": -186.0122833251953, + "logps/rejected": -480.4891662597656, + "loss": 9.727, + "rewards/chosen": 16.487300872802734, + "rewards/margins": 1.9370126724243164, + "rewards/rejected": 14.550287246704102, + "step": 4150 + }, + { + "accuracy": 0.44999998807907104, + "epoch": 1.04, + "learning_rate": 7.331623001141343e-06, + "logps/chosen": -550.5108642578125, + "logps/margins": -51.222557067871094, + "logps/rejected": -499.28826904296875, + "loss": 9.8795, + "rewards/chosen": 21.38043785095215, + "rewards/margins": 2.106489658355713, + "rewards/rejected": 19.273948669433594, + "step": 4160 + }, + { + "accuracy": 0.574999988079071, + "epoch": 1.04, + "learning_rate": 7.320035361371799e-06, + "logps/chosen": -571.89453125, + "logps/margins": 0.5586913824081421, + "logps/rejected": -572.4532470703125, + "loss": 9.5857, + "rewards/chosen": 14.386314392089844, + "rewards/margins": 1.6044563055038452, + "rewards/rejected": 12.781856536865234, + "step": 4170 + }, + { + "accuracy": 0.44999998807907104, + "epoch": 1.04, + "learning_rate": 7.308431820074637e-06, + "logps/chosen": -553.707275390625, + "logps/margins": -17.948810577392578, + "logps/rejected": -535.758544921875, + "loss": 10.0361, + "rewards/chosen": 13.450729370117188, + "rewards/margins": -2.5351874828338623, + "rewards/rejected": 15.985916137695312, + "step": 4180 + }, + { + "accuracy": 0.512499988079071, + "epoch": 1.05, + "learning_rate": 7.296812456780554e-06, + "logps/chosen": -628.6981201171875, + "logps/margins": -116.25621032714844, + "logps/rejected": -512.44189453125, + "loss": 9.4908, + "rewards/chosen": 17.452648162841797, + "rewards/margins": 2.067063808441162, + "rewards/rejected": 15.385584831237793, + "step": 4190 + }, + { + "accuracy": 0.5375000238418579, + "epoch": 1.05, + "learning_rate": 7.285177351128695e-06, + "logps/chosen": -672.6380004882812, + "logps/margins": -116.74320220947266, + "logps/rejected": -555.894775390625, + "loss": 9.6434, + "rewards/chosen": 13.330810546875, + "rewards/margins": -0.11802148818969727, + "rewards/rejected": 13.448832511901855, + "step": 4200 + }, + { + "accuracy": 0.637499988079071, + "epoch": 1.05, + "learning_rate": 7.273526582866098e-06, + "logps/chosen": -531.7286376953125, + "logps/margins": 22.184986114501953, + "logps/rejected": -553.91357421875, + "loss": 9.579, + "rewards/chosen": 18.254589080810547, + "rewards/margins": 9.311025619506836, + "rewards/rejected": 8.943562507629395, + "step": 4210 + }, + { + "accuracy": 0.637499988079071, + "epoch": 1.05, + "learning_rate": 7.261860231847158e-06, + "logps/chosen": -611.2303466796875, + "logps/margins": -37.31039047241211, + "logps/rejected": -573.9200439453125, + "loss": 8.9149, + "rewards/chosen": 28.57184410095215, + "rewards/margins": 13.256256103515625, + "rewards/rejected": 15.315587043762207, + "step": 4220 + }, + { + "accuracy": 0.574999988079071, + "epoch": 1.06, + "learning_rate": 7.25017837803307e-06, + "logps/chosen": -643.6930541992188, + "logps/margins": -47.72447204589844, + "logps/rejected": -595.9685668945312, + "loss": 11.6042, + "rewards/chosen": 14.718179702758789, + "rewards/margins": 2.758030652999878, + "rewards/rejected": 11.960149765014648, + "step": 4230 + }, + { + "accuracy": 0.612500011920929, + "epoch": 1.06, + "learning_rate": 7.238481101491283e-06, + "logps/chosen": -481.87042236328125, + "logps/margins": 25.723400115966797, + "logps/rejected": -507.5938415527344, + "loss": 9.7055, + "rewards/chosen": 6.210371971130371, + "rewards/margins": -0.05915398523211479, + "rewards/rejected": 6.269526481628418, + "step": 4240 + }, + { + "accuracy": 0.5375000238418579, + "epoch": 1.06, + "learning_rate": 7.226768482394961e-06, + "logps/chosen": -509.94146728515625, + "logps/margins": 84.38920593261719, + "logps/rejected": -594.3306884765625, + "loss": 9.8299, + "rewards/chosen": 11.366849899291992, + "rewards/margins": 1.3220767974853516, + "rewards/rejected": 10.04477310180664, + "step": 4250 + }, + { + "accuracy": 0.5625, + "epoch": 1.06, + "learning_rate": 7.215040601022421e-06, + "logps/chosen": -588.5303955078125, + "logps/margins": -132.66171264648438, + "logps/rejected": -455.86871337890625, + "loss": 9.5092, + "rewards/chosen": 18.962696075439453, + "rewards/margins": 6.089469909667969, + "rewards/rejected": 12.873225212097168, + "step": 4260 + }, + { + "accuracy": 0.550000011920929, + "epoch": 1.07, + "learning_rate": 7.2032975377565885e-06, + "logps/chosen": -619.9846801757812, + "logps/margins": -60.08556365966797, + "logps/rejected": -559.8990478515625, + "loss": 8.0341, + "rewards/chosen": 12.389080047607422, + "rewards/margins": 6.451897621154785, + "rewards/rejected": 5.937182426452637, + "step": 4270 + }, + { + "accuracy": 0.550000011920929, + "epoch": 1.07, + "learning_rate": 7.191539373084444e-06, + "logps/chosen": -475.9000549316406, + "logps/margins": 31.109455108642578, + "logps/rejected": -507.00946044921875, + "loss": 9.3528, + "rewards/chosen": 14.103042602539062, + "rewards/margins": 4.521309852600098, + "rewards/rejected": 9.581731796264648, + "step": 4280 + }, + { + "accuracy": 0.5625, + "epoch": 1.07, + "learning_rate": 7.179766187596478e-06, + "logps/chosen": -667.6243286132812, + "logps/margins": -161.53176879882812, + "logps/rejected": -506.09259033203125, + "loss": 7.8611, + "rewards/chosen": 20.534860610961914, + "rewards/margins": 8.234492301940918, + "rewards/rejected": 12.300365447998047, + "step": 4290 + }, + { + "accuracy": 0.5375000238418579, + "epoch": 1.07, + "learning_rate": 7.1679780619861265e-06, + "logps/chosen": -627.7063598632812, + "logps/margins": -27.023029327392578, + "logps/rejected": -600.6832885742188, + "loss": 10.4372, + "rewards/chosen": 30.828487396240234, + "rewards/margins": 4.551738262176514, + "rewards/rejected": 26.276752471923828, + "step": 4300 + }, + { + "accuracy": 0.5, + "epoch": 1.08, + "learning_rate": 7.156175077049232e-06, + "logps/chosen": -575.5839233398438, + "logps/margins": -38.87522506713867, + "logps/rejected": -536.7086791992188, + "loss": 8.4652, + "rewards/chosen": 10.572076797485352, + "rewards/margins": 0.4772973656654358, + "rewards/rejected": 10.094779014587402, + "step": 4310 + }, + { + "accuracy": 0.5375000238418579, + "epoch": 1.08, + "learning_rate": 7.144357313683479e-06, + "logps/chosen": -554.1640625, + "logps/margins": -45.89463424682617, + "logps/rejected": -508.2693786621094, + "loss": 11.1403, + "rewards/chosen": 2.8936798572540283, + "rewards/margins": -5.526806831359863, + "rewards/rejected": 8.420485496520996, + "step": 4320 + }, + { + "accuracy": 0.512499988079071, + "epoch": 1.08, + "learning_rate": 7.132524852887845e-06, + "logps/chosen": -655.2017211914062, + "logps/margins": -0.14141693711280823, + "logps/rejected": -655.0603637695312, + "loss": 8.9998, + "rewards/chosen": 34.9572639465332, + "rewards/margins": 1.4154398441314697, + "rewards/rejected": 33.54182052612305, + "step": 4330 + }, + { + "accuracy": 0.4749999940395355, + "epoch": 1.08, + "learning_rate": 7.120677775762044e-06, + "logps/chosen": -601.6637573242188, + "logps/margins": -12.083990097045898, + "logps/rejected": -589.5797729492188, + "loss": 9.8975, + "rewards/chosen": 30.282150268554688, + "rewards/margins": 2.765232563018799, + "rewards/rejected": 27.516918182373047, + "step": 4340 + }, + { + "accuracy": 0.4375, + "epoch": 1.09, + "learning_rate": 7.108816163505965e-06, + "logps/chosen": -604.1182250976562, + "logps/margins": 9.5507173538208, + "logps/rejected": -613.6688842773438, + "loss": 11.7085, + "rewards/chosen": 9.133367538452148, + "rewards/margins": -9.697785377502441, + "rewards/rejected": 18.831153869628906, + "step": 4350 + }, + { + "accuracy": 0.5249999761581421, + "epoch": 1.09, + "learning_rate": 7.0969400974191295e-06, + "logps/chosen": -580.3289794921875, + "logps/margins": -51.98638916015625, + "logps/rejected": -528.3425903320312, + "loss": 9.6027, + "rewards/chosen": 21.71453285217285, + "rewards/margins": 0.5898569822311401, + "rewards/rejected": 21.124675750732422, + "step": 4360 + }, + { + "accuracy": 0.5249999761581421, + "epoch": 1.09, + "learning_rate": 7.08504965890012e-06, + "logps/chosen": -639.2381591796875, + "logps/margins": -117.7774429321289, + "logps/rejected": -521.460693359375, + "loss": 11.7807, + "rewards/chosen": 23.28557014465332, + "rewards/margins": 1.9887611865997314, + "rewards/rejected": 21.296810150146484, + "step": 4370 + }, + { + "accuracy": 0.5249999761581421, + "epoch": 1.09, + "learning_rate": 7.07314492944603e-06, + "logps/chosen": -530.3563232421875, + "logps/margins": -110.5643539428711, + "logps/rejected": -419.7919006347656, + "loss": 7.9143, + "rewards/chosen": 11.660337448120117, + "rewards/margins": -0.6639792919158936, + "rewards/rejected": 12.32431697845459, + "step": 4380 + }, + { + "accuracy": 0.574999988079071, + "epoch": 1.1, + "learning_rate": 7.061225990651902e-06, + "logps/chosen": -587.0665283203125, + "logps/margins": -51.5008544921875, + "logps/rejected": -535.5657348632812, + "loss": 10.0063, + "rewards/chosen": 15.772730827331543, + "rewards/margins": 9.559380531311035, + "rewards/rejected": 6.213352680206299, + "step": 4390 + }, + { + "accuracy": 0.512499988079071, + "epoch": 1.1, + "learning_rate": 7.049292924210167e-06, + "logps/chosen": -699.37646484375, + "logps/margins": -97.90028381347656, + "logps/rejected": -601.4762573242188, + "loss": 9.3751, + "rewards/chosen": 18.023426055908203, + "rewards/margins": 6.826565742492676, + "rewards/rejected": 11.196860313415527, + "step": 4400 + }, + { + "accuracy": 0.612500011920929, + "epoch": 1.1, + "learning_rate": 7.037345811910091e-06, + "logps/chosen": -595.02978515625, + "logps/margins": -60.354461669921875, + "logps/rejected": -534.67529296875, + "loss": 9.924, + "rewards/chosen": 18.885034561157227, + "rewards/margins": 3.7846381664276123, + "rewards/rejected": 15.100395202636719, + "step": 4410 + }, + { + "accuracy": 0.5625, + "epoch": 1.1, + "learning_rate": 7.025384735637209e-06, + "logps/chosen": -635.20068359375, + "logps/margins": -133.60401916503906, + "logps/rejected": -501.59674072265625, + "loss": 10.3811, + "rewards/chosen": 23.001201629638672, + "rewards/margins": 4.048158168792725, + "rewards/rejected": 18.953044891357422, + "step": 4420 + }, + { + "accuracy": 0.4749999940395355, + "epoch": 1.11, + "learning_rate": 7.013409777372765e-06, + "logps/chosen": -588.6281127929688, + "logps/margins": -25.535118103027344, + "logps/rejected": -563.093017578125, + "loss": 8.3709, + "rewards/chosen": 17.534225463867188, + "rewards/margins": 0.699182391166687, + "rewards/rejected": 16.83504295349121, + "step": 4430 + }, + { + "accuracy": 0.4749999940395355, + "epoch": 1.11, + "learning_rate": 7.001421019193151e-06, + "logps/chosen": -542.7562255859375, + "logps/margins": -99.9024429321289, + "logps/rejected": -442.853759765625, + "loss": 10.6212, + "rewards/chosen": 5.329944610595703, + "rewards/margins": 1.178192377090454, + "rewards/rejected": 4.151752471923828, + "step": 4440 + }, + { + "accuracy": 0.5249999761581421, + "epoch": 1.11, + "learning_rate": 6.989418543269341e-06, + "logps/chosen": -712.9873657226562, + "logps/margins": -176.8262481689453, + "logps/rejected": -536.1611328125, + "loss": 9.2662, + "rewards/chosen": 1.5166842937469482, + "rewards/margins": -0.8680270910263062, + "rewards/rejected": 2.3847110271453857, + "step": 4450 + }, + { + "accuracy": 0.4749999940395355, + "epoch": 1.11, + "learning_rate": 6.977402431866331e-06, + "logps/chosen": -551.7241821289062, + "logps/margins": 30.57217025756836, + "logps/rejected": -582.2962646484375, + "loss": 8.3379, + "rewards/chosen": 18.142868041992188, + "rewards/margins": 1.03926420211792, + "rewards/rejected": 17.10360336303711, + "step": 4460 + }, + { + "accuracy": 0.44999998807907104, + "epoch": 1.12, + "learning_rate": 6.9653727673425785e-06, + "logps/chosen": -682.8448486328125, + "logps/margins": -73.8694076538086, + "logps/rejected": -608.9754028320312, + "loss": 10.0019, + "rewards/chosen": 17.409618377685547, + "rewards/margins": -0.07768688350915909, + "rewards/rejected": 17.4873046875, + "step": 4470 + }, + { + "accuracy": 0.512499988079071, + "epoch": 1.12, + "learning_rate": 6.95332963214943e-06, + "logps/chosen": -598.482421875, + "logps/margins": 4.308962345123291, + "logps/rejected": -602.7913208007812, + "loss": 9.2766, + "rewards/chosen": 14.323320388793945, + "rewards/margins": 3.396200656890869, + "rewards/rejected": 10.92712116241455, + "step": 4480 + }, + { + "accuracy": 0.38749998807907104, + "epoch": 1.12, + "learning_rate": 6.941273108830563e-06, + "logps/chosen": -584.4751586914062, + "logps/margins": -90.8985824584961, + "logps/rejected": -493.5765686035156, + "loss": 10.1344, + "rewards/chosen": 11.961258888244629, + "rewards/margins": -4.7644805908203125, + "rewards/rejected": 16.725740432739258, + "step": 4490 + }, + { + "accuracy": 0.512499988079071, + "epoch": 1.12, + "learning_rate": 6.9292032800214135e-06, + "logps/chosen": -774.3654174804688, + "logps/margins": -108.67464447021484, + "logps/rejected": -665.6907958984375, + "loss": 10.2131, + "rewards/chosen": 10.611570358276367, + "rewards/margins": -2.9416213035583496, + "rewards/rejected": 13.553192138671875, + "step": 4500 + }, + { + "accuracy": 0.5, + "epoch": 1.13, + "learning_rate": 6.917120228448615e-06, + "logps/chosen": -593.4318237304688, + "logps/margins": -133.0118408203125, + "logps/rejected": -460.4200134277344, + "loss": 10.0714, + "rewards/chosen": 18.744373321533203, + "rewards/margins": 0.2943130433559418, + "rewards/rejected": 18.450061798095703, + "step": 4510 + }, + { + "accuracy": 0.574999988079071, + "epoch": 1.13, + "learning_rate": 6.905024036929433e-06, + "logps/chosen": -557.03125, + "logps/margins": -36.74125289916992, + "logps/rejected": -520.2899780273438, + "loss": 10.1787, + "rewards/chosen": 24.1259822845459, + "rewards/margins": -1.671494722366333, + "rewards/rejected": 25.797475814819336, + "step": 4520 + }, + { + "accuracy": 0.7250000238418579, + "epoch": 1.13, + "learning_rate": 6.892914788371189e-06, + "logps/chosen": -635.5712890625, + "logps/margins": -70.38050842285156, + "logps/rejected": -565.1907958984375, + "loss": 7.4005, + "rewards/chosen": 16.646474838256836, + "rewards/margins": 11.042746543884277, + "rewards/rejected": 5.603727340698242, + "step": 4530 + }, + { + "accuracy": 0.6000000238418579, + "epoch": 1.14, + "learning_rate": 6.880792565770701e-06, + "logps/chosen": -514.6046142578125, + "logps/margins": -85.4898452758789, + "logps/rejected": -429.1148376464844, + "loss": 7.8817, + "rewards/chosen": 11.575087547302246, + "rewards/margins": 5.041314125061035, + "rewards/rejected": 6.533771514892578, + "step": 4540 + }, + { + "accuracy": 0.6000000238418579, + "epoch": 1.14, + "learning_rate": 6.868657452213712e-06, + "logps/chosen": -581.8590698242188, + "logps/margins": -91.28489685058594, + "logps/rejected": -490.5741271972656, + "loss": 7.3327, + "rewards/chosen": 12.486246109008789, + "rewards/margins": 5.500300884246826, + "rewards/rejected": 6.985945701599121, + "step": 4550 + }, + { + "accuracy": 0.42500001192092896, + "epoch": 1.14, + "learning_rate": 6.856509530874315e-06, + "logps/chosen": -523.0977172851562, + "logps/margins": 85.36385345458984, + "logps/rejected": -608.4615478515625, + "loss": 9.4152, + "rewards/chosen": 15.65026569366455, + "rewards/margins": -0.6000126600265503, + "rewards/rejected": 16.25027847290039, + "step": 4560 + }, + { + "accuracy": 0.550000011920929, + "epoch": 1.14, + "learning_rate": 6.844348885014391e-06, + "logps/chosen": -570.0127563476562, + "logps/margins": -116.9560546875, + "logps/rejected": -453.05670166015625, + "loss": 9.1722, + "rewards/chosen": 23.635826110839844, + "rewards/margins": 6.348434925079346, + "rewards/rejected": 17.287389755249023, + "step": 4570 + }, + { + "accuracy": 0.512499988079071, + "epoch": 1.15, + "learning_rate": 6.832175597983035e-06, + "logps/chosen": -586.2013549804688, + "logps/margins": -8.127923011779785, + "logps/rejected": -578.073486328125, + "loss": 8.3949, + "rewards/chosen": 15.957934379577637, + "rewards/margins": 0.7728201150894165, + "rewards/rejected": 15.185113906860352, + "step": 4580 + }, + { + "accuracy": 0.5625, + "epoch": 1.15, + "learning_rate": 6.819989753215983e-06, + "logps/chosen": -590.9134521484375, + "logps/margins": -77.81314849853516, + "logps/rejected": -513.1002807617188, + "loss": 9.9174, + "rewards/chosen": 10.421762466430664, + "rewards/margins": 2.14864444732666, + "rewards/rejected": 8.27311897277832, + "step": 4590 + }, + { + "accuracy": 0.550000011920929, + "epoch": 1.15, + "learning_rate": 6.8077914342350435e-06, + "logps/chosen": -588.4954223632812, + "logps/margins": -10.75060749053955, + "logps/rejected": -577.744873046875, + "loss": 8.7883, + "rewards/chosen": 21.301668167114258, + "rewards/margins": 4.65571928024292, + "rewards/rejected": 16.645950317382812, + "step": 4600 + }, + { + "accuracy": 0.574999988079071, + "epoch": 1.15, + "learning_rate": 6.795580724647523e-06, + "logps/chosen": -616.9813232421875, + "logps/margins": -82.67198181152344, + "logps/rejected": -534.3093872070312, + "loss": 8.8517, + "rewards/chosen": 18.223691940307617, + "rewards/margins": 5.144467353820801, + "rewards/rejected": 13.079225540161133, + "step": 4610 + }, + { + "accuracy": 0.550000011920929, + "epoch": 1.16, + "learning_rate": 6.78335770814565e-06, + "logps/chosen": -621.3577880859375, + "logps/margins": -52.59630584716797, + "logps/rejected": -568.7615356445312, + "loss": 9.007, + "rewards/chosen": 10.830148696899414, + "rewards/margins": -1.3421748876571655, + "rewards/rejected": 12.172323226928711, + "step": 4620 + }, + { + "accuracy": 0.48750001192092896, + "epoch": 1.16, + "learning_rate": 6.771122468506011e-06, + "logps/chosen": -574.4246826171875, + "logps/margins": 58.8470344543457, + "logps/rejected": -633.2716674804688, + "loss": 10.4864, + "rewards/chosen": 8.39540958404541, + "rewards/margins": -0.6201708316802979, + "rewards/rejected": 9.015580177307129, + "step": 4630 + }, + { + "accuracy": 0.44999998807907104, + "epoch": 1.16, + "learning_rate": 6.758875089588961e-06, + "logps/chosen": -587.578857421875, + "logps/margins": -101.94442749023438, + "logps/rejected": -485.63446044921875, + "loss": 8.118, + "rewards/chosen": 9.850728988647461, + "rewards/margins": -3.9947686195373535, + "rewards/rejected": 13.845499038696289, + "step": 4640 + }, + { + "accuracy": 0.5375000238418579, + "epoch": 1.16, + "learning_rate": 6.746615655338068e-06, + "logps/chosen": -565.5685424804688, + "logps/margins": -51.834083557128906, + "logps/rejected": -513.7344970703125, + "loss": 7.5751, + "rewards/chosen": 19.51181411743164, + "rewards/margins": 2.237417221069336, + "rewards/rejected": 17.274394989013672, + "step": 4650 + }, + { + "accuracy": 0.512499988079071, + "epoch": 1.17, + "learning_rate": 6.734344249779521e-06, + "logps/chosen": -665.9302368164062, + "logps/margins": -138.86166381835938, + "logps/rejected": -527.068603515625, + "loss": 9.1127, + "rewards/chosen": 20.64487075805664, + "rewards/margins": 3.5906853675842285, + "rewards/rejected": 17.054187774658203, + "step": 4660 + }, + { + "accuracy": 0.5375000238418579, + "epoch": 1.17, + "learning_rate": 6.72206095702156e-06, + "logps/chosen": -654.910888671875, + "logps/margins": -72.11922454833984, + "logps/rejected": -582.7916259765625, + "loss": 7.9438, + "rewards/chosen": 19.023317337036133, + "rewards/margins": 2.796055793762207, + "rewards/rejected": 16.227262496948242, + "step": 4670 + }, + { + "accuracy": 0.512499988079071, + "epoch": 1.17, + "learning_rate": 6.709765861253901e-06, + "logps/chosen": -621.2952270507812, + "logps/margins": -103.27718353271484, + "logps/rejected": -518.0179443359375, + "loss": 8.9843, + "rewards/chosen": 14.968803405761719, + "rewards/margins": 0.555211067199707, + "rewards/rejected": 14.413592338562012, + "step": 4680 + }, + { + "accuracy": 0.5375000238418579, + "epoch": 1.17, + "learning_rate": 6.697459046747159e-06, + "logps/chosen": -540.6524658203125, + "logps/margins": -74.23405456542969, + "logps/rejected": -466.41839599609375, + "loss": 9.2012, + "rewards/chosen": 18.80353355407715, + "rewards/margins": 4.802393436431885, + "rewards/rejected": 14.001141548156738, + "step": 4690 + }, + { + "accuracy": 0.512499988079071, + "epoch": 1.18, + "learning_rate": 6.685140597852269e-06, + "logps/chosen": -592.5223388671875, + "logps/margins": 5.995031833648682, + "logps/rejected": -598.517333984375, + "loss": 7.6956, + "rewards/chosen": 28.734050750732422, + "rewards/margins": 2.1288490295410156, + "rewards/rejected": 26.605199813842773, + "step": 4700 + }, + { + "accuracy": 0.5249999761581421, + "epoch": 1.18, + "learning_rate": 6.672810598999908e-06, + "logps/chosen": -556.445068359375, + "logps/margins": 12.520895004272461, + "logps/rejected": -568.9659423828125, + "loss": 9.1733, + "rewards/chosen": 17.679698944091797, + "rewards/margins": 3.316242218017578, + "rewards/rejected": 14.363456726074219, + "step": 4710 + }, + { + "accuracy": 0.4749999940395355, + "epoch": 1.18, + "learning_rate": 6.660469134699915e-06, + "logps/chosen": -577.5178833007812, + "logps/margins": -53.476036071777344, + "logps/rejected": -524.0418701171875, + "loss": 9.1504, + "rewards/chosen": 12.82477855682373, + "rewards/margins": -1.5666865110397339, + "rewards/rejected": 14.391467094421387, + "step": 4720 + }, + { + "accuracy": 0.5249999761581421, + "epoch": 1.18, + "learning_rate": 6.648116289540714e-06, + "logps/chosen": -666.3054809570312, + "logps/margins": 44.45446014404297, + "logps/rejected": -710.7598876953125, + "loss": 9.4696, + "rewards/chosen": 20.359203338623047, + "rewards/margins": 2.9928441047668457, + "rewards/rejected": 17.36635971069336, + "step": 4730 + }, + { + "accuracy": 0.550000011920929, + "epoch": 1.19, + "learning_rate": 6.635752148188733e-06, + "logps/chosen": -570.4151000976562, + "logps/margins": -77.20024108886719, + "logps/rejected": -493.21490478515625, + "loss": 9.6335, + "rewards/chosen": 15.249404907226562, + "rewards/margins": 3.4817185401916504, + "rewards/rejected": 11.767684936523438, + "step": 4740 + }, + { + "accuracy": 0.512499988079071, + "epoch": 1.19, + "learning_rate": 6.623376795387827e-06, + "logps/chosen": -626.0394897460938, + "logps/margins": -40.56245040893555, + "logps/rejected": -585.47705078125, + "loss": 7.5903, + "rewards/chosen": 9.297224044799805, + "rewards/margins": 4.475728511810303, + "rewards/rejected": 4.821494102478027, + "step": 4750 + }, + { + "accuracy": 0.6000000238418579, + "epoch": 1.19, + "learning_rate": 6.610990315958692e-06, + "logps/chosen": -582.8409423828125, + "logps/margins": -75.4446792602539, + "logps/rejected": -507.39630126953125, + "loss": 10.3544, + "rewards/chosen": 17.548702239990234, + "rewards/margins": 5.71473503112793, + "rewards/rejected": 11.833967208862305, + "step": 4760 + }, + { + "accuracy": 0.5375000238418579, + "epoch": 1.19, + "learning_rate": 6.598592794798284e-06, + "logps/chosen": -645.3778686523438, + "logps/margins": -91.62894439697266, + "logps/rejected": -553.7489013671875, + "loss": 10.41, + "rewards/chosen": 14.960217475891113, + "rewards/margins": -2.8089938163757324, + "rewards/rejected": 17.769214630126953, + "step": 4770 + }, + { + "accuracy": 0.44999998807907104, + "epoch": 1.2, + "learning_rate": 6.586184316879244e-06, + "logps/chosen": -541.2896118164062, + "logps/margins": -94.1109619140625, + "logps/rejected": -447.1786193847656, + "loss": 9.8738, + "rewards/chosen": 12.0540189743042, + "rewards/margins": -1.291947603225708, + "rewards/rejected": 13.345967292785645, + "step": 4780 + }, + { + "accuracy": 0.5375000238418579, + "epoch": 1.2, + "learning_rate": 6.573764967249304e-06, + "logps/chosen": -598.0709838867188, + "logps/margins": 19.031848907470703, + "logps/rejected": -617.1028442382812, + "loss": 9.5922, + "rewards/chosen": 18.679536819458008, + "rewards/margins": 2.871368885040283, + "rewards/rejected": 15.80816650390625, + "step": 4790 + }, + { + "accuracy": 0.5625, + "epoch": 1.2, + "learning_rate": 6.561334831030717e-06, + "logps/chosen": -546.0628662109375, + "logps/margins": 43.303443908691406, + "logps/rejected": -589.3663330078125, + "loss": 10.4067, + "rewards/chosen": 15.189065933227539, + "rewards/margins": 3.5714049339294434, + "rewards/rejected": 11.617659568786621, + "step": 4800 + }, + { + "accuracy": 0.512499988079071, + "epoch": 1.2, + "learning_rate": 6.548893993419664e-06, + "logps/chosen": -594.3072509765625, + "logps/margins": 8.140060424804688, + "logps/rejected": -602.4472045898438, + "loss": 9.8144, + "rewards/chosen": 20.143295288085938, + "rewards/margins": 6.580397129058838, + "rewards/rejected": 13.562899589538574, + "step": 4810 + }, + { + "accuracy": 0.48750001192092896, + "epoch": 1.21, + "learning_rate": 6.536442539685675e-06, + "logps/chosen": -605.498779296875, + "logps/margins": -69.20304870605469, + "logps/rejected": -536.2957153320312, + "loss": 8.8796, + "rewards/chosen": 13.871084213256836, + "rewards/margins": -1.137249231338501, + "rewards/rejected": 15.008334159851074, + "step": 4820 + }, + { + "accuracy": 0.625, + "epoch": 1.21, + "learning_rate": 6.523980555171041e-06, + "logps/chosen": -603.6917114257812, + "logps/margins": -75.93836212158203, + "logps/rejected": -527.7533569335938, + "loss": 8.7251, + "rewards/chosen": 17.992172241210938, + "rewards/margins": 5.426838397979736, + "rewards/rejected": 12.565335273742676, + "step": 4830 + }, + { + "accuracy": 0.512499988079071, + "epoch": 1.21, + "learning_rate": 6.511508125290233e-06, + "logps/chosen": -540.4547119140625, + "logps/margins": 9.098905563354492, + "logps/rejected": -549.5535278320312, + "loss": 9.6261, + "rewards/chosen": 13.338488578796387, + "rewards/margins": -0.4812222421169281, + "rewards/rejected": 13.819711685180664, + "step": 4840 + }, + { + "accuracy": 0.6000000238418579, + "epoch": 1.21, + "learning_rate": 6.499025335529312e-06, + "logps/chosen": -717.9833984375, + "logps/margins": -201.05059814453125, + "logps/rejected": -516.9328002929688, + "loss": 9.8242, + "rewards/chosen": 27.516876220703125, + "rewards/margins": 0.5229851007461548, + "rewards/rejected": 26.993886947631836, + "step": 4850 + }, + { + "accuracy": 0.5625, + "epoch": 1.22, + "learning_rate": 6.486532271445349e-06, + "logps/chosen": -578.3492431640625, + "logps/margins": -57.5036506652832, + "logps/rejected": -520.8455810546875, + "loss": 7.1879, + "rewards/chosen": 20.576129913330078, + "rewards/margins": 3.5093276500701904, + "rewards/rejected": 17.066801071166992, + "step": 4860 + }, + { + "accuracy": 0.675000011920929, + "epoch": 1.22, + "learning_rate": 6.474029018665834e-06, + "logps/chosen": -585.490966796875, + "logps/margins": 62.590728759765625, + "logps/rejected": -648.0817260742188, + "loss": 9.4463, + "rewards/chosen": 18.430753707885742, + "rewards/margins": 6.359074592590332, + "rewards/rejected": 12.071680068969727, + "step": 4870 + }, + { + "accuracy": 0.625, + "epoch": 1.22, + "learning_rate": 6.46151566288809e-06, + "logps/chosen": -596.1143798828125, + "logps/margins": 59.162986755371094, + "logps/rejected": -655.27734375, + "loss": 8.391, + "rewards/chosen": 18.687786102294922, + "rewards/margins": 5.970137596130371, + "rewards/rejected": 12.717645645141602, + "step": 4880 + }, + { + "accuracy": 0.512499988079071, + "epoch": 1.22, + "learning_rate": 6.4489922898786805e-06, + "logps/chosen": -572.008544921875, + "logps/margins": -124.23038482666016, + "logps/rejected": -447.7782287597656, + "loss": 7.8696, + "rewards/chosen": 23.004793167114258, + "rewards/margins": 5.3413238525390625, + "rewards/rejected": 17.663467407226562, + "step": 4890 + }, + { + "accuracy": 0.5625, + "epoch": 1.23, + "learning_rate": 6.436458985472839e-06, + "logps/chosen": -639.0369873046875, + "logps/margins": -163.1796875, + "logps/rejected": -475.85723876953125, + "loss": 7.6772, + "rewards/chosen": 28.317569732666016, + "rewards/margins": 5.713476657867432, + "rewards/rejected": 22.604093551635742, + "step": 4900 + }, + { + "accuracy": 0.625, + "epoch": 1.23, + "learning_rate": 6.4239158355738584e-06, + "logps/chosen": -535.2227783203125, + "logps/margins": 23.124067306518555, + "logps/rejected": -558.3468017578125, + "loss": 8.9844, + "rewards/chosen": 18.002239227294922, + "rewards/margins": 4.318982124328613, + "rewards/rejected": 13.683255195617676, + "step": 4910 + }, + { + "accuracy": 0.5249999761581421, + "epoch": 1.23, + "learning_rate": 6.41136292615252e-06, + "logps/chosen": -638.65380859375, + "logps/margins": -51.215919494628906, + "logps/rejected": -587.4378662109375, + "loss": 9.4449, + "rewards/chosen": 9.911506652832031, + "rewards/margins": 1.9219672679901123, + "rewards/rejected": 7.989540100097656, + "step": 4920 + }, + { + "accuracy": 0.5375000238418579, + "epoch": 1.23, + "learning_rate": 6.39880034324649e-06, + "logps/chosen": -488.660400390625, + "logps/margins": 22.69782066345215, + "logps/rejected": -511.3582458496094, + "loss": 8.1251, + "rewards/chosen": 20.555299758911133, + "rewards/margins": 3.1678054332733154, + "rewards/rejected": 17.387493133544922, + "step": 4930 + }, + { + "accuracy": 0.637499988079071, + "epoch": 1.23, + "learning_rate": 6.386228172959742e-06, + "logps/chosen": -461.69659423828125, + "logps/margins": 27.260986328125, + "logps/rejected": -488.95751953125, + "loss": 9.1284, + "rewards/chosen": 10.884035110473633, + "rewards/margins": 4.314483165740967, + "rewards/rejected": 6.569552421569824, + "step": 4940 + }, + { + "accuracy": 0.612500011920929, + "epoch": 1.24, + "learning_rate": 6.373646501461958e-06, + "logps/chosen": -560.2979125976562, + "logps/margins": 37.86301803588867, + "logps/rejected": -598.1609497070312, + "loss": 10.1888, + "rewards/chosen": 12.691306114196777, + "rewards/margins": 2.8200879096984863, + "rewards/rejected": 9.871216773986816, + "step": 4950 + }, + { + "accuracy": 0.4124999940395355, + "epoch": 1.24, + "learning_rate": 6.361055414987945e-06, + "logps/chosen": -662.9562377929688, + "logps/margins": -78.11106872558594, + "logps/rejected": -584.8451538085938, + "loss": 8.3135, + "rewards/chosen": 14.116917610168457, + "rewards/margins": -0.8237252235412598, + "rewards/rejected": 14.940643310546875, + "step": 4960 + }, + { + "accuracy": 0.44999998807907104, + "epoch": 1.24, + "learning_rate": 6.348454999837035e-06, + "logps/chosen": -462.7723693847656, + "logps/margins": 45.37617111206055, + "logps/rejected": -508.1485900878906, + "loss": 10.4542, + "rewards/chosen": 21.325868606567383, + "rewards/margins": 0.28819942474365234, + "rewards/rejected": 21.037670135498047, + "step": 4970 + }, + { + "accuracy": 0.574999988079071, + "epoch": 1.25, + "learning_rate": 6.335845342372506e-06, + "logps/chosen": -689.0484619140625, + "logps/margins": -56.80048370361328, + "logps/rejected": -632.2479858398438, + "loss": 9.3076, + "rewards/chosen": 23.148082733154297, + "rewards/margins": 2.0666050910949707, + "rewards/rejected": 21.08147621154785, + "step": 4980 + }, + { + "accuracy": 0.5, + "epoch": 1.25, + "learning_rate": 6.323226529020978e-06, + "logps/chosen": -530.1307373046875, + "logps/margins": -9.259374618530273, + "logps/rejected": -520.8714599609375, + "loss": 10.4226, + "rewards/chosen": 14.855291366577148, + "rewards/margins": 0.6108319163322449, + "rewards/rejected": 14.24445915222168, + "step": 4990 + }, + { + "accuracy": 0.5249999761581421, + "epoch": 1.25, + "learning_rate": 6.310598646271823e-06, + "logps/chosen": -502.6156311035156, + "logps/margins": -25.10276985168457, + "logps/rejected": -477.51287841796875, + "loss": 8.7818, + "rewards/chosen": 11.72206974029541, + "rewards/margins": 0.3785064220428467, + "rewards/rejected": 11.3435640335083, + "step": 5000 + }, + { + "accuracy": 0.5874999761581421, + "epoch": 1.25, + "learning_rate": 6.297961780676581e-06, + "logps/chosen": -576.6988525390625, + "logps/margins": -45.04465103149414, + "logps/rejected": -531.6541137695312, + "loss": 10.4381, + "rewards/chosen": 15.58830738067627, + "rewards/margins": 3.9738717079162598, + "rewards/rejected": 11.614436149597168, + "step": 5010 + }, + { + "accuracy": 0.5, + "epoch": 1.25, + "learning_rate": 6.285316018848355e-06, + "logps/chosen": -637.6673583984375, + "logps/margins": -51.02132797241211, + "logps/rejected": -586.6460571289062, + "loss": 9.2374, + "rewards/chosen": 19.482568740844727, + "rewards/margins": 3.575634717941284, + "rewards/rejected": 15.906933784484863, + "step": 5020 + }, + { + "accuracy": 0.512499988079071, + "epoch": 1.26, + "learning_rate": 6.272661447461228e-06, + "logps/chosen": -569.5038452148438, + "logps/margins": -44.189422607421875, + "logps/rejected": -525.3143310546875, + "loss": 11.1555, + "rewards/chosen": 15.374031066894531, + "rewards/margins": -2.158506393432617, + "rewards/rejected": 17.53253746032715, + "step": 5030 + }, + { + "accuracy": 0.4375, + "epoch": 1.26, + "learning_rate": 6.259998153249659e-06, + "logps/chosen": -694.3445434570312, + "logps/margins": -235.1769561767578, + "logps/rejected": -459.1676330566406, + "loss": 10.2687, + "rewards/chosen": 7.7294020652771, + "rewards/margins": -1.8063020706176758, + "rewards/rejected": 9.535703659057617, + "step": 5040 + }, + { + "accuracy": 0.5874999761581421, + "epoch": 1.26, + "learning_rate": 6.247326223007897e-06, + "logps/chosen": -482.3675842285156, + "logps/margins": 20.76206398010254, + "logps/rejected": -503.129638671875, + "loss": 7.413, + "rewards/chosen": 12.757905006408691, + "rewards/margins": 3.5557334423065186, + "rewards/rejected": 9.202171325683594, + "step": 5050 + }, + { + "accuracy": 0.625, + "epoch": 1.27, + "learning_rate": 6.234645743589378e-06, + "logps/chosen": -621.7166137695312, + "logps/margins": -67.03638458251953, + "logps/rejected": -554.68017578125, + "loss": 8.5834, + "rewards/chosen": 18.655439376831055, + "rewards/margins": 5.51022481918335, + "rewards/rejected": 13.14521598815918, + "step": 5060 + }, + { + "accuracy": 0.675000011920929, + "epoch": 1.27, + "learning_rate": 6.221956801906138e-06, + "logps/chosen": -662.2034301757812, + "logps/margins": -104.49214935302734, + "logps/rejected": -557.7111206054688, + "loss": 8.4527, + "rewards/chosen": 16.873844146728516, + "rewards/margins": 4.916099548339844, + "rewards/rejected": 11.957746505737305, + "step": 5070 + }, + { + "accuracy": 0.5625, + "epoch": 1.27, + "learning_rate": 6.2092594849282126e-06, + "logps/chosen": -582.021240234375, + "logps/margins": 15.251180648803711, + "logps/rejected": -597.2723999023438, + "loss": 8.8049, + "rewards/chosen": 30.59884262084961, + "rewards/margins": 3.2912240028381348, + "rewards/rejected": 27.307621002197266, + "step": 5080 + }, + { + "accuracy": 0.44999998807907104, + "epoch": 1.27, + "learning_rate": 6.196553879683041e-06, + "logps/chosen": -645.8094482421875, + "logps/margins": 77.44926452636719, + "logps/rejected": -723.2587280273438, + "loss": 10.7296, + "rewards/chosen": 13.802746772766113, + "rewards/margins": -1.3899497985839844, + "rewards/rejected": 15.192697525024414, + "step": 5090 + }, + { + "accuracy": 0.550000011920929, + "epoch": 1.27, + "learning_rate": 6.18384007325487e-06, + "logps/chosen": -662.3619995117188, + "logps/margins": -95.2347183227539, + "logps/rejected": -567.1272583007812, + "loss": 10.1673, + "rewards/chosen": 14.71519947052002, + "rewards/margins": 2.5730817317962646, + "rewards/rejected": 12.142117500305176, + "step": 5100 + }, + { + "accuracy": 0.512499988079071, + "epoch": 1.28, + "learning_rate": 6.171118152784156e-06, + "logps/chosen": -599.5924072265625, + "logps/margins": 3.4024322032928467, + "logps/rejected": -602.9948120117188, + "loss": 9.2015, + "rewards/chosen": 26.340229034423828, + "rewards/margins": 4.131651878356934, + "rewards/rejected": 22.208576202392578, + "step": 5110 + }, + { + "accuracy": 0.5249999761581421, + "epoch": 1.28, + "learning_rate": 6.158388205466972e-06, + "logps/chosen": -597.4902954101562, + "logps/margins": -63.1513671875, + "logps/rejected": -534.3389282226562, + "loss": 10.6256, + "rewards/chosen": 18.185733795166016, + "rewards/margins": 3.3370628356933594, + "rewards/rejected": 14.848670959472656, + "step": 5120 + }, + { + "accuracy": 0.512499988079071, + "epoch": 1.28, + "learning_rate": 6.145650318554407e-06, + "logps/chosen": -599.203369140625, + "logps/margins": -40.07122802734375, + "logps/rejected": -559.1321411132812, + "loss": 9.0515, + "rewards/chosen": 12.657954216003418, + "rewards/margins": 5.486607551574707, + "rewards/rejected": 7.171347141265869, + "step": 5130 + }, + { + "accuracy": 0.48750001192092896, + "epoch": 1.28, + "learning_rate": 6.132904579351965e-06, + "logps/chosen": -699.0057373046875, + "logps/margins": -147.8622589111328, + "logps/rejected": -551.1434326171875, + "loss": 9.4247, + "rewards/chosen": 11.453560829162598, + "rewards/margins": 1.7000868320465088, + "rewards/rejected": 9.753473281860352, + "step": 5140 + }, + { + "accuracy": 0.5, + "epoch": 1.29, + "learning_rate": 6.1201510752189715e-06, + "logps/chosen": -569.2718505859375, + "logps/margins": 39.02416229248047, + "logps/rejected": -608.2960205078125, + "loss": 9.2579, + "rewards/chosen": 16.639400482177734, + "rewards/margins": -1.4143325090408325, + "rewards/rejected": 18.05373191833496, + "step": 5150 + }, + { + "accuracy": 0.6499999761581421, + "epoch": 1.29, + "learning_rate": 6.107389893567974e-06, + "logps/chosen": -542.501708984375, + "logps/margins": -37.06194305419922, + "logps/rejected": -505.4398498535156, + "loss": 8.3507, + "rewards/chosen": 15.917261123657227, + "rewards/margins": 6.689794063568115, + "rewards/rejected": 9.22746753692627, + "step": 5160 + }, + { + "accuracy": 0.6499999761581421, + "epoch": 1.29, + "learning_rate": 6.0946211218641395e-06, + "logps/chosen": -546.2916259765625, + "logps/margins": -72.44542694091797, + "logps/rejected": -473.84625244140625, + "loss": 7.3508, + "rewards/chosen": 18.775968551635742, + "rewards/margins": 4.878602027893066, + "rewards/rejected": 13.897366523742676, + "step": 5170 + }, + { + "accuracy": 0.48750001192092896, + "epoch": 1.29, + "learning_rate": 6.081844847624657e-06, + "logps/chosen": -621.3878784179688, + "logps/margins": -150.66204833984375, + "logps/rejected": -470.725830078125, + "loss": 9.4796, + "rewards/chosen": 18.46845054626465, + "rewards/margins": -1.2344127893447876, + "rewards/rejected": 19.702861785888672, + "step": 5180 + }, + { + "accuracy": 0.512499988079071, + "epoch": 1.3, + "learning_rate": 6.069061158418141e-06, + "logps/chosen": -571.2891845703125, + "logps/margins": -53.815956115722656, + "logps/rejected": -517.47314453125, + "loss": 10.2586, + "rewards/chosen": 15.404231071472168, + "rewards/margins": -5.064778804779053, + "rewards/rejected": 20.469011306762695, + "step": 5190 + }, + { + "accuracy": 0.550000011920929, + "epoch": 1.3, + "learning_rate": 6.056270141864026e-06, + "logps/chosen": -650.2880249023438, + "logps/margins": -75.55158996582031, + "logps/rejected": -574.7364501953125, + "loss": 9.3863, + "rewards/chosen": 15.167839050292969, + "rewards/margins": 5.13944149017334, + "rewards/rejected": 10.02839469909668, + "step": 5200 + }, + { + "accuracy": 0.4749999940395355, + "epoch": 1.3, + "learning_rate": 6.043471885631968e-06, + "logps/chosen": -658.7298583984375, + "logps/margins": -199.4250030517578, + "logps/rejected": -459.3048400878906, + "loss": 9.3192, + "rewards/chosen": 43.64255905151367, + "rewards/margins": 3.9382617473602295, + "rewards/rejected": 39.70429229736328, + "step": 5210 + }, + { + "accuracy": 0.5874999761581421, + "epoch": 1.3, + "learning_rate": 6.030666477441244e-06, + "logps/chosen": -522.2306518554688, + "logps/margins": -69.74696350097656, + "logps/rejected": -452.48370361328125, + "loss": 11.4314, + "rewards/chosen": 20.27651023864746, + "rewards/margins": 4.411519527435303, + "rewards/rejected": 15.864992141723633, + "step": 5220 + }, + { + "accuracy": 0.4749999940395355, + "epoch": 1.31, + "learning_rate": 6.01785400506015e-06, + "logps/chosen": -543.7600708007812, + "logps/margins": -94.32052612304688, + "logps/rejected": -449.43951416015625, + "loss": 10.5824, + "rewards/chosen": 25.475189208984375, + "rewards/margins": 0.2732888162136078, + "rewards/rejected": 25.2018985748291, + "step": 5230 + }, + { + "accuracy": 0.5375000238418579, + "epoch": 1.31, + "learning_rate": 6.0050345563054025e-06, + "logps/chosen": -688.3558959960938, + "logps/margins": -30.88446044921875, + "logps/rejected": -657.4713745117188, + "loss": 9.3251, + "rewards/chosen": 29.716205596923828, + "rewards/margins": 2.6770129203796387, + "rewards/rejected": 27.039196014404297, + "step": 5240 + }, + { + "accuracy": 0.4749999940395355, + "epoch": 1.31, + "learning_rate": 5.992208219041531e-06, + "logps/chosen": -546.9627685546875, + "logps/margins": -10.580477714538574, + "logps/rejected": -536.38232421875, + "loss": 8.983, + "rewards/chosen": 22.22104835510254, + "rewards/margins": 2.0042996406555176, + "rewards/rejected": 20.21674919128418, + "step": 5250 + }, + { + "accuracy": 0.5625, + "epoch": 1.31, + "learning_rate": 5.9793750811802795e-06, + "logps/chosen": -624.0933837890625, + "logps/margins": -18.649133682250977, + "logps/rejected": -605.4442138671875, + "loss": 8.7351, + "rewards/chosen": 17.549222946166992, + "rewards/margins": 6.494621276855469, + "rewards/rejected": 11.05460262298584, + "step": 5260 + }, + { + "accuracy": 0.5375000238418579, + "epoch": 1.32, + "learning_rate": 5.966535230680006e-06, + "logps/chosen": -554.544921875, + "logps/margins": -3.8857269287109375, + "logps/rejected": -550.6591796875, + "loss": 8.1353, + "rewards/chosen": 12.27543830871582, + "rewards/margins": 6.414697170257568, + "rewards/rejected": 5.86074161529541, + "step": 5270 + }, + { + "accuracy": 0.44999998807907104, + "epoch": 1.32, + "learning_rate": 5.9536887555450736e-06, + "logps/chosen": -510.53125, + "logps/margins": -25.178665161132812, + "logps/rejected": -485.3526306152344, + "loss": 9.2118, + "rewards/chosen": 11.298933029174805, + "rewards/margins": -3.0604729652404785, + "rewards/rejected": 14.359407424926758, + "step": 5280 + }, + { + "accuracy": 0.6000000238418579, + "epoch": 1.32, + "learning_rate": 5.940835743825253e-06, + "logps/chosen": -602.4078369140625, + "logps/margins": -53.994468688964844, + "logps/rejected": -548.4133911132812, + "loss": 8.4706, + "rewards/chosen": 20.938392639160156, + "rewards/margins": 5.43187141418457, + "rewards/rejected": 15.506521224975586, + "step": 5290 + }, + { + "accuracy": 0.5375000238418579, + "epoch": 1.32, + "learning_rate": 5.927976283615113e-06, + "logps/chosen": -600.4595947265625, + "logps/margins": -106.0486831665039, + "logps/rejected": -494.41094970703125, + "loss": 9.1988, + "rewards/chosen": 23.6138858795166, + "rewards/margins": 5.459677696228027, + "rewards/rejected": 18.154207229614258, + "step": 5300 + }, + { + "accuracy": 0.5, + "epoch": 1.33, + "learning_rate": 5.915110463053425e-06, + "logps/chosen": -542.3008422851562, + "logps/margins": 52.20063018798828, + "logps/rejected": -594.5014038085938, + "loss": 9.0755, + "rewards/chosen": 12.555248260498047, + "rewards/margins": 0.7145259976387024, + "rewards/rejected": 11.84072208404541, + "step": 5310 + }, + { + "accuracy": 0.574999988079071, + "epoch": 1.33, + "learning_rate": 5.902238370322554e-06, + "logps/chosen": -511.56317138671875, + "logps/margins": -27.505260467529297, + "logps/rejected": -484.0579528808594, + "loss": 10.0381, + "rewards/chosen": 16.923397064208984, + "rewards/margins": 7.804965972900391, + "rewards/rejected": 9.118431091308594, + "step": 5320 + }, + { + "accuracy": 0.612500011920929, + "epoch": 1.33, + "learning_rate": 5.889360093647848e-06, + "logps/chosen": -635.9285278320312, + "logps/margins": 88.66679382324219, + "logps/rejected": -724.5953369140625, + "loss": 8.7897, + "rewards/chosen": 12.007619857788086, + "rewards/margins": 7.555914402008057, + "rewards/rejected": 4.451704502105713, + "step": 5330 + }, + { + "accuracy": 0.5, + "epoch": 1.33, + "learning_rate": 5.876475721297048e-06, + "logps/chosen": -532.5311279296875, + "logps/margins": -79.0572738647461, + "logps/rejected": -453.47393798828125, + "loss": 8.3428, + "rewards/chosen": 13.332174301147461, + "rewards/margins": 3.3372626304626465, + "rewards/rejected": 9.994913101196289, + "step": 5340 + }, + { + "accuracy": 0.512499988079071, + "epoch": 1.34, + "learning_rate": 5.863585341579671e-06, + "logps/chosen": -622.5350341796875, + "logps/margins": 61.635337829589844, + "logps/rejected": -684.17041015625, + "loss": 9.8799, + "rewards/chosen": 14.19207763671875, + "rewards/margins": 2.8413100242614746, + "rewards/rejected": 11.350767135620117, + "step": 5350 + }, + { + "accuracy": 0.44999998807907104, + "epoch": 1.34, + "learning_rate": 5.850689042846408e-06, + "logps/chosen": -712.9805908203125, + "logps/margins": -35.60800552368164, + "logps/rejected": -677.37255859375, + "loss": 10.4476, + "rewards/chosen": 14.118307113647461, + "rewards/margins": -3.564350128173828, + "rewards/rejected": 17.68265724182129, + "step": 5360 + }, + { + "accuracy": 0.574999988079071, + "epoch": 1.34, + "learning_rate": 5.83778691348852e-06, + "logps/chosen": -645.9959716796875, + "logps/margins": -45.06897735595703, + "logps/rejected": -600.927001953125, + "loss": 9.0866, + "rewards/chosen": 22.170251846313477, + "rewards/margins": 6.442114353179932, + "rewards/rejected": 15.728137016296387, + "step": 5370 + }, + { + "accuracy": 0.5375000238418579, + "epoch": 1.34, + "learning_rate": 5.824879041937231e-06, + "logps/chosen": -509.568359375, + "logps/margins": 97.16014099121094, + "logps/rejected": -606.7284545898438, + "loss": 9.0765, + "rewards/chosen": 12.925369262695312, + "rewards/margins": 4.865277290344238, + "rewards/rejected": 8.060091972351074, + "step": 5380 + }, + { + "accuracy": 0.512499988079071, + "epoch": 1.35, + "learning_rate": 5.81196551666312e-06, + "logps/chosen": -644.4435424804688, + "logps/margins": -160.2688751220703, + "logps/rejected": -484.17474365234375, + "loss": 9.2804, + "rewards/chosen": 17.82694435119629, + "rewards/margins": 4.105551242828369, + "rewards/rejected": 13.721392631530762, + "step": 5390 + }, + { + "accuracy": 0.5625, + "epoch": 1.35, + "learning_rate": 5.799046426175523e-06, + "logps/chosen": -493.23870849609375, + "logps/margins": -45.02393341064453, + "logps/rejected": -448.2147521972656, + "loss": 7.2645, + "rewards/chosen": 12.509767532348633, + "rewards/margins": 3.5316402912139893, + "rewards/rejected": 8.978126525878906, + "step": 5400 + }, + { + "accuracy": 0.5375000238418579, + "epoch": 1.35, + "learning_rate": 5.78612185902191e-06, + "logps/chosen": -713.7047729492188, + "logps/margins": -114.13802337646484, + "logps/rejected": -599.5667724609375, + "loss": 9.0185, + "rewards/chosen": 13.844782829284668, + "rewards/margins": 2.7414114475250244, + "rewards/rejected": 11.103372573852539, + "step": 5410 + }, + { + "accuracy": 0.5625, + "epoch": 1.35, + "learning_rate": 5.773191903787296e-06, + "logps/chosen": -709.2800903320312, + "logps/margins": -144.23629760742188, + "logps/rejected": -565.0438232421875, + "loss": 10.6033, + "rewards/chosen": 15.678532600402832, + "rewards/margins": -0.17481489479541779, + "rewards/rejected": 15.853347778320312, + "step": 5420 + }, + { + "accuracy": 0.5375000238418579, + "epoch": 1.36, + "learning_rate": 5.760256649093625e-06, + "logps/chosen": -473.309814453125, + "logps/margins": 114.12580871582031, + "logps/rejected": -587.4356079101562, + "loss": 8.455, + "rewards/chosen": 18.673664093017578, + "rewards/margins": 4.170456409454346, + "rewards/rejected": 14.503207206726074, + "step": 5430 + }, + { + "accuracy": 0.512499988079071, + "epoch": 1.36, + "learning_rate": 5.747316183599159e-06, + "logps/chosen": -665.8555297851562, + "logps/margins": -68.9659194946289, + "logps/rejected": -596.8895263671875, + "loss": 9.1602, + "rewards/chosen": 15.628732681274414, + "rewards/margins": 3.0700385570526123, + "rewards/rejected": 12.558693885803223, + "step": 5440 + }, + { + "accuracy": 0.5, + "epoch": 1.36, + "learning_rate": 5.73437059599788e-06, + "logps/chosen": -630.9876098632812, + "logps/margins": 82.4388656616211, + "logps/rejected": -713.426513671875, + "loss": 10.1671, + "rewards/chosen": 22.473468780517578, + "rewards/margins": 1.397131323814392, + "rewards/rejected": 21.076335906982422, + "step": 5450 + }, + { + "accuracy": 0.44999998807907104, + "epoch": 1.36, + "learning_rate": 5.721419975018874e-06, + "logps/chosen": -654.8243408203125, + "logps/margins": -21.526996612548828, + "logps/rejected": -633.2973022460938, + "loss": 9.8882, + "rewards/chosen": 16.257015228271484, + "rewards/margins": 0.9529422521591187, + "rewards/rejected": 15.304071426391602, + "step": 5460 + }, + { + "accuracy": 0.512499988079071, + "epoch": 1.37, + "learning_rate": 5.708464409425724e-06, + "logps/chosen": -771.3359375, + "logps/margins": -170.65481567382812, + "logps/rejected": -600.6810302734375, + "loss": 11.3577, + "rewards/chosen": 15.04347038269043, + "rewards/margins": 1.9322478771209717, + "rewards/rejected": 13.111224174499512, + "step": 5470 + }, + { + "accuracy": 0.550000011920929, + "epoch": 1.37, + "learning_rate": 5.69550398801591e-06, + "logps/chosen": -538.9473266601562, + "logps/margins": 35.34078598022461, + "logps/rejected": -574.2880859375, + "loss": 8.8204, + "rewards/chosen": 15.893794059753418, + "rewards/margins": 1.282684087753296, + "rewards/rejected": 14.611111640930176, + "step": 5480 + }, + { + "accuracy": 0.6000000238418579, + "epoch": 1.37, + "learning_rate": 5.68253879962019e-06, + "logps/chosen": -536.8682250976562, + "logps/margins": 33.18367004394531, + "logps/rejected": -570.0519409179688, + "loss": 11.4695, + "rewards/chosen": 18.266429901123047, + "rewards/margins": 5.812760829925537, + "rewards/rejected": 12.453669548034668, + "step": 5490 + }, + { + "accuracy": 0.5375000238418579, + "epoch": 1.38, + "learning_rate": 5.66956893310199e-06, + "logps/chosen": -602.8007202148438, + "logps/margins": -13.274798393249512, + "logps/rejected": -589.5259399414062, + "loss": 10.9558, + "rewards/chosen": 22.650348663330078, + "rewards/margins": 5.6598076820373535, + "rewards/rejected": 16.990543365478516, + "step": 5500 + }, + { + "accuracy": 0.36250001192092896, + "epoch": 1.38, + "learning_rate": 5.656594477356807e-06, + "logps/chosen": -606.2086181640625, + "logps/margins": -12.599712371826172, + "logps/rejected": -593.6088256835938, + "loss": 10.4782, + "rewards/chosen": 7.609148979187012, + "rewards/margins": -10.671507835388184, + "rewards/rejected": 18.280658721923828, + "step": 5510 + }, + { + "accuracy": 0.44999998807907104, + "epoch": 1.38, + "learning_rate": 5.643615521311591e-06, + "logps/chosen": -775.8338623046875, + "logps/margins": -127.74928283691406, + "logps/rejected": -648.0845947265625, + "loss": 9.7576, + "rewards/chosen": 17.71401596069336, + "rewards/margins": -0.41762199997901917, + "rewards/rejected": 18.13163948059082, + "step": 5520 + }, + { + "accuracy": 0.5249999761581421, + "epoch": 1.38, + "learning_rate": 5.630632153924135e-06, + "logps/chosen": -682.3362426757812, + "logps/margins": -30.616466522216797, + "logps/rejected": -651.7197265625, + "loss": 11.1383, + "rewards/chosen": 12.430808067321777, + "rewards/margins": -1.9352922439575195, + "rewards/rejected": 14.366101264953613, + "step": 5530 + }, + { + "accuracy": 0.5874999761581421, + "epoch": 1.39, + "learning_rate": 5.617644464182469e-06, + "logps/chosen": -555.5802001953125, + "logps/margins": -9.702635765075684, + "logps/rejected": -545.8775634765625, + "loss": 10.5042, + "rewards/chosen": 11.953606605529785, + "rewards/margins": 3.2359375953674316, + "rewards/rejected": 8.717669486999512, + "step": 5540 + }, + { + "accuracy": 0.5249999761581421, + "epoch": 1.39, + "learning_rate": 5.6046525411042465e-06, + "logps/chosen": -588.6121826171875, + "logps/margins": 18.677902221679688, + "logps/rejected": -607.2901611328125, + "loss": 10.0873, + "rewards/chosen": 15.800477981567383, + "rewards/margins": 3.7223923206329346, + "rewards/rejected": 12.078083992004395, + "step": 5550 + }, + { + "accuracy": 0.4625000059604645, + "epoch": 1.39, + "learning_rate": 5.59165647373614e-06, + "logps/chosen": -559.7953491210938, + "logps/margins": -32.83938980102539, + "logps/rejected": -526.9559326171875, + "loss": 8.7824, + "rewards/chosen": 17.1368350982666, + "rewards/margins": 5.214023590087891, + "rewards/rejected": 11.922811508178711, + "step": 5560 + }, + { + "accuracy": 0.5874999761581421, + "epoch": 1.39, + "learning_rate": 5.5786563511532225e-06, + "logps/chosen": -487.2350158691406, + "logps/margins": 22.16974639892578, + "logps/rejected": -509.40472412109375, + "loss": 8.1786, + "rewards/chosen": 16.21988296508789, + "rewards/margins": 4.8656206130981445, + "rewards/rejected": 11.354262351989746, + "step": 5570 + }, + { + "accuracy": 0.550000011920929, + "epoch": 1.4, + "learning_rate": 5.565652262458363e-06, + "logps/chosen": -571.356201171875, + "logps/margins": -50.59857177734375, + "logps/rejected": -520.7576904296875, + "loss": 8.9853, + "rewards/chosen": 28.402347564697266, + "rewards/margins": 3.8751564025878906, + "rewards/rejected": 24.52718734741211, + "step": 5580 + }, + { + "accuracy": 0.5249999761581421, + "epoch": 1.4, + "learning_rate": 5.55264429678162e-06, + "logps/chosen": -521.6007690429688, + "logps/margins": 3.263327121734619, + "logps/rejected": -524.8641357421875, + "loss": 8.7059, + "rewards/chosen": 11.137941360473633, + "rewards/margins": 4.151110649108887, + "rewards/rejected": 6.9868316650390625, + "step": 5590 + }, + { + "accuracy": 0.612500011920929, + "epoch": 1.4, + "learning_rate": 5.539632543279613e-06, + "logps/chosen": -646.941162109375, + "logps/margins": -100.841796875, + "logps/rejected": -546.099365234375, + "loss": 10.3559, + "rewards/chosen": 18.68703842163086, + "rewards/margins": 7.440060615539551, + "rewards/rejected": 11.24697494506836, + "step": 5600 + }, + { + "accuracy": 0.5, + "epoch": 1.4, + "learning_rate": 5.526617091134935e-06, + "logps/chosen": -655.107666015625, + "logps/margins": -138.97891235351562, + "logps/rejected": -516.1287841796875, + "loss": 10.0755, + "rewards/chosen": 12.616865158081055, + "rewards/margins": -0.6074798703193665, + "rewards/rejected": 13.224342346191406, + "step": 5610 + }, + { + "accuracy": 0.44999998807907104, + "epoch": 1.41, + "learning_rate": 5.51359802955552e-06, + "logps/chosen": -531.0446166992188, + "logps/margins": -10.549161911010742, + "logps/rejected": -520.4954833984375, + "loss": 10.2467, + "rewards/chosen": 14.446090698242188, + "rewards/margins": 1.5576660633087158, + "rewards/rejected": 12.88842487335205, + "step": 5620 + }, + { + "accuracy": 0.48750001192092896, + "epoch": 1.41, + "learning_rate": 5.500575447774049e-06, + "logps/chosen": -562.3323364257812, + "logps/margins": -58.555419921875, + "logps/rejected": -503.77691650390625, + "loss": 10.3725, + "rewards/chosen": 11.060080528259277, + "rewards/margins": -2.869616985321045, + "rewards/rejected": 13.92969799041748, + "step": 5630 + }, + { + "accuracy": 0.42500001192092896, + "epoch": 1.41, + "learning_rate": 5.487549435047326e-06, + "logps/chosen": -693.4885864257812, + "logps/margins": -118.33091735839844, + "logps/rejected": -575.1577758789062, + "loss": 10.2271, + "rewards/chosen": 8.816314697265625, + "rewards/margins": -4.0319743156433105, + "rewards/rejected": 12.848287582397461, + "step": 5640 + }, + { + "accuracy": 0.5625, + "epoch": 1.41, + "learning_rate": 5.474520080655673e-06, + "logps/chosen": -594.1282958984375, + "logps/margins": -73.54985046386719, + "logps/rejected": -520.5784912109375, + "loss": 7.3061, + "rewards/chosen": 17.72264289855957, + "rewards/margins": 3.0925276279449463, + "rewards/rejected": 14.630114555358887, + "step": 5650 + }, + { + "accuracy": 0.5, + "epoch": 1.42, + "learning_rate": 5.461487473902312e-06, + "logps/chosen": -608.9813232421875, + "logps/margins": -39.725135803222656, + "logps/rejected": -569.2562255859375, + "loss": 10.3679, + "rewards/chosen": 9.870391845703125, + "rewards/margins": 0.5336726903915405, + "rewards/rejected": 9.336718559265137, + "step": 5660 + }, + { + "accuracy": 0.42500001192092896, + "epoch": 1.42, + "learning_rate": 5.44845170411276e-06, + "logps/chosen": -631.4370727539062, + "logps/margins": -45.59686279296875, + "logps/rejected": -585.8402099609375, + "loss": 11.4957, + "rewards/chosen": 7.915677070617676, + "rewards/margins": -5.90485954284668, + "rewards/rejected": 13.820536613464355, + "step": 5670 + }, + { + "accuracy": 0.4124999940395355, + "epoch": 1.42, + "learning_rate": 5.4354128606342135e-06, + "logps/chosen": -599.5980224609375, + "logps/margins": 2.1444976329803467, + "logps/rejected": -601.7425537109375, + "loss": 9.7061, + "rewards/chosen": 14.73759651184082, + "rewards/margins": -4.230515480041504, + "rewards/rejected": 18.96811294555664, + "step": 5680 + }, + { + "accuracy": 0.5874999761581421, + "epoch": 1.42, + "learning_rate": 5.422371032834935e-06, + "logps/chosen": -575.2979125976562, + "logps/margins": -58.373291015625, + "logps/rejected": -516.924560546875, + "loss": 9.7919, + "rewards/chosen": 23.29046630859375, + "rewards/margins": 2.105541944503784, + "rewards/rejected": 21.18492889404297, + "step": 5690 + }, + { + "accuracy": 0.550000011920929, + "epoch": 1.43, + "learning_rate": 5.409326310103641e-06, + "logps/chosen": -604.4918823242188, + "logps/margins": -51.045616149902344, + "logps/rejected": -553.4461669921875, + "loss": 9.4196, + "rewards/chosen": 16.40767478942871, + "rewards/margins": 2.130031108856201, + "rewards/rejected": 14.277644157409668, + "step": 5700 + }, + { + "accuracy": 0.5249999761581421, + "epoch": 1.43, + "learning_rate": 5.396278781848892e-06, + "logps/chosen": -606.3555908203125, + "logps/margins": -71.03301239013672, + "logps/rejected": -535.3226318359375, + "loss": 8.2967, + "rewards/chosen": 17.800174713134766, + "rewards/margins": 6.916959285736084, + "rewards/rejected": 10.88321304321289, + "step": 5710 + }, + { + "accuracy": 0.6000000238418579, + "epoch": 1.43, + "learning_rate": 5.383228537498474e-06, + "logps/chosen": -574.6397094726562, + "logps/margins": -44.88056182861328, + "logps/rejected": -529.7591552734375, + "loss": 9.769, + "rewards/chosen": 22.354421615600586, + "rewards/margins": 6.549884796142578, + "rewards/rejected": 15.804539680480957, + "step": 5720 + }, + { + "accuracy": 0.4749999940395355, + "epoch": 1.43, + "learning_rate": 5.370175666498793e-06, + "logps/chosen": -505.1705627441406, + "logps/margins": -57.75910568237305, + "logps/rejected": -447.4114685058594, + "loss": 7.6743, + "rewards/chosen": 18.98794174194336, + "rewards/margins": 5.107205390930176, + "rewards/rejected": 13.880739212036133, + "step": 5730 + }, + { + "accuracy": 0.550000011920929, + "epoch": 1.44, + "learning_rate": 5.3571202583142555e-06, + "logps/chosen": -505.0125427246094, + "logps/margins": -61.940528869628906, + "logps/rejected": -443.07196044921875, + "loss": 8.5057, + "rewards/chosen": 20.454761505126953, + "rewards/margins": -0.0825420394539833, + "rewards/rejected": 20.537303924560547, + "step": 5740 + }, + { + "accuracy": 0.574999988079071, + "epoch": 1.44, + "learning_rate": 5.34406240242666e-06, + "logps/chosen": -625.8734741210938, + "logps/margins": -112.40678405761719, + "logps/rejected": -513.4666748046875, + "loss": 9.2881, + "rewards/chosen": 15.25017261505127, + "rewards/margins": 2.814181089401245, + "rewards/rejected": 12.435991287231445, + "step": 5750 + }, + { + "accuracy": 0.5874999761581421, + "epoch": 1.44, + "learning_rate": 5.33100218833458e-06, + "logps/chosen": -543.5582885742188, + "logps/margins": 34.492881774902344, + "logps/rejected": -578.0512084960938, + "loss": 8.9673, + "rewards/chosen": 15.383227348327637, + "rewards/margins": 2.7522222995758057, + "rewards/rejected": 12.63100528717041, + "step": 5760 + }, + { + "accuracy": 0.4749999940395355, + "epoch": 1.44, + "learning_rate": 5.3179397055527515e-06, + "logps/chosen": -563.4320068359375, + "logps/margins": 40.9635009765625, + "logps/rejected": -604.3955078125, + "loss": 8.1638, + "rewards/chosen": 13.930071830749512, + "rewards/margins": 0.11591318994760513, + "rewards/rejected": 13.814160346984863, + "step": 5770 + }, + { + "accuracy": 0.574999988079071, + "epoch": 1.45, + "learning_rate": 5.30487504361146e-06, + "logps/chosen": -569.4702758789062, + "logps/margins": -51.24161911010742, + "logps/rejected": -518.2286376953125, + "loss": 8.7509, + "rewards/chosen": 18.283912658691406, + "rewards/margins": 1.2649528980255127, + "rewards/rejected": 17.018959045410156, + "step": 5780 + }, + { + "accuracy": 0.5249999761581421, + "epoch": 1.45, + "learning_rate": 5.291808292055931e-06, + "logps/chosen": -604.3680419921875, + "logps/margins": -86.90727233886719, + "logps/rejected": -517.460693359375, + "loss": 7.7287, + "rewards/chosen": 19.640851974487305, + "rewards/margins": 4.991347312927246, + "rewards/rejected": 14.649505615234375, + "step": 5790 + }, + { + "accuracy": 0.637499988079071, + "epoch": 1.45, + "learning_rate": 5.278739540445708e-06, + "logps/chosen": -604.7346801757812, + "logps/margins": -114.20124816894531, + "logps/rejected": -490.53338623046875, + "loss": 7.3915, + "rewards/chosen": 18.88345718383789, + "rewards/margins": 11.091729164123535, + "rewards/rejected": 7.791729927062988, + "step": 5800 + }, + { + "accuracy": 0.574999988079071, + "epoch": 1.45, + "learning_rate": 5.265668878354043e-06, + "logps/chosen": -656.7084350585938, + "logps/margins": -18.626996994018555, + "logps/rejected": -638.0814208984375, + "loss": 8.5605, + "rewards/chosen": 22.845172882080078, + "rewards/margins": 4.749499320983887, + "rewards/rejected": 18.095672607421875, + "step": 5810 + }, + { + "accuracy": 0.5625, + "epoch": 1.46, + "learning_rate": 5.252596395367286e-06, + "logps/chosen": -678.775634765625, + "logps/margins": -120.8924789428711, + "logps/rejected": -557.8831176757812, + "loss": 8.059, + "rewards/chosen": 19.470434188842773, + "rewards/margins": 9.666606903076172, + "rewards/rejected": 9.803827285766602, + "step": 5820 + }, + { + "accuracy": 0.612500011920929, + "epoch": 1.46, + "learning_rate": 5.239522181084262e-06, + "logps/chosen": -609.0624389648438, + "logps/margins": -29.084224700927734, + "logps/rejected": -579.9781494140625, + "loss": 8.7638, + "rewards/chosen": 19.86415672302246, + "rewards/margins": 4.145925521850586, + "rewards/rejected": 15.718233108520508, + "step": 5830 + }, + { + "accuracy": 0.5874999761581421, + "epoch": 1.46, + "learning_rate": 5.226446325115667e-06, + "logps/chosen": -612.7992553710938, + "logps/margins": -64.93578338623047, + "logps/rejected": -547.8634643554688, + "loss": 8.4809, + "rewards/chosen": 23.024892807006836, + "rewards/margins": 4.364573001861572, + "rewards/rejected": 18.660320281982422, + "step": 5840 + }, + { + "accuracy": 0.44999998807907104, + "epoch": 1.46, + "learning_rate": 5.213368917083447e-06, + "logps/chosen": -577.4593505859375, + "logps/margins": 24.49588394165039, + "logps/rejected": -601.9552001953125, + "loss": 8.5876, + "rewards/chosen": 24.33335304260254, + "rewards/margins": -0.9923039674758911, + "rewards/rejected": 25.32565689086914, + "step": 5850 + }, + { + "accuracy": 0.4625000059604645, + "epoch": 1.47, + "learning_rate": 5.200290046620187e-06, + "logps/chosen": -604.6259155273438, + "logps/margins": -41.174522399902344, + "logps/rejected": -563.4514770507812, + "loss": 8.5483, + "rewards/chosen": 16.271068572998047, + "rewards/margins": 3.201953887939453, + "rewards/rejected": 13.069112777709961, + "step": 5860 + }, + { + "accuracy": 0.5375000238418579, + "epoch": 1.47, + "learning_rate": 5.187209803368493e-06, + "logps/chosen": -513.3167724609375, + "logps/margins": 45.782691955566406, + "logps/rejected": -559.0994873046875, + "loss": 9.7576, + "rewards/chosen": 15.635335922241211, + "rewards/margins": -0.11521463096141815, + "rewards/rejected": 15.75054931640625, + "step": 5870 + }, + { + "accuracy": 0.550000011920929, + "epoch": 1.47, + "learning_rate": 5.1741282769803845e-06, + "logps/chosen": -581.5233154296875, + "logps/margins": -115.37931060791016, + "logps/rejected": -466.14410400390625, + "loss": 8.7089, + "rewards/chosen": 20.026020050048828, + "rewards/margins": 2.274528980255127, + "rewards/rejected": 17.75149154663086, + "step": 5880 + }, + { + "accuracy": 0.5249999761581421, + "epoch": 1.47, + "learning_rate": 5.1610455571166705e-06, + "logps/chosen": -630.3762817382812, + "logps/margins": -126.6087417602539, + "logps/rejected": -503.76751708984375, + "loss": 11.1987, + "rewards/chosen": 22.12693214416504, + "rewards/margins": 5.534392356872559, + "rewards/rejected": 16.592538833618164, + "step": 5890 + }, + { + "accuracy": 0.5249999761581421, + "epoch": 1.48, + "learning_rate": 5.147961733446344e-06, + "logps/chosen": -576.0491943359375, + "logps/margins": 38.260963439941406, + "logps/rejected": -614.3102416992188, + "loss": 10.2234, + "rewards/chosen": 18.371767044067383, + "rewards/margins": 0.8409388661384583, + "rewards/rejected": 17.530826568603516, + "step": 5900 + }, + { + "accuracy": 0.5375000238418579, + "epoch": 1.48, + "learning_rate": 5.13487689564596e-06, + "logps/chosen": -597.911376953125, + "logps/margins": 36.217529296875, + "logps/rejected": -634.12890625, + "loss": 9.3014, + "rewards/chosen": 15.341076850891113, + "rewards/margins": 4.3141350746154785, + "rewards/rejected": 11.026944160461426, + "step": 5910 + }, + { + "accuracy": 0.4124999940395355, + "epoch": 1.48, + "learning_rate": 5.12179113339903e-06, + "logps/chosen": -643.5623779296875, + "logps/margins": -129.94371032714844, + "logps/rejected": -513.61865234375, + "loss": 7.8447, + "rewards/chosen": 9.581153869628906, + "rewards/margins": -3.8336825370788574, + "rewards/rejected": 13.414834976196289, + "step": 5920 + }, + { + "accuracy": 0.512499988079071, + "epoch": 1.48, + "learning_rate": 5.108704536395397e-06, + "logps/chosen": -537.8453979492188, + "logps/margins": -61.53409957885742, + "logps/rejected": -476.31134033203125, + "loss": 9.5914, + "rewards/chosen": 17.39281463623047, + "rewards/margins": 2.0330333709716797, + "rewards/rejected": 15.359779357910156, + "step": 5930 + }, + { + "accuracy": 0.5, + "epoch": 1.48, + "learning_rate": 5.0956171943306235e-06, + "logps/chosen": -619.1099853515625, + "logps/margins": -70.60470581054688, + "logps/rejected": -548.5052490234375, + "loss": 9.0155, + "rewards/chosen": 16.391908645629883, + "rewards/margins": 2.4468541145324707, + "rewards/rejected": 13.945055961608887, + "step": 5940 + }, + { + "accuracy": 0.5625, + "epoch": 1.49, + "learning_rate": 5.082529196905382e-06, + "logps/chosen": -664.9713134765625, + "logps/margins": -36.02037811279297, + "logps/rejected": -628.9508666992188, + "loss": 9.5638, + "rewards/chosen": 27.331918716430664, + "rewards/margins": 7.168522834777832, + "rewards/rejected": 20.163394927978516, + "step": 5950 + }, + { + "accuracy": 0.5625, + "epoch": 1.49, + "learning_rate": 5.06944063382484e-06, + "logps/chosen": -573.1370849609375, + "logps/margins": -52.665321350097656, + "logps/rejected": -520.4718017578125, + "loss": 8.4449, + "rewards/chosen": 15.177358627319336, + "rewards/margins": 2.899728775024414, + "rewards/rejected": 12.277630805969238, + "step": 5960 + }, + { + "accuracy": 0.5375000238418579, + "epoch": 1.49, + "learning_rate": 5.056351594798037e-06, + "logps/chosen": -614.9654541015625, + "logps/margins": 5.376377105712891, + "logps/rejected": -620.3419189453125, + "loss": 10.8618, + "rewards/chosen": 14.915060043334961, + "rewards/margins": -1.0871585607528687, + "rewards/rejected": 16.00221824645996, + "step": 5970 + }, + { + "accuracy": 0.4375, + "epoch": 1.5, + "learning_rate": 5.0432621695372755e-06, + "logps/chosen": -539.8976440429688, + "logps/margins": -64.4924545288086, + "logps/rejected": -475.40521240234375, + "loss": 10.4358, + "rewards/chosen": 9.932031631469727, + "rewards/margins": -0.47447139024734497, + "rewards/rejected": 10.406502723693848, + "step": 5980 + }, + { + "accuracy": 0.4749999940395355, + "epoch": 1.5, + "learning_rate": 5.030172447757506e-06, + "logps/chosen": -569.9727172851562, + "logps/margins": 46.77895736694336, + "logps/rejected": -616.751708984375, + "loss": 9.0982, + "rewards/chosen": 13.128868103027344, + "rewards/margins": -1.7962459325790405, + "rewards/rejected": 14.925114631652832, + "step": 5990 + }, + { + "accuracy": 0.5874999761581421, + "epoch": 1.5, + "learning_rate": 5.0170825191757115e-06, + "logps/chosen": -564.9503173828125, + "logps/margins": 6.816415309906006, + "logps/rejected": -571.7666625976562, + "loss": 10.1059, + "rewards/chosen": 17.72634506225586, + "rewards/margins": 4.551650524139404, + "rewards/rejected": 13.174695014953613, + "step": 6000 + }, + { + "epoch": 1.5, + "eval_accuracy": 0.5252290345313602, + "eval_logps/chosen": -601.7291870117188, + "eval_logps/margins": -39.9495964050293, + "eval_logps/rejected": -561.7796020507812, + "eval_loss": 9.37901496887207, + "eval_rewards/chosen": 9.320639610290527, + "eval_rewards/margins": 1.7325608730316162, + "eval_rewards/rejected": 7.588078022003174, + "eval_runtime": 1180.1863, + "eval_samples_per_second": 12.024, + "eval_steps_per_second": 1.503, + "step": 6000 + } + ], + "logging_steps": 10, + "max_steps": 12000, + "num_train_epochs": 3, + "save_steps": 3000, + "total_flos": 4.4912043125626634e+18, + "trial_name": null, + "trial_params": null +}