Training in progress, step 2527, checkpoint
Browse files- last-checkpoint/adapter_model.safetensors +1 -1
- last-checkpoint/global_step2527/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step2527/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step2527/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step2527/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step2527/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step2527/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step2527/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step2527/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step2527/zero_pp_rank_0_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/global_step2527/zero_pp_rank_1_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/global_step2527/zero_pp_rank_2_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/global_step2527/zero_pp_rank_3_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/global_step2527/zero_pp_rank_4_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/global_step2527/zero_pp_rank_5_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/global_step2527/zero_pp_rank_6_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/global_step2527/zero_pp_rank_7_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/latest +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/rng_state_4.pth +1 -1
- last-checkpoint/rng_state_5.pth +1 -1
- last-checkpoint/rng_state_6.pth +1 -1
- last-checkpoint/rng_state_7.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +278 -4
last-checkpoint/adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 48680136
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8346ab13f4b7b21d2b29112b8902325a29dd3b846c062714d1a52cc8e3529039
|
3 |
size 48680136
|
last-checkpoint/global_step2527/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6430755e4d771a1d655da6a1122a88c06170b3283533cff22655ca0c300bc051
|
3 |
+
size 36474352
|
last-checkpoint/global_step2527/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:28ca477b3d667f9bd120a3f5a0027056380a000cb2c785e3fe48573dfbadae75
|
3 |
+
size 36474352
|
last-checkpoint/global_step2527/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4143120b3fd3d0d24bbb06c958d63707e92aa4c5608c462cc256587b07bd48ae
|
3 |
+
size 36474352
|
last-checkpoint/global_step2527/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:099aadae436f7329944571a6f95df04b3350e34b231b01a5b18f510dcb0825d6
|
3 |
+
size 36474352
|
last-checkpoint/global_step2527/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d92d1db126a6c00c0911057ddc047de4f28b675d5ccd74f1c4acf86ffede2051
|
3 |
+
size 36474352
|
last-checkpoint/global_step2527/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9bff6ba32ab439063f5a6f5d43a0235bd6ded54cc7f5ed534b621ca96385b3c0
|
3 |
+
size 36474352
|
last-checkpoint/global_step2527/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:aa6eab5768c985a15d15daec830906bf954e42edc06c6d64dfb78b6745f51c5b
|
3 |
+
size 36474352
|
last-checkpoint/global_step2527/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:556d2e56ffee6c65efb42cecab9b558ae4d59b452762ba18ad76882392357d22
|
3 |
+
size 36474352
|
last-checkpoint/global_step2527/zero_pp_rank_0_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a899397f8e1be9eece0d2cff42a99a02b88d1d3919acf5627d8b45ec803faf9c
|
3 |
+
size 390451
|
last-checkpoint/global_step2527/zero_pp_rank_1_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0db1c1c054d19eb3996ec805796d485095f52b86ec41bef01e1836f441978533
|
3 |
+
size 390387
|
last-checkpoint/global_step2527/zero_pp_rank_2_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b2ebe7aef19a19af80ff1ee3b138d90ac19ded2df55cf1b4f48a7ff1bc200945
|
3 |
+
size 390387
|
last-checkpoint/global_step2527/zero_pp_rank_3_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:626ab93bae5148e7ec4008808d2d0bd4e5f6b1218e8c6fc74862d0c2eea59369
|
3 |
+
size 390387
|
last-checkpoint/global_step2527/zero_pp_rank_4_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1f1e277faa2b654951b677e101231e6ef09a2f3aff4c548c70549d90dfff4993
|
3 |
+
size 390387
|
last-checkpoint/global_step2527/zero_pp_rank_5_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d86545118cda5ad95577530de8b0298285184172fe3fd4ef69b743506cd581ca
|
3 |
+
size 390387
|
last-checkpoint/global_step2527/zero_pp_rank_6_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:80a94252d81267a9b39b875517331b81ed9ff36646278c02ed653bb435373d66
|
3 |
+
size 390387
|
last-checkpoint/global_step2527/zero_pp_rank_7_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:aa1e7083f8aee635a411566a4eba90d63697b0c3943fef588ac35000614a4f80
|
3 |
+
size 390387
|
last-checkpoint/latest
CHANGED
@@ -1 +1 @@
|
|
1 |
-
|
|
|
1 |
+
global_step2527
|
last-checkpoint/rng_state_0.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15984
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0c02adaeb104a456cce73f3dfce7ef82bd1d39537f6f938fef85bbda510bd32a
|
3 |
size 15984
|
last-checkpoint/rng_state_1.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15984
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f975a10d97cf0c5780ff024524346681750a4672878c083063f7246a341e0fa2
|
3 |
size 15984
|
last-checkpoint/rng_state_2.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15984
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a6fb704c634e883f9e3881266a3dcb5ba23374c8ebf181812c9f96a130e61f66
|
3 |
size 15984
|
last-checkpoint/rng_state_3.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15984
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6bc646933e05a0c4234556cd6f0c7075bab2d33a5950ff4bbe972166c1092320
|
3 |
size 15984
|
last-checkpoint/rng_state_4.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15984
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b495a6c499c1edc6102a33cf75929a4f32fe1644e38ef1d57d162fb64bfb915f
|
3 |
size 15984
|
last-checkpoint/rng_state_5.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15984
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4f8a3798560394c1c82cce7ebec75141fc923441919268c7c53e22a06fb17656
|
3 |
size 15984
|
last-checkpoint/rng_state_6.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15984
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0b1aedf073e7fd48123a8e0decc6dd465a3bcde85b1242e59856358cf0587a7d
|
3 |
size 15984
|
last-checkpoint/rng_state_7.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15984
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:26c6ae9753603f5ca51b6367b59f3afc788d1cceb974eb84386f65a6283806de
|
3 |
size 15984
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ba1417a4512f0520215fff01c82d65455bbbf8bb1817ff1d5e6b2e4fc2d3773d
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 0
|
5 |
"eval_steps": 5,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -26024,6 +26024,280 @@
|
|
26024 |
"eval_samples_per_second": 6.589,
|
26025 |
"eval_steps_per_second": 0.22,
|
26026 |
"step": 2500
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
26027 |
}
|
26028 |
],
|
26029 |
"logging_steps": 1,
|
@@ -26038,12 +26312,12 @@
|
|
26038 |
"should_evaluate": false,
|
26039 |
"should_log": false,
|
26040 |
"should_save": true,
|
26041 |
-
"should_training_stop":
|
26042 |
},
|
26043 |
"attributes": {}
|
26044 |
}
|
26045 |
},
|
26046 |
-
"total_flos":
|
26047 |
"train_batch_size": 2,
|
26048 |
"trial_name": null,
|
26049 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 1.0,
|
5 |
"eval_steps": 5,
|
6 |
+
"global_step": 2527,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
26024 |
"eval_samples_per_second": 6.589,
|
26025 |
"eval_steps_per_second": 0.22,
|
26026 |
"step": 2500
|
26027 |
+
},
|
26028 |
+
{
|
26029 |
+
"epoch": 0.9897111199050257,
|
26030 |
+
"grad_norm": 0.7640975936543402,
|
26031 |
+
"learning_rate": 3.225216210623327e-08,
|
26032 |
+
"loss": 0.1595,
|
26033 |
+
"step": 2501
|
26034 |
+
},
|
26035 |
+
{
|
26036 |
+
"epoch": 0.9901068460625247,
|
26037 |
+
"grad_norm": 0.6935968775183601,
|
26038 |
+
"learning_rate": 2.981917874453344e-08,
|
26039 |
+
"loss": 0.147,
|
26040 |
+
"step": 2502
|
26041 |
+
},
|
26042 |
+
{
|
26043 |
+
"epoch": 0.9905025722200237,
|
26044 |
+
"grad_norm": 0.671927431715776,
|
26045 |
+
"learning_rate": 2.7481569312381995e-08,
|
26046 |
+
"loss": 0.0938,
|
26047 |
+
"step": 2503
|
26048 |
+
},
|
26049 |
+
{
|
26050 |
+
"epoch": 0.9908982983775227,
|
26051 |
+
"grad_norm": 0.7349943492731037,
|
26052 |
+
"learning_rate": 2.52393382713767e-08,
|
26053 |
+
"loss": 0.1265,
|
26054 |
+
"step": 2504
|
26055 |
+
},
|
26056 |
+
{
|
26057 |
+
"epoch": 0.9912940245350218,
|
26058 |
+
"grad_norm": 0.5079188246341098,
|
26059 |
+
"learning_rate": 2.3092489901083148e-08,
|
26060 |
+
"loss": 0.1685,
|
26061 |
+
"step": 2505
|
26062 |
+
},
|
26063 |
+
{
|
26064 |
+
"epoch": 0.9912940245350218,
|
26065 |
+
"eval_PRM Accuracy": 0.9047619047619048,
|
26066 |
+
"eval_PRM F1": 0.9464285714285714,
|
26067 |
+
"eval_PRM F1 AUC": 0.7058534185932119,
|
26068 |
+
"eval_PRM F1 AUC (fixed)": 0.8566158386620757,
|
26069 |
+
"eval_PRM F1 Neg": 0.5714285714285714,
|
26070 |
+
"eval_PRM NPV": 0.8888888888888888,
|
26071 |
+
"eval_PRM Precision": 0.905982905982906,
|
26072 |
+
"eval_PRM Recall": 0.9906542056074766,
|
26073 |
+
"eval_PRM Specificty": 0.42105263157894735,
|
26074 |
+
"eval_loss": 0.3131347596645355,
|
26075 |
+
"eval_runtime": 4.2821,
|
26076 |
+
"eval_samples_per_second": 7.006,
|
26077 |
+
"eval_steps_per_second": 0.234,
|
26078 |
+
"step": 2505
|
26079 |
+
},
|
26080 |
+
{
|
26081 |
+
"epoch": 0.9916897506925207,
|
26082 |
+
"grad_norm": 0.7550093866508485,
|
26083 |
+
"learning_rate": 2.1041028299012555e-08,
|
26084 |
+
"loss": 0.1852,
|
26085 |
+
"step": 2506
|
26086 |
+
},
|
26087 |
+
{
|
26088 |
+
"epoch": 0.9920854768500198,
|
26089 |
+
"grad_norm": 0.84761330923606,
|
26090 |
+
"learning_rate": 1.908495738061067e-08,
|
26091 |
+
"loss": 0.15,
|
26092 |
+
"step": 2507
|
26093 |
+
},
|
26094 |
+
{
|
26095 |
+
"epoch": 0.9924812030075187,
|
26096 |
+
"grad_norm": 0.5014629514556076,
|
26097 |
+
"learning_rate": 1.7224280879279964e-08,
|
26098 |
+
"loss": 0.1174,
|
26099 |
+
"step": 2508
|
26100 |
+
},
|
26101 |
+
{
|
26102 |
+
"epoch": 0.9928769291650178,
|
26103 |
+
"grad_norm": 0.7739608773483745,
|
26104 |
+
"learning_rate": 1.5459002346324135e-08,
|
26105 |
+
"loss": 0.1267,
|
26106 |
+
"step": 2509
|
26107 |
+
},
|
26108 |
+
{
|
26109 |
+
"epoch": 0.9932726553225169,
|
26110 |
+
"grad_norm": 0.6174389258151092,
|
26111 |
+
"learning_rate": 1.3789125150998061e-08,
|
26112 |
+
"loss": 0.1471,
|
26113 |
+
"step": 2510
|
26114 |
+
},
|
26115 |
+
{
|
26116 |
+
"epoch": 0.9932726553225169,
|
26117 |
+
"eval_PRM Accuracy": 0.9047619047619048,
|
26118 |
+
"eval_PRM F1": 0.9464285714285714,
|
26119 |
+
"eval_PRM F1 AUC": 0.7058534185932119,
|
26120 |
+
"eval_PRM F1 AUC (fixed)": 0.8541564190850959,
|
26121 |
+
"eval_PRM F1 Neg": 0.5714285714285714,
|
26122 |
+
"eval_PRM NPV": 0.8888888888888888,
|
26123 |
+
"eval_PRM Precision": 0.905982905982906,
|
26124 |
+
"eval_PRM Recall": 0.9906542056074766,
|
26125 |
+
"eval_PRM Specificty": 0.42105263157894735,
|
26126 |
+
"eval_loss": 0.3146809935569763,
|
26127 |
+
"eval_runtime": 5.0007,
|
26128 |
+
"eval_samples_per_second": 5.999,
|
26129 |
+
"eval_steps_per_second": 0.2,
|
26130 |
+
"step": 2510
|
26131 |
+
},
|
26132 |
+
{
|
26133 |
+
"epoch": 0.9936683814800158,
|
26134 |
+
"grad_norm": 0.8881221027776471,
|
26135 |
+
"learning_rate": 1.2214652480452282e-08,
|
26136 |
+
"loss": 0.1881,
|
26137 |
+
"step": 2511
|
26138 |
+
},
|
26139 |
+
{
|
26140 |
+
"epoch": 0.9940641076375148,
|
26141 |
+
"grad_norm": 0.6711036576482892,
|
26142 |
+
"learning_rate": 1.0735587339749665e-08,
|
26143 |
+
"loss": 0.1136,
|
26144 |
+
"step": 2512
|
26145 |
+
},
|
26146 |
+
{
|
26147 |
+
"epoch": 0.9944598337950139,
|
26148 |
+
"grad_norm": 0.7811317531255666,
|
26149 |
+
"learning_rate": 9.351932551854292e-09,
|
26150 |
+
"loss": 0.1778,
|
26151 |
+
"step": 2513
|
26152 |
+
},
|
26153 |
+
{
|
26154 |
+
"epoch": 0.9948555599525128,
|
26155 |
+
"grad_norm": 0.8843034018155372,
|
26156 |
+
"learning_rate": 8.063690757642572e-09,
|
26157 |
+
"loss": 0.2367,
|
26158 |
+
"step": 2514
|
26159 |
+
},
|
26160 |
+
{
|
26161 |
+
"epoch": 0.9952512861100119,
|
26162 |
+
"grad_norm": 0.8913039576623297,
|
26163 |
+
"learning_rate": 6.8708644158754775e-09,
|
26164 |
+
"loss": 0.1645,
|
26165 |
+
"step": 2515
|
26166 |
+
},
|
26167 |
+
{
|
26168 |
+
"epoch": 0.9952512861100119,
|
26169 |
+
"eval_PRM Accuracy": 0.9047619047619048,
|
26170 |
+
"eval_PRM F1": 0.9464285714285714,
|
26171 |
+
"eval_PRM F1 AUC": 0.7058534185932119,
|
26172 |
+
"eval_PRM F1 AUC (fixed)": 0.8553861288735858,
|
26173 |
+
"eval_PRM F1 Neg": 0.5714285714285714,
|
26174 |
+
"eval_PRM NPV": 0.8888888888888888,
|
26175 |
+
"eval_PRM Precision": 0.905982905982906,
|
26176 |
+
"eval_PRM Recall": 0.9906542056074766,
|
26177 |
+
"eval_PRM Specificty": 0.42105263157894735,
|
26178 |
+
"eval_loss": 0.3146321475505829,
|
26179 |
+
"eval_runtime": 4.3747,
|
26180 |
+
"eval_samples_per_second": 6.858,
|
26181 |
+
"eval_steps_per_second": 0.229,
|
26182 |
+
"step": 2515
|
26183 |
+
},
|
26184 |
+
{
|
26185 |
+
"epoch": 0.9956470122675108,
|
26186 |
+
"grad_norm": 0.6541800097659761,
|
26187 |
+
"learning_rate": 5.773455803187444e-09,
|
26188 |
+
"loss": 0.1339,
|
26189 |
+
"step": 2516
|
26190 |
+
},
|
26191 |
+
{
|
26192 |
+
"epoch": 0.9960427384250099,
|
26193 |
+
"grad_norm": 0.5875809598684076,
|
26194 |
+
"learning_rate": 4.771467014125231e-09,
|
26195 |
+
"loss": 0.1672,
|
26196 |
+
"step": 2517
|
26197 |
+
},
|
26198 |
+
{
|
26199 |
+
"epoch": 0.996438464582509,
|
26200 |
+
"grad_norm": 0.7159822425927463,
|
26201 |
+
"learning_rate": 3.864899961097956e-09,
|
26202 |
+
"loss": 0.1658,
|
26203 |
+
"step": 2518
|
26204 |
+
},
|
26205 |
+
{
|
26206 |
+
"epoch": 0.9968341907400079,
|
26207 |
+
"grad_norm": 1.0406525491292937,
|
26208 |
+
"learning_rate": 3.053756374393757e-09,
|
26209 |
+
"loss": 0.2251,
|
26210 |
+
"step": 2519
|
26211 |
+
},
|
26212 |
+
{
|
26213 |
+
"epoch": 0.997229916897507,
|
26214 |
+
"grad_norm": 0.6871265678261859,
|
26215 |
+
"learning_rate": 2.338037802174231e-09,
|
26216 |
+
"loss": 0.1341,
|
26217 |
+
"step": 2520
|
26218 |
+
},
|
26219 |
+
{
|
26220 |
+
"epoch": 0.997229916897507,
|
26221 |
+
"eval_PRM Accuracy": 0.9047619047619048,
|
26222 |
+
"eval_PRM F1": 0.9464285714285714,
|
26223 |
+
"eval_PRM F1 AUC": 0.7058534185932119,
|
26224 |
+
"eval_PRM F1 AUC (fixed)": 0.8566158386620757,
|
26225 |
+
"eval_PRM F1 Neg": 0.5714285714285714,
|
26226 |
+
"eval_PRM NPV": 0.8888888888888888,
|
26227 |
+
"eval_PRM Precision": 0.905982905982906,
|
26228 |
+
"eval_PRM Recall": 0.9906542056074766,
|
26229 |
+
"eval_PRM Specificty": 0.42105263157894735,
|
26230 |
+
"eval_loss": 0.31285807490348816,
|
26231 |
+
"eval_runtime": 4.6229,
|
26232 |
+
"eval_samples_per_second": 6.489,
|
26233 |
+
"eval_steps_per_second": 0.216,
|
26234 |
+
"step": 2520
|
26235 |
+
},
|
26236 |
+
{
|
26237 |
+
"epoch": 0.9976256430550059,
|
26238 |
+
"grad_norm": 0.6838195939717506,
|
26239 |
+
"learning_rate": 1.7177456104688905e-09,
|
26240 |
+
"loss": 0.185,
|
26241 |
+
"step": 2521
|
26242 |
+
},
|
26243 |
+
{
|
26244 |
+
"epoch": 0.998021369212505,
|
26245 |
+
"grad_norm": 0.9441501978794823,
|
26246 |
+
"learning_rate": 1.1928809831807108e-09,
|
26247 |
+
"loss": 0.2185,
|
26248 |
+
"step": 2522
|
26249 |
+
},
|
26250 |
+
{
|
26251 |
+
"epoch": 0.998417095370004,
|
26252 |
+
"grad_norm": 0.6337380468158322,
|
26253 |
+
"learning_rate": 7.634449220805806e-10,
|
26254 |
+
"loss": 0.1319,
|
26255 |
+
"step": 2523
|
26256 |
+
},
|
26257 |
+
{
|
26258 |
+
"epoch": 0.998812821527503,
|
26259 |
+
"grad_norm": 0.48889023343641086,
|
26260 |
+
"learning_rate": 4.294382467906477e-10,
|
26261 |
+
"loss": 0.11,
|
26262 |
+
"step": 2524
|
26263 |
+
},
|
26264 |
+
{
|
26265 |
+
"epoch": 0.999208547685002,
|
26266 |
+
"grad_norm": 0.898426655651998,
|
26267 |
+
"learning_rate": 1.9086159480097287e-10,
|
26268 |
+
"loss": 0.1946,
|
26269 |
+
"step": 2525
|
26270 |
+
},
|
26271 |
+
{
|
26272 |
+
"epoch": 0.999208547685002,
|
26273 |
+
"eval_PRM Accuracy": 0.9047619047619048,
|
26274 |
+
"eval_PRM F1": 0.9464285714285714,
|
26275 |
+
"eval_PRM F1 AUC": 0.7058534185932119,
|
26276 |
+
"eval_PRM F1 AUC (fixed)": 0.854402361042794,
|
26277 |
+
"eval_PRM F1 Neg": 0.5714285714285714,
|
26278 |
+
"eval_PRM NPV": 0.8888888888888888,
|
26279 |
+
"eval_PRM Precision": 0.905982905982906,
|
26280 |
+
"eval_PRM Recall": 0.9906542056074766,
|
26281 |
+
"eval_PRM Specificty": 0.42105263157894735,
|
26282 |
+
"eval_loss": 0.3133951723575592,
|
26283 |
+
"eval_runtime": 4.4087,
|
26284 |
+
"eval_samples_per_second": 6.805,
|
26285 |
+
"eval_steps_per_second": 0.227,
|
26286 |
+
"step": 2525
|
26287 |
+
},
|
26288 |
+
{
|
26289 |
+
"epoch": 0.999604273842501,
|
26290 |
+
"grad_norm": 0.6412635302679868,
|
26291 |
+
"learning_rate": 4.771542146952967e-11,
|
26292 |
+
"loss": 0.1794,
|
26293 |
+
"step": 2526
|
26294 |
+
},
|
26295 |
+
{
|
26296 |
+
"epoch": 1.0,
|
26297 |
+
"grad_norm": 0.7990569727457182,
|
26298 |
+
"learning_rate": 0.0,
|
26299 |
+
"loss": 0.1372,
|
26300 |
+
"step": 2527
|
26301 |
}
|
26302 |
],
|
26303 |
"logging_steps": 1,
|
|
|
26312 |
"should_evaluate": false,
|
26313 |
"should_log": false,
|
26314 |
"should_save": true,
|
26315 |
+
"should_training_stop": true
|
26316 |
},
|
26317 |
"attributes": {}
|
26318 |
}
|
26319 |
},
|
26320 |
+
"total_flos": 2549072128245760.0,
|
26321 |
"train_batch_size": 2,
|
26322 |
"trial_name": null,
|
26323 |
"trial_params": null
|