rkumar1999 commited on
Commit
6dbd991
·
verified ·
1 Parent(s): ced6fc0

Model save

Browse files
Files changed (4) hide show
  1. README.md +3 -3
  2. all_results.json +4 -4
  3. train_results.json +4 -4
  4. trainer_state.json +23 -13
README.md CHANGED
@@ -27,15 +27,15 @@ print(output["generated_text"])
27
 
28
  ## Training procedure
29
 
30
- [<img src="https://raw.githubusercontent.com/wandb/assets/main/wandb-github-badge-28.svg" alt="Visualize in Weights & Biases" width="150" height="24"/>](https://wandb.ai/rohanbayya1205-san-jose-state-university/huggingface/runs/wf116seg)
31
 
32
 
33
  This model was trained with GRPO, a method introduced in [DeepSeekMath: Pushing the Limits of Mathematical Reasoning in Open Language Models](https://huggingface.co/papers/2402.03300).
34
 
35
  ### Framework versions
36
 
37
- - TRL: 0.16.0.dev0
38
- - Transformers: 4.49.0
39
  - Pytorch: 2.5.1
40
  - Datasets: 3.3.2
41
  - Tokenizers: 0.21.0
 
27
 
28
  ## Training procedure
29
 
30
+ [<img src="https://raw.githubusercontent.com/wandb/assets/main/wandb-github-badge-28.svg" alt="Visualize in Weights & Biases" width="150" height="24"/>](https://wandb.ai/rohanbayya1205-san-jose-state-university/huggingface/runs/setdmiv0)
31
 
32
 
33
  This model was trained with GRPO, a method introduced in [DeepSeekMath: Pushing the Limits of Mathematical Reasoning in Open Language Models](https://huggingface.co/papers/2402.03300).
34
 
35
  ### Framework versions
36
 
37
+ - TRL: 0.16.0
38
+ - Transformers: 4.50.0
39
  - Pytorch: 2.5.1
40
  - Datasets: 3.3.2
41
  - Tokenizers: 0.21.0
all_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "total_flos": 0.0,
3
- "train_loss": 0.010941930115222931,
4
- "train_runtime": 4898.9111,
5
- "train_samples": 800,
6
- "train_samples_per_second": 0.163,
7
  "train_steps_per_second": 0.0
8
  }
 
1
  {
2
  "total_flos": 0.0,
3
+ "train_loss": 0.025846945121884346,
4
+ "train_runtime": 5654.5918,
5
+ "train_samples": 512,
6
+ "train_samples_per_second": 0.091,
7
  "train_steps_per_second": 0.0
8
  }
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "total_flos": 0.0,
3
- "train_loss": 0.010941930115222931,
4
- "train_runtime": 4898.9111,
5
- "train_samples": 800,
6
- "train_samples_per_second": 0.163,
7
  "train_steps_per_second": 0.0
8
  }
 
1
  {
2
  "total_flos": 0.0,
3
+ "train_loss": 0.025846945121884346,
4
+ "train_runtime": 5654.5918,
5
+ "train_samples": 512,
6
+ "train_samples_per_second": 0.091,
7
  "train_steps_per_second": 0.0
8
  }
trainer_state.json CHANGED
@@ -1,8 +1,9 @@
1
  {
 
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.64,
5
- "eval_steps": 100,
6
  "global_step": 1,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
@@ -10,18 +11,27 @@
10
  "log_history": [
11
  {
12
  "clip_ratio": 0.0,
13
- "completion_length": 678.875,
14
- "epoch": 0.64,
 
15
  "kl": 0.0,
16
- "reward": 0.2314453125,
17
- "reward_std": 0.20854125346522778,
18
- "rewards/accuracy_reward": 0.0703125,
19
- "rewards/format_reward": 0.1611328125,
 
 
 
 
 
 
 
 
20
  "step": 1,
21
  "total_flos": 0.0,
22
- "train_loss": 0.010941930115222931,
23
- "train_runtime": 4898.9111,
24
- "train_samples_per_second": 0.163,
25
  "train_steps_per_second": 0.0
26
  }
27
  ],
@@ -36,8 +46,8 @@
36
  "should_epoch_stop": false,
37
  "should_evaluate": false,
38
  "should_log": false,
39
- "should_save": false,
40
- "should_training_stop": false
41
  },
42
  "attributes": {}
43
  }
 
1
  {
2
+ "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
+ "epoch": 1.0,
6
+ "eval_steps": 500,
7
  "global_step": 1,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
 
11
  "log_history": [
12
  {
13
  "clip_ratio": 0.0,
14
+ "completion_length": 675.4326171875,
15
+ "epoch": 1.0,
16
+ "grad_norm": 0.011617695912718773,
17
  "kl": 0.0,
18
+ "learning_rate": 2e-05,
19
+ "loss": 0.0258,
20
+ "num_tokens": 875773.0,
21
+ "reward": 0.6923828125,
22
+ "reward_std": 0.33076575957238674,
23
+ "rewards/accuracy_reward": 0.0634765625,
24
+ "rewards/format_reward": 0.107421875,
25
+ "rewards/tag_count_reward": 0.521484375,
26
+ "step": 1
27
+ },
28
+ {
29
+ "epoch": 1.0,
30
  "step": 1,
31
  "total_flos": 0.0,
32
+ "train_loss": 0.025846945121884346,
33
+ "train_runtime": 5654.5918,
34
+ "train_samples_per_second": 0.091,
35
  "train_steps_per_second": 0.0
36
  }
37
  ],
 
46
  "should_epoch_stop": false,
47
  "should_evaluate": false,
48
  "should_log": false,
49
+ "should_save": true,
50
+ "should_training_stop": true
51
  },
52
  "attributes": {}
53
  }