alex-treebeard commited on
Commit
7c1b437
·
verified ·
1 Parent(s): 0faef13

Upload folder using huggingface_hub

Browse files
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ tokenizer.json filter=lfs diff=lfs merge=lfs -text
chat_template.jinja ADDED
@@ -0,0 +1,47 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {{ bos_token }}
2
+ {%- if messages[0]['role'] == 'system' -%}
3
+ {%- if messages[0]['content'] is string -%}
4
+ {%- set first_user_prefix = messages[0]['content'] + '
5
+
6
+ ' -%}
7
+ {%- else -%}
8
+ {%- set first_user_prefix = messages[0]['content'][0]['text'] + '
9
+
10
+ ' -%}
11
+ {%- endif -%}
12
+ {%- set loop_messages = messages[1:] -%}
13
+ {%- else -%}
14
+ {%- set first_user_prefix = "" -%}
15
+ {%- set loop_messages = messages -%}
16
+ {%- endif -%}
17
+ {%- for message in loop_messages -%}
18
+ {%- if (message['role'] == 'user') != (loop.index0 % 2 == 0) -%}
19
+ {{ raise_exception("Conversation roles must alternate user/assistant/user/assistant/...") }}
20
+ {%- endif -%}
21
+ {%- if (message['role'] == 'assistant') -%}
22
+ {%- set role = "model" -%}
23
+ {%- else -%}
24
+ {%- set role = message['role'] -%}
25
+ {%- endif -%}
26
+ {{ '<start_of_turn>' + role + '
27
+ ' + (first_user_prefix if loop.first else "") }}
28
+ {%- if message['content'] is string -%}
29
+ {{ message['content'] | trim }}
30
+ {%- elif message['content'] is iterable -%}
31
+ {%- for item in message['content'] -%}
32
+ {%- if item['type'] == 'image' -%}
33
+ {{ '<start_of_image>' }}
34
+ {%- elif item['type'] == 'text' -%}
35
+ {{ item['text'] | trim }}
36
+ {%- endif -%}
37
+ {%- endfor -%}
38
+ {%- else -%}
39
+ {{ raise_exception("Invalid content type") }}
40
+ {%- endif -%}
41
+ {{ '<end_of_turn>
42
+ ' }}
43
+ {%- endfor -%}
44
+ {%- if add_generation_prompt -%}
45
+ {{'<start_of_turn>model
46
+ '}}
47
+ {%- endif -%}
config.json ADDED
@@ -0,0 +1,54 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_sliding_window_pattern": 6,
3
+ "architectures": [
4
+ "Gemma3ForCausalLM"
5
+ ],
6
+ "attention_bias": false,
7
+ "attention_dropout": 0.0,
8
+ "attn_logit_softcapping": null,
9
+ "bos_token_id": 2,
10
+ "eos_token_id": 1,
11
+ "final_logit_softcapping": null,
12
+ "head_dim": 256,
13
+ "hidden_activation": "gelu_pytorch_tanh",
14
+ "hidden_size": 640,
15
+ "initializer_range": 0.02,
16
+ "intermediate_size": 2048,
17
+ "layer_types": [
18
+ "sliding_attention",
19
+ "sliding_attention",
20
+ "sliding_attention",
21
+ "sliding_attention",
22
+ "sliding_attention",
23
+ "full_attention",
24
+ "sliding_attention",
25
+ "sliding_attention",
26
+ "sliding_attention",
27
+ "sliding_attention",
28
+ "sliding_attention",
29
+ "full_attention",
30
+ "sliding_attention",
31
+ "sliding_attention",
32
+ "sliding_attention",
33
+ "sliding_attention",
34
+ "sliding_attention",
35
+ "full_attention"
36
+ ],
37
+ "max_position_embeddings": 32768,
38
+ "model_type": "gemma3_text",
39
+ "num_attention_heads": 4,
40
+ "num_hidden_layers": 18,
41
+ "num_key_value_heads": 1,
42
+ "pad_token_id": 0,
43
+ "query_pre_attn_scalar": 256,
44
+ "rms_norm_eps": 1e-06,
45
+ "rope_local_base_freq": 10000.0,
46
+ "rope_scaling": null,
47
+ "rope_theta": 1000000.0,
48
+ "sliding_window": 512,
49
+ "torch_dtype": "float32",
50
+ "transformers_version": "4.55.4",
51
+ "use_bidirectional_attention": false,
52
+ "use_cache": true,
53
+ "vocab_size": 262144
54
+ }
generation_config.json ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cache_implementation": "hybrid",
3
+ "do_sample": true,
4
+ "eos_token_id": [
5
+ 1,
6
+ 106
7
+ ],
8
+ "top_k": 64,
9
+ "top_p": 0.95,
10
+ "transformers_version": "4.55.4"
11
+ }
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:60c13d690aaa7a3ce54956d34775a0db42472adadca75fd21e0dd4905f81f1ac
3
+ size 1072419256
optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:507f1ecb754d7fbfbbfbc1bf73683608983bc7595fc2da776b53a3eea47e6a02
3
+ size 2144983819
rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9ed02e18c5d5c9a8c21145e4329bb76c2742650b4e8d26d41784ff106106232d
3
+ size 14645
scaler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0e8bef6dda512071503ea3bab68f0960919f4ba9156b465cc1853aaa448a81f7
3
+ size 1383
scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f676bddb701813c0517f7a76e0d3092f6615fcb710950ed9352f8f6a5da99e52
3
+ size 1465
special_tokens_map.json ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "boi_token": "<start_of_image>",
3
+ "bos_token": {
4
+ "content": "<bos>",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false
9
+ },
10
+ "eoi_token": "<end_of_image>",
11
+ "eos_token": {
12
+ "content": "<eos>",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false
17
+ },
18
+ "image_token": "<image_soft_token>",
19
+ "pad_token": {
20
+ "content": "<pad>",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false
25
+ },
26
+ "unk_token": {
27
+ "content": "<unk>",
28
+ "lstrip": false,
29
+ "normalized": false,
30
+ "rstrip": false,
31
+ "single_word": false
32
+ }
33
+ }
tokenizer.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d786405177734910d7a3db625c2826640964a0b4e5cdbbd70620ae3313a01bef
3
+ size 33384722
tokenizer_config.json ADDED
The diff for this file is too large to render. See raw diff
 
trainer_state.json ADDED
@@ -0,0 +1,554 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_global_step": null,
3
+ "best_metric": null,
4
+ "best_model_checkpoint": null,
5
+ "epoch": 0.2,
6
+ "eval_steps": 500,
7
+ "global_step": 200,
8
+ "is_hyper_param_search": false,
9
+ "is_local_process_zero": true,
10
+ "is_world_process_zero": true,
11
+ "log_history": [
12
+ {
13
+ "clip_ratio/high_max": 0.0,
14
+ "clip_ratio/high_mean": 0.0,
15
+ "clip_ratio/low_mean": 0.0,
16
+ "clip_ratio/low_min": 0.0,
17
+ "clip_ratio/region_mean": 0.0,
18
+ "completions/clipped_ratio": 1.0,
19
+ "completions/max_length": 256.0,
20
+ "completions/max_terminated_length": 0.0,
21
+ "completions/mean_length": 256.0,
22
+ "completions/mean_terminated_length": 0.0,
23
+ "completions/min_length": 256.0,
24
+ "completions/min_terminated_length": 0.0,
25
+ "entropy": 0.8932701796293259,
26
+ "epoch": 0.01,
27
+ "frac_reward_zero_std": 0.0,
28
+ "grad_norm": 17.141883850097656,
29
+ "learning_rate": 9.649999999999999e-07,
30
+ "loss": -0.0,
31
+ "num_tokens": 49344.0,
32
+ "reward": -459.925,
33
+ "reward_std": 173.0284324645996,
34
+ "rewards/reward_func/mean": -459.925,
35
+ "rewards/reward_func/std": 173.02844009399413,
36
+ "step": 10
37
+ },
38
+ {
39
+ "clip_ratio/high_max": 0.0,
40
+ "clip_ratio/high_mean": 0.0,
41
+ "clip_ratio/low_mean": 0.0,
42
+ "clip_ratio/low_min": 0.0,
43
+ "clip_ratio/region_mean": 0.0,
44
+ "completions/clipped_ratio": 1.0,
45
+ "completions/max_length": 256.0,
46
+ "completions/max_terminated_length": 0.0,
47
+ "completions/mean_length": 256.0,
48
+ "completions/mean_terminated_length": 0.0,
49
+ "completions/min_length": 256.0,
50
+ "completions/min_terminated_length": 0.0,
51
+ "entropy": 0.5676407814025879,
52
+ "epoch": 0.02,
53
+ "frac_reward_zero_std": 0.0,
54
+ "grad_norm": 12.152915000915527,
55
+ "learning_rate": 9.15e-07,
56
+ "loss": 0.0,
57
+ "num_tokens": 104592.0,
58
+ "reward": -394.4625,
59
+ "reward_std": 221.84682922363282,
60
+ "rewards/reward_func/mean": -394.4625,
61
+ "rewards/reward_func/std": 221.84682998657226,
62
+ "step": 20
63
+ },
64
+ {
65
+ "clip_ratio/high_max": 0.0,
66
+ "clip_ratio/high_mean": 0.0,
67
+ "clip_ratio/low_mean": 0.0,
68
+ "clip_ratio/low_min": 0.0,
69
+ "clip_ratio/region_mean": 0.0,
70
+ "completions/clipped_ratio": 1.0,
71
+ "completions/max_length": 256.0,
72
+ "completions/max_terminated_length": 0.0,
73
+ "completions/mean_length": 256.0,
74
+ "completions/mean_terminated_length": 0.0,
75
+ "completions/min_length": 256.0,
76
+ "completions/min_terminated_length": 0.0,
77
+ "entropy": 0.20221070423722268,
78
+ "epoch": 0.03,
79
+ "frac_reward_zero_std": 0.0,
80
+ "grad_norm": 6.0900750160217285,
81
+ "learning_rate": 8.65e-07,
82
+ "loss": 0.0,
83
+ "num_tokens": 152208.0,
84
+ "reward": -79.8125,
85
+ "reward_std": 144.8038761138916,
86
+ "rewards/reward_func/mean": -79.8125,
87
+ "rewards/reward_func/std": 144.80387077331542,
88
+ "step": 30
89
+ },
90
+ {
91
+ "clip_ratio/high_max": 0.0,
92
+ "clip_ratio/high_mean": 0.0,
93
+ "clip_ratio/low_mean": 0.0,
94
+ "clip_ratio/low_min": 0.0,
95
+ "clip_ratio/region_mean": 0.0,
96
+ "completions/clipped_ratio": 1.0,
97
+ "completions/max_length": 256.0,
98
+ "completions/max_terminated_length": 0.0,
99
+ "completions/mean_length": 256.0,
100
+ "completions/mean_terminated_length": 0.0,
101
+ "completions/min_length": 256.0,
102
+ "completions/min_terminated_length": 0.0,
103
+ "entropy": 0.09748994875699282,
104
+ "epoch": 0.04,
105
+ "frac_reward_zero_std": 0.0,
106
+ "grad_norm": 3.9179723262786865,
107
+ "learning_rate": 8.149999999999999e-07,
108
+ "loss": -0.0,
109
+ "num_tokens": 201216.0,
110
+ "reward": 9.3625,
111
+ "reward_std": 81.3893858909607,
112
+ "rewards/reward_func/mean": 9.3625,
113
+ "rewards/reward_func/std": 81.38938302993775,
114
+ "step": 40
115
+ },
116
+ {
117
+ "clip_ratio/high_max": 0.0,
118
+ "clip_ratio/high_mean": 0.0,
119
+ "clip_ratio/low_mean": 0.0,
120
+ "clip_ratio/low_min": 0.0,
121
+ "clip_ratio/region_mean": 0.0,
122
+ "completions/clipped_ratio": 1.0,
123
+ "completions/max_length": 256.0,
124
+ "completions/max_terminated_length": 0.0,
125
+ "completions/mean_length": 256.0,
126
+ "completions/mean_terminated_length": 0.0,
127
+ "completions/min_length": 256.0,
128
+ "completions/min_terminated_length": 0.0,
129
+ "entropy": 0.08117130994796753,
130
+ "epoch": 0.05,
131
+ "frac_reward_zero_std": 0.0,
132
+ "grad_norm": 3.0272433757781982,
133
+ "learning_rate": 7.65e-07,
134
+ "loss": -0.0,
135
+ "num_tokens": 252344.0,
136
+ "reward": 60.8,
137
+ "reward_std": 22.109106731414794,
138
+ "rewards/reward_func/mean": 60.8,
139
+ "rewards/reward_func/std": 22.109107208251952,
140
+ "step": 50
141
+ },
142
+ {
143
+ "clip_ratio/high_max": 0.0,
144
+ "clip_ratio/high_mean": 0.0,
145
+ "clip_ratio/low_mean": 0.0,
146
+ "clip_ratio/low_min": 0.0,
147
+ "clip_ratio/region_mean": 0.0,
148
+ "completions/clipped_ratio": 1.0,
149
+ "completions/max_length": 256.0,
150
+ "completions/max_terminated_length": 0.0,
151
+ "completions/mean_length": 256.0,
152
+ "completions/mean_terminated_length": 0.0,
153
+ "completions/min_length": 256.0,
154
+ "completions/min_terminated_length": 0.0,
155
+ "entropy": 0.0658429590985179,
156
+ "epoch": 0.06,
157
+ "frac_reward_zero_std": 0.0,
158
+ "grad_norm": 3.603912115097046,
159
+ "learning_rate": 7.149999999999999e-07,
160
+ "loss": 0.0,
161
+ "num_tokens": 303800.0,
162
+ "reward": 65.1875,
163
+ "reward_std": 14.921637725830077,
164
+ "rewards/reward_func/mean": 65.1875,
165
+ "rewards/reward_func/std": 14.921638202667236,
166
+ "step": 60
167
+ },
168
+ {
169
+ "clip_ratio/high_max": 0.0,
170
+ "clip_ratio/high_mean": 0.0,
171
+ "clip_ratio/low_mean": 0.0,
172
+ "clip_ratio/low_min": 0.0,
173
+ "clip_ratio/region_mean": 0.0,
174
+ "completions/clipped_ratio": 1.0,
175
+ "completions/max_length": 256.0,
176
+ "completions/max_terminated_length": 0.0,
177
+ "completions/mean_length": 256.0,
178
+ "completions/mean_terminated_length": 0.0,
179
+ "completions/min_length": 256.0,
180
+ "completions/min_terminated_length": 0.0,
181
+ "entropy": 0.08412722386419773,
182
+ "epoch": 0.07,
183
+ "frac_reward_zero_std": 0.0,
184
+ "grad_norm": 3.701263189315796,
185
+ "learning_rate": 6.65e-07,
186
+ "loss": 0.0,
187
+ "num_tokens": 355936.0,
188
+ "reward": 74.2625,
189
+ "reward_std": 19.148268938064575,
190
+ "rewards/reward_func/mean": 74.2625,
191
+ "rewards/reward_func/std": 19.14826898574829,
192
+ "step": 70
193
+ },
194
+ {
195
+ "clip_ratio/high_max": 0.0,
196
+ "clip_ratio/high_mean": 0.0,
197
+ "clip_ratio/low_mean": 0.0,
198
+ "clip_ratio/low_min": 0.0,
199
+ "clip_ratio/region_mean": 0.0,
200
+ "completions/clipped_ratio": 1.0,
201
+ "completions/max_length": 256.0,
202
+ "completions/max_terminated_length": 0.0,
203
+ "completions/mean_length": 256.0,
204
+ "completions/mean_terminated_length": 0.0,
205
+ "completions/min_length": 256.0,
206
+ "completions/min_terminated_length": 0.0,
207
+ "entropy": 0.08717511333525181,
208
+ "epoch": 0.08,
209
+ "frac_reward_zero_std": 0.0,
210
+ "grad_norm": 3.643070936203003,
211
+ "learning_rate": 6.149999999999999e-07,
212
+ "loss": -0.0,
213
+ "num_tokens": 409632.0,
214
+ "reward": 71.3,
215
+ "reward_std": 19.00277919769287,
216
+ "rewards/reward_func/mean": 71.3,
217
+ "rewards/reward_func/std": 19.0027795791626,
218
+ "step": 80
219
+ },
220
+ {
221
+ "clip_ratio/high_max": 0.0,
222
+ "clip_ratio/high_mean": 0.0,
223
+ "clip_ratio/low_mean": 0.0,
224
+ "clip_ratio/low_min": 0.0,
225
+ "clip_ratio/region_mean": 0.0,
226
+ "completions/clipped_ratio": 1.0,
227
+ "completions/max_length": 256.0,
228
+ "completions/max_terminated_length": 0.0,
229
+ "completions/mean_length": 256.0,
230
+ "completions/mean_terminated_length": 0.0,
231
+ "completions/min_length": 256.0,
232
+ "completions/min_terminated_length": 0.0,
233
+ "entropy": 0.10131141170859337,
234
+ "epoch": 0.09,
235
+ "frac_reward_zero_std": 0.0,
236
+ "grad_norm": 4.0365071296691895,
237
+ "learning_rate": 5.649999999999999e-07,
238
+ "loss": 0.0,
239
+ "num_tokens": 457048.0,
240
+ "reward": 81.2875,
241
+ "reward_std": 12.263180470466613,
242
+ "rewards/reward_func/mean": 81.2875,
243
+ "rewards/reward_func/std": 12.263180875778199,
244
+ "step": 90
245
+ },
246
+ {
247
+ "clip_ratio/high_max": 0.0,
248
+ "clip_ratio/high_mean": 0.0,
249
+ "clip_ratio/low_mean": 0.0,
250
+ "clip_ratio/low_min": 0.0,
251
+ "clip_ratio/region_mean": 0.0,
252
+ "completions/clipped_ratio": 1.0,
253
+ "completions/max_length": 256.0,
254
+ "completions/max_terminated_length": 0.0,
255
+ "completions/mean_length": 256.0,
256
+ "completions/mean_terminated_length": 0.0,
257
+ "completions/min_length": 256.0,
258
+ "completions/min_terminated_length": 0.0,
259
+ "entropy": 0.07999873682856559,
260
+ "epoch": 0.1,
261
+ "frac_reward_zero_std": 0.0,
262
+ "grad_norm": 4.262997150421143,
263
+ "learning_rate": 5.149999999999999e-07,
264
+ "loss": 0.0,
265
+ "num_tokens": 506584.0,
266
+ "reward": 73.05,
267
+ "reward_std": 16.826302528381348,
268
+ "rewards/reward_func/mean": 73.05,
269
+ "rewards/reward_func/std": 16.82630310058594,
270
+ "step": 100
271
+ },
272
+ {
273
+ "clip_ratio/high_max": 0.0,
274
+ "clip_ratio/high_mean": 0.0,
275
+ "clip_ratio/low_mean": 0.0,
276
+ "clip_ratio/low_min": 0.0,
277
+ "clip_ratio/region_mean": 0.0,
278
+ "completions/clipped_ratio": 1.0,
279
+ "completions/max_length": 256.0,
280
+ "completions/max_terminated_length": 0.0,
281
+ "completions/mean_length": 256.0,
282
+ "completions/mean_terminated_length": 0.0,
283
+ "completions/min_length": 256.0,
284
+ "completions/min_terminated_length": 0.0,
285
+ "entropy": 0.07205904349684715,
286
+ "epoch": 0.11,
287
+ "frac_reward_zero_std": 0.0,
288
+ "grad_norm": 3.98404598236084,
289
+ "learning_rate": 4.65e-07,
290
+ "loss": 0.0,
291
+ "num_tokens": 556800.0,
292
+ "reward": 80.7875,
293
+ "reward_std": 11.600197219848633,
294
+ "rewards/reward_func/mean": 80.7875,
295
+ "rewards/reward_func/std": 11.600197505950927,
296
+ "step": 110
297
+ },
298
+ {
299
+ "clip_ratio/high_max": 0.0,
300
+ "clip_ratio/high_mean": 0.0,
301
+ "clip_ratio/low_mean": 0.0,
302
+ "clip_ratio/low_min": 0.0,
303
+ "clip_ratio/region_mean": 0.0,
304
+ "completions/clipped_ratio": 1.0,
305
+ "completions/max_length": 256.0,
306
+ "completions/max_terminated_length": 0.0,
307
+ "completions/mean_length": 256.0,
308
+ "completions/mean_terminated_length": 0.0,
309
+ "completions/min_length": 256.0,
310
+ "completions/min_terminated_length": 0.0,
311
+ "entropy": 0.08184156678617001,
312
+ "epoch": 0.12,
313
+ "frac_reward_zero_std": 0.0,
314
+ "grad_norm": 2.9209563732147217,
315
+ "learning_rate": 4.1499999999999994e-07,
316
+ "loss": -0.0,
317
+ "num_tokens": 605280.0,
318
+ "reward": 82.5625,
319
+ "reward_std": 13.004734754562378,
320
+ "rewards/reward_func/mean": 82.5625,
321
+ "rewards/reward_func/std": 13.004735040664674,
322
+ "step": 120
323
+ },
324
+ {
325
+ "clip_ratio/high_max": 0.0,
326
+ "clip_ratio/high_mean": 0.0,
327
+ "clip_ratio/low_mean": 0.0,
328
+ "clip_ratio/low_min": 0.0,
329
+ "clip_ratio/region_mean": 0.0,
330
+ "completions/clipped_ratio": 1.0,
331
+ "completions/max_length": 256.0,
332
+ "completions/max_terminated_length": 0.0,
333
+ "completions/mean_length": 256.0,
334
+ "completions/mean_terminated_length": 0.0,
335
+ "completions/min_length": 256.0,
336
+ "completions/min_terminated_length": 0.0,
337
+ "entropy": 0.07969092782586813,
338
+ "epoch": 0.13,
339
+ "frac_reward_zero_std": 0.0,
340
+ "grad_norm": 4.116878509521484,
341
+ "learning_rate": 3.65e-07,
342
+ "loss": 0.0,
343
+ "num_tokens": 657160.0,
344
+ "reward": 55.6875,
345
+ "reward_std": 24.997920417785643,
346
+ "rewards/reward_func/mean": 55.6875,
347
+ "rewards/reward_func/std": 24.99792127609253,
348
+ "step": 130
349
+ },
350
+ {
351
+ "clip_ratio/high_max": 0.0,
352
+ "clip_ratio/high_mean": 0.0,
353
+ "clip_ratio/low_mean": 0.0,
354
+ "clip_ratio/low_min": 0.0,
355
+ "clip_ratio/region_mean": 0.0,
356
+ "completions/clipped_ratio": 1.0,
357
+ "completions/max_length": 256.0,
358
+ "completions/max_terminated_length": 0.0,
359
+ "completions/mean_length": 256.0,
360
+ "completions/mean_terminated_length": 0.0,
361
+ "completions/min_length": 256.0,
362
+ "completions/min_terminated_length": 0.0,
363
+ "entropy": 0.08586088940501213,
364
+ "epoch": 0.14,
365
+ "frac_reward_zero_std": 0.0,
366
+ "grad_norm": 3.09967041015625,
367
+ "learning_rate": 3.15e-07,
368
+ "loss": 0.0,
369
+ "num_tokens": 707272.0,
370
+ "reward": 78.0375,
371
+ "reward_std": 13.64857816696167,
372
+ "rewards/reward_func/mean": 78.0375,
373
+ "rewards/reward_func/std": 13.64857850074768,
374
+ "step": 140
375
+ },
376
+ {
377
+ "clip_ratio/high_max": 0.0,
378
+ "clip_ratio/high_mean": 0.0,
379
+ "clip_ratio/low_mean": 0.0,
380
+ "clip_ratio/low_min": 0.0,
381
+ "clip_ratio/region_mean": 0.0,
382
+ "completions/clipped_ratio": 1.0,
383
+ "completions/max_length": 256.0,
384
+ "completions/max_terminated_length": 0.0,
385
+ "completions/mean_length": 256.0,
386
+ "completions/mean_terminated_length": 0.0,
387
+ "completions/min_length": 256.0,
388
+ "completions/min_terminated_length": 0.0,
389
+ "entropy": 0.07407695688307285,
390
+ "epoch": 0.15,
391
+ "frac_reward_zero_std": 0.0,
392
+ "grad_norm": 3.6434996128082275,
393
+ "learning_rate": 2.65e-07,
394
+ "loss": 0.0,
395
+ "num_tokens": 755712.0,
396
+ "reward": 62.6875,
397
+ "reward_std": 29.81237063407898,
398
+ "rewards/reward_func/mean": 62.6875,
399
+ "rewards/reward_func/std": 29.812370777130127,
400
+ "step": 150
401
+ },
402
+ {
403
+ "clip_ratio/high_max": 0.0,
404
+ "clip_ratio/high_mean": 0.0,
405
+ "clip_ratio/low_mean": 0.0,
406
+ "clip_ratio/low_min": 0.0,
407
+ "clip_ratio/region_mean": 0.0,
408
+ "completions/clipped_ratio": 1.0,
409
+ "completions/max_length": 256.0,
410
+ "completions/max_terminated_length": 0.0,
411
+ "completions/mean_length": 256.0,
412
+ "completions/mean_terminated_length": 0.0,
413
+ "completions/min_length": 256.0,
414
+ "completions/min_terminated_length": 0.0,
415
+ "entropy": 0.08506322149187326,
416
+ "epoch": 0.16,
417
+ "frac_reward_zero_std": 0.0,
418
+ "grad_norm": 3.3035037517547607,
419
+ "learning_rate": 2.1499999999999998e-07,
420
+ "loss": 0.0,
421
+ "num_tokens": 806232.0,
422
+ "reward": 80.3875,
423
+ "reward_std": 12.722473907470704,
424
+ "rewards/reward_func/mean": 80.3875,
425
+ "rewards/reward_func/std": 12.722473907470704,
426
+ "step": 160
427
+ },
428
+ {
429
+ "clip_ratio/high_max": 0.0,
430
+ "clip_ratio/high_mean": 0.0,
431
+ "clip_ratio/low_mean": 0.0,
432
+ "clip_ratio/low_min": 0.0,
433
+ "clip_ratio/region_mean": 0.0,
434
+ "completions/clipped_ratio": 1.0,
435
+ "completions/max_length": 256.0,
436
+ "completions/max_terminated_length": 0.0,
437
+ "completions/mean_length": 256.0,
438
+ "completions/mean_terminated_length": 0.0,
439
+ "completions/min_length": 256.0,
440
+ "completions/min_terminated_length": 0.0,
441
+ "entropy": 0.07220943029969931,
442
+ "epoch": 0.17,
443
+ "frac_reward_zero_std": 0.0,
444
+ "grad_norm": 4.2111358642578125,
445
+ "learning_rate": 1.65e-07,
446
+ "loss": 0.0,
447
+ "num_tokens": 858016.0,
448
+ "reward": 81.125,
449
+ "reward_std": 11.847685623168946,
450
+ "rewards/reward_func/mean": 81.125,
451
+ "rewards/reward_func/std": 11.84768624305725,
452
+ "step": 170
453
+ },
454
+ {
455
+ "clip_ratio/high_max": 0.0,
456
+ "clip_ratio/high_mean": 0.0,
457
+ "clip_ratio/low_mean": 0.0,
458
+ "clip_ratio/low_min": 0.0,
459
+ "clip_ratio/region_mean": 0.0,
460
+ "completions/clipped_ratio": 1.0,
461
+ "completions/max_length": 256.0,
462
+ "completions/max_terminated_length": 0.0,
463
+ "completions/mean_length": 256.0,
464
+ "completions/mean_terminated_length": 0.0,
465
+ "completions/min_length": 256.0,
466
+ "completions/min_terminated_length": 0.0,
467
+ "entropy": 0.07000144328922034,
468
+ "epoch": 0.18,
469
+ "frac_reward_zero_std": 0.0,
470
+ "grad_norm": 3.138835906982422,
471
+ "learning_rate": 1.15e-07,
472
+ "loss": 0.0,
473
+ "num_tokens": 907688.0,
474
+ "reward": 72.55,
475
+ "reward_std": 17.49347562789917,
476
+ "rewards/reward_func/mean": 72.55,
477
+ "rewards/reward_func/std": 17.493476104736327,
478
+ "step": 180
479
+ },
480
+ {
481
+ "clip_ratio/high_max": 0.0,
482
+ "clip_ratio/high_mean": 0.0,
483
+ "clip_ratio/low_mean": 0.0,
484
+ "clip_ratio/low_min": 0.0,
485
+ "clip_ratio/region_mean": 0.0,
486
+ "completions/clipped_ratio": 1.0,
487
+ "completions/max_length": 256.0,
488
+ "completions/max_terminated_length": 0.0,
489
+ "completions/mean_length": 256.0,
490
+ "completions/mean_terminated_length": 0.0,
491
+ "completions/min_length": 256.0,
492
+ "completions/min_terminated_length": 0.0,
493
+ "entropy": 0.06578830443322659,
494
+ "epoch": 0.19,
495
+ "frac_reward_zero_std": 0.0,
496
+ "grad_norm": 4.317309379577637,
497
+ "learning_rate": 6.5e-08,
498
+ "loss": -0.0,
499
+ "num_tokens": 960144.0,
500
+ "reward": 78.3875,
501
+ "reward_std": 13.952195310592652,
502
+ "rewards/reward_func/mean": 78.3875,
503
+ "rewards/reward_func/std": 13.952195501327514,
504
+ "step": 190
505
+ },
506
+ {
507
+ "clip_ratio/high_max": 0.0,
508
+ "clip_ratio/high_mean": 0.0,
509
+ "clip_ratio/low_mean": 0.0,
510
+ "clip_ratio/low_min": 0.0,
511
+ "clip_ratio/region_mean": 0.0,
512
+ "completions/clipped_ratio": 1.0,
513
+ "completions/max_length": 256.0,
514
+ "completions/max_terminated_length": 0.0,
515
+ "completions/mean_length": 256.0,
516
+ "completions/mean_terminated_length": 0.0,
517
+ "completions/min_length": 256.0,
518
+ "completions/min_terminated_length": 0.0,
519
+ "entropy": 0.06528270887210966,
520
+ "epoch": 0.2,
521
+ "frac_reward_zero_std": 0.0,
522
+ "grad_norm": 2.896949291229248,
523
+ "learning_rate": 1.5e-08,
524
+ "loss": -0.0,
525
+ "num_tokens": 1006496.0,
526
+ "reward": 75.8875,
527
+ "reward_std": 12.274623441696168,
528
+ "rewards/reward_func/mean": 75.8875,
529
+ "rewards/reward_func/std": 12.274623727798462,
530
+ "step": 200
531
+ }
532
+ ],
533
+ "logging_steps": 10,
534
+ "max_steps": 200,
535
+ "num_input_tokens_seen": 1006496,
536
+ "num_train_epochs": 1,
537
+ "save_steps": 50,
538
+ "stateful_callbacks": {
539
+ "TrainerControl": {
540
+ "args": {
541
+ "should_epoch_stop": false,
542
+ "should_evaluate": false,
543
+ "should_log": false,
544
+ "should_save": true,
545
+ "should_training_stop": true
546
+ },
547
+ "attributes": {}
548
+ }
549
+ },
550
+ "total_flos": 0.0,
551
+ "train_batch_size": 8,
552
+ "trial_name": null,
553
+ "trial_params": null
554
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5382119339ff62a6f44d73ee51e1f60cd3f50649f7ec6c2eacbaaeea6b72202b
3
+ size 6993