gokulsrinivasagan commited on
Commit
4c5a81d
·
verified ·
1 Parent(s): 2b19c43

End of training

Browse files
README.md CHANGED
@@ -4,11 +4,23 @@ license: apache-2.0
4
  base_model: google-bert/bert-base-uncased
5
  tags:
6
  - generated_from_trainer
 
 
7
  metrics:
8
  - accuracy
9
  model-index:
10
  - name: bert_base_code_uml
11
- results: []
 
 
 
 
 
 
 
 
 
 
12
  ---
13
 
14
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
@@ -16,10 +28,10 @@ should probably proofread and complete it, then remove this comment. -->
16
 
17
  # bert_base_code_uml
18
 
19
- This model is a fine-tuned version of [google-bert/bert-base-uncased](https://huggingface.co/google-bert/bert-base-uncased) on an unknown dataset.
20
  It achieves the following results on the evaluation set:
21
- - Loss: 0.8292
22
- - Accuracy: 0.8286
23
 
24
  ## Model description
25
 
 
4
  base_model: google-bert/bert-base-uncased
5
  tags:
6
  - generated_from_trainer
7
+ datasets:
8
+ - devgpt-aimotion/the-stack-v2_PlantUML_filtered
9
  metrics:
10
  - accuracy
11
  model-index:
12
  - name: bert_base_code_uml
13
+ results:
14
+ - task:
15
+ name: Masked Language Modeling
16
+ type: fill-mask
17
+ dataset:
18
+ name: devgpt-aimotion/the-stack-v2_PlantUML_filtered
19
+ type: devgpt-aimotion/the-stack-v2_PlantUML_filtered
20
+ metrics:
21
+ - name: Accuracy
22
+ type: accuracy
23
+ value: 0.829663160408593
24
  ---
25
 
26
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
 
28
 
29
  # bert_base_code_uml
30
 
31
+ This model is a fine-tuned version of [google-bert/bert-base-uncased](https://huggingface.co/google-bert/bert-base-uncased) on the devgpt-aimotion/the-stack-v2_PlantUML_filtered dataset.
32
  It achieves the following results on the evaluation set:
33
+ - Loss: 0.8230
34
+ - Accuracy: 0.8297
35
 
36
  ## Model description
37
 
all_results.json ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 25.0,
3
+ "eval_accuracy": 0.829663160408593,
4
+ "eval_loss": 0.822982132434845,
5
+ "eval_runtime": 31.502,
6
+ "eval_samples": 6151,
7
+ "eval_samples_per_second": 195.258,
8
+ "eval_steps_per_second": 2.063,
9
+ "perplexity": 2.2772808746912707,
10
+ "total_flos": 8.0444602960128e+17,
11
+ "train_loss": 1.908989332549426,
12
+ "train_runtime": 23433.9018,
13
+ "train_samples": 122254,
14
+ "train_samples_per_second": 130.424,
15
+ "train_steps_per_second": 1.359
16
+ }
eval_results.json ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 25.0,
3
+ "eval_accuracy": 0.829663160408593,
4
+ "eval_loss": 0.822982132434845,
5
+ "eval_runtime": 31.502,
6
+ "eval_samples": 6151,
7
+ "eval_samples_per_second": 195.258,
8
+ "eval_steps_per_second": 2.063,
9
+ "perplexity": 2.2772808746912707
10
+ }
logs/events.out.tfevents.1750880481.ki-g0008.1143286.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e84d5fe95d0d8246931c473747cad9ab697fb1f27b0174608f57b860d69413c0
3
+ size 417
train_results.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 25.0,
3
+ "total_flos": 8.0444602960128e+17,
4
+ "train_loss": 1.908989332549426,
5
+ "train_runtime": 23433.9018,
6
+ "train_samples": 122254,
7
+ "train_samples_per_second": 130.424,
8
+ "train_steps_per_second": 1.359
9
+ }
trainer_state.json ADDED
@@ -0,0 +1,511 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_global_step": 30000,
3
+ "best_metric": 0.8292354941368103,
4
+ "best_model_checkpoint": "bert_base_code_uml/checkpoint-30000",
5
+ "epoch": 25.0,
6
+ "eval_steps": 10000,
7
+ "global_step": 31850,
8
+ "is_hyper_param_search": false,
9
+ "is_local_process_zero": true,
10
+ "is_world_process_zero": true,
11
+ "log_history": [
12
+ {
13
+ "epoch": 0.3924646781789639,
14
+ "grad_norm": 2.2605140209198,
15
+ "learning_rate": 4.9900000000000005e-06,
16
+ "loss": 8.1534,
17
+ "step": 500
18
+ },
19
+ {
20
+ "epoch": 0.7849293563579278,
21
+ "grad_norm": 1.4277119636535645,
22
+ "learning_rate": 9.990000000000001e-06,
23
+ "loss": 5.4981,
24
+ "step": 1000
25
+ },
26
+ {
27
+ "epoch": 1.1773940345368916,
28
+ "grad_norm": 1.1594833135604858,
29
+ "learning_rate": 1.499e-05,
30
+ "loss": 4.6668,
31
+ "step": 1500
32
+ },
33
+ {
34
+ "epoch": 1.5698587127158556,
35
+ "grad_norm": 1.3609659671783447,
36
+ "learning_rate": 1.999e-05,
37
+ "loss": 4.4569,
38
+ "step": 2000
39
+ },
40
+ {
41
+ "epoch": 1.9623233908948194,
42
+ "grad_norm": 1.4516750574111938,
43
+ "learning_rate": 2.4990000000000003e-05,
44
+ "loss": 4.3236,
45
+ "step": 2500
46
+ },
47
+ {
48
+ "epoch": 2.3547880690737832,
49
+ "grad_norm": 1.307254672050476,
50
+ "learning_rate": 2.9990000000000003e-05,
51
+ "loss": 4.2234,
52
+ "step": 3000
53
+ },
54
+ {
55
+ "epoch": 2.7472527472527473,
56
+ "grad_norm": 1.1777299642562866,
57
+ "learning_rate": 3.499e-05,
58
+ "loss": 4.1369,
59
+ "step": 3500
60
+ },
61
+ {
62
+ "epoch": 3.1397174254317113,
63
+ "grad_norm": 1.277431607246399,
64
+ "learning_rate": 3.999e-05,
65
+ "loss": 4.0883,
66
+ "step": 4000
67
+ },
68
+ {
69
+ "epoch": 3.5321821036106753,
70
+ "grad_norm": 1.136020302772522,
71
+ "learning_rate": 4.499e-05,
72
+ "loss": 4.0251,
73
+ "step": 4500
74
+ },
75
+ {
76
+ "epoch": 3.924646781789639,
77
+ "grad_norm": 1.5430645942687988,
78
+ "learning_rate": 4.999e-05,
79
+ "loss": 3.7435,
80
+ "step": 5000
81
+ },
82
+ {
83
+ "epoch": 4.3171114599686025,
84
+ "grad_norm": 1.1859745979309082,
85
+ "learning_rate": 5.499000000000001e-05,
86
+ "loss": 3.5562,
87
+ "step": 5500
88
+ },
89
+ {
90
+ "epoch": 4.7095761381475665,
91
+ "grad_norm": 1.1602009534835815,
92
+ "learning_rate": 5.999e-05,
93
+ "loss": 3.4409,
94
+ "step": 6000
95
+ },
96
+ {
97
+ "epoch": 5.1020408163265305,
98
+ "grad_norm": 1.5617371797561646,
99
+ "learning_rate": 6.499000000000001e-05,
100
+ "loss": 3.3426,
101
+ "step": 6500
102
+ },
103
+ {
104
+ "epoch": 5.4945054945054945,
105
+ "grad_norm": 1.3554491996765137,
106
+ "learning_rate": 6.999e-05,
107
+ "loss": 3.2194,
108
+ "step": 7000
109
+ },
110
+ {
111
+ "epoch": 5.8869701726844585,
112
+ "grad_norm": 2.1539087295532227,
113
+ "learning_rate": 7.499e-05,
114
+ "loss": 3.1264,
115
+ "step": 7500
116
+ },
117
+ {
118
+ "epoch": 6.279434850863423,
119
+ "grad_norm": 1.4375736713409424,
120
+ "learning_rate": 7.999000000000001e-05,
121
+ "loss": 3.0421,
122
+ "step": 8000
123
+ },
124
+ {
125
+ "epoch": 6.671899529042387,
126
+ "grad_norm": 1.8041514158248901,
127
+ "learning_rate": 8.499e-05,
128
+ "loss": 2.9334,
129
+ "step": 8500
130
+ },
131
+ {
132
+ "epoch": 7.06436420722135,
133
+ "grad_norm": 2.089439868927002,
134
+ "learning_rate": 8.999000000000001e-05,
135
+ "loss": 2.8356,
136
+ "step": 9000
137
+ },
138
+ {
139
+ "epoch": 7.456828885400314,
140
+ "grad_norm": 1.8236392736434937,
141
+ "learning_rate": 9.499e-05,
142
+ "loss": 2.6914,
143
+ "step": 9500
144
+ },
145
+ {
146
+ "epoch": 7.849293563579278,
147
+ "grad_norm": 1.8073580265045166,
148
+ "learning_rate": 9.999000000000001e-05,
149
+ "loss": 2.4929,
150
+ "step": 10000
151
+ },
152
+ {
153
+ "epoch": 7.849293563579278,
154
+ "eval_accuracy": 0.5692341405099076,
155
+ "eval_loss": 2.151398181915283,
156
+ "eval_runtime": 38.2798,
157
+ "eval_samples_per_second": 160.685,
158
+ "eval_steps_per_second": 1.698,
159
+ "step": 10000
160
+ },
161
+ {
162
+ "epoch": 8.241758241758241,
163
+ "grad_norm": 1.849391222000122,
164
+ "learning_rate": 9.77162471395881e-05,
165
+ "loss": 2.1576,
166
+ "step": 10500
167
+ },
168
+ {
169
+ "epoch": 8.634222919937205,
170
+ "grad_norm": 1.2290756702423096,
171
+ "learning_rate": 9.542791762013731e-05,
172
+ "loss": 1.7337,
173
+ "step": 11000
174
+ },
175
+ {
176
+ "epoch": 9.026687598116169,
177
+ "grad_norm": 1.1669484376907349,
178
+ "learning_rate": 9.313958810068651e-05,
179
+ "loss": 1.4375,
180
+ "step": 11500
181
+ },
182
+ {
183
+ "epoch": 9.419152276295133,
184
+ "grad_norm": 1.0519758462905884,
185
+ "learning_rate": 9.08512585812357e-05,
186
+ "loss": 1.3162,
187
+ "step": 12000
188
+ },
189
+ {
190
+ "epoch": 9.811616954474097,
191
+ "grad_norm": 1.0862187147140503,
192
+ "learning_rate": 8.85629290617849e-05,
193
+ "loss": 1.2368,
194
+ "step": 12500
195
+ },
196
+ {
197
+ "epoch": 10.204081632653061,
198
+ "grad_norm": 0.9377219676971436,
199
+ "learning_rate": 8.62745995423341e-05,
200
+ "loss": 1.1784,
201
+ "step": 13000
202
+ },
203
+ {
204
+ "epoch": 10.596546310832025,
205
+ "grad_norm": 0.9312331676483154,
206
+ "learning_rate": 8.398627002288329e-05,
207
+ "loss": 1.1388,
208
+ "step": 13500
209
+ },
210
+ {
211
+ "epoch": 10.989010989010989,
212
+ "grad_norm": 0.9040568470954895,
213
+ "learning_rate": 8.16979405034325e-05,
214
+ "loss": 1.1097,
215
+ "step": 14000
216
+ },
217
+ {
218
+ "epoch": 11.381475667189953,
219
+ "grad_norm": 0.8583242297172546,
220
+ "learning_rate": 7.94096109839817e-05,
221
+ "loss": 1.0736,
222
+ "step": 14500
223
+ },
224
+ {
225
+ "epoch": 11.773940345368917,
226
+ "grad_norm": 0.8321512937545776,
227
+ "learning_rate": 7.712128146453089e-05,
228
+ "loss": 1.0626,
229
+ "step": 15000
230
+ },
231
+ {
232
+ "epoch": 12.166405023547881,
233
+ "grad_norm": 0.9143489003181458,
234
+ "learning_rate": 7.48329519450801e-05,
235
+ "loss": 1.0358,
236
+ "step": 15500
237
+ },
238
+ {
239
+ "epoch": 12.558869701726845,
240
+ "grad_norm": 0.8196631669998169,
241
+ "learning_rate": 7.25446224256293e-05,
242
+ "loss": 1.0207,
243
+ "step": 16000
244
+ },
245
+ {
246
+ "epoch": 12.95133437990581,
247
+ "grad_norm": 0.7631738781929016,
248
+ "learning_rate": 7.025629290617849e-05,
249
+ "loss": 1.004,
250
+ "step": 16500
251
+ },
252
+ {
253
+ "epoch": 13.343799058084773,
254
+ "grad_norm": 0.8194634914398193,
255
+ "learning_rate": 6.79679633867277e-05,
256
+ "loss": 0.9921,
257
+ "step": 17000
258
+ },
259
+ {
260
+ "epoch": 13.736263736263737,
261
+ "grad_norm": 0.7670016884803772,
262
+ "learning_rate": 6.56796338672769e-05,
263
+ "loss": 0.9779,
264
+ "step": 17500
265
+ },
266
+ {
267
+ "epoch": 14.1287284144427,
268
+ "grad_norm": 0.7673987746238708,
269
+ "learning_rate": 6.339130434782609e-05,
270
+ "loss": 0.9608,
271
+ "step": 18000
272
+ },
273
+ {
274
+ "epoch": 14.521193092621663,
275
+ "grad_norm": 0.7936846613883972,
276
+ "learning_rate": 6.110297482837529e-05,
277
+ "loss": 0.9558,
278
+ "step": 18500
279
+ },
280
+ {
281
+ "epoch": 14.913657770800627,
282
+ "grad_norm": 0.7623568177223206,
283
+ "learning_rate": 5.881464530892449e-05,
284
+ "loss": 0.9505,
285
+ "step": 19000
286
+ },
287
+ {
288
+ "epoch": 15.306122448979592,
289
+ "grad_norm": 0.7214558720588684,
290
+ "learning_rate": 5.652631578947368e-05,
291
+ "loss": 0.9402,
292
+ "step": 19500
293
+ },
294
+ {
295
+ "epoch": 15.698587127158556,
296
+ "grad_norm": 0.827078640460968,
297
+ "learning_rate": 5.423798627002289e-05,
298
+ "loss": 0.9263,
299
+ "step": 20000
300
+ },
301
+ {
302
+ "epoch": 15.698587127158556,
303
+ "eval_accuracy": 0.8142541052951258,
304
+ "eval_loss": 0.9068173170089722,
305
+ "eval_runtime": 31.5271,
306
+ "eval_samples_per_second": 195.102,
307
+ "eval_steps_per_second": 2.062,
308
+ "step": 20000
309
+ },
310
+ {
311
+ "epoch": 16.09105180533752,
312
+ "grad_norm": 0.7756440043449402,
313
+ "learning_rate": 5.1949656750572084e-05,
314
+ "loss": 0.9185,
315
+ "step": 20500
316
+ },
317
+ {
318
+ "epoch": 16.483516483516482,
319
+ "grad_norm": 0.7866923809051514,
320
+ "learning_rate": 4.966132723112129e-05,
321
+ "loss": 0.9115,
322
+ "step": 21000
323
+ },
324
+ {
325
+ "epoch": 16.875981161695446,
326
+ "grad_norm": 0.7449353337287903,
327
+ "learning_rate": 4.737299771167048e-05,
328
+ "loss": 0.9021,
329
+ "step": 21500
330
+ },
331
+ {
332
+ "epoch": 17.26844583987441,
333
+ "grad_norm": 0.7738542556762695,
334
+ "learning_rate": 4.508466819221968e-05,
335
+ "loss": 0.9021,
336
+ "step": 22000
337
+ },
338
+ {
339
+ "epoch": 17.660910518053374,
340
+ "grad_norm": 0.7117587924003601,
341
+ "learning_rate": 4.279633867276888e-05,
342
+ "loss": 0.8932,
343
+ "step": 22500
344
+ },
345
+ {
346
+ "epoch": 18.053375196232338,
347
+ "grad_norm": 0.6952142715454102,
348
+ "learning_rate": 4.0508009153318077e-05,
349
+ "loss": 0.8866,
350
+ "step": 23000
351
+ },
352
+ {
353
+ "epoch": 18.445839874411302,
354
+ "grad_norm": 0.6748417615890503,
355
+ "learning_rate": 3.821967963386728e-05,
356
+ "loss": 0.8831,
357
+ "step": 23500
358
+ },
359
+ {
360
+ "epoch": 18.838304552590266,
361
+ "grad_norm": 0.7013327479362488,
362
+ "learning_rate": 3.593135011441648e-05,
363
+ "loss": 0.8714,
364
+ "step": 24000
365
+ },
366
+ {
367
+ "epoch": 19.23076923076923,
368
+ "grad_norm": 0.629546046257019,
369
+ "learning_rate": 3.364302059496568e-05,
370
+ "loss": 0.8684,
371
+ "step": 24500
372
+ },
373
+ {
374
+ "epoch": 19.623233908948194,
375
+ "grad_norm": 0.6739959120750427,
376
+ "learning_rate": 3.135469107551487e-05,
377
+ "loss": 0.8664,
378
+ "step": 25000
379
+ },
380
+ {
381
+ "epoch": 20.015698587127158,
382
+ "grad_norm": 0.6923867464065552,
383
+ "learning_rate": 2.9066361556064075e-05,
384
+ "loss": 0.8613,
385
+ "step": 25500
386
+ },
387
+ {
388
+ "epoch": 20.408163265306122,
389
+ "grad_norm": 0.7043192386627197,
390
+ "learning_rate": 2.677803203661327e-05,
391
+ "loss": 0.8541,
392
+ "step": 26000
393
+ },
394
+ {
395
+ "epoch": 20.800627943485086,
396
+ "grad_norm": 0.6633190512657166,
397
+ "learning_rate": 2.448970251716247e-05,
398
+ "loss": 0.8558,
399
+ "step": 26500
400
+ },
401
+ {
402
+ "epoch": 21.19309262166405,
403
+ "grad_norm": 0.6382936239242554,
404
+ "learning_rate": 2.2201372997711673e-05,
405
+ "loss": 0.8486,
406
+ "step": 27000
407
+ },
408
+ {
409
+ "epoch": 21.585557299843014,
410
+ "grad_norm": 0.7126407623291016,
411
+ "learning_rate": 1.9913043478260872e-05,
412
+ "loss": 0.8455,
413
+ "step": 27500
414
+ },
415
+ {
416
+ "epoch": 21.978021978021978,
417
+ "grad_norm": 0.6809006929397583,
418
+ "learning_rate": 1.7624713958810068e-05,
419
+ "loss": 0.8382,
420
+ "step": 28000
421
+ },
422
+ {
423
+ "epoch": 22.370486656200942,
424
+ "grad_norm": 0.6693772077560425,
425
+ "learning_rate": 1.533638443935927e-05,
426
+ "loss": 0.8377,
427
+ "step": 28500
428
+ },
429
+ {
430
+ "epoch": 22.762951334379906,
431
+ "grad_norm": 0.7368608117103577,
432
+ "learning_rate": 1.3048054919908468e-05,
433
+ "loss": 0.8346,
434
+ "step": 29000
435
+ },
436
+ {
437
+ "epoch": 23.15541601255887,
438
+ "grad_norm": 0.6541247963905334,
439
+ "learning_rate": 1.0759725400457667e-05,
440
+ "loss": 0.8298,
441
+ "step": 29500
442
+ },
443
+ {
444
+ "epoch": 23.547880690737834,
445
+ "grad_norm": 0.6780161261558533,
446
+ "learning_rate": 8.471395881006864e-06,
447
+ "loss": 0.8293,
448
+ "step": 30000
449
+ },
450
+ {
451
+ "epoch": 23.547880690737834,
452
+ "eval_accuracy": 0.8285928159953133,
453
+ "eval_loss": 0.8292354941368103,
454
+ "eval_runtime": 31.6198,
455
+ "eval_samples_per_second": 194.53,
456
+ "eval_steps_per_second": 2.056,
457
+ "step": 30000
458
+ },
459
+ {
460
+ "epoch": 23.940345368916798,
461
+ "grad_norm": 0.6645314693450928,
462
+ "learning_rate": 6.183066361556064e-06,
463
+ "loss": 0.8306,
464
+ "step": 30500
465
+ },
466
+ {
467
+ "epoch": 24.332810047095762,
468
+ "grad_norm": 0.7622667551040649,
469
+ "learning_rate": 3.894736842105264e-06,
470
+ "loss": 0.8225,
471
+ "step": 31000
472
+ },
473
+ {
474
+ "epoch": 24.725274725274726,
475
+ "grad_norm": 0.6563706398010254,
476
+ "learning_rate": 1.6064073226544622e-06,
477
+ "loss": 0.8275,
478
+ "step": 31500
479
+ },
480
+ {
481
+ "epoch": 25.0,
482
+ "step": 31850,
483
+ "total_flos": 8.0444602960128e+17,
484
+ "train_loss": 1.908989332549426,
485
+ "train_runtime": 23433.9018,
486
+ "train_samples_per_second": 130.424,
487
+ "train_steps_per_second": 1.359
488
+ }
489
+ ],
490
+ "logging_steps": 500,
491
+ "max_steps": 31850,
492
+ "num_input_tokens_seen": 0,
493
+ "num_train_epochs": 25,
494
+ "save_steps": 10000,
495
+ "stateful_callbacks": {
496
+ "TrainerControl": {
497
+ "args": {
498
+ "should_epoch_stop": false,
499
+ "should_evaluate": false,
500
+ "should_log": false,
501
+ "should_save": true,
502
+ "should_training_stop": true
503
+ },
504
+ "attributes": {}
505
+ }
506
+ },
507
+ "total_flos": 8.0444602960128e+17,
508
+ "train_batch_size": 96,
509
+ "trial_name": null,
510
+ "trial_params": null
511
+ }