NikiBase commited on
Commit
80fe108
·
1 Parent(s): ba860a8

Upload config.json

Browse files
Files changed (1) hide show
  1. config.json +356 -0
config.json ADDED
@@ -0,0 +1,356 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 1.4986101388931274,
3
+ "best_model_checkpoint": "BLOOM-alpaca",
4
+ "epoch": 2.5628704148646486,
5
+ "global_step": 1000,
6
+ "is_hyper_param_search": false,
7
+ "is_local_process_zero": true,
8
+ "is_world_process_zero": true,
9
+ "log_history": [
10
+ {
11
+ "epoch": 0.05,
12
+ "learning_rate": 5.9999999999999995e-05,
13
+ "loss": 3.799,
14
+ "step": 20
15
+ },
16
+ {
17
+ "epoch": 0.1,
18
+ "learning_rate": 0.00011999999999999999,
19
+ "loss": 3.1924,
20
+ "step": 40
21
+ },
22
+ {
23
+ "epoch": 0.15,
24
+ "learning_rate": 0.00017999999999999998,
25
+ "loss": 2.1651,
26
+ "step": 60
27
+ },
28
+ {
29
+ "epoch": 0.21,
30
+ "learning_rate": 0.00023999999999999998,
31
+ "loss": 1.7068,
32
+ "step": 80
33
+ },
34
+ {
35
+ "epoch": 0.26,
36
+ "learning_rate": 0.0003,
37
+ "loss": 1.6352,
38
+ "step": 100
39
+ },
40
+ {
41
+ "epoch": 0.31,
42
+ "learning_rate": 0.00029439252336448596,
43
+ "loss": 1.5979,
44
+ "step": 120
45
+ },
46
+ {
47
+ "epoch": 0.36,
48
+ "learning_rate": 0.00028878504672897194,
49
+ "loss": 1.5722,
50
+ "step": 140
51
+ },
52
+ {
53
+ "epoch": 0.41,
54
+ "learning_rate": 0.0002831775700934579,
55
+ "loss": 1.5735,
56
+ "step": 160
57
+ },
58
+ {
59
+ "epoch": 0.46,
60
+ "learning_rate": 0.0002775700934579439,
61
+ "loss": 1.5481,
62
+ "step": 180
63
+ },
64
+ {
65
+ "epoch": 0.51,
66
+ "learning_rate": 0.0002719626168224299,
67
+ "loss": 1.5288,
68
+ "step": 200
69
+ },
70
+ {
71
+ "epoch": 0.51,
72
+ "eval_loss": 1.5642188787460327,
73
+ "eval_runtime": 117.512,
74
+ "eval_samples_per_second": 17.02,
75
+ "eval_steps_per_second": 2.127,
76
+ "step": 200
77
+ },
78
+ {
79
+ "epoch": 0.56,
80
+ "learning_rate": 0.00026635514018691586,
81
+ "loss": 1.5394,
82
+ "step": 220
83
+ },
84
+ {
85
+ "epoch": 0.62,
86
+ "learning_rate": 0.00026074766355140184,
87
+ "loss": 1.5299,
88
+ "step": 240
89
+ },
90
+ {
91
+ "epoch": 0.67,
92
+ "learning_rate": 0.0002551401869158878,
93
+ "loss": 1.5137,
94
+ "step": 260
95
+ },
96
+ {
97
+ "epoch": 0.72,
98
+ "learning_rate": 0.0002495327102803738,
99
+ "loss": 1.5199,
100
+ "step": 280
101
+ },
102
+ {
103
+ "epoch": 0.77,
104
+ "learning_rate": 0.0002439252336448598,
105
+ "loss": 1.5171,
106
+ "step": 300
107
+ },
108
+ {
109
+ "epoch": 0.82,
110
+ "learning_rate": 0.00023831775700934577,
111
+ "loss": 1.5143,
112
+ "step": 320
113
+ },
114
+ {
115
+ "epoch": 0.87,
116
+ "learning_rate": 0.00023271028037383175,
117
+ "loss": 1.5053,
118
+ "step": 340
119
+ },
120
+ {
121
+ "epoch": 0.92,
122
+ "learning_rate": 0.00022710280373831773,
123
+ "loss": 1.5078,
124
+ "step": 360
125
+ },
126
+ {
127
+ "epoch": 0.97,
128
+ "learning_rate": 0.0002214953271028037,
129
+ "loss": 1.5003,
130
+ "step": 380
131
+ },
132
+ {
133
+ "epoch": 1.03,
134
+ "learning_rate": 0.0002158878504672897,
135
+ "loss": 1.5004,
136
+ "step": 400
137
+ },
138
+ {
139
+ "epoch": 1.03,
140
+ "eval_loss": 1.5298347473144531,
141
+ "eval_runtime": 117.6931,
142
+ "eval_samples_per_second": 16.993,
143
+ "eval_steps_per_second": 2.124,
144
+ "step": 400
145
+ },
146
+ {
147
+ "epoch": 1.08,
148
+ "learning_rate": 0.00021028037383177567,
149
+ "loss": 1.4853,
150
+ "step": 420
151
+ },
152
+ {
153
+ "epoch": 1.13,
154
+ "learning_rate": 0.00020467289719626166,
155
+ "loss": 1.511,
156
+ "step": 440
157
+ },
158
+ {
159
+ "epoch": 1.18,
160
+ "learning_rate": 0.00019906542056074764,
161
+ "loss": 1.4976,
162
+ "step": 460
163
+ },
164
+ {
165
+ "epoch": 1.23,
166
+ "learning_rate": 0.00019345794392523362,
167
+ "loss": 1.4804,
168
+ "step": 480
169
+ },
170
+ {
171
+ "epoch": 1.28,
172
+ "learning_rate": 0.0001878504672897196,
173
+ "loss": 1.4795,
174
+ "step": 500
175
+ },
176
+ {
177
+ "epoch": 1.33,
178
+ "learning_rate": 0.00018224299065420558,
179
+ "loss": 1.4876,
180
+ "step": 520
181
+ },
182
+ {
183
+ "epoch": 1.38,
184
+ "learning_rate": 0.00017663551401869156,
185
+ "loss": 1.4791,
186
+ "step": 540
187
+ },
188
+ {
189
+ "epoch": 1.44,
190
+ "learning_rate": 0.00017102803738317754,
191
+ "loss": 1.4903,
192
+ "step": 560
193
+ },
194
+ {
195
+ "epoch": 1.49,
196
+ "learning_rate": 0.00016542056074766352,
197
+ "loss": 1.4817,
198
+ "step": 580
199
+ },
200
+ {
201
+ "epoch": 1.54,
202
+ "learning_rate": 0.0001598130841121495,
203
+ "loss": 1.5033,
204
+ "step": 600
205
+ },
206
+ {
207
+ "epoch": 1.54,
208
+ "eval_loss": 1.513887882232666,
209
+ "eval_runtime": 118.2404,
210
+ "eval_samples_per_second": 16.915,
211
+ "eval_steps_per_second": 2.114,
212
+ "step": 600
213
+ },
214
+ {
215
+ "epoch": 1.59,
216
+ "learning_rate": 0.0001542056074766355,
217
+ "loss": 1.4738,
218
+ "step": 620
219
+ },
220
+ {
221
+ "epoch": 1.64,
222
+ "learning_rate": 0.00014859813084112147,
223
+ "loss": 1.4903,
224
+ "step": 640
225
+ },
226
+ {
227
+ "epoch": 1.69,
228
+ "learning_rate": 0.00014299065420560745,
229
+ "loss": 1.4982,
230
+ "step": 660
231
+ },
232
+ {
233
+ "epoch": 1.74,
234
+ "learning_rate": 0.00013738317757009343,
235
+ "loss": 1.4773,
236
+ "step": 680
237
+ },
238
+ {
239
+ "epoch": 1.79,
240
+ "learning_rate": 0.0001317757009345794,
241
+ "loss": 1.4663,
242
+ "step": 700
243
+ },
244
+ {
245
+ "epoch": 1.85,
246
+ "learning_rate": 0.0001261682242990654,
247
+ "loss": 1.478,
248
+ "step": 720
249
+ },
250
+ {
251
+ "epoch": 1.9,
252
+ "learning_rate": 0.00012056074766355139,
253
+ "loss": 1.4548,
254
+ "step": 740
255
+ },
256
+ {
257
+ "epoch": 1.95,
258
+ "learning_rate": 0.00011495327102803737,
259
+ "loss": 1.4928,
260
+ "step": 760
261
+ },
262
+ {
263
+ "epoch": 2.0,
264
+ "learning_rate": 0.00010934579439252335,
265
+ "loss": 1.4769,
266
+ "step": 780
267
+ },
268
+ {
269
+ "epoch": 2.05,
270
+ "learning_rate": 0.00010373831775700933,
271
+ "loss": 1.4553,
272
+ "step": 800
273
+ },
274
+ {
275
+ "epoch": 2.05,
276
+ "eval_loss": 1.5058649778366089,
277
+ "eval_runtime": 117.6034,
278
+ "eval_samples_per_second": 17.006,
279
+ "eval_steps_per_second": 2.126,
280
+ "step": 800
281
+ },
282
+ {
283
+ "epoch": 2.1,
284
+ "learning_rate": 9.813084112149531e-05,
285
+ "loss": 1.4699,
286
+ "step": 820
287
+ },
288
+ {
289
+ "epoch": 2.15,
290
+ "learning_rate": 9.25233644859813e-05,
291
+ "loss": 1.4596,
292
+ "step": 840
293
+ },
294
+ {
295
+ "epoch": 2.2,
296
+ "learning_rate": 8.691588785046728e-05,
297
+ "loss": 1.4786,
298
+ "step": 860
299
+ },
300
+ {
301
+ "epoch": 2.26,
302
+ "learning_rate": 8.130841121495326e-05,
303
+ "loss": 1.4701,
304
+ "step": 880
305
+ },
306
+ {
307
+ "epoch": 2.31,
308
+ "learning_rate": 7.570093457943924e-05,
309
+ "loss": 1.4608,
310
+ "step": 900
311
+ },
312
+ {
313
+ "epoch": 2.36,
314
+ "learning_rate": 7.009345794392522e-05,
315
+ "loss": 1.47,
316
+ "step": 920
317
+ },
318
+ {
319
+ "epoch": 2.41,
320
+ "learning_rate": 6.44859813084112e-05,
321
+ "loss": 1.4593,
322
+ "step": 940
323
+ },
324
+ {
325
+ "epoch": 2.46,
326
+ "learning_rate": 5.887850467289719e-05,
327
+ "loss": 1.4566,
328
+ "step": 960
329
+ },
330
+ {
331
+ "epoch": 2.51,
332
+ "learning_rate": 5.327102803738317e-05,
333
+ "loss": 1.4624,
334
+ "step": 980
335
+ },
336
+ {
337
+ "epoch": 2.56,
338
+ "learning_rate": 4.766355140186915e-05,
339
+ "loss": 1.4484,
340
+ "step": 1000
341
+ },
342
+ {
343
+ "epoch": 2.56,
344
+ "eval_loss": 1.4986101388931274,
345
+ "eval_runtime": 117.7967,
346
+ "eval_samples_per_second": 16.978,
347
+ "eval_steps_per_second": 2.122,
348
+ "step": 1000
349
+ }
350
+ ],
351
+ "max_steps": 1170,
352
+ "num_train_epochs": 3,
353
+ "total_flos": 5.959000367982182e+16,
354
+ "trial_name": null,
355
+ "trial_params": null
356
+ }