alonb19 commited on
Commit
d2b6db3
·
verified ·
1 Parent(s): 87529f3

Upload folder using huggingface_hub

Browse files
checkpoint-1400/config.json ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "ASTForAudioClassification"
4
+ ],
5
+ "attention_probs_dropout_prob": 0.0,
6
+ "frequency_stride": 10,
7
+ "hidden_act": "gelu",
8
+ "hidden_dropout_prob": 0.0,
9
+ "hidden_size": 768,
10
+ "id2label": {
11
+ "0": "Sound_Guitar",
12
+ "1": "Sound_Drum",
13
+ "2": "Sound_Piano"
14
+ },
15
+ "initializer_range": 0.02,
16
+ "intermediate_size": 3072,
17
+ "label2id": {
18
+ "Sound_Drum": 1,
19
+ "Sound_Guitar": 0,
20
+ "Sound_Piano": 2
21
+ },
22
+ "layer_norm_eps": 1e-12,
23
+ "max_length": 1024,
24
+ "model_type": "audio-spectrogram-transformer",
25
+ "num_attention_heads": 12,
26
+ "num_hidden_layers": 12,
27
+ "num_mel_bins": 128,
28
+ "patch_size": 16,
29
+ "problem_type": "single_label_classification",
30
+ "qkv_bias": true,
31
+ "time_stride": 10,
32
+ "torch_dtype": "float32",
33
+ "transformers_version": "4.53.1"
34
+ }
checkpoint-1400/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d4c91a78d1fe12be34e1f18a5440b5c5e8a19dcbcc9729cc1d3e88e46038c185
3
+ size 344793116
checkpoint-1400/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ced18c40edf54ecf5c4bf5208acb78876f9365377470c4073374b6fc9f1c63f8
3
+ size 689698682
checkpoint-1400/preprocessor_config.json ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "do_normalize": true,
3
+ "feature_extractor_type": "ASTFeatureExtractor",
4
+ "feature_size": 1,
5
+ "max_length": 1024,
6
+ "mean": -4.2677393,
7
+ "num_mel_bins": 128,
8
+ "padding_side": "right",
9
+ "padding_value": 0.0,
10
+ "return_attention_mask": false,
11
+ "sampling_rate": 16000,
12
+ "std": 4.5689974
13
+ }
checkpoint-1400/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:55eb9a50a24203e1f00ac6c722a2a24e52518f0efb376c58a3d9a8931f29ae83
3
+ size 14244
checkpoint-1400/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f15e7f69b12164656f8a35dffbd37a4f86e04972461e4fb5d8eac4d5f7ff496e
3
+ size 1064
checkpoint-1400/trainer_state.json ADDED
@@ -0,0 +1,594 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_global_step": 600,
3
+ "best_metric": 0.9974874371859297,
4
+ "best_model_checkpoint": "musical_instrument_detection_improved/checkpoint-600",
5
+ "epoch": 14.0,
6
+ "eval_steps": 500,
7
+ "global_step": 1400,
8
+ "is_hyper_param_search": false,
9
+ "is_local_process_zero": true,
10
+ "is_world_process_zero": true,
11
+ "log_history": [
12
+ {
13
+ "epoch": 0.25125628140703515,
14
+ "grad_norm": 0.4818384647369385,
15
+ "learning_rate": 1.6000000000000003e-05,
16
+ "loss": 0.4941,
17
+ "step": 25
18
+ },
19
+ {
20
+ "epoch": 0.5025125628140703,
21
+ "grad_norm": 4.753159046173096,
22
+ "learning_rate": 3.266666666666667e-05,
23
+ "loss": 0.0539,
24
+ "step": 50
25
+ },
26
+ {
27
+ "epoch": 0.7537688442211056,
28
+ "grad_norm": 15.198661804199219,
29
+ "learning_rate": 4.933333333333334e-05,
30
+ "loss": 0.1284,
31
+ "step": 75
32
+ },
33
+ {
34
+ "epoch": 1.0,
35
+ "grad_norm": 0.40100908279418945,
36
+ "learning_rate": 6.6e-05,
37
+ "loss": 0.1306,
38
+ "step": 100
39
+ },
40
+ {
41
+ "epoch": 1.0,
42
+ "eval_accuracy": 0.9748743718592965,
43
+ "eval_f1_macro": 0.9731342037379646,
44
+ "eval_loss": 0.07195427268743515,
45
+ "eval_precision_macro": 0.9712150747238466,
46
+ "eval_recall_macro": 0.9761503412881977,
47
+ "eval_runtime": 12.3998,
48
+ "eval_samples_per_second": 32.097,
49
+ "eval_steps_per_second": 4.032,
50
+ "step": 100
51
+ },
52
+ {
53
+ "epoch": 1.2512562814070352,
54
+ "grad_norm": 0.03644077107310295,
55
+ "learning_rate": 8.266666666666667e-05,
56
+ "loss": 0.0742,
57
+ "step": 125
58
+ },
59
+ {
60
+ "epoch": 1.5025125628140703,
61
+ "grad_norm": 0.0054424116387963295,
62
+ "learning_rate": 9.933333333333334e-05,
63
+ "loss": 0.0654,
64
+ "step": 150
65
+ },
66
+ {
67
+ "epoch": 1.7537688442211055,
68
+ "grad_norm": 0.05749930813908577,
69
+ "learning_rate": 9.822222222222223e-05,
70
+ "loss": 0.0411,
71
+ "step": 175
72
+ },
73
+ {
74
+ "epoch": 2.0,
75
+ "grad_norm": 0.034677326679229736,
76
+ "learning_rate": 9.637037037037037e-05,
77
+ "loss": 0.1626,
78
+ "step": 200
79
+ },
80
+ {
81
+ "epoch": 2.0,
82
+ "eval_accuracy": 0.9773869346733668,
83
+ "eval_f1_macro": 0.9760285913720755,
84
+ "eval_loss": 0.1097569689154625,
85
+ "eval_precision_macro": 0.9750499571795603,
86
+ "eval_recall_macro": 0.9784172661870504,
87
+ "eval_runtime": 12.8602,
88
+ "eval_samples_per_second": 30.948,
89
+ "eval_steps_per_second": 3.888,
90
+ "step": 200
91
+ },
92
+ {
93
+ "epoch": 2.251256281407035,
94
+ "grad_norm": 8.391584396362305,
95
+ "learning_rate": 9.451851851851853e-05,
96
+ "loss": 0.0613,
97
+ "step": 225
98
+ },
99
+ {
100
+ "epoch": 2.5025125628140703,
101
+ "grad_norm": 0.05065636709332466,
102
+ "learning_rate": 9.266666666666666e-05,
103
+ "loss": 0.0783,
104
+ "step": 250
105
+ },
106
+ {
107
+ "epoch": 2.7537688442211055,
108
+ "grad_norm": 4.692741394042969,
109
+ "learning_rate": 9.081481481481482e-05,
110
+ "loss": 0.0838,
111
+ "step": 275
112
+ },
113
+ {
114
+ "epoch": 3.0,
115
+ "grad_norm": 0.001607125741429627,
116
+ "learning_rate": 8.896296296296297e-05,
117
+ "loss": 0.0577,
118
+ "step": 300
119
+ },
120
+ {
121
+ "epoch": 3.0,
122
+ "eval_accuracy": 0.9849246231155779,
123
+ "eval_f1_macro": 0.9840659199317944,
124
+ "eval_loss": 0.06375118345022202,
125
+ "eval_precision_macro": 0.9825340045928281,
126
+ "eval_recall_macro": 0.9860503753859658,
127
+ "eval_runtime": 12.6279,
128
+ "eval_samples_per_second": 31.517,
129
+ "eval_steps_per_second": 3.959,
130
+ "step": 300
131
+ },
132
+ {
133
+ "epoch": 3.251256281407035,
134
+ "grad_norm": 0.0066846804693341255,
135
+ "learning_rate": 8.711111111111112e-05,
136
+ "loss": 0.0161,
137
+ "step": 325
138
+ },
139
+ {
140
+ "epoch": 3.5025125628140703,
141
+ "grad_norm": 5.183607578277588,
142
+ "learning_rate": 8.525925925925926e-05,
143
+ "loss": 0.0852,
144
+ "step": 350
145
+ },
146
+ {
147
+ "epoch": 3.7537688442211055,
148
+ "grad_norm": 1.3307738304138184,
149
+ "learning_rate": 8.340740740740741e-05,
150
+ "loss": 0.0025,
151
+ "step": 375
152
+ },
153
+ {
154
+ "epoch": 4.0,
155
+ "grad_norm": 0.0007463983492925763,
156
+ "learning_rate": 8.155555555555557e-05,
157
+ "loss": 0.031,
158
+ "step": 400
159
+ },
160
+ {
161
+ "epoch": 4.0,
162
+ "eval_accuracy": 0.9949748743718593,
163
+ "eval_f1_macro": 0.9944572643771775,
164
+ "eval_loss": 0.023198017850518227,
165
+ "eval_precision_macro": 0.9944572643771775,
166
+ "eval_recall_macro": 0.9944572643771775,
167
+ "eval_runtime": 12.4966,
168
+ "eval_samples_per_second": 31.849,
169
+ "eval_steps_per_second": 4.001,
170
+ "step": 400
171
+ },
172
+ {
173
+ "epoch": 4.251256281407035,
174
+ "grad_norm": 0.0033074100501835346,
175
+ "learning_rate": 7.97037037037037e-05,
176
+ "loss": 0.0155,
177
+ "step": 425
178
+ },
179
+ {
180
+ "epoch": 4.50251256281407,
181
+ "grad_norm": 2.0624492168426514,
182
+ "learning_rate": 7.785185185185186e-05,
183
+ "loss": 0.0366,
184
+ "step": 450
185
+ },
186
+ {
187
+ "epoch": 4.7537688442211055,
188
+ "grad_norm": 0.00034747723839245737,
189
+ "learning_rate": 7.6e-05,
190
+ "loss": 0.0139,
191
+ "step": 475
192
+ },
193
+ {
194
+ "epoch": 5.0,
195
+ "grad_norm": 0.0007314584800042212,
196
+ "learning_rate": 7.414814814814815e-05,
197
+ "loss": 0.0123,
198
+ "step": 500
199
+ },
200
+ {
201
+ "epoch": 5.0,
202
+ "eval_accuracy": 0.9874371859296482,
203
+ "eval_f1_macro": 0.9862145023435346,
204
+ "eval_loss": 0.05530214682221413,
205
+ "eval_precision_macro": 0.984984984984985,
206
+ "eval_recall_macro": 0.9880095923261392,
207
+ "eval_runtime": 12.5007,
208
+ "eval_samples_per_second": 31.838,
209
+ "eval_steps_per_second": 4.0,
210
+ "step": 500
211
+ },
212
+ {
213
+ "epoch": 5.251256281407035,
214
+ "grad_norm": 0.0016032257117331028,
215
+ "learning_rate": 7.22962962962963e-05,
216
+ "loss": 0.0002,
217
+ "step": 525
218
+ },
219
+ {
220
+ "epoch": 5.50251256281407,
221
+ "grad_norm": 2.0849623680114746,
222
+ "learning_rate": 7.044444444444444e-05,
223
+ "loss": 0.0597,
224
+ "step": 550
225
+ },
226
+ {
227
+ "epoch": 5.7537688442211055,
228
+ "grad_norm": 0.006351375486701727,
229
+ "learning_rate": 6.85925925925926e-05,
230
+ "loss": 0.0191,
231
+ "step": 575
232
+ },
233
+ {
234
+ "epoch": 6.0,
235
+ "grad_norm": 0.942449152469635,
236
+ "learning_rate": 6.674074074074075e-05,
237
+ "loss": 0.0016,
238
+ "step": 600
239
+ },
240
+ {
241
+ "epoch": 6.0,
242
+ "eval_accuracy": 0.9974874371859297,
243
+ "eval_f1_macro": 0.9972316853386666,
244
+ "eval_loss": 0.02638443559408188,
245
+ "eval_precision_macro": 0.9968847352024922,
246
+ "eval_recall_macro": 0.9976019184652278,
247
+ "eval_runtime": 12.501,
248
+ "eval_samples_per_second": 31.838,
249
+ "eval_steps_per_second": 4.0,
250
+ "step": 600
251
+ },
252
+ {
253
+ "epoch": 6.251256281407035,
254
+ "grad_norm": 0.0003063753538299352,
255
+ "learning_rate": 6.488888888888889e-05,
256
+ "loss": 0.0037,
257
+ "step": 625
258
+ },
259
+ {
260
+ "epoch": 6.50251256281407,
261
+ "grad_norm": 0.0006155333830974996,
262
+ "learning_rate": 6.303703703703704e-05,
263
+ "loss": 0.0126,
264
+ "step": 650
265
+ },
266
+ {
267
+ "epoch": 6.7537688442211055,
268
+ "grad_norm": 0.005792179610580206,
269
+ "learning_rate": 6.118518518518518e-05,
270
+ "loss": 0.0223,
271
+ "step": 675
272
+ },
273
+ {
274
+ "epoch": 7.0,
275
+ "grad_norm": 0.00047105210251174867,
276
+ "learning_rate": 5.9333333333333343e-05,
277
+ "loss": 0.0297,
278
+ "step": 700
279
+ },
280
+ {
281
+ "epoch": 7.0,
282
+ "eval_accuracy": 0.9849246231155779,
283
+ "eval_f1_macro": 0.9843097171528544,
284
+ "eval_loss": 0.09602699428796768,
285
+ "eval_precision_macro": 0.9835777126099706,
286
+ "eval_recall_macro": 0.985611510791367,
287
+ "eval_runtime": 12.5994,
288
+ "eval_samples_per_second": 31.589,
289
+ "eval_steps_per_second": 3.968,
290
+ "step": 700
291
+ },
292
+ {
293
+ "epoch": 7.251256281407035,
294
+ "grad_norm": 0.5099765062332153,
295
+ "learning_rate": 5.748148148148148e-05,
296
+ "loss": 0.0104,
297
+ "step": 725
298
+ },
299
+ {
300
+ "epoch": 7.50251256281407,
301
+ "grad_norm": 4.217642784118652,
302
+ "learning_rate": 5.562962962962963e-05,
303
+ "loss": 0.0024,
304
+ "step": 750
305
+ },
306
+ {
307
+ "epoch": 7.7537688442211055,
308
+ "grad_norm": 0.00032669113716110587,
309
+ "learning_rate": 5.377777777777778e-05,
310
+ "loss": 0.0002,
311
+ "step": 775
312
+ },
313
+ {
314
+ "epoch": 8.0,
315
+ "grad_norm": 0.00013344452599994838,
316
+ "learning_rate": 5.1925925925925933e-05,
317
+ "loss": 0.0051,
318
+ "step": 800
319
+ },
320
+ {
321
+ "epoch": 8.0,
322
+ "eval_accuracy": 0.9949748743718593,
323
+ "eval_f1_macro": 0.9944572643771775,
324
+ "eval_loss": 0.04679703712463379,
325
+ "eval_precision_macro": 0.9944572643771775,
326
+ "eval_recall_macro": 0.9944572643771775,
327
+ "eval_runtime": 12.6339,
328
+ "eval_samples_per_second": 31.502,
329
+ "eval_steps_per_second": 3.958,
330
+ "step": 800
331
+ },
332
+ {
333
+ "epoch": 8.251256281407034,
334
+ "grad_norm": 0.0006574160070158541,
335
+ "learning_rate": 5.007407407407407e-05,
336
+ "loss": 0.0186,
337
+ "step": 825
338
+ },
339
+ {
340
+ "epoch": 8.50251256281407,
341
+ "grad_norm": 0.0004143440746702254,
342
+ "learning_rate": 4.8222222222222225e-05,
343
+ "loss": 0.0005,
344
+ "step": 850
345
+ },
346
+ {
347
+ "epoch": 8.753768844221106,
348
+ "grad_norm": 0.009332993067800999,
349
+ "learning_rate": 4.637037037037038e-05,
350
+ "loss": 0.0,
351
+ "step": 875
352
+ },
353
+ {
354
+ "epoch": 9.0,
355
+ "grad_norm": 0.2795320451259613,
356
+ "learning_rate": 4.4518518518518523e-05,
357
+ "loss": 0.0073,
358
+ "step": 900
359
+ },
360
+ {
361
+ "epoch": 9.0,
362
+ "eval_accuracy": 0.9849246231155779,
363
+ "eval_f1_macro": 0.9836277857935251,
364
+ "eval_loss": 0.10048004239797592,
365
+ "eval_precision_macro": 0.9821428571428571,
366
+ "eval_recall_macro": 0.9858309430886664,
367
+ "eval_runtime": 12.4384,
368
+ "eval_samples_per_second": 31.998,
369
+ "eval_steps_per_second": 4.02,
370
+ "step": 900
371
+ },
372
+ {
373
+ "epoch": 9.251256281407034,
374
+ "grad_norm": 0.000357466604327783,
375
+ "learning_rate": 4.266666666666667e-05,
376
+ "loss": 0.0002,
377
+ "step": 925
378
+ },
379
+ {
380
+ "epoch": 9.50251256281407,
381
+ "grad_norm": 0.004065455868840218,
382
+ "learning_rate": 4.0814814814814815e-05,
383
+ "loss": 0.0013,
384
+ "step": 950
385
+ },
386
+ {
387
+ "epoch": 9.753768844221106,
388
+ "grad_norm": 0.0004535421321634203,
389
+ "learning_rate": 3.896296296296296e-05,
390
+ "loss": 0.0153,
391
+ "step": 975
392
+ },
393
+ {
394
+ "epoch": 10.0,
395
+ "grad_norm": 0.00031142737134359777,
396
+ "learning_rate": 3.7111111111111113e-05,
397
+ "loss": 0.0,
398
+ "step": 1000
399
+ },
400
+ {
401
+ "epoch": 10.0,
402
+ "eval_accuracy": 0.9899497487437185,
403
+ "eval_f1_macro": 0.9889609804451654,
404
+ "eval_loss": 0.053815823048353195,
405
+ "eval_precision_macro": 0.9878787878787879,
406
+ "eval_recall_macro": 0.9904076738609113,
407
+ "eval_runtime": 12.5827,
408
+ "eval_samples_per_second": 31.631,
409
+ "eval_steps_per_second": 3.974,
410
+ "step": 1000
411
+ },
412
+ {
413
+ "epoch": 10.251256281407034,
414
+ "grad_norm": 0.0003844445454888046,
415
+ "learning_rate": 3.525925925925926e-05,
416
+ "loss": 0.0182,
417
+ "step": 1025
418
+ },
419
+ {
420
+ "epoch": 10.50251256281407,
421
+ "grad_norm": 0.0005026073777116835,
422
+ "learning_rate": 3.340740740740741e-05,
423
+ "loss": 0.0,
424
+ "step": 1050
425
+ },
426
+ {
427
+ "epoch": 10.753768844221106,
428
+ "grad_norm": 0.0004087302659172565,
429
+ "learning_rate": 3.155555555555556e-05,
430
+ "loss": 0.0007,
431
+ "step": 1075
432
+ },
433
+ {
434
+ "epoch": 11.0,
435
+ "grad_norm": 0.00014286497025750577,
436
+ "learning_rate": 2.9703703703703707e-05,
437
+ "loss": 0.0001,
438
+ "step": 1100
439
+ },
440
+ {
441
+ "epoch": 11.0,
442
+ "eval_accuracy": 0.9899497487437185,
443
+ "eval_f1_macro": 0.9889609804451654,
444
+ "eval_loss": 0.059518326073884964,
445
+ "eval_precision_macro": 0.9878787878787879,
446
+ "eval_recall_macro": 0.9904076738609113,
447
+ "eval_runtime": 12.9414,
448
+ "eval_samples_per_second": 30.754,
449
+ "eval_steps_per_second": 3.864,
450
+ "step": 1100
451
+ },
452
+ {
453
+ "epoch": 11.251256281407034,
454
+ "grad_norm": 0.006657073274254799,
455
+ "learning_rate": 2.7851851851851853e-05,
456
+ "loss": 0.0,
457
+ "step": 1125
458
+ },
459
+ {
460
+ "epoch": 11.50251256281407,
461
+ "grad_norm": 0.0008139883284457028,
462
+ "learning_rate": 2.6000000000000002e-05,
463
+ "loss": 0.003,
464
+ "step": 1150
465
+ },
466
+ {
467
+ "epoch": 11.753768844221106,
468
+ "grad_norm": 0.0003728137817233801,
469
+ "learning_rate": 2.414814814814815e-05,
470
+ "loss": 0.0,
471
+ "step": 1175
472
+ },
473
+ {
474
+ "epoch": 12.0,
475
+ "grad_norm": 0.00036724514211528003,
476
+ "learning_rate": 2.2296296296296297e-05,
477
+ "loss": 0.0,
478
+ "step": 1200
479
+ },
480
+ {
481
+ "epoch": 12.0,
482
+ "eval_accuracy": 0.9899497487437185,
483
+ "eval_f1_macro": 0.9889609804451654,
484
+ "eval_loss": 0.08012186735868454,
485
+ "eval_precision_macro": 0.9878787878787879,
486
+ "eval_recall_macro": 0.9904076738609113,
487
+ "eval_runtime": 12.5781,
488
+ "eval_samples_per_second": 31.642,
489
+ "eval_steps_per_second": 3.975,
490
+ "step": 1200
491
+ },
492
+ {
493
+ "epoch": 12.251256281407034,
494
+ "grad_norm": 0.0005332791479304433,
495
+ "learning_rate": 2.0444444444444446e-05,
496
+ "loss": 0.0,
497
+ "step": 1225
498
+ },
499
+ {
500
+ "epoch": 12.50251256281407,
501
+ "grad_norm": 0.0004045717651024461,
502
+ "learning_rate": 1.8592592592592595e-05,
503
+ "loss": 0.0,
504
+ "step": 1250
505
+ },
506
+ {
507
+ "epoch": 12.753768844221106,
508
+ "grad_norm": 0.0002463693090248853,
509
+ "learning_rate": 1.674074074074074e-05,
510
+ "loss": 0.0,
511
+ "step": 1275
512
+ },
513
+ {
514
+ "epoch": 13.0,
515
+ "grad_norm": 0.00010770269727800041,
516
+ "learning_rate": 1.4888888888888888e-05,
517
+ "loss": 0.0,
518
+ "step": 1300
519
+ },
520
+ {
521
+ "epoch": 13.0,
522
+ "eval_accuracy": 0.9899497487437185,
523
+ "eval_f1_macro": 0.9889609804451654,
524
+ "eval_loss": 0.07899193465709686,
525
+ "eval_precision_macro": 0.9878787878787879,
526
+ "eval_recall_macro": 0.9904076738609113,
527
+ "eval_runtime": 12.6619,
528
+ "eval_samples_per_second": 31.433,
529
+ "eval_steps_per_second": 3.949,
530
+ "step": 1300
531
+ },
532
+ {
533
+ "epoch": 13.251256281407034,
534
+ "grad_norm": 0.00043465051567181945,
535
+ "learning_rate": 1.3037037037037036e-05,
536
+ "loss": 0.0,
537
+ "step": 1325
538
+ },
539
+ {
540
+ "epoch": 13.50251256281407,
541
+ "grad_norm": 0.00033275250461883843,
542
+ "learning_rate": 1.1185185185185187e-05,
543
+ "loss": 0.0,
544
+ "step": 1350
545
+ },
546
+ {
547
+ "epoch": 13.753768844221106,
548
+ "grad_norm": 0.00047057392657734454,
549
+ "learning_rate": 9.333333333333334e-06,
550
+ "loss": 0.0,
551
+ "step": 1375
552
+ },
553
+ {
554
+ "epoch": 14.0,
555
+ "grad_norm": 0.0001207014502142556,
556
+ "learning_rate": 7.481481481481483e-06,
557
+ "loss": 0.0,
558
+ "step": 1400
559
+ },
560
+ {
561
+ "epoch": 14.0,
562
+ "eval_accuracy": 0.9899497487437185,
563
+ "eval_f1_macro": 0.9889609804451654,
564
+ "eval_loss": 0.07876739650964737,
565
+ "eval_precision_macro": 0.9878787878787879,
566
+ "eval_recall_macro": 0.9904076738609113,
567
+ "eval_runtime": 12.5567,
568
+ "eval_samples_per_second": 31.696,
569
+ "eval_steps_per_second": 3.982,
570
+ "step": 1400
571
+ }
572
+ ],
573
+ "logging_steps": 25,
574
+ "max_steps": 1500,
575
+ "num_input_tokens_seen": 0,
576
+ "num_train_epochs": 15,
577
+ "save_steps": 500,
578
+ "stateful_callbacks": {
579
+ "TrainerControl": {
580
+ "args": {
581
+ "should_epoch_stop": false,
582
+ "should_evaluate": false,
583
+ "should_log": false,
584
+ "should_save": true,
585
+ "should_training_stop": false
586
+ },
587
+ "attributes": {}
588
+ }
589
+ },
590
+ "total_flos": 1.5098084463618294e+18,
591
+ "train_batch_size": 8,
592
+ "trial_name": null,
593
+ "trial_params": null
594
+ }
checkpoint-1400/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e517b5194d3d7e0c0e46b924233f9f5528a2197171f7f496eac4bd52616415f0
3
+ size 5432
checkpoint-1500/config.json ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "ASTForAudioClassification"
4
+ ],
5
+ "attention_probs_dropout_prob": 0.0,
6
+ "frequency_stride": 10,
7
+ "hidden_act": "gelu",
8
+ "hidden_dropout_prob": 0.0,
9
+ "hidden_size": 768,
10
+ "id2label": {
11
+ "0": "Sound_Guitar",
12
+ "1": "Sound_Drum",
13
+ "2": "Sound_Piano"
14
+ },
15
+ "initializer_range": 0.02,
16
+ "intermediate_size": 3072,
17
+ "label2id": {
18
+ "Sound_Drum": 1,
19
+ "Sound_Guitar": 0,
20
+ "Sound_Piano": 2
21
+ },
22
+ "layer_norm_eps": 1e-12,
23
+ "max_length": 1024,
24
+ "model_type": "audio-spectrogram-transformer",
25
+ "num_attention_heads": 12,
26
+ "num_hidden_layers": 12,
27
+ "num_mel_bins": 128,
28
+ "patch_size": 16,
29
+ "problem_type": "single_label_classification",
30
+ "qkv_bias": true,
31
+ "time_stride": 10,
32
+ "torch_dtype": "float32",
33
+ "transformers_version": "4.53.1"
34
+ }
checkpoint-1500/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:747ad7a0fcb3b73609d71591c090bb461f562e87546a307500133791ac8c1bea
3
+ size 344793116
checkpoint-1500/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bdd69a7559409576556c75543f98ad0c79ef09ffa0481a4017b26e96aca99815
3
+ size 689698682
checkpoint-1500/preprocessor_config.json ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "do_normalize": true,
3
+ "feature_extractor_type": "ASTFeatureExtractor",
4
+ "feature_size": 1,
5
+ "max_length": 1024,
6
+ "mean": -4.2677393,
7
+ "num_mel_bins": 128,
8
+ "padding_side": "right",
9
+ "padding_value": 0.0,
10
+ "return_attention_mask": false,
11
+ "sampling_rate": 16000,
12
+ "std": 4.5689974
13
+ }
checkpoint-1500/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7c94c375fe5ad2903d244ca6b5cc2a1a6cba4c0c26196f3b9cbd9ddd170bb0b8
3
+ size 14244
checkpoint-1500/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:740f92207430d59e3e915864ad915fc31827de287779f64d8e590410bcf177e5
3
+ size 1064
checkpoint-1500/trainer_state.json ADDED
@@ -0,0 +1,634 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_global_step": 600,
3
+ "best_metric": 0.9974874371859297,
4
+ "best_model_checkpoint": "musical_instrument_detection_improved/checkpoint-600",
5
+ "epoch": 15.0,
6
+ "eval_steps": 500,
7
+ "global_step": 1500,
8
+ "is_hyper_param_search": false,
9
+ "is_local_process_zero": true,
10
+ "is_world_process_zero": true,
11
+ "log_history": [
12
+ {
13
+ "epoch": 0.25125628140703515,
14
+ "grad_norm": 0.4818384647369385,
15
+ "learning_rate": 1.6000000000000003e-05,
16
+ "loss": 0.4941,
17
+ "step": 25
18
+ },
19
+ {
20
+ "epoch": 0.5025125628140703,
21
+ "grad_norm": 4.753159046173096,
22
+ "learning_rate": 3.266666666666667e-05,
23
+ "loss": 0.0539,
24
+ "step": 50
25
+ },
26
+ {
27
+ "epoch": 0.7537688442211056,
28
+ "grad_norm": 15.198661804199219,
29
+ "learning_rate": 4.933333333333334e-05,
30
+ "loss": 0.1284,
31
+ "step": 75
32
+ },
33
+ {
34
+ "epoch": 1.0,
35
+ "grad_norm": 0.40100908279418945,
36
+ "learning_rate": 6.6e-05,
37
+ "loss": 0.1306,
38
+ "step": 100
39
+ },
40
+ {
41
+ "epoch": 1.0,
42
+ "eval_accuracy": 0.9748743718592965,
43
+ "eval_f1_macro": 0.9731342037379646,
44
+ "eval_loss": 0.07195427268743515,
45
+ "eval_precision_macro": 0.9712150747238466,
46
+ "eval_recall_macro": 0.9761503412881977,
47
+ "eval_runtime": 12.3998,
48
+ "eval_samples_per_second": 32.097,
49
+ "eval_steps_per_second": 4.032,
50
+ "step": 100
51
+ },
52
+ {
53
+ "epoch": 1.2512562814070352,
54
+ "grad_norm": 0.03644077107310295,
55
+ "learning_rate": 8.266666666666667e-05,
56
+ "loss": 0.0742,
57
+ "step": 125
58
+ },
59
+ {
60
+ "epoch": 1.5025125628140703,
61
+ "grad_norm": 0.0054424116387963295,
62
+ "learning_rate": 9.933333333333334e-05,
63
+ "loss": 0.0654,
64
+ "step": 150
65
+ },
66
+ {
67
+ "epoch": 1.7537688442211055,
68
+ "grad_norm": 0.05749930813908577,
69
+ "learning_rate": 9.822222222222223e-05,
70
+ "loss": 0.0411,
71
+ "step": 175
72
+ },
73
+ {
74
+ "epoch": 2.0,
75
+ "grad_norm": 0.034677326679229736,
76
+ "learning_rate": 9.637037037037037e-05,
77
+ "loss": 0.1626,
78
+ "step": 200
79
+ },
80
+ {
81
+ "epoch": 2.0,
82
+ "eval_accuracy": 0.9773869346733668,
83
+ "eval_f1_macro": 0.9760285913720755,
84
+ "eval_loss": 0.1097569689154625,
85
+ "eval_precision_macro": 0.9750499571795603,
86
+ "eval_recall_macro": 0.9784172661870504,
87
+ "eval_runtime": 12.8602,
88
+ "eval_samples_per_second": 30.948,
89
+ "eval_steps_per_second": 3.888,
90
+ "step": 200
91
+ },
92
+ {
93
+ "epoch": 2.251256281407035,
94
+ "grad_norm": 8.391584396362305,
95
+ "learning_rate": 9.451851851851853e-05,
96
+ "loss": 0.0613,
97
+ "step": 225
98
+ },
99
+ {
100
+ "epoch": 2.5025125628140703,
101
+ "grad_norm": 0.05065636709332466,
102
+ "learning_rate": 9.266666666666666e-05,
103
+ "loss": 0.0783,
104
+ "step": 250
105
+ },
106
+ {
107
+ "epoch": 2.7537688442211055,
108
+ "grad_norm": 4.692741394042969,
109
+ "learning_rate": 9.081481481481482e-05,
110
+ "loss": 0.0838,
111
+ "step": 275
112
+ },
113
+ {
114
+ "epoch": 3.0,
115
+ "grad_norm": 0.001607125741429627,
116
+ "learning_rate": 8.896296296296297e-05,
117
+ "loss": 0.0577,
118
+ "step": 300
119
+ },
120
+ {
121
+ "epoch": 3.0,
122
+ "eval_accuracy": 0.9849246231155779,
123
+ "eval_f1_macro": 0.9840659199317944,
124
+ "eval_loss": 0.06375118345022202,
125
+ "eval_precision_macro": 0.9825340045928281,
126
+ "eval_recall_macro": 0.9860503753859658,
127
+ "eval_runtime": 12.6279,
128
+ "eval_samples_per_second": 31.517,
129
+ "eval_steps_per_second": 3.959,
130
+ "step": 300
131
+ },
132
+ {
133
+ "epoch": 3.251256281407035,
134
+ "grad_norm": 0.0066846804693341255,
135
+ "learning_rate": 8.711111111111112e-05,
136
+ "loss": 0.0161,
137
+ "step": 325
138
+ },
139
+ {
140
+ "epoch": 3.5025125628140703,
141
+ "grad_norm": 5.183607578277588,
142
+ "learning_rate": 8.525925925925926e-05,
143
+ "loss": 0.0852,
144
+ "step": 350
145
+ },
146
+ {
147
+ "epoch": 3.7537688442211055,
148
+ "grad_norm": 1.3307738304138184,
149
+ "learning_rate": 8.340740740740741e-05,
150
+ "loss": 0.0025,
151
+ "step": 375
152
+ },
153
+ {
154
+ "epoch": 4.0,
155
+ "grad_norm": 0.0007463983492925763,
156
+ "learning_rate": 8.155555555555557e-05,
157
+ "loss": 0.031,
158
+ "step": 400
159
+ },
160
+ {
161
+ "epoch": 4.0,
162
+ "eval_accuracy": 0.9949748743718593,
163
+ "eval_f1_macro": 0.9944572643771775,
164
+ "eval_loss": 0.023198017850518227,
165
+ "eval_precision_macro": 0.9944572643771775,
166
+ "eval_recall_macro": 0.9944572643771775,
167
+ "eval_runtime": 12.4966,
168
+ "eval_samples_per_second": 31.849,
169
+ "eval_steps_per_second": 4.001,
170
+ "step": 400
171
+ },
172
+ {
173
+ "epoch": 4.251256281407035,
174
+ "grad_norm": 0.0033074100501835346,
175
+ "learning_rate": 7.97037037037037e-05,
176
+ "loss": 0.0155,
177
+ "step": 425
178
+ },
179
+ {
180
+ "epoch": 4.50251256281407,
181
+ "grad_norm": 2.0624492168426514,
182
+ "learning_rate": 7.785185185185186e-05,
183
+ "loss": 0.0366,
184
+ "step": 450
185
+ },
186
+ {
187
+ "epoch": 4.7537688442211055,
188
+ "grad_norm": 0.00034747723839245737,
189
+ "learning_rate": 7.6e-05,
190
+ "loss": 0.0139,
191
+ "step": 475
192
+ },
193
+ {
194
+ "epoch": 5.0,
195
+ "grad_norm": 0.0007314584800042212,
196
+ "learning_rate": 7.414814814814815e-05,
197
+ "loss": 0.0123,
198
+ "step": 500
199
+ },
200
+ {
201
+ "epoch": 5.0,
202
+ "eval_accuracy": 0.9874371859296482,
203
+ "eval_f1_macro": 0.9862145023435346,
204
+ "eval_loss": 0.05530214682221413,
205
+ "eval_precision_macro": 0.984984984984985,
206
+ "eval_recall_macro": 0.9880095923261392,
207
+ "eval_runtime": 12.5007,
208
+ "eval_samples_per_second": 31.838,
209
+ "eval_steps_per_second": 4.0,
210
+ "step": 500
211
+ },
212
+ {
213
+ "epoch": 5.251256281407035,
214
+ "grad_norm": 0.0016032257117331028,
215
+ "learning_rate": 7.22962962962963e-05,
216
+ "loss": 0.0002,
217
+ "step": 525
218
+ },
219
+ {
220
+ "epoch": 5.50251256281407,
221
+ "grad_norm": 2.0849623680114746,
222
+ "learning_rate": 7.044444444444444e-05,
223
+ "loss": 0.0597,
224
+ "step": 550
225
+ },
226
+ {
227
+ "epoch": 5.7537688442211055,
228
+ "grad_norm": 0.006351375486701727,
229
+ "learning_rate": 6.85925925925926e-05,
230
+ "loss": 0.0191,
231
+ "step": 575
232
+ },
233
+ {
234
+ "epoch": 6.0,
235
+ "grad_norm": 0.942449152469635,
236
+ "learning_rate": 6.674074074074075e-05,
237
+ "loss": 0.0016,
238
+ "step": 600
239
+ },
240
+ {
241
+ "epoch": 6.0,
242
+ "eval_accuracy": 0.9974874371859297,
243
+ "eval_f1_macro": 0.9972316853386666,
244
+ "eval_loss": 0.02638443559408188,
245
+ "eval_precision_macro": 0.9968847352024922,
246
+ "eval_recall_macro": 0.9976019184652278,
247
+ "eval_runtime": 12.501,
248
+ "eval_samples_per_second": 31.838,
249
+ "eval_steps_per_second": 4.0,
250
+ "step": 600
251
+ },
252
+ {
253
+ "epoch": 6.251256281407035,
254
+ "grad_norm": 0.0003063753538299352,
255
+ "learning_rate": 6.488888888888889e-05,
256
+ "loss": 0.0037,
257
+ "step": 625
258
+ },
259
+ {
260
+ "epoch": 6.50251256281407,
261
+ "grad_norm": 0.0006155333830974996,
262
+ "learning_rate": 6.303703703703704e-05,
263
+ "loss": 0.0126,
264
+ "step": 650
265
+ },
266
+ {
267
+ "epoch": 6.7537688442211055,
268
+ "grad_norm": 0.005792179610580206,
269
+ "learning_rate": 6.118518518518518e-05,
270
+ "loss": 0.0223,
271
+ "step": 675
272
+ },
273
+ {
274
+ "epoch": 7.0,
275
+ "grad_norm": 0.00047105210251174867,
276
+ "learning_rate": 5.9333333333333343e-05,
277
+ "loss": 0.0297,
278
+ "step": 700
279
+ },
280
+ {
281
+ "epoch": 7.0,
282
+ "eval_accuracy": 0.9849246231155779,
283
+ "eval_f1_macro": 0.9843097171528544,
284
+ "eval_loss": 0.09602699428796768,
285
+ "eval_precision_macro": 0.9835777126099706,
286
+ "eval_recall_macro": 0.985611510791367,
287
+ "eval_runtime": 12.5994,
288
+ "eval_samples_per_second": 31.589,
289
+ "eval_steps_per_second": 3.968,
290
+ "step": 700
291
+ },
292
+ {
293
+ "epoch": 7.251256281407035,
294
+ "grad_norm": 0.5099765062332153,
295
+ "learning_rate": 5.748148148148148e-05,
296
+ "loss": 0.0104,
297
+ "step": 725
298
+ },
299
+ {
300
+ "epoch": 7.50251256281407,
301
+ "grad_norm": 4.217642784118652,
302
+ "learning_rate": 5.562962962962963e-05,
303
+ "loss": 0.0024,
304
+ "step": 750
305
+ },
306
+ {
307
+ "epoch": 7.7537688442211055,
308
+ "grad_norm": 0.00032669113716110587,
309
+ "learning_rate": 5.377777777777778e-05,
310
+ "loss": 0.0002,
311
+ "step": 775
312
+ },
313
+ {
314
+ "epoch": 8.0,
315
+ "grad_norm": 0.00013344452599994838,
316
+ "learning_rate": 5.1925925925925933e-05,
317
+ "loss": 0.0051,
318
+ "step": 800
319
+ },
320
+ {
321
+ "epoch": 8.0,
322
+ "eval_accuracy": 0.9949748743718593,
323
+ "eval_f1_macro": 0.9944572643771775,
324
+ "eval_loss": 0.04679703712463379,
325
+ "eval_precision_macro": 0.9944572643771775,
326
+ "eval_recall_macro": 0.9944572643771775,
327
+ "eval_runtime": 12.6339,
328
+ "eval_samples_per_second": 31.502,
329
+ "eval_steps_per_second": 3.958,
330
+ "step": 800
331
+ },
332
+ {
333
+ "epoch": 8.251256281407034,
334
+ "grad_norm": 0.0006574160070158541,
335
+ "learning_rate": 5.007407407407407e-05,
336
+ "loss": 0.0186,
337
+ "step": 825
338
+ },
339
+ {
340
+ "epoch": 8.50251256281407,
341
+ "grad_norm": 0.0004143440746702254,
342
+ "learning_rate": 4.8222222222222225e-05,
343
+ "loss": 0.0005,
344
+ "step": 850
345
+ },
346
+ {
347
+ "epoch": 8.753768844221106,
348
+ "grad_norm": 0.009332993067800999,
349
+ "learning_rate": 4.637037037037038e-05,
350
+ "loss": 0.0,
351
+ "step": 875
352
+ },
353
+ {
354
+ "epoch": 9.0,
355
+ "grad_norm": 0.2795320451259613,
356
+ "learning_rate": 4.4518518518518523e-05,
357
+ "loss": 0.0073,
358
+ "step": 900
359
+ },
360
+ {
361
+ "epoch": 9.0,
362
+ "eval_accuracy": 0.9849246231155779,
363
+ "eval_f1_macro": 0.9836277857935251,
364
+ "eval_loss": 0.10048004239797592,
365
+ "eval_precision_macro": 0.9821428571428571,
366
+ "eval_recall_macro": 0.9858309430886664,
367
+ "eval_runtime": 12.4384,
368
+ "eval_samples_per_second": 31.998,
369
+ "eval_steps_per_second": 4.02,
370
+ "step": 900
371
+ },
372
+ {
373
+ "epoch": 9.251256281407034,
374
+ "grad_norm": 0.000357466604327783,
375
+ "learning_rate": 4.266666666666667e-05,
376
+ "loss": 0.0002,
377
+ "step": 925
378
+ },
379
+ {
380
+ "epoch": 9.50251256281407,
381
+ "grad_norm": 0.004065455868840218,
382
+ "learning_rate": 4.0814814814814815e-05,
383
+ "loss": 0.0013,
384
+ "step": 950
385
+ },
386
+ {
387
+ "epoch": 9.753768844221106,
388
+ "grad_norm": 0.0004535421321634203,
389
+ "learning_rate": 3.896296296296296e-05,
390
+ "loss": 0.0153,
391
+ "step": 975
392
+ },
393
+ {
394
+ "epoch": 10.0,
395
+ "grad_norm": 0.00031142737134359777,
396
+ "learning_rate": 3.7111111111111113e-05,
397
+ "loss": 0.0,
398
+ "step": 1000
399
+ },
400
+ {
401
+ "epoch": 10.0,
402
+ "eval_accuracy": 0.9899497487437185,
403
+ "eval_f1_macro": 0.9889609804451654,
404
+ "eval_loss": 0.053815823048353195,
405
+ "eval_precision_macro": 0.9878787878787879,
406
+ "eval_recall_macro": 0.9904076738609113,
407
+ "eval_runtime": 12.5827,
408
+ "eval_samples_per_second": 31.631,
409
+ "eval_steps_per_second": 3.974,
410
+ "step": 1000
411
+ },
412
+ {
413
+ "epoch": 10.251256281407034,
414
+ "grad_norm": 0.0003844445454888046,
415
+ "learning_rate": 3.525925925925926e-05,
416
+ "loss": 0.0182,
417
+ "step": 1025
418
+ },
419
+ {
420
+ "epoch": 10.50251256281407,
421
+ "grad_norm": 0.0005026073777116835,
422
+ "learning_rate": 3.340740740740741e-05,
423
+ "loss": 0.0,
424
+ "step": 1050
425
+ },
426
+ {
427
+ "epoch": 10.753768844221106,
428
+ "grad_norm": 0.0004087302659172565,
429
+ "learning_rate": 3.155555555555556e-05,
430
+ "loss": 0.0007,
431
+ "step": 1075
432
+ },
433
+ {
434
+ "epoch": 11.0,
435
+ "grad_norm": 0.00014286497025750577,
436
+ "learning_rate": 2.9703703703703707e-05,
437
+ "loss": 0.0001,
438
+ "step": 1100
439
+ },
440
+ {
441
+ "epoch": 11.0,
442
+ "eval_accuracy": 0.9899497487437185,
443
+ "eval_f1_macro": 0.9889609804451654,
444
+ "eval_loss": 0.059518326073884964,
445
+ "eval_precision_macro": 0.9878787878787879,
446
+ "eval_recall_macro": 0.9904076738609113,
447
+ "eval_runtime": 12.9414,
448
+ "eval_samples_per_second": 30.754,
449
+ "eval_steps_per_second": 3.864,
450
+ "step": 1100
451
+ },
452
+ {
453
+ "epoch": 11.251256281407034,
454
+ "grad_norm": 0.006657073274254799,
455
+ "learning_rate": 2.7851851851851853e-05,
456
+ "loss": 0.0,
457
+ "step": 1125
458
+ },
459
+ {
460
+ "epoch": 11.50251256281407,
461
+ "grad_norm": 0.0008139883284457028,
462
+ "learning_rate": 2.6000000000000002e-05,
463
+ "loss": 0.003,
464
+ "step": 1150
465
+ },
466
+ {
467
+ "epoch": 11.753768844221106,
468
+ "grad_norm": 0.0003728137817233801,
469
+ "learning_rate": 2.414814814814815e-05,
470
+ "loss": 0.0,
471
+ "step": 1175
472
+ },
473
+ {
474
+ "epoch": 12.0,
475
+ "grad_norm": 0.00036724514211528003,
476
+ "learning_rate": 2.2296296296296297e-05,
477
+ "loss": 0.0,
478
+ "step": 1200
479
+ },
480
+ {
481
+ "epoch": 12.0,
482
+ "eval_accuracy": 0.9899497487437185,
483
+ "eval_f1_macro": 0.9889609804451654,
484
+ "eval_loss": 0.08012186735868454,
485
+ "eval_precision_macro": 0.9878787878787879,
486
+ "eval_recall_macro": 0.9904076738609113,
487
+ "eval_runtime": 12.5781,
488
+ "eval_samples_per_second": 31.642,
489
+ "eval_steps_per_second": 3.975,
490
+ "step": 1200
491
+ },
492
+ {
493
+ "epoch": 12.251256281407034,
494
+ "grad_norm": 0.0005332791479304433,
495
+ "learning_rate": 2.0444444444444446e-05,
496
+ "loss": 0.0,
497
+ "step": 1225
498
+ },
499
+ {
500
+ "epoch": 12.50251256281407,
501
+ "grad_norm": 0.0004045717651024461,
502
+ "learning_rate": 1.8592592592592595e-05,
503
+ "loss": 0.0,
504
+ "step": 1250
505
+ },
506
+ {
507
+ "epoch": 12.753768844221106,
508
+ "grad_norm": 0.0002463693090248853,
509
+ "learning_rate": 1.674074074074074e-05,
510
+ "loss": 0.0,
511
+ "step": 1275
512
+ },
513
+ {
514
+ "epoch": 13.0,
515
+ "grad_norm": 0.00010770269727800041,
516
+ "learning_rate": 1.4888888888888888e-05,
517
+ "loss": 0.0,
518
+ "step": 1300
519
+ },
520
+ {
521
+ "epoch": 13.0,
522
+ "eval_accuracy": 0.9899497487437185,
523
+ "eval_f1_macro": 0.9889609804451654,
524
+ "eval_loss": 0.07899193465709686,
525
+ "eval_precision_macro": 0.9878787878787879,
526
+ "eval_recall_macro": 0.9904076738609113,
527
+ "eval_runtime": 12.6619,
528
+ "eval_samples_per_second": 31.433,
529
+ "eval_steps_per_second": 3.949,
530
+ "step": 1300
531
+ },
532
+ {
533
+ "epoch": 13.251256281407034,
534
+ "grad_norm": 0.00043465051567181945,
535
+ "learning_rate": 1.3037037037037036e-05,
536
+ "loss": 0.0,
537
+ "step": 1325
538
+ },
539
+ {
540
+ "epoch": 13.50251256281407,
541
+ "grad_norm": 0.00033275250461883843,
542
+ "learning_rate": 1.1185185185185187e-05,
543
+ "loss": 0.0,
544
+ "step": 1350
545
+ },
546
+ {
547
+ "epoch": 13.753768844221106,
548
+ "grad_norm": 0.00047057392657734454,
549
+ "learning_rate": 9.333333333333334e-06,
550
+ "loss": 0.0,
551
+ "step": 1375
552
+ },
553
+ {
554
+ "epoch": 14.0,
555
+ "grad_norm": 0.0001207014502142556,
556
+ "learning_rate": 7.481481481481483e-06,
557
+ "loss": 0.0,
558
+ "step": 1400
559
+ },
560
+ {
561
+ "epoch": 14.0,
562
+ "eval_accuracy": 0.9899497487437185,
563
+ "eval_f1_macro": 0.9889609804451654,
564
+ "eval_loss": 0.07876739650964737,
565
+ "eval_precision_macro": 0.9878787878787879,
566
+ "eval_recall_macro": 0.9904076738609113,
567
+ "eval_runtime": 12.5567,
568
+ "eval_samples_per_second": 31.696,
569
+ "eval_steps_per_second": 3.982,
570
+ "step": 1400
571
+ },
572
+ {
573
+ "epoch": 14.251256281407034,
574
+ "grad_norm": 0.0019831983372569084,
575
+ "learning_rate": 5.62962962962963e-06,
576
+ "loss": 0.0,
577
+ "step": 1425
578
+ },
579
+ {
580
+ "epoch": 14.50251256281407,
581
+ "grad_norm": 0.0002614876430016011,
582
+ "learning_rate": 3.777777777777778e-06,
583
+ "loss": 0.0,
584
+ "step": 1450
585
+ },
586
+ {
587
+ "epoch": 14.753768844221106,
588
+ "grad_norm": 0.00039343832759186625,
589
+ "learning_rate": 1.925925925925926e-06,
590
+ "loss": 0.0,
591
+ "step": 1475
592
+ },
593
+ {
594
+ "epoch": 15.0,
595
+ "grad_norm": 0.00017274008132517338,
596
+ "learning_rate": 7.407407407407407e-08,
597
+ "loss": 0.0,
598
+ "step": 1500
599
+ },
600
+ {
601
+ "epoch": 15.0,
602
+ "eval_accuracy": 0.9899497487437185,
603
+ "eval_f1_macro": 0.9889609804451654,
604
+ "eval_loss": 0.07867377996444702,
605
+ "eval_precision_macro": 0.9878787878787879,
606
+ "eval_recall_macro": 0.9904076738609113,
607
+ "eval_runtime": 12.6055,
608
+ "eval_samples_per_second": 31.573,
609
+ "eval_steps_per_second": 3.967,
610
+ "step": 1500
611
+ }
612
+ ],
613
+ "logging_steps": 25,
614
+ "max_steps": 1500,
615
+ "num_input_tokens_seen": 0,
616
+ "num_train_epochs": 15,
617
+ "save_steps": 500,
618
+ "stateful_callbacks": {
619
+ "TrainerControl": {
620
+ "args": {
621
+ "should_epoch_stop": false,
622
+ "should_evaluate": false,
623
+ "should_log": false,
624
+ "should_save": true,
625
+ "should_training_stop": true
626
+ },
627
+ "attributes": {}
628
+ }
629
+ },
630
+ "total_flos": 1.6176519068162458e+18,
631
+ "train_batch_size": 8,
632
+ "trial_name": null,
633
+ "trial_params": null
634
+ }
checkpoint-1500/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e517b5194d3d7e0c0e46b924233f9f5528a2197171f7f496eac4bd52616415f0
3
+ size 5432
checkpoint-600/config.json ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "ASTForAudioClassification"
4
+ ],
5
+ "attention_probs_dropout_prob": 0.0,
6
+ "frequency_stride": 10,
7
+ "hidden_act": "gelu",
8
+ "hidden_dropout_prob": 0.0,
9
+ "hidden_size": 768,
10
+ "id2label": {
11
+ "0": "Sound_Guitar",
12
+ "1": "Sound_Drum",
13
+ "2": "Sound_Piano"
14
+ },
15
+ "initializer_range": 0.02,
16
+ "intermediate_size": 3072,
17
+ "label2id": {
18
+ "Sound_Drum": 1,
19
+ "Sound_Guitar": 0,
20
+ "Sound_Piano": 2
21
+ },
22
+ "layer_norm_eps": 1e-12,
23
+ "max_length": 1024,
24
+ "model_type": "audio-spectrogram-transformer",
25
+ "num_attention_heads": 12,
26
+ "num_hidden_layers": 12,
27
+ "num_mel_bins": 128,
28
+ "patch_size": 16,
29
+ "problem_type": "single_label_classification",
30
+ "qkv_bias": true,
31
+ "time_stride": 10,
32
+ "torch_dtype": "float32",
33
+ "transformers_version": "4.53.1"
34
+ }
checkpoint-600/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6ae170acdc8ff2e5cd84e430d88ca1b2a959897f540931ff4625bc47e34cb603
3
+ size 344793116
checkpoint-600/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:330c98e71441d59f4e24e75f5dd5e9d3d795fcab3a4ef659c8b6c517e4fbc94c
3
+ size 689698682
checkpoint-600/preprocessor_config.json ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "do_normalize": true,
3
+ "feature_extractor_type": "ASTFeatureExtractor",
4
+ "feature_size": 1,
5
+ "max_length": 1024,
6
+ "mean": -4.2677393,
7
+ "num_mel_bins": 128,
8
+ "padding_side": "right",
9
+ "padding_value": 0.0,
10
+ "return_attention_mask": false,
11
+ "sampling_rate": 16000,
12
+ "std": 4.5689974
13
+ }
checkpoint-600/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e3e5d946241df2516b06d7074d8779088eae7607173ad780df56583910a9589b
3
+ size 14244
checkpoint-600/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:747a3535c0bde2e788cd3d8e682b1bd8db22385df76d10e4c41b5d5f68659f18
3
+ size 1064
checkpoint-600/trainer_state.json ADDED
@@ -0,0 +1,274 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_global_step": 600,
3
+ "best_metric": 0.9974874371859297,
4
+ "best_model_checkpoint": "musical_instrument_detection_improved/checkpoint-600",
5
+ "epoch": 6.0,
6
+ "eval_steps": 500,
7
+ "global_step": 600,
8
+ "is_hyper_param_search": false,
9
+ "is_local_process_zero": true,
10
+ "is_world_process_zero": true,
11
+ "log_history": [
12
+ {
13
+ "epoch": 0.25125628140703515,
14
+ "grad_norm": 0.4818384647369385,
15
+ "learning_rate": 1.6000000000000003e-05,
16
+ "loss": 0.4941,
17
+ "step": 25
18
+ },
19
+ {
20
+ "epoch": 0.5025125628140703,
21
+ "grad_norm": 4.753159046173096,
22
+ "learning_rate": 3.266666666666667e-05,
23
+ "loss": 0.0539,
24
+ "step": 50
25
+ },
26
+ {
27
+ "epoch": 0.7537688442211056,
28
+ "grad_norm": 15.198661804199219,
29
+ "learning_rate": 4.933333333333334e-05,
30
+ "loss": 0.1284,
31
+ "step": 75
32
+ },
33
+ {
34
+ "epoch": 1.0,
35
+ "grad_norm": 0.40100908279418945,
36
+ "learning_rate": 6.6e-05,
37
+ "loss": 0.1306,
38
+ "step": 100
39
+ },
40
+ {
41
+ "epoch": 1.0,
42
+ "eval_accuracy": 0.9748743718592965,
43
+ "eval_f1_macro": 0.9731342037379646,
44
+ "eval_loss": 0.07195427268743515,
45
+ "eval_precision_macro": 0.9712150747238466,
46
+ "eval_recall_macro": 0.9761503412881977,
47
+ "eval_runtime": 12.3998,
48
+ "eval_samples_per_second": 32.097,
49
+ "eval_steps_per_second": 4.032,
50
+ "step": 100
51
+ },
52
+ {
53
+ "epoch": 1.2512562814070352,
54
+ "grad_norm": 0.03644077107310295,
55
+ "learning_rate": 8.266666666666667e-05,
56
+ "loss": 0.0742,
57
+ "step": 125
58
+ },
59
+ {
60
+ "epoch": 1.5025125628140703,
61
+ "grad_norm": 0.0054424116387963295,
62
+ "learning_rate": 9.933333333333334e-05,
63
+ "loss": 0.0654,
64
+ "step": 150
65
+ },
66
+ {
67
+ "epoch": 1.7537688442211055,
68
+ "grad_norm": 0.05749930813908577,
69
+ "learning_rate": 9.822222222222223e-05,
70
+ "loss": 0.0411,
71
+ "step": 175
72
+ },
73
+ {
74
+ "epoch": 2.0,
75
+ "grad_norm": 0.034677326679229736,
76
+ "learning_rate": 9.637037037037037e-05,
77
+ "loss": 0.1626,
78
+ "step": 200
79
+ },
80
+ {
81
+ "epoch": 2.0,
82
+ "eval_accuracy": 0.9773869346733668,
83
+ "eval_f1_macro": 0.9760285913720755,
84
+ "eval_loss": 0.1097569689154625,
85
+ "eval_precision_macro": 0.9750499571795603,
86
+ "eval_recall_macro": 0.9784172661870504,
87
+ "eval_runtime": 12.8602,
88
+ "eval_samples_per_second": 30.948,
89
+ "eval_steps_per_second": 3.888,
90
+ "step": 200
91
+ },
92
+ {
93
+ "epoch": 2.251256281407035,
94
+ "grad_norm": 8.391584396362305,
95
+ "learning_rate": 9.451851851851853e-05,
96
+ "loss": 0.0613,
97
+ "step": 225
98
+ },
99
+ {
100
+ "epoch": 2.5025125628140703,
101
+ "grad_norm": 0.05065636709332466,
102
+ "learning_rate": 9.266666666666666e-05,
103
+ "loss": 0.0783,
104
+ "step": 250
105
+ },
106
+ {
107
+ "epoch": 2.7537688442211055,
108
+ "grad_norm": 4.692741394042969,
109
+ "learning_rate": 9.081481481481482e-05,
110
+ "loss": 0.0838,
111
+ "step": 275
112
+ },
113
+ {
114
+ "epoch": 3.0,
115
+ "grad_norm": 0.001607125741429627,
116
+ "learning_rate": 8.896296296296297e-05,
117
+ "loss": 0.0577,
118
+ "step": 300
119
+ },
120
+ {
121
+ "epoch": 3.0,
122
+ "eval_accuracy": 0.9849246231155779,
123
+ "eval_f1_macro": 0.9840659199317944,
124
+ "eval_loss": 0.06375118345022202,
125
+ "eval_precision_macro": 0.9825340045928281,
126
+ "eval_recall_macro": 0.9860503753859658,
127
+ "eval_runtime": 12.6279,
128
+ "eval_samples_per_second": 31.517,
129
+ "eval_steps_per_second": 3.959,
130
+ "step": 300
131
+ },
132
+ {
133
+ "epoch": 3.251256281407035,
134
+ "grad_norm": 0.0066846804693341255,
135
+ "learning_rate": 8.711111111111112e-05,
136
+ "loss": 0.0161,
137
+ "step": 325
138
+ },
139
+ {
140
+ "epoch": 3.5025125628140703,
141
+ "grad_norm": 5.183607578277588,
142
+ "learning_rate": 8.525925925925926e-05,
143
+ "loss": 0.0852,
144
+ "step": 350
145
+ },
146
+ {
147
+ "epoch": 3.7537688442211055,
148
+ "grad_norm": 1.3307738304138184,
149
+ "learning_rate": 8.340740740740741e-05,
150
+ "loss": 0.0025,
151
+ "step": 375
152
+ },
153
+ {
154
+ "epoch": 4.0,
155
+ "grad_norm": 0.0007463983492925763,
156
+ "learning_rate": 8.155555555555557e-05,
157
+ "loss": 0.031,
158
+ "step": 400
159
+ },
160
+ {
161
+ "epoch": 4.0,
162
+ "eval_accuracy": 0.9949748743718593,
163
+ "eval_f1_macro": 0.9944572643771775,
164
+ "eval_loss": 0.023198017850518227,
165
+ "eval_precision_macro": 0.9944572643771775,
166
+ "eval_recall_macro": 0.9944572643771775,
167
+ "eval_runtime": 12.4966,
168
+ "eval_samples_per_second": 31.849,
169
+ "eval_steps_per_second": 4.001,
170
+ "step": 400
171
+ },
172
+ {
173
+ "epoch": 4.251256281407035,
174
+ "grad_norm": 0.0033074100501835346,
175
+ "learning_rate": 7.97037037037037e-05,
176
+ "loss": 0.0155,
177
+ "step": 425
178
+ },
179
+ {
180
+ "epoch": 4.50251256281407,
181
+ "grad_norm": 2.0624492168426514,
182
+ "learning_rate": 7.785185185185186e-05,
183
+ "loss": 0.0366,
184
+ "step": 450
185
+ },
186
+ {
187
+ "epoch": 4.7537688442211055,
188
+ "grad_norm": 0.00034747723839245737,
189
+ "learning_rate": 7.6e-05,
190
+ "loss": 0.0139,
191
+ "step": 475
192
+ },
193
+ {
194
+ "epoch": 5.0,
195
+ "grad_norm": 0.0007314584800042212,
196
+ "learning_rate": 7.414814814814815e-05,
197
+ "loss": 0.0123,
198
+ "step": 500
199
+ },
200
+ {
201
+ "epoch": 5.0,
202
+ "eval_accuracy": 0.9874371859296482,
203
+ "eval_f1_macro": 0.9862145023435346,
204
+ "eval_loss": 0.05530214682221413,
205
+ "eval_precision_macro": 0.984984984984985,
206
+ "eval_recall_macro": 0.9880095923261392,
207
+ "eval_runtime": 12.5007,
208
+ "eval_samples_per_second": 31.838,
209
+ "eval_steps_per_second": 4.0,
210
+ "step": 500
211
+ },
212
+ {
213
+ "epoch": 5.251256281407035,
214
+ "grad_norm": 0.0016032257117331028,
215
+ "learning_rate": 7.22962962962963e-05,
216
+ "loss": 0.0002,
217
+ "step": 525
218
+ },
219
+ {
220
+ "epoch": 5.50251256281407,
221
+ "grad_norm": 2.0849623680114746,
222
+ "learning_rate": 7.044444444444444e-05,
223
+ "loss": 0.0597,
224
+ "step": 550
225
+ },
226
+ {
227
+ "epoch": 5.7537688442211055,
228
+ "grad_norm": 0.006351375486701727,
229
+ "learning_rate": 6.85925925925926e-05,
230
+ "loss": 0.0191,
231
+ "step": 575
232
+ },
233
+ {
234
+ "epoch": 6.0,
235
+ "grad_norm": 0.942449152469635,
236
+ "learning_rate": 6.674074074074075e-05,
237
+ "loss": 0.0016,
238
+ "step": 600
239
+ },
240
+ {
241
+ "epoch": 6.0,
242
+ "eval_accuracy": 0.9974874371859297,
243
+ "eval_f1_macro": 0.9972316853386666,
244
+ "eval_loss": 0.02638443559408188,
245
+ "eval_precision_macro": 0.9968847352024922,
246
+ "eval_recall_macro": 0.9976019184652278,
247
+ "eval_runtime": 12.501,
248
+ "eval_samples_per_second": 31.838,
249
+ "eval_steps_per_second": 4.0,
250
+ "step": 600
251
+ }
252
+ ],
253
+ "logging_steps": 25,
254
+ "max_steps": 1500,
255
+ "num_input_tokens_seen": 0,
256
+ "num_train_epochs": 15,
257
+ "save_steps": 500,
258
+ "stateful_callbacks": {
259
+ "TrainerControl": {
260
+ "args": {
261
+ "should_epoch_stop": false,
262
+ "should_evaluate": false,
263
+ "should_log": false,
264
+ "should_save": true,
265
+ "should_training_stop": false
266
+ },
267
+ "attributes": {}
268
+ }
269
+ },
270
+ "total_flos": 6.470607627264983e+17,
271
+ "train_batch_size": 8,
272
+ "trial_name": null,
273
+ "trial_params": null
274
+ }
checkpoint-600/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e517b5194d3d7e0c0e46b924233f9f5528a2197171f7f496eac4bd52616415f0
3
+ size 5432
config.json ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "ASTForAudioClassification"
4
+ ],
5
+ "attention_probs_dropout_prob": 0.0,
6
+ "frequency_stride": 10,
7
+ "hidden_act": "gelu",
8
+ "hidden_dropout_prob": 0.0,
9
+ "hidden_size": 768,
10
+ "id2label": {
11
+ "0": "Sound_Guitar",
12
+ "1": "Sound_Drum",
13
+ "2": "Sound_Piano"
14
+ },
15
+ "initializer_range": 0.02,
16
+ "intermediate_size": 3072,
17
+ "label2id": {
18
+ "Sound_Drum": 1,
19
+ "Sound_Guitar": 0,
20
+ "Sound_Piano": 2
21
+ },
22
+ "layer_norm_eps": 1e-12,
23
+ "max_length": 1024,
24
+ "model_type": "audio-spectrogram-transformer",
25
+ "num_attention_heads": 12,
26
+ "num_hidden_layers": 12,
27
+ "num_mel_bins": 128,
28
+ "patch_size": 16,
29
+ "problem_type": "single_label_classification",
30
+ "qkv_bias": true,
31
+ "time_stride": 10,
32
+ "torch_dtype": "float32",
33
+ "transformers_version": "4.53.1"
34
+ }
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6ae170acdc8ff2e5cd84e430d88ca1b2a959897f540931ff4625bc47e34cb603
3
+ size 344793116
preprocessor_config.json ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "do_normalize": true,
3
+ "feature_extractor_type": "ASTFeatureExtractor",
4
+ "feature_size": 1,
5
+ "max_length": 1024,
6
+ "mean": -4.2677393,
7
+ "num_mel_bins": 128,
8
+ "padding_side": "right",
9
+ "padding_value": 0.0,
10
+ "return_attention_mask": false,
11
+ "sampling_rate": 16000,
12
+ "std": 4.5689974
13
+ }
runs/Jul13_07-53-15_ee9e3dbd0066/events.out.tfevents.1752393198.ee9e3dbd0066.19259.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d4cb0095c797094eeeea30565890537f4b4d278c3a17299efa1137b5e934e77b
3
+ size 28156
runs/Jul13_08-28-47_ee9e3dbd0066/events.out.tfevents.1752395338.ee9e3dbd0066.19259.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b075e616edb5deec0f08e79c52f8da3de4733e555b95fd93381a7fcbaac66b18
3
+ size 25495
runs/Jul13_08-28-47_ee9e3dbd0066/events.out.tfevents.1752397078.ee9e3dbd0066.19259.2 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:57ee05b04ffd48b5fcc95864cf3916b82789eafdbd75ab510c82039e1cc9e40e
3
+ size 578
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e517b5194d3d7e0c0e46b924233f9f5528a2197171f7f496eac4bd52616415f0
3
+ size 5432