NicoNico commited on
Commit
016f50a
·
verified ·
1 Parent(s): 38509a7

Add files using upload-large-folder tool

Browse files
model-00001-of-00003.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:04dc3aeb2e7fa02bc48f9f36a732d24e5fa0894708b2a977693595e9e9fa550b
3
- size 5349476525
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b5fdf8bce59e04fbbd9ee87f14cca71408b7ce981d270f3ee0f2f549adc9f578
3
+ size 5364545324
model-00002-of-00003.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c352e1c23949a8e3f0b640f8dd9c5dd38a6488fd3e6f42ab5a94e63450cc703e
3
- size 4793668184
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bbc1df0432c8f120d7dcd89c55da5274ee93c3e8a30fbb18c2d5da06d706673e
3
+ size 5350492553
model-00003-of-00003.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:896f4f21ce3e1e165a8636fc21bf67eb5c4ccbc0b1d07ede63d5246dd459ded2
3
- size 3111649406
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e752f5dcf0582c9f0b0b78939cf0e26fa979f25e0c56ac257b563cbe521455cb
3
+ size 3159071730
model.safetensors.index.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
  "metadata": {
3
- "total_size": 13254615040
4
  },
5
  "weight_map": {
6
  "lm_head.weight": "model-00003-of-00003.safetensors",
@@ -162,34 +162,34 @@
162
  "model.layers.11.self_attn.v_proj.scales": "model-00001-of-00003.safetensors",
163
  "model.layers.11.self_attn.v_proj.zeros": "model-00001-of-00003.safetensors",
164
  "model.layers.12.input_layernorm.weight": "model-00002-of-00003.safetensors",
165
- "model.layers.12.mlp.down_proj.channel_scale": "model-00001-of-00003.safetensors",
166
- "model.layers.12.mlp.down_proj.q_perm": "model-00001-of-00003.safetensors",
167
- "model.layers.12.mlp.down_proj.qweight": "model-00001-of-00003.safetensors",
168
- "model.layers.12.mlp.down_proj.scales": "model-00001-of-00003.safetensors",
169
- "model.layers.12.mlp.down_proj.zeros": "model-00001-of-00003.safetensors",
170
- "model.layers.12.mlp.gate_proj.channel_scale": "model-00001-of-00003.safetensors",
171
- "model.layers.12.mlp.gate_proj.q_perm": "model-00001-of-00003.safetensors",
172
- "model.layers.12.mlp.gate_proj.qweight": "model-00001-of-00003.safetensors",
173
- "model.layers.12.mlp.gate_proj.scales": "model-00001-of-00003.safetensors",
174
- "model.layers.12.mlp.gate_proj.zeros": "model-00001-of-00003.safetensors",
175
  "model.layers.12.mlp.up_proj.channel_scale": "model-00002-of-00003.safetensors",
176
  "model.layers.12.mlp.up_proj.q_perm": "model-00002-of-00003.safetensors",
177
  "model.layers.12.mlp.up_proj.qweight": "model-00002-of-00003.safetensors",
178
  "model.layers.12.mlp.up_proj.scales": "model-00002-of-00003.safetensors",
179
  "model.layers.12.mlp.up_proj.zeros": "model-00002-of-00003.safetensors",
180
  "model.layers.12.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
181
- "model.layers.12.self_attn.k_norm.weight": "model-00001-of-00003.safetensors",
182
  "model.layers.12.self_attn.k_proj.channel_scale": "model-00001-of-00003.safetensors",
183
  "model.layers.12.self_attn.k_proj.q_perm": "model-00001-of-00003.safetensors",
184
  "model.layers.12.self_attn.k_proj.qweight": "model-00001-of-00003.safetensors",
185
  "model.layers.12.self_attn.k_proj.scales": "model-00001-of-00003.safetensors",
186
  "model.layers.12.self_attn.k_proj.zeros": "model-00001-of-00003.safetensors",
187
- "model.layers.12.self_attn.o_proj.channel_scale": "model-00001-of-00003.safetensors",
188
- "model.layers.12.self_attn.o_proj.q_perm": "model-00001-of-00003.safetensors",
189
- "model.layers.12.self_attn.o_proj.qweight": "model-00001-of-00003.safetensors",
190
- "model.layers.12.self_attn.o_proj.scales": "model-00001-of-00003.safetensors",
191
- "model.layers.12.self_attn.o_proj.zeros": "model-00001-of-00003.safetensors",
192
- "model.layers.12.self_attn.q_norm.weight": "model-00001-of-00003.safetensors",
193
  "model.layers.12.self_attn.q_proj.channel_scale": "model-00001-of-00003.safetensors",
194
  "model.layers.12.self_attn.q_proj.q_perm": "model-00001-of-00003.safetensors",
195
  "model.layers.12.self_attn.q_proj.qweight": "model-00001-of-00003.safetensors",
@@ -1292,7 +1292,7 @@
1292
  "model.layers.38.self_attn.v_proj.qweight": "model-00002-of-00003.safetensors",
1293
  "model.layers.38.self_attn.v_proj.scales": "model-00002-of-00003.safetensors",
1294
  "model.layers.38.self_attn.v_proj.zeros": "model-00002-of-00003.safetensors",
1295
- "model.layers.39.input_layernorm.weight": "model-00002-of-00003.safetensors",
1296
  "model.layers.39.mlp.down_proj.channel_scale": "model-00002-of-00003.safetensors",
1297
  "model.layers.39.mlp.down_proj.q_perm": "model-00002-of-00003.safetensors",
1298
  "model.layers.39.mlp.down_proj.qweight": "model-00002-of-00003.safetensors",
@@ -1303,12 +1303,12 @@
1303
  "model.layers.39.mlp.gate_proj.qweight": "model-00002-of-00003.safetensors",
1304
  "model.layers.39.mlp.gate_proj.scales": "model-00002-of-00003.safetensors",
1305
  "model.layers.39.mlp.gate_proj.zeros": "model-00002-of-00003.safetensors",
1306
- "model.layers.39.mlp.up_proj.channel_scale": "model-00002-of-00003.safetensors",
1307
- "model.layers.39.mlp.up_proj.q_perm": "model-00002-of-00003.safetensors",
1308
- "model.layers.39.mlp.up_proj.qweight": "model-00002-of-00003.safetensors",
1309
- "model.layers.39.mlp.up_proj.scales": "model-00002-of-00003.safetensors",
1310
- "model.layers.39.mlp.up_proj.zeros": "model-00002-of-00003.safetensors",
1311
- "model.layers.39.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
1312
  "model.layers.39.self_attn.k_norm.weight": "model-00002-of-00003.safetensors",
1313
  "model.layers.39.self_attn.k_proj.channel_scale": "model-00002-of-00003.safetensors",
1314
  "model.layers.39.self_attn.k_proj.q_perm": "model-00002-of-00003.safetensors",
@@ -1565,6 +1565,6 @@
1565
  "model.layers.9.self_attn.v_proj.qweight": "model-00001-of-00003.safetensors",
1566
  "model.layers.9.self_attn.v_proj.scales": "model-00001-of-00003.safetensors",
1567
  "model.layers.9.self_attn.v_proj.zeros": "model-00001-of-00003.safetensors",
1568
- "model.norm.weight": "model-00002-of-00003.safetensors"
1569
  }
1570
  }
 
1
  {
2
  "metadata": {
3
+ "total_size": 13873930240
4
  },
5
  "weight_map": {
6
  "lm_head.weight": "model-00003-of-00003.safetensors",
 
162
  "model.layers.11.self_attn.v_proj.scales": "model-00001-of-00003.safetensors",
163
  "model.layers.11.self_attn.v_proj.zeros": "model-00001-of-00003.safetensors",
164
  "model.layers.12.input_layernorm.weight": "model-00002-of-00003.safetensors",
165
+ "model.layers.12.mlp.down_proj.channel_scale": "model-00002-of-00003.safetensors",
166
+ "model.layers.12.mlp.down_proj.q_perm": "model-00002-of-00003.safetensors",
167
+ "model.layers.12.mlp.down_proj.qweight": "model-00002-of-00003.safetensors",
168
+ "model.layers.12.mlp.down_proj.scales": "model-00002-of-00003.safetensors",
169
+ "model.layers.12.mlp.down_proj.zeros": "model-00002-of-00003.safetensors",
170
+ "model.layers.12.mlp.gate_proj.channel_scale": "model-00002-of-00003.safetensors",
171
+ "model.layers.12.mlp.gate_proj.q_perm": "model-00002-of-00003.safetensors",
172
+ "model.layers.12.mlp.gate_proj.qweight": "model-00002-of-00003.safetensors",
173
+ "model.layers.12.mlp.gate_proj.scales": "model-00002-of-00003.safetensors",
174
+ "model.layers.12.mlp.gate_proj.zeros": "model-00002-of-00003.safetensors",
175
  "model.layers.12.mlp.up_proj.channel_scale": "model-00002-of-00003.safetensors",
176
  "model.layers.12.mlp.up_proj.q_perm": "model-00002-of-00003.safetensors",
177
  "model.layers.12.mlp.up_proj.qweight": "model-00002-of-00003.safetensors",
178
  "model.layers.12.mlp.up_proj.scales": "model-00002-of-00003.safetensors",
179
  "model.layers.12.mlp.up_proj.zeros": "model-00002-of-00003.safetensors",
180
  "model.layers.12.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
181
+ "model.layers.12.self_attn.k_norm.weight": "model-00002-of-00003.safetensors",
182
  "model.layers.12.self_attn.k_proj.channel_scale": "model-00001-of-00003.safetensors",
183
  "model.layers.12.self_attn.k_proj.q_perm": "model-00001-of-00003.safetensors",
184
  "model.layers.12.self_attn.k_proj.qweight": "model-00001-of-00003.safetensors",
185
  "model.layers.12.self_attn.k_proj.scales": "model-00001-of-00003.safetensors",
186
  "model.layers.12.self_attn.k_proj.zeros": "model-00001-of-00003.safetensors",
187
+ "model.layers.12.self_attn.o_proj.channel_scale": "model-00002-of-00003.safetensors",
188
+ "model.layers.12.self_attn.o_proj.q_perm": "model-00002-of-00003.safetensors",
189
+ "model.layers.12.self_attn.o_proj.qweight": "model-00002-of-00003.safetensors",
190
+ "model.layers.12.self_attn.o_proj.scales": "model-00002-of-00003.safetensors",
191
+ "model.layers.12.self_attn.o_proj.zeros": "model-00002-of-00003.safetensors",
192
+ "model.layers.12.self_attn.q_norm.weight": "model-00002-of-00003.safetensors",
193
  "model.layers.12.self_attn.q_proj.channel_scale": "model-00001-of-00003.safetensors",
194
  "model.layers.12.self_attn.q_proj.q_perm": "model-00001-of-00003.safetensors",
195
  "model.layers.12.self_attn.q_proj.qweight": "model-00001-of-00003.safetensors",
 
1292
  "model.layers.38.self_attn.v_proj.qweight": "model-00002-of-00003.safetensors",
1293
  "model.layers.38.self_attn.v_proj.scales": "model-00002-of-00003.safetensors",
1294
  "model.layers.38.self_attn.v_proj.zeros": "model-00002-of-00003.safetensors",
1295
+ "model.layers.39.input_layernorm.weight": "model-00003-of-00003.safetensors",
1296
  "model.layers.39.mlp.down_proj.channel_scale": "model-00002-of-00003.safetensors",
1297
  "model.layers.39.mlp.down_proj.q_perm": "model-00002-of-00003.safetensors",
1298
  "model.layers.39.mlp.down_proj.qweight": "model-00002-of-00003.safetensors",
 
1303
  "model.layers.39.mlp.gate_proj.qweight": "model-00002-of-00003.safetensors",
1304
  "model.layers.39.mlp.gate_proj.scales": "model-00002-of-00003.safetensors",
1305
  "model.layers.39.mlp.gate_proj.zeros": "model-00002-of-00003.safetensors",
1306
+ "model.layers.39.mlp.up_proj.channel_scale": "model-00003-of-00003.safetensors",
1307
+ "model.layers.39.mlp.up_proj.q_perm": "model-00003-of-00003.safetensors",
1308
+ "model.layers.39.mlp.up_proj.qweight": "model-00003-of-00003.safetensors",
1309
+ "model.layers.39.mlp.up_proj.scales": "model-00003-of-00003.safetensors",
1310
+ "model.layers.39.mlp.up_proj.zeros": "model-00003-of-00003.safetensors",
1311
+ "model.layers.39.post_attention_layernorm.weight": "model-00003-of-00003.safetensors",
1312
  "model.layers.39.self_attn.k_norm.weight": "model-00002-of-00003.safetensors",
1313
  "model.layers.39.self_attn.k_proj.channel_scale": "model-00002-of-00003.safetensors",
1314
  "model.layers.39.self_attn.k_proj.q_perm": "model-00002-of-00003.safetensors",
 
1565
  "model.layers.9.self_attn.v_proj.qweight": "model-00001-of-00003.safetensors",
1566
  "model.layers.9.self_attn.v_proj.scales": "model-00001-of-00003.safetensors",
1567
  "model.layers.9.self_attn.v_proj.zeros": "model-00001-of-00003.safetensors",
1568
+ "model.norm.weight": "model-00003-of-00003.safetensors"
1569
  }
1570
  }
quant_strategy.json CHANGED
@@ -1,11 +1,11 @@
1
  {
2
  "measurement": {
3
  "model.layers.0": {
4
- "accuracy": 0.9783150250732433,
5
- "total_bits": 1424424960.0,
6
  "o_proj": {
7
  "group_size": {
8
- "4": 32
9
  },
10
  "bits": [
11
  4
@@ -18,7 +18,7 @@
18
  },
19
  "down_proj": {
20
  "group_size": {
21
- "4": 32
22
  },
23
  "bits": [
24
  4
@@ -31,7 +31,7 @@
31
  },
32
  "q_proj": {
33
  "group_size": {
34
- "4": 32
35
  },
36
  "bits": [
37
  4
@@ -44,7 +44,7 @@
44
  },
45
  "k_proj": {
46
  "group_size": {
47
- "4": 32
48
  },
49
  "bits": [
50
  4
@@ -57,7 +57,7 @@
57
  },
58
  "v_proj": {
59
  "group_size": {
60
- "4": 32
61
  },
62
  "bits": [
63
  4
@@ -70,7 +70,7 @@
70
  },
71
  "gate_proj": {
72
  "group_size": {
73
- "4": 32
74
  },
75
  "bits": [
76
  4
@@ -83,7 +83,7 @@
83
  },
84
  "up_proj": {
85
  "group_size": {
86
- "4": 32
87
  },
88
  "bits": [
89
  4
@@ -96,11 +96,11 @@
96
  }
97
  },
98
  "model.layers.1": {
99
- "accuracy": 0.9810890091612237,
100
- "total_bits": 1424424960.0,
101
  "o_proj": {
102
  "group_size": {
103
- "4": 32
104
  },
105
  "bits": [
106
  4
@@ -113,7 +113,7 @@
113
  },
114
  "down_proj": {
115
  "group_size": {
116
- "4": 32
117
  },
118
  "bits": [
119
  4
@@ -126,7 +126,7 @@
126
  },
127
  "q_proj": {
128
  "group_size": {
129
- "4": 32
130
  },
131
  "bits": [
132
  4
@@ -139,7 +139,7 @@
139
  },
140
  "k_proj": {
141
  "group_size": {
142
- "4": 32
143
  },
144
  "bits": [
145
  4
@@ -152,7 +152,7 @@
152
  },
153
  "v_proj": {
154
  "group_size": {
155
- "4": 32
156
  },
157
  "bits": [
158
  4
@@ -165,7 +165,7 @@
165
  },
166
  "gate_proj": {
167
  "group_size": {
168
- "4": 32
169
  },
170
  "bits": [
171
  4
@@ -178,7 +178,7 @@
178
  },
179
  "up_proj": {
180
  "group_size": {
181
- "4": 32
182
  },
183
  "bits": [
184
  4
@@ -191,11 +191,11 @@
191
  }
192
  },
193
  "model.layers.2": {
194
- "accuracy": 0.9835030369504238,
195
- "total_bits": 1424424960.0,
196
  "o_proj": {
197
  "group_size": {
198
- "4": 32
199
  },
200
  "bits": [
201
  4
@@ -208,7 +208,7 @@
208
  },
209
  "down_proj": {
210
  "group_size": {
211
- "4": 32
212
  },
213
  "bits": [
214
  4
@@ -221,7 +221,7 @@
221
  },
222
  "q_proj": {
223
  "group_size": {
224
- "4": 32
225
  },
226
  "bits": [
227
  4
@@ -234,7 +234,7 @@
234
  },
235
  "k_proj": {
236
  "group_size": {
237
- "4": 32
238
  },
239
  "bits": [
240
  4
@@ -247,7 +247,7 @@
247
  },
248
  "v_proj": {
249
  "group_size": {
250
- "4": 32
251
  },
252
  "bits": [
253
  4
@@ -260,7 +260,7 @@
260
  },
261
  "gate_proj": {
262
  "group_size": {
263
- "4": 32
264
  },
265
  "bits": [
266
  4
@@ -273,7 +273,7 @@
273
  },
274
  "up_proj": {
275
  "group_size": {
276
- "4": 32
277
  },
278
  "bits": [
279
  4
@@ -286,11 +286,11 @@
286
  }
287
  },
288
  "model.layers.3": {
289
- "accuracy": 0.9778428763966076,
290
- "total_bits": 1424424960.0,
291
  "o_proj": {
292
  "group_size": {
293
- "4": 32
294
  },
295
  "bits": [
296
  4
@@ -303,7 +303,7 @@
303
  },
304
  "down_proj": {
305
  "group_size": {
306
- "4": 32
307
  },
308
  "bits": [
309
  4
@@ -316,7 +316,7 @@
316
  },
317
  "q_proj": {
318
  "group_size": {
319
- "4": 32
320
  },
321
  "bits": [
322
  4
@@ -329,7 +329,7 @@
329
  },
330
  "k_proj": {
331
  "group_size": {
332
- "4": 32
333
  },
334
  "bits": [
335
  4
@@ -342,7 +342,7 @@
342
  },
343
  "v_proj": {
344
  "group_size": {
345
- "4": 32
346
  },
347
  "bits": [
348
  4
@@ -355,7 +355,7 @@
355
  },
356
  "gate_proj": {
357
  "group_size": {
358
- "4": 32
359
  },
360
  "bits": [
361
  4
@@ -368,7 +368,7 @@
368
  },
369
  "up_proj": {
370
  "group_size": {
371
- "4": 32
372
  },
373
  "bits": [
374
  4
@@ -381,11 +381,11 @@
381
  }
382
  },
383
  "model.layers.4": {
384
- "accuracy": 0.9737890610995237,
385
- "total_bits": 1424424960.0,
386
  "o_proj": {
387
  "group_size": {
388
- "4": 32
389
  },
390
  "bits": [
391
  4
@@ -398,7 +398,7 @@
398
  },
399
  "down_proj": {
400
  "group_size": {
401
- "4": 32
402
  },
403
  "bits": [
404
  4
@@ -411,7 +411,7 @@
411
  },
412
  "q_proj": {
413
  "group_size": {
414
- "4": 32
415
  },
416
  "bits": [
417
  4
@@ -424,7 +424,7 @@
424
  },
425
  "k_proj": {
426
  "group_size": {
427
- "4": 32
428
  },
429
  "bits": [
430
  4
@@ -437,7 +437,7 @@
437
  },
438
  "v_proj": {
439
  "group_size": {
440
- "4": 32
441
  },
442
  "bits": [
443
  4
@@ -450,7 +450,7 @@
450
  },
451
  "gate_proj": {
452
  "group_size": {
453
- "4": 32
454
  },
455
  "bits": [
456
  4
@@ -463,7 +463,7 @@
463
  },
464
  "up_proj": {
465
  "group_size": {
466
- "4": 32
467
  },
468
  "bits": [
469
  4
@@ -571,11 +571,11 @@
571
  }
572
  },
573
  "model.layers.6": {
574
- "accuracy": 0.974737796568661,
575
- "total_bits": 1424424960.0,
576
  "o_proj": {
577
  "group_size": {
578
- "4": 32
579
  },
580
  "bits": [
581
  4
@@ -588,7 +588,7 @@
588
  },
589
  "down_proj": {
590
  "group_size": {
591
- "4": 32
592
  },
593
  "bits": [
594
  4
@@ -601,7 +601,7 @@
601
  },
602
  "q_proj": {
603
  "group_size": {
604
- "4": 32
605
  },
606
  "bits": [
607
  4
@@ -614,7 +614,7 @@
614
  },
615
  "k_proj": {
616
  "group_size": {
617
- "4": 32
618
  },
619
  "bits": [
620
  4
@@ -627,7 +627,7 @@
627
  },
628
  "v_proj": {
629
  "group_size": {
630
- "4": 32
631
  },
632
  "bits": [
633
  4
@@ -640,7 +640,7 @@
640
  },
641
  "gate_proj": {
642
  "group_size": {
643
- "4": 32
644
  },
645
  "bits": [
646
  4
@@ -653,7 +653,7 @@
653
  },
654
  "up_proj": {
655
  "group_size": {
656
- "4": 32
657
  },
658
  "bits": [
659
  4
@@ -856,11 +856,11 @@
856
  }
857
  },
858
  "model.layers.9": {
859
- "accuracy": 0.9765517065825406,
860
- "total_bits": 1424424960.0,
861
  "o_proj": {
862
  "group_size": {
863
- "4": 32
864
  },
865
  "bits": [
866
  4
@@ -873,7 +873,7 @@
873
  },
874
  "down_proj": {
875
  "group_size": {
876
- "4": 32
877
  },
878
  "bits": [
879
  4
@@ -886,7 +886,7 @@
886
  },
887
  "q_proj": {
888
  "group_size": {
889
- "4": 32
890
  },
891
  "bits": [
892
  4
@@ -899,7 +899,7 @@
899
  },
900
  "k_proj": {
901
  "group_size": {
902
- "4": 32
903
  },
904
  "bits": [
905
  4
@@ -912,7 +912,7 @@
912
  },
913
  "v_proj": {
914
  "group_size": {
915
- "4": 32
916
  },
917
  "bits": [
918
  4
@@ -925,7 +925,7 @@
925
  },
926
  "gate_proj": {
927
  "group_size": {
928
- "4": 32
929
  },
930
  "bits": [
931
  4
@@ -938,7 +938,7 @@
938
  },
939
  "up_proj": {
940
  "group_size": {
941
- "4": 32
942
  },
943
  "bits": [
944
  4
@@ -1046,11 +1046,11 @@
1046
  }
1047
  },
1048
  "model.layers.11": {
1049
- "accuracy": 0.9722881840571063,
1050
- "total_bits": 1424424960.0,
1051
  "o_proj": {
1052
  "group_size": {
1053
- "4": 32
1054
  },
1055
  "bits": [
1056
  4
@@ -1063,7 +1063,7 @@
1063
  },
1064
  "down_proj": {
1065
  "group_size": {
1066
- "4": 32
1067
  },
1068
  "bits": [
1069
  4
@@ -1076,7 +1076,7 @@
1076
  },
1077
  "q_proj": {
1078
  "group_size": {
1079
- "4": 32
1080
  },
1081
  "bits": [
1082
  4
@@ -1089,7 +1089,7 @@
1089
  },
1090
  "k_proj": {
1091
  "group_size": {
1092
- "4": 32
1093
  },
1094
  "bits": [
1095
  4
@@ -1102,7 +1102,7 @@
1102
  },
1103
  "v_proj": {
1104
  "group_size": {
1105
- "4": 32
1106
  },
1107
  "bits": [
1108
  4
@@ -1115,7 +1115,7 @@
1115
  },
1116
  "gate_proj": {
1117
  "group_size": {
1118
- "4": 32
1119
  },
1120
  "bits": [
1121
  4
@@ -1128,7 +1128,7 @@
1128
  },
1129
  "up_proj": {
1130
  "group_size": {
1131
- "4": 32
1132
  },
1133
  "bits": [
1134
  4
@@ -1141,11 +1141,11 @@
1141
  }
1142
  },
1143
  "model.layers.12": {
1144
- "accuracy": 0.9722967038105708,
1145
- "total_bits": 1424424960.0,
1146
  "o_proj": {
1147
  "group_size": {
1148
- "4": 32
1149
  },
1150
  "bits": [
1151
  4
@@ -1158,7 +1158,7 @@
1158
  },
1159
  "down_proj": {
1160
  "group_size": {
1161
- "4": 32
1162
  },
1163
  "bits": [
1164
  4
@@ -1171,7 +1171,7 @@
1171
  },
1172
  "q_proj": {
1173
  "group_size": {
1174
- "4": 32
1175
  },
1176
  "bits": [
1177
  4
@@ -1184,7 +1184,7 @@
1184
  },
1185
  "k_proj": {
1186
  "group_size": {
1187
- "4": 32
1188
  },
1189
  "bits": [
1190
  4
@@ -1197,7 +1197,7 @@
1197
  },
1198
  "v_proj": {
1199
  "group_size": {
1200
- "4": 32
1201
  },
1202
  "bits": [
1203
  4
@@ -1210,7 +1210,7 @@
1210
  },
1211
  "gate_proj": {
1212
  "group_size": {
1213
- "4": 32
1214
  },
1215
  "bits": [
1216
  4
@@ -1223,7 +1223,7 @@
1223
  },
1224
  "up_proj": {
1225
  "group_size": {
1226
- "4": 32
1227
  },
1228
  "bits": [
1229
  4
@@ -1996,11 +1996,11 @@
1996
  }
1997
  },
1998
  "model.layers.21": {
1999
- "accuracy": 0.9682318295162986,
2000
- "total_bits": 1424424960.0,
2001
  "o_proj": {
2002
  "group_size": {
2003
- "4": 32
2004
  },
2005
  "bits": [
2006
  4
@@ -2013,7 +2013,7 @@
2013
  },
2014
  "down_proj": {
2015
  "group_size": {
2016
- "4": 32
2017
  },
2018
  "bits": [
2019
  4
@@ -2026,7 +2026,7 @@
2026
  },
2027
  "q_proj": {
2028
  "group_size": {
2029
- "4": 32
2030
  },
2031
  "bits": [
2032
  4
@@ -2039,7 +2039,7 @@
2039
  },
2040
  "k_proj": {
2041
  "group_size": {
2042
- "4": 32
2043
  },
2044
  "bits": [
2045
  4
@@ -2052,7 +2052,7 @@
2052
  },
2053
  "v_proj": {
2054
  "group_size": {
2055
- "4": 32
2056
  },
2057
  "bits": [
2058
  4
@@ -2065,7 +2065,7 @@
2065
  },
2066
  "gate_proj": {
2067
  "group_size": {
2068
- "4": 32
2069
  },
2070
  "bits": [
2071
  4
@@ -2078,7 +2078,7 @@
2078
  },
2079
  "up_proj": {
2080
  "group_size": {
2081
- "4": 32
2082
  },
2083
  "bits": [
2084
  4
@@ -2376,11 +2376,11 @@
2376
  }
2377
  },
2378
  "model.layers.25": {
2379
- "accuracy": 0.9732438526698388,
2380
- "total_bits": 1424424960.0,
2381
  "o_proj": {
2382
  "group_size": {
2383
- "4": 32
2384
  },
2385
  "bits": [
2386
  4
@@ -2393,7 +2393,7 @@
2393
  },
2394
  "down_proj": {
2395
  "group_size": {
2396
- "4": 32
2397
  },
2398
  "bits": [
2399
  4
@@ -2406,7 +2406,7 @@
2406
  },
2407
  "q_proj": {
2408
  "group_size": {
2409
- "4": 32
2410
  },
2411
  "bits": [
2412
  4
@@ -2419,7 +2419,7 @@
2419
  },
2420
  "k_proj": {
2421
  "group_size": {
2422
- "4": 32
2423
  },
2424
  "bits": [
2425
  4
@@ -2432,7 +2432,7 @@
2432
  },
2433
  "v_proj": {
2434
  "group_size": {
2435
- "4": 32
2436
  },
2437
  "bits": [
2438
  4
@@ -2445,7 +2445,7 @@
2445
  },
2446
  "gate_proj": {
2447
  "group_size": {
2448
- "4": 32
2449
  },
2450
  "bits": [
2451
  4
@@ -2458,7 +2458,7 @@
2458
  },
2459
  "up_proj": {
2460
  "group_size": {
2461
- "4": 32
2462
  },
2463
  "bits": [
2464
  4
@@ -2471,11 +2471,11 @@
2471
  }
2472
  },
2473
  "model.layers.26": {
2474
- "accuracy": 0.9732138870458584,
2475
- "total_bits": 1424424960.0,
2476
  "o_proj": {
2477
  "group_size": {
2478
- "4": 32
2479
  },
2480
  "bits": [
2481
  4
@@ -2488,7 +2488,7 @@
2488
  },
2489
  "down_proj": {
2490
  "group_size": {
2491
- "4": 32
2492
  },
2493
  "bits": [
2494
  4
@@ -2501,7 +2501,7 @@
2501
  },
2502
  "q_proj": {
2503
  "group_size": {
2504
- "4": 32
2505
  },
2506
  "bits": [
2507
  4
@@ -2514,7 +2514,7 @@
2514
  },
2515
  "k_proj": {
2516
  "group_size": {
2517
- "4": 32
2518
  },
2519
  "bits": [
2520
  4
@@ -2527,7 +2527,7 @@
2527
  },
2528
  "v_proj": {
2529
  "group_size": {
2530
- "4": 32
2531
  },
2532
  "bits": [
2533
  4
@@ -2540,7 +2540,7 @@
2540
  },
2541
  "gate_proj": {
2542
  "group_size": {
2543
- "4": 32
2544
  },
2545
  "bits": [
2546
  4
@@ -2553,7 +2553,7 @@
2553
  },
2554
  "up_proj": {
2555
  "group_size": {
2556
- "4": 32
2557
  },
2558
  "bits": [
2559
  4
@@ -2946,11 +2946,11 @@
2946
  }
2947
  },
2948
  "model.layers.31": {
2949
- "accuracy": 0.9733400281984359,
2950
- "total_bits": 1424424960.0,
2951
  "o_proj": {
2952
  "group_size": {
2953
- "4": 32
2954
  },
2955
  "bits": [
2956
  4
@@ -2963,7 +2963,7 @@
2963
  },
2964
  "down_proj": {
2965
  "group_size": {
2966
- "4": 32
2967
  },
2968
  "bits": [
2969
  4
@@ -2976,7 +2976,7 @@
2976
  },
2977
  "q_proj": {
2978
  "group_size": {
2979
- "4": 32
2980
  },
2981
  "bits": [
2982
  4
@@ -2989,7 +2989,7 @@
2989
  },
2990
  "k_proj": {
2991
  "group_size": {
2992
- "4": 32
2993
  },
2994
  "bits": [
2995
  4
@@ -3002,7 +3002,7 @@
3002
  },
3003
  "v_proj": {
3004
  "group_size": {
3005
- "4": 32
3006
  },
3007
  "bits": [
3008
  4
@@ -3015,7 +3015,7 @@
3015
  },
3016
  "gate_proj": {
3017
  "group_size": {
3018
- "4": 32
3019
  },
3020
  "bits": [
3021
  4
@@ -3028,7 +3028,7 @@
3028
  },
3029
  "up_proj": {
3030
  "group_size": {
3031
- "4": 32
3032
  },
3033
  "bits": [
3034
  4
@@ -3041,11 +3041,11 @@
3041
  }
3042
  },
3043
  "model.layers.32": {
3044
- "accuracy": 0.972005337716837,
3045
- "total_bits": 1424424960.0,
3046
  "o_proj": {
3047
  "group_size": {
3048
- "4": 32
3049
  },
3050
  "bits": [
3051
  4
@@ -3058,7 +3058,7 @@
3058
  },
3059
  "down_proj": {
3060
  "group_size": {
3061
- "4": 32
3062
  },
3063
  "bits": [
3064
  4
@@ -3071,7 +3071,7 @@
3071
  },
3072
  "q_proj": {
3073
  "group_size": {
3074
- "4": 32
3075
  },
3076
  "bits": [
3077
  4
@@ -3084,7 +3084,7 @@
3084
  },
3085
  "k_proj": {
3086
  "group_size": {
3087
- "4": 32
3088
  },
3089
  "bits": [
3090
  4
@@ -3097,7 +3097,7 @@
3097
  },
3098
  "v_proj": {
3099
  "group_size": {
3100
- "4": 32
3101
  },
3102
  "bits": [
3103
  4
@@ -3110,7 +3110,7 @@
3110
  },
3111
  "gate_proj": {
3112
  "group_size": {
3113
- "4": 32
3114
  },
3115
  "bits": [
3116
  4
@@ -3123,7 +3123,7 @@
3123
  },
3124
  "up_proj": {
3125
  "group_size": {
3126
- "4": 32
3127
  },
3128
  "bits": [
3129
  4
@@ -3231,11 +3231,11 @@
3231
  }
3232
  },
3233
  "model.layers.34": {
3234
- "accuracy": 0.9886766500858357,
3235
- "total_bits": 1424424960.0,
3236
  "o_proj": {
3237
  "group_size": {
3238
- "4": 32
3239
  },
3240
  "bits": [
3241
  4
@@ -3248,7 +3248,7 @@
3248
  },
3249
  "down_proj": {
3250
  "group_size": {
3251
- "4": 32
3252
  },
3253
  "bits": [
3254
  4
@@ -3261,7 +3261,7 @@
3261
  },
3262
  "q_proj": {
3263
  "group_size": {
3264
- "4": 32
3265
  },
3266
  "bits": [
3267
  4
@@ -3274,7 +3274,7 @@
3274
  },
3275
  "k_proj": {
3276
  "group_size": {
3277
- "4": 32
3278
  },
3279
  "bits": [
3280
  4
@@ -3287,7 +3287,7 @@
3287
  },
3288
  "v_proj": {
3289
  "group_size": {
3290
- "4": 32
3291
  },
3292
  "bits": [
3293
  4
@@ -3300,7 +3300,7 @@
3300
  },
3301
  "gate_proj": {
3302
  "group_size": {
3303
- "4": 32
3304
  },
3305
  "bits": [
3306
  4
@@ -3313,7 +3313,7 @@
3313
  },
3314
  "up_proj": {
3315
  "group_size": {
3316
- "4": 32
3317
  },
3318
  "bits": [
3319
  4
@@ -3326,11 +3326,11 @@
3326
  }
3327
  },
3328
  "model.layers.35": {
3329
- "accuracy": 0.9980904093172285,
3330
- "total_bits": 1424424960.0,
3331
  "o_proj": {
3332
  "group_size": {
3333
- "4": 32
3334
  },
3335
  "bits": [
3336
  4
@@ -3343,7 +3343,7 @@
3343
  },
3344
  "down_proj": {
3345
  "group_size": {
3346
- "4": 32
3347
  },
3348
  "bits": [
3349
  4
@@ -3356,7 +3356,7 @@
3356
  },
3357
  "q_proj": {
3358
  "group_size": {
3359
- "4": 32
3360
  },
3361
  "bits": [
3362
  4
@@ -3369,7 +3369,7 @@
3369
  },
3370
  "k_proj": {
3371
  "group_size": {
3372
- "4": 32
3373
  },
3374
  "bits": [
3375
  4
@@ -3382,7 +3382,7 @@
3382
  },
3383
  "v_proj": {
3384
  "group_size": {
3385
- "4": 32
3386
  },
3387
  "bits": [
3388
  4
@@ -3395,7 +3395,7 @@
3395
  },
3396
  "gate_proj": {
3397
  "group_size": {
3398
- "4": 32
3399
  },
3400
  "bits": [
3401
  4
@@ -3408,7 +3408,7 @@
3408
  },
3409
  "up_proj": {
3410
  "group_size": {
3411
- "4": 32
3412
  },
3413
  "bits": [
3414
  4
@@ -3421,8 +3421,8 @@
3421
  }
3422
  },
3423
  "model.layers.36": {
3424
- "accuracy": 0.9971199873834848,
3425
- "total_bits": 772669440.0,
3426
  "o_proj": {
3427
  "group_size": {
3428
  "4": 128
@@ -3438,10 +3438,10 @@
3438
  },
3439
  "down_proj": {
3440
  "group_size": {
3441
- "2": 64
3442
  },
3443
  "bits": [
3444
- 2
3445
  ],
3446
  "bits_prop": [
3447
  1
@@ -3451,10 +3451,10 @@
3451
  },
3452
  "q_proj": {
3453
  "group_size": {
3454
- "2": 64
3455
  },
3456
  "bits": [
3457
- 2
3458
  ],
3459
  "bits_prop": [
3460
  1
@@ -3464,10 +3464,10 @@
3464
  },
3465
  "k_proj": {
3466
  "group_size": {
3467
- "2": 64
3468
  },
3469
  "bits": [
3470
- 2
3471
  ],
3472
  "bits_prop": [
3473
  1
@@ -3490,10 +3490,10 @@
3490
  },
3491
  "gate_proj": {
3492
  "group_size": {
3493
- "2": 64
3494
  },
3495
  "bits": [
3496
- 2
3497
  ],
3498
  "bits_prop": [
3499
  1
@@ -3503,10 +3503,10 @@
3503
  },
3504
  "up_proj": {
3505
  "group_size": {
3506
- "2": 64
3507
  },
3508
  "bits": [
3509
- 2
3510
  ],
3511
  "bits_prop": [
3512
  1
@@ -3516,8 +3516,8 @@
3516
  }
3517
  },
3518
  "model.layers.37": {
3519
- "accuracy": 0.9971822665538639,
3520
- "total_bits": 1115258880.0,
3521
  "o_proj": {
3522
  "group_size": {
3523
  "4": 128
@@ -3546,10 +3546,10 @@
3546
  },
3547
  "q_proj": {
3548
  "group_size": {
3549
- "2": 64
3550
  },
3551
  "bits": [
3552
- 2
3553
  ],
3554
  "bits_prop": [
3555
  1
@@ -3559,10 +3559,10 @@
3559
  },
3560
  "k_proj": {
3561
  "group_size": {
3562
- "2": 64
3563
  },
3564
  "bits": [
3565
- 2
3566
  ],
3567
  "bits_prop": [
3568
  1
@@ -3598,10 +3598,10 @@
3598
  },
3599
  "up_proj": {
3600
  "group_size": {
3601
- "2": 64
3602
  },
3603
  "bits": [
3604
- 2
3605
  ],
3606
  "bits_prop": [
3607
  1
@@ -3611,8 +3611,8 @@
3611
  }
3612
  },
3613
  "model.layers.38": {
3614
- "accuracy": 0.9971685816999525,
3615
- "total_bits": 1115258880.0,
3616
  "o_proj": {
3617
  "group_size": {
3618
  "4": 128
@@ -3641,10 +3641,10 @@
3641
  },
3642
  "q_proj": {
3643
  "group_size": {
3644
- "2": 64
3645
  },
3646
  "bits": [
3647
- 2
3648
  ],
3649
  "bits_prop": [
3650
  1
@@ -3654,10 +3654,10 @@
3654
  },
3655
  "k_proj": {
3656
  "group_size": {
3657
- "2": 64
3658
  },
3659
  "bits": [
3660
- 2
3661
  ],
3662
  "bits_prop": [
3663
  1
@@ -3693,10 +3693,10 @@
3693
  },
3694
  "up_proj": {
3695
  "group_size": {
3696
- "2": 64
3697
  },
3698
  "bits": [
3699
- 2
3700
  ],
3701
  "bits_prop": [
3702
  1
@@ -3706,8 +3706,8 @@
3706
  }
3707
  },
3708
  "model.layers.39": {
3709
- "accuracy": 0.9982514649163932,
3710
- "total_bits": 1115258880.0,
3711
  "o_proj": {
3712
  "group_size": {
3713
  "4": 128
@@ -3736,10 +3736,10 @@
3736
  },
3737
  "q_proj": {
3738
  "group_size": {
3739
- "2": 64
3740
  },
3741
  "bits": [
3742
- 2
3743
  ],
3744
  "bits_prop": [
3745
  1
@@ -3749,10 +3749,10 @@
3749
  },
3750
  "k_proj": {
3751
  "group_size": {
3752
- "2": 64
3753
  },
3754
  "bits": [
3755
- 2
3756
  ],
3757
  "bits_prop": [
3758
  1
@@ -3788,10 +3788,10 @@
3788
  },
3789
  "up_proj": {
3790
  "group_size": {
3791
- "2": 64
3792
  },
3793
  "bits": [
3794
- 2
3795
  ],
3796
  "bits_prop": [
3797
  1
@@ -3801,4 +3801,4 @@
3801
  }
3802
  }
3803
  }
3804
- }
 
1
  {
2
  "measurement": {
3
  "model.layers.0": {
4
+ "accuracy": 0.9718493807595223,
5
+ "total_bits": 1347010560.0,
6
  "o_proj": {
7
  "group_size": {
8
+ "4": 128
9
  },
10
  "bits": [
11
  4
 
18
  },
19
  "down_proj": {
20
  "group_size": {
21
+ "4": 128
22
  },
23
  "bits": [
24
  4
 
31
  },
32
  "q_proj": {
33
  "group_size": {
34
+ "4": 128
35
  },
36
  "bits": [
37
  4
 
44
  },
45
  "k_proj": {
46
  "group_size": {
47
+ "4": 128
48
  },
49
  "bits": [
50
  4
 
57
  },
58
  "v_proj": {
59
  "group_size": {
60
+ "4": 128
61
  },
62
  "bits": [
63
  4
 
70
  },
71
  "gate_proj": {
72
  "group_size": {
73
+ "4": 128
74
  },
75
  "bits": [
76
  4
 
83
  },
84
  "up_proj": {
85
  "group_size": {
86
+ "4": 128
87
  },
88
  "bits": [
89
  4
 
96
  }
97
  },
98
  "model.layers.1": {
99
+ "accuracy": 0.9743924007634632,
100
+ "total_bits": 1347010560.0,
101
  "o_proj": {
102
  "group_size": {
103
+ "4": 128
104
  },
105
  "bits": [
106
  4
 
113
  },
114
  "down_proj": {
115
  "group_size": {
116
+ "4": 128
117
  },
118
  "bits": [
119
  4
 
126
  },
127
  "q_proj": {
128
  "group_size": {
129
+ "4": 128
130
  },
131
  "bits": [
132
  4
 
139
  },
140
  "k_proj": {
141
  "group_size": {
142
+ "4": 128
143
  },
144
  "bits": [
145
  4
 
152
  },
153
  "v_proj": {
154
  "group_size": {
155
+ "4": 128
156
  },
157
  "bits": [
158
  4
 
165
  },
166
  "gate_proj": {
167
  "group_size": {
168
+ "4": 128
169
  },
170
  "bits": [
171
  4
 
178
  },
179
  "up_proj": {
180
  "group_size": {
181
+ "4": 128
182
  },
183
  "bits": [
184
  4
 
191
  }
192
  },
193
  "model.layers.2": {
194
+ "accuracy": 0.9789639118534978,
195
+ "total_bits": 1347010560.0,
196
  "o_proj": {
197
  "group_size": {
198
+ "4": 128
199
  },
200
  "bits": [
201
  4
 
208
  },
209
  "down_proj": {
210
  "group_size": {
211
+ "4": 128
212
  },
213
  "bits": [
214
  4
 
221
  },
222
  "q_proj": {
223
  "group_size": {
224
+ "4": 128
225
  },
226
  "bits": [
227
  4
 
234
  },
235
  "k_proj": {
236
  "group_size": {
237
+ "4": 128
238
  },
239
  "bits": [
240
  4
 
247
  },
248
  "v_proj": {
249
  "group_size": {
250
+ "4": 128
251
  },
252
  "bits": [
253
  4
 
260
  },
261
  "gate_proj": {
262
  "group_size": {
263
+ "4": 128
264
  },
265
  "bits": [
266
  4
 
273
  },
274
  "up_proj": {
275
  "group_size": {
276
+ "4": 128
277
  },
278
  "bits": [
279
  4
 
286
  }
287
  },
288
  "model.layers.3": {
289
+ "accuracy": 0.9701395294832764,
290
+ "total_bits": 1347010560.0,
291
  "o_proj": {
292
  "group_size": {
293
+ "4": 128
294
  },
295
  "bits": [
296
  4
 
303
  },
304
  "down_proj": {
305
  "group_size": {
306
+ "4": 128
307
  },
308
  "bits": [
309
  4
 
316
  },
317
  "q_proj": {
318
  "group_size": {
319
+ "4": 128
320
  },
321
  "bits": [
322
  4
 
329
  },
330
  "k_proj": {
331
  "group_size": {
332
+ "4": 128
333
  },
334
  "bits": [
335
  4
 
342
  },
343
  "v_proj": {
344
  "group_size": {
345
+ "4": 128
346
  },
347
  "bits": [
348
  4
 
355
  },
356
  "gate_proj": {
357
  "group_size": {
358
+ "4": 128
359
  },
360
  "bits": [
361
  4
 
368
  },
369
  "up_proj": {
370
  "group_size": {
371
+ "4": 128
372
  },
373
  "bits": [
374
  4
 
381
  }
382
  },
383
  "model.layers.4": {
384
+ "accuracy": 0.9654985481756739,
385
+ "total_bits": 1347010560.0,
386
  "o_proj": {
387
  "group_size": {
388
+ "4": 128
389
  },
390
  "bits": [
391
  4
 
398
  },
399
  "down_proj": {
400
  "group_size": {
401
+ "4": 128
402
  },
403
  "bits": [
404
  4
 
411
  },
412
  "q_proj": {
413
  "group_size": {
414
+ "4": 128
415
  },
416
  "bits": [
417
  4
 
424
  },
425
  "k_proj": {
426
  "group_size": {
427
+ "4": 128
428
  },
429
  "bits": [
430
  4
 
437
  },
438
  "v_proj": {
439
  "group_size": {
440
+ "4": 128
441
  },
442
  "bits": [
443
  4
 
450
  },
451
  "gate_proj": {
452
  "group_size": {
453
+ "4": 128
454
  },
455
  "bits": [
456
  4
 
463
  },
464
  "up_proj": {
465
  "group_size": {
466
+ "4": 128
467
  },
468
  "bits": [
469
  4
 
571
  }
572
  },
573
  "model.layers.6": {
574
+ "accuracy": 0.9665761806536466,
575
+ "total_bits": 1347010560.0,
576
  "o_proj": {
577
  "group_size": {
578
+ "4": 128
579
  },
580
  "bits": [
581
  4
 
588
  },
589
  "down_proj": {
590
  "group_size": {
591
+ "4": 128
592
  },
593
  "bits": [
594
  4
 
601
  },
602
  "q_proj": {
603
  "group_size": {
604
+ "4": 128
605
  },
606
  "bits": [
607
  4
 
614
  },
615
  "k_proj": {
616
  "group_size": {
617
+ "4": 128
618
  },
619
  "bits": [
620
  4
 
627
  },
628
  "v_proj": {
629
  "group_size": {
630
+ "4": 128
631
  },
632
  "bits": [
633
  4
 
640
  },
641
  "gate_proj": {
642
  "group_size": {
643
+ "4": 128
644
  },
645
  "bits": [
646
  4
 
653
  },
654
  "up_proj": {
655
  "group_size": {
656
+ "4": 128
657
  },
658
  "bits": [
659
  4
 
856
  }
857
  },
858
  "model.layers.9": {
859
+ "accuracy": 0.9690405392611865,
860
+ "total_bits": 1347010560.0,
861
  "o_proj": {
862
  "group_size": {
863
+ "4": 128
864
  },
865
  "bits": [
866
  4
 
873
  },
874
  "down_proj": {
875
  "group_size": {
876
+ "4": 128
877
  },
878
  "bits": [
879
  4
 
886
  },
887
  "q_proj": {
888
  "group_size": {
889
+ "4": 128
890
  },
891
  "bits": [
892
  4
 
899
  },
900
  "k_proj": {
901
  "group_size": {
902
+ "4": 128
903
  },
904
  "bits": [
905
  4
 
912
  },
913
  "v_proj": {
914
  "group_size": {
915
+ "4": 128
916
  },
917
  "bits": [
918
  4
 
925
  },
926
  "gate_proj": {
927
  "group_size": {
928
+ "4": 128
929
  },
930
  "bits": [
931
  4
 
938
  },
939
  "up_proj": {
940
  "group_size": {
941
+ "4": 128
942
  },
943
  "bits": [
944
  4
 
1046
  }
1047
  },
1048
  "model.layers.11": {
1049
+ "accuracy": 0.9653666304366197,
1050
+ "total_bits": 1347010560.0,
1051
  "o_proj": {
1052
  "group_size": {
1053
+ "4": 128
1054
  },
1055
  "bits": [
1056
  4
 
1063
  },
1064
  "down_proj": {
1065
  "group_size": {
1066
+ "4": 128
1067
  },
1068
  "bits": [
1069
  4
 
1076
  },
1077
  "q_proj": {
1078
  "group_size": {
1079
+ "4": 128
1080
  },
1081
  "bits": [
1082
  4
 
1089
  },
1090
  "k_proj": {
1091
  "group_size": {
1092
+ "4": 128
1093
  },
1094
  "bits": [
1095
  4
 
1102
  },
1103
  "v_proj": {
1104
  "group_size": {
1105
+ "4": 128
1106
  },
1107
  "bits": [
1108
  4
 
1115
  },
1116
  "gate_proj": {
1117
  "group_size": {
1118
+ "4": 128
1119
  },
1120
  "bits": [
1121
  4
 
1128
  },
1129
  "up_proj": {
1130
  "group_size": {
1131
+ "4": 128
1132
  },
1133
  "bits": [
1134
  4
 
1141
  }
1142
  },
1143
  "model.layers.12": {
1144
+ "accuracy": 0.9655183832655894,
1145
+ "total_bits": 1347010560.0,
1146
  "o_proj": {
1147
  "group_size": {
1148
+ "4": 128
1149
  },
1150
  "bits": [
1151
  4
 
1158
  },
1159
  "down_proj": {
1160
  "group_size": {
1161
+ "4": 128
1162
  },
1163
  "bits": [
1164
  4
 
1171
  },
1172
  "q_proj": {
1173
  "group_size": {
1174
+ "4": 128
1175
  },
1176
  "bits": [
1177
  4
 
1184
  },
1185
  "k_proj": {
1186
  "group_size": {
1187
+ "4": 128
1188
  },
1189
  "bits": [
1190
  4
 
1197
  },
1198
  "v_proj": {
1199
  "group_size": {
1200
+ "4": 128
1201
  },
1202
  "bits": [
1203
  4
 
1210
  },
1211
  "gate_proj": {
1212
  "group_size": {
1213
+ "4": 128
1214
  },
1215
  "bits": [
1216
  4
 
1223
  },
1224
  "up_proj": {
1225
  "group_size": {
1226
+ "4": 128
1227
  },
1228
  "bits": [
1229
  4
 
1996
  }
1997
  },
1998
  "model.layers.21": {
1999
+ "accuracy": 0.9602713624117314,
2000
+ "total_bits": 1347010560.0,
2001
  "o_proj": {
2002
  "group_size": {
2003
+ "4": 128
2004
  },
2005
  "bits": [
2006
  4
 
2013
  },
2014
  "down_proj": {
2015
  "group_size": {
2016
+ "4": 128
2017
  },
2018
  "bits": [
2019
  4
 
2026
  },
2027
  "q_proj": {
2028
  "group_size": {
2029
+ "4": 128
2030
  },
2031
  "bits": [
2032
  4
 
2039
  },
2040
  "k_proj": {
2041
  "group_size": {
2042
+ "4": 128
2043
  },
2044
  "bits": [
2045
  4
 
2052
  },
2053
  "v_proj": {
2054
  "group_size": {
2055
+ "4": 128
2056
  },
2057
  "bits": [
2058
  4
 
2065
  },
2066
  "gate_proj": {
2067
  "group_size": {
2068
+ "4": 128
2069
  },
2070
  "bits": [
2071
  4
 
2078
  },
2079
  "up_proj": {
2080
  "group_size": {
2081
+ "4": 128
2082
  },
2083
  "bits": [
2084
  4
 
2376
  }
2377
  },
2378
  "model.layers.25": {
2379
+ "accuracy": 0.9665456048605847,
2380
+ "total_bits": 1347010560.0,
2381
  "o_proj": {
2382
  "group_size": {
2383
+ "4": 128
2384
  },
2385
  "bits": [
2386
  4
 
2393
  },
2394
  "down_proj": {
2395
  "group_size": {
2396
+ "4": 128
2397
  },
2398
  "bits": [
2399
  4
 
2406
  },
2407
  "q_proj": {
2408
  "group_size": {
2409
+ "4": 128
2410
  },
2411
  "bits": [
2412
  4
 
2419
  },
2420
  "k_proj": {
2421
  "group_size": {
2422
+ "4": 128
2423
  },
2424
  "bits": [
2425
  4
 
2432
  },
2433
  "v_proj": {
2434
  "group_size": {
2435
+ "4": 128
2436
  },
2437
  "bits": [
2438
  4
 
2445
  },
2446
  "gate_proj": {
2447
  "group_size": {
2448
+ "4": 128
2449
  },
2450
  "bits": [
2451
  4
 
2458
  },
2459
  "up_proj": {
2460
  "group_size": {
2461
+ "4": 128
2462
  },
2463
  "bits": [
2464
  4
 
2471
  }
2472
  },
2473
  "model.layers.26": {
2474
+ "accuracy": 0.9651254423079081,
2475
+ "total_bits": 1347010560.0,
2476
  "o_proj": {
2477
  "group_size": {
2478
+ "4": 128
2479
  },
2480
  "bits": [
2481
  4
 
2488
  },
2489
  "down_proj": {
2490
  "group_size": {
2491
+ "4": 128
2492
  },
2493
  "bits": [
2494
  4
 
2501
  },
2502
  "q_proj": {
2503
  "group_size": {
2504
+ "4": 128
2505
  },
2506
  "bits": [
2507
  4
 
2514
  },
2515
  "k_proj": {
2516
  "group_size": {
2517
+ "4": 128
2518
  },
2519
  "bits": [
2520
  4
 
2527
  },
2528
  "v_proj": {
2529
  "group_size": {
2530
+ "4": 128
2531
  },
2532
  "bits": [
2533
  4
 
2540
  },
2541
  "gate_proj": {
2542
  "group_size": {
2543
+ "4": 128
2544
  },
2545
  "bits": [
2546
  4
 
2553
  },
2554
  "up_proj": {
2555
  "group_size": {
2556
+ "4": 128
2557
  },
2558
  "bits": [
2559
  4
 
2946
  }
2947
  },
2948
  "model.layers.31": {
2949
+ "accuracy": 0.9652374170400435,
2950
+ "total_bits": 1347010560.0,
2951
  "o_proj": {
2952
  "group_size": {
2953
+ "4": 128
2954
  },
2955
  "bits": [
2956
  4
 
2963
  },
2964
  "down_proj": {
2965
  "group_size": {
2966
+ "4": 128
2967
  },
2968
  "bits": [
2969
  4
 
2976
  },
2977
  "q_proj": {
2978
  "group_size": {
2979
+ "4": 128
2980
  },
2981
  "bits": [
2982
  4
 
2989
  },
2990
  "k_proj": {
2991
  "group_size": {
2992
+ "4": 128
2993
  },
2994
  "bits": [
2995
  4
 
3002
  },
3003
  "v_proj": {
3004
  "group_size": {
3005
+ "4": 128
3006
  },
3007
  "bits": [
3008
  4
 
3015
  },
3016
  "gate_proj": {
3017
  "group_size": {
3018
+ "4": 128
3019
  },
3020
  "bits": [
3021
  4
 
3028
  },
3029
  "up_proj": {
3030
  "group_size": {
3031
+ "4": 128
3032
  },
3033
  "bits": [
3034
  4
 
3041
  }
3042
  },
3043
  "model.layers.32": {
3044
+ "accuracy": 0.9640791268902831,
3045
+ "total_bits": 1347010560.0,
3046
  "o_proj": {
3047
  "group_size": {
3048
+ "4": 128
3049
  },
3050
  "bits": [
3051
  4
 
3058
  },
3059
  "down_proj": {
3060
  "group_size": {
3061
+ "4": 128
3062
  },
3063
  "bits": [
3064
  4
 
3071
  },
3072
  "q_proj": {
3073
  "group_size": {
3074
+ "4": 128
3075
  },
3076
  "bits": [
3077
  4
 
3084
  },
3085
  "k_proj": {
3086
  "group_size": {
3087
+ "4": 128
3088
  },
3089
  "bits": [
3090
  4
 
3097
  },
3098
  "v_proj": {
3099
  "group_size": {
3100
+ "4": 128
3101
  },
3102
  "bits": [
3103
  4
 
3110
  },
3111
  "gate_proj": {
3112
  "group_size": {
3113
+ "4": 128
3114
  },
3115
  "bits": [
3116
  4
 
3123
  },
3124
  "up_proj": {
3125
  "group_size": {
3126
+ "4": 128
3127
  },
3128
  "bits": [
3129
  4
 
3231
  }
3232
  },
3233
  "model.layers.34": {
3234
+ "accuracy": 0.9841037099831738,
3235
+ "total_bits": 1347010560.0,
3236
  "o_proj": {
3237
  "group_size": {
3238
+ "4": 128
3239
  },
3240
  "bits": [
3241
  4
 
3248
  },
3249
  "down_proj": {
3250
  "group_size": {
3251
+ "4": 128
3252
  },
3253
  "bits": [
3254
  4
 
3261
  },
3262
  "q_proj": {
3263
  "group_size": {
3264
+ "4": 128
3265
  },
3266
  "bits": [
3267
  4
 
3274
  },
3275
  "k_proj": {
3276
  "group_size": {
3277
+ "4": 128
3278
  },
3279
  "bits": [
3280
  4
 
3287
  },
3288
  "v_proj": {
3289
  "group_size": {
3290
+ "4": 128
3291
  },
3292
  "bits": [
3293
  4
 
3300
  },
3301
  "gate_proj": {
3302
  "group_size": {
3303
+ "4": 128
3304
  },
3305
  "bits": [
3306
  4
 
3313
  },
3314
  "up_proj": {
3315
  "group_size": {
3316
+ "4": 128
3317
  },
3318
  "bits": [
3319
  4
 
3326
  }
3327
  },
3328
  "model.layers.35": {
3329
+ "accuracy": 0.9970002071495401,
3330
+ "total_bits": 1347010560.0,
3331
  "o_proj": {
3332
  "group_size": {
3333
+ "4": 128
3334
  },
3335
  "bits": [
3336
  4
 
3343
  },
3344
  "down_proj": {
3345
  "group_size": {
3346
+ "4": 128
3347
  },
3348
  "bits": [
3349
  4
 
3356
  },
3357
  "q_proj": {
3358
  "group_size": {
3359
+ "4": 128
3360
  },
3361
  "bits": [
3362
  4
 
3369
  },
3370
  "k_proj": {
3371
  "group_size": {
3372
+ "4": 128
3373
  },
3374
  "bits": [
3375
  4
 
3382
  },
3383
  "v_proj": {
3384
  "group_size": {
3385
+ "4": 128
3386
  },
3387
  "bits": [
3388
  4
 
3395
  },
3396
  "gate_proj": {
3397
  "group_size": {
3398
+ "4": 128
3399
  },
3400
  "bits": [
3401
  4
 
3408
  },
3409
  "up_proj": {
3410
  "group_size": {
3411
+ "4": 128
3412
  },
3413
  "bits": [
3414
  4
 
3421
  }
3422
  },
3423
  "model.layers.36": {
3424
+ "accuracy": 0.9985264171846211,
3425
+ "total_bits": 1347010560.0,
3426
  "o_proj": {
3427
  "group_size": {
3428
  "4": 128
 
3438
  },
3439
  "down_proj": {
3440
  "group_size": {
3441
+ "4": 128
3442
  },
3443
  "bits": [
3444
+ 4
3445
  ],
3446
  "bits_prop": [
3447
  1
 
3451
  },
3452
  "q_proj": {
3453
  "group_size": {
3454
+ "4": 128
3455
  },
3456
  "bits": [
3457
+ 4
3458
  ],
3459
  "bits_prop": [
3460
  1
 
3464
  },
3465
  "k_proj": {
3466
  "group_size": {
3467
+ "4": 128
3468
  },
3469
  "bits": [
3470
+ 4
3471
  ],
3472
  "bits_prop": [
3473
  1
 
3490
  },
3491
  "gate_proj": {
3492
  "group_size": {
3493
+ "4": 128
3494
  },
3495
  "bits": [
3496
+ 4
3497
  ],
3498
  "bits_prop": [
3499
  1
 
3503
  },
3504
  "up_proj": {
3505
  "group_size": {
3506
+ "4": 128
3507
  },
3508
  "bits": [
3509
+ 4
3510
  ],
3511
  "bits_prop": [
3512
  1
 
3516
  }
3517
  },
3518
  "model.layers.37": {
3519
+ "accuracy": 0.9979474495630711,
3520
+ "total_bits": 1347010560.0,
3521
  "o_proj": {
3522
  "group_size": {
3523
  "4": 128
 
3546
  },
3547
  "q_proj": {
3548
  "group_size": {
3549
+ "4": 128
3550
  },
3551
  "bits": [
3552
+ 4
3553
  ],
3554
  "bits_prop": [
3555
  1
 
3559
  },
3560
  "k_proj": {
3561
  "group_size": {
3562
+ "4": 128
3563
  },
3564
  "bits": [
3565
+ 4
3566
  ],
3567
  "bits_prop": [
3568
  1
 
3598
  },
3599
  "up_proj": {
3600
  "group_size": {
3601
+ "4": 128
3602
  },
3603
  "bits": [
3604
+ 4
3605
  ],
3606
  "bits_prop": [
3607
  1
 
3611
  }
3612
  },
3613
  "model.layers.38": {
3614
+ "accuracy": 0.9980866985861212,
3615
+ "total_bits": 1347010560.0,
3616
  "o_proj": {
3617
  "group_size": {
3618
  "4": 128
 
3641
  },
3642
  "q_proj": {
3643
  "group_size": {
3644
+ "4": 128
3645
  },
3646
  "bits": [
3647
+ 4
3648
  ],
3649
  "bits_prop": [
3650
  1
 
3654
  },
3655
  "k_proj": {
3656
  "group_size": {
3657
+ "4": 128
3658
  },
3659
  "bits": [
3660
+ 4
3661
  ],
3662
  "bits_prop": [
3663
  1
 
3693
  },
3694
  "up_proj": {
3695
  "group_size": {
3696
+ "4": 128
3697
  },
3698
  "bits": [
3699
+ 4
3700
  ],
3701
  "bits_prop": [
3702
  1
 
3706
  }
3707
  },
3708
  "model.layers.39": {
3709
+ "accuracy": 0.9991928795352578,
3710
+ "total_bits": 1347010560.0,
3711
  "o_proj": {
3712
  "group_size": {
3713
  "4": 128
 
3736
  },
3737
  "q_proj": {
3738
  "group_size": {
3739
+ "4": 128
3740
  },
3741
  "bits": [
3742
+ 4
3743
  ],
3744
  "bits_prop": [
3745
  1
 
3749
  },
3750
  "k_proj": {
3751
  "group_size": {
3752
+ "4": 128
3753
  },
3754
  "bits": [
3755
+ 4
3756
  ],
3757
  "bits_prop": [
3758
  1
 
3788
  },
3789
  "up_proj": {
3790
  "group_size": {
3791
+ "4": 128
3792
  },
3793
  "bits": [
3794
+ 4
3795
  ],
3796
  "bits_prop": [
3797
  1
 
3801
  }
3802
  }
3803
  }
3804
+ }