Upload folder using huggingface_hub
Browse files- config.json +1 -33
- model-00001-of-00010.safetensors +2 -2
- model-00002-of-00010.safetensors +2 -2
- model-00003-of-00010.safetensors +2 -2
- model-00004-of-00010.safetensors +2 -2
- model-00005-of-00010.safetensors +2 -2
- model-00006-of-00010.safetensors +2 -2
- model-00007-of-00010.safetensors +2 -2
- model-00008-of-00010.safetensors +2 -2
- model-00009-of-00010.safetensors +2 -2
- model-00010-of-00010.safetensors +2 -2
- model.safetensors.index.json +220 -156
- tokenizer_config.json +3 -3
config.json
CHANGED
@@ -1,5 +1,5 @@
|
|
1 |
{
|
2 |
-
"_name_or_path": "/model/mistralai/Mixtral-8x7B-Instruct-v0.1",
|
3 |
"architectures": [
|
4 |
"MixtralForCausalLM"
|
5 |
],
|
@@ -30,69 +30,37 @@
|
|
30 |
"weight_merge_groups": null
|
31 |
},
|
32 |
"ignored_layers": [
|
33 |
-
"model.layers.0.self_attn.o_proj",
|
34 |
"model.layers.0.block_sparse_moe.gate",
|
35 |
-
"model.layers.1.self_attn.o_proj",
|
36 |
"model.layers.1.block_sparse_moe.gate",
|
37 |
-
"model.layers.2.self_attn.o_proj",
|
38 |
"model.layers.2.block_sparse_moe.gate",
|
39 |
-
"model.layers.3.self_attn.o_proj",
|
40 |
"model.layers.3.block_sparse_moe.gate",
|
41 |
-
"model.layers.4.self_attn.o_proj",
|
42 |
"model.layers.4.block_sparse_moe.gate",
|
43 |
-
"model.layers.5.self_attn.o_proj",
|
44 |
"model.layers.5.block_sparse_moe.gate",
|
45 |
-
"model.layers.6.self_attn.o_proj",
|
46 |
"model.layers.6.block_sparse_moe.gate",
|
47 |
-
"model.layers.7.self_attn.o_proj",
|
48 |
"model.layers.7.block_sparse_moe.gate",
|
49 |
-
"model.layers.8.self_attn.o_proj",
|
50 |
"model.layers.8.block_sparse_moe.gate",
|
51 |
-
"model.layers.9.self_attn.o_proj",
|
52 |
"model.layers.9.block_sparse_moe.gate",
|
53 |
-
"model.layers.10.self_attn.o_proj",
|
54 |
"model.layers.10.block_sparse_moe.gate",
|
55 |
-
"model.layers.11.self_attn.o_proj",
|
56 |
"model.layers.11.block_sparse_moe.gate",
|
57 |
-
"model.layers.12.self_attn.o_proj",
|
58 |
"model.layers.12.block_sparse_moe.gate",
|
59 |
-
"model.layers.13.self_attn.o_proj",
|
60 |
"model.layers.13.block_sparse_moe.gate",
|
61 |
-
"model.layers.14.self_attn.o_proj",
|
62 |
"model.layers.14.block_sparse_moe.gate",
|
63 |
-
"model.layers.15.self_attn.o_proj",
|
64 |
"model.layers.15.block_sparse_moe.gate",
|
65 |
-
"model.layers.16.self_attn.o_proj",
|
66 |
"model.layers.16.block_sparse_moe.gate",
|
67 |
-
"model.layers.17.self_attn.o_proj",
|
68 |
"model.layers.17.block_sparse_moe.gate",
|
69 |
-
"model.layers.18.self_attn.o_proj",
|
70 |
"model.layers.18.block_sparse_moe.gate",
|
71 |
-
"model.layers.19.self_attn.o_proj",
|
72 |
"model.layers.19.block_sparse_moe.gate",
|
73 |
-
"model.layers.20.self_attn.o_proj",
|
74 |
"model.layers.20.block_sparse_moe.gate",
|
75 |
-
"model.layers.21.self_attn.o_proj",
|
76 |
"model.layers.21.block_sparse_moe.gate",
|
77 |
-
"model.layers.22.self_attn.o_proj",
|
78 |
"model.layers.22.block_sparse_moe.gate",
|
79 |
-
"model.layers.23.self_attn.o_proj",
|
80 |
"model.layers.23.block_sparse_moe.gate",
|
81 |
-
"model.layers.24.self_attn.o_proj",
|
82 |
"model.layers.24.block_sparse_moe.gate",
|
83 |
-
"model.layers.25.self_attn.o_proj",
|
84 |
"model.layers.25.block_sparse_moe.gate",
|
85 |
-
"model.layers.26.self_attn.o_proj",
|
86 |
"model.layers.26.block_sparse_moe.gate",
|
87 |
-
"model.layers.27.self_attn.o_proj",
|
88 |
"model.layers.27.block_sparse_moe.gate",
|
89 |
-
"model.layers.28.self_attn.o_proj",
|
90 |
"model.layers.28.block_sparse_moe.gate",
|
91 |
-
"model.layers.29.self_attn.o_proj",
|
92 |
"model.layers.29.block_sparse_moe.gate",
|
93 |
-
"model.layers.30.self_attn.o_proj",
|
94 |
"model.layers.30.block_sparse_moe.gate",
|
95 |
-
"model.layers.31.self_attn.o_proj",
|
96 |
"model.layers.31.block_sparse_moe.gate",
|
97 |
"lm_head"
|
98 |
],
|
|
|
1 |
{
|
2 |
+
"_name_or_path": "/model/mistralai/Mixtral-8x7B-Instruct-v0.1-MLCommons",
|
3 |
"architectures": [
|
4 |
"MixtralForCausalLM"
|
5 |
],
|
|
|
30 |
"weight_merge_groups": null
|
31 |
},
|
32 |
"ignored_layers": [
|
|
|
33 |
"model.layers.0.block_sparse_moe.gate",
|
|
|
34 |
"model.layers.1.block_sparse_moe.gate",
|
|
|
35 |
"model.layers.2.block_sparse_moe.gate",
|
|
|
36 |
"model.layers.3.block_sparse_moe.gate",
|
|
|
37 |
"model.layers.4.block_sparse_moe.gate",
|
|
|
38 |
"model.layers.5.block_sparse_moe.gate",
|
|
|
39 |
"model.layers.6.block_sparse_moe.gate",
|
|
|
40 |
"model.layers.7.block_sparse_moe.gate",
|
|
|
41 |
"model.layers.8.block_sparse_moe.gate",
|
|
|
42 |
"model.layers.9.block_sparse_moe.gate",
|
|
|
43 |
"model.layers.10.block_sparse_moe.gate",
|
|
|
44 |
"model.layers.11.block_sparse_moe.gate",
|
|
|
45 |
"model.layers.12.block_sparse_moe.gate",
|
|
|
46 |
"model.layers.13.block_sparse_moe.gate",
|
|
|
47 |
"model.layers.14.block_sparse_moe.gate",
|
|
|
48 |
"model.layers.15.block_sparse_moe.gate",
|
|
|
49 |
"model.layers.16.block_sparse_moe.gate",
|
|
|
50 |
"model.layers.17.block_sparse_moe.gate",
|
|
|
51 |
"model.layers.18.block_sparse_moe.gate",
|
|
|
52 |
"model.layers.19.block_sparse_moe.gate",
|
|
|
53 |
"model.layers.20.block_sparse_moe.gate",
|
|
|
54 |
"model.layers.21.block_sparse_moe.gate",
|
|
|
55 |
"model.layers.22.block_sparse_moe.gate",
|
|
|
56 |
"model.layers.23.block_sparse_moe.gate",
|
|
|
57 |
"model.layers.24.block_sparse_moe.gate",
|
|
|
58 |
"model.layers.25.block_sparse_moe.gate",
|
|
|
59 |
"model.layers.26.block_sparse_moe.gate",
|
|
|
60 |
"model.layers.27.block_sparse_moe.gate",
|
|
|
61 |
"model.layers.28.block_sparse_moe.gate",
|
|
|
62 |
"model.layers.29.block_sparse_moe.gate",
|
|
|
63 |
"model.layers.30.block_sparse_moe.gate",
|
|
|
64 |
"model.layers.31.block_sparse_moe.gate",
|
65 |
"lm_head"
|
66 |
],
|
model-00001-of-00010.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9ecd14ec5d85f2c066dbb7f51ef8be30e0b08a203265450b45eed2a63acb1ac8
|
3 |
+
size 4951724404
|
model-00002-of-00010.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3aa6b0680351be68dcee66544094e9ca1639bb00809a922e1eb278d2190b23ae
|
3 |
+
size 4999892088
|
model-00003-of-00010.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:15d66cf16d3f70e368d03a4f14adde83680aab0eb8fa77cf9855df6f7c056b72
|
3 |
+
size 4983198624
|
model-00004-of-00010.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:68a5200086c3543ead430588b88bd9ee6bd733204d2cdeb4a9ab33e216cb00d8
|
3 |
+
size 4999892344
|
model-00005-of-00010.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b073cb23401fa7aed985e96fca66b69cc33f463d52afb9c7a4027bbd6b07ccf6
|
3 |
+
size 4999909308
|
model-00006-of-00010.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:02325fcd4cbeead017c342bc0dffc84611bcc1db2d174672e4534efeea72a929
|
3 |
+
size 4983181812
|
model-00007-of-00010.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2420bbfc5134419522aefe880a730c69ac82bcb7a754cfe63857347debd4f769
|
3 |
+
size 4999892472
|
model-00008-of-00010.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6d7e3cb3975f378758fb6a74c41674d694dc60ef62ed7220494b0ef04d4ffe20
|
3 |
+
size 4983198840
|
model-00009-of-00010.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0d881a918e94bb35844a2373251643d2c75940078ec9bb7fc345f0361e2434c0
|
3 |
+
size 4999892424
|
model-00010-of-00010.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:874b967850a292cc1c0acbbcce7fb48286bea2346ed46567aead9b47948e9ad9
|
3 |
+
size 2065815212
|
model.safetensors.index.json
CHANGED
@@ -1,6 +1,6 @@
|
|
1 |
{
|
2 |
"metadata": {
|
3 |
-
"total_size":
|
4 |
},
|
5 |
"weight_map": {
|
6 |
"lm_head.weight": "model-00010-of-00010.safetensors",
|
@@ -84,7 +84,9 @@
|
|
84 |
"model.layers.0.self_attn.k_proj.weight": "model-00001-of-00010.safetensors",
|
85 |
"model.layers.0.self_attn.k_proj.weight_scale": "model-00001-of-00010.safetensors",
|
86 |
"model.layers.0.self_attn.k_scale": "model-00001-of-00010.safetensors",
|
|
|
87 |
"model.layers.0.self_attn.o_proj.weight": "model-00001-of-00010.safetensors",
|
|
|
88 |
"model.layers.0.self_attn.q_proj.input_scale": "model-00001-of-00010.safetensors",
|
89 |
"model.layers.0.self_attn.q_proj.weight": "model-00001-of-00010.safetensors",
|
90 |
"model.layers.0.self_attn.q_proj.weight_scale": "model-00001-of-00010.safetensors",
|
@@ -171,7 +173,9 @@
|
|
171 |
"model.layers.1.self_attn.k_proj.weight": "model-00001-of-00010.safetensors",
|
172 |
"model.layers.1.self_attn.k_proj.weight_scale": "model-00001-of-00010.safetensors",
|
173 |
"model.layers.1.self_attn.k_scale": "model-00001-of-00010.safetensors",
|
|
|
174 |
"model.layers.1.self_attn.o_proj.weight": "model-00001-of-00010.safetensors",
|
|
|
175 |
"model.layers.1.self_attn.q_proj.input_scale": "model-00001-of-00010.safetensors",
|
176 |
"model.layers.1.self_attn.q_proj.weight": "model-00001-of-00010.safetensors",
|
177 |
"model.layers.1.self_attn.q_proj.weight_scale": "model-00001-of-00010.safetensors",
|
@@ -179,12 +183,12 @@
|
|
179 |
"model.layers.1.self_attn.v_proj.weight": "model-00001-of-00010.safetensors",
|
180 |
"model.layers.1.self_attn.v_proj.weight_scale": "model-00001-of-00010.safetensors",
|
181 |
"model.layers.1.self_attn.v_scale": "model-00001-of-00010.safetensors",
|
182 |
-
"model.layers.10.block_sparse_moe.experts.0.w1.input_scale": "model-
|
183 |
-
"model.layers.10.block_sparse_moe.experts.0.w1.weight": "model-
|
184 |
-
"model.layers.10.block_sparse_moe.experts.0.w1.weight_scale": "model-
|
185 |
-
"model.layers.10.block_sparse_moe.experts.0.w2.input_scale": "model-
|
186 |
-
"model.layers.10.block_sparse_moe.experts.0.w2.weight": "model-
|
187 |
-
"model.layers.10.block_sparse_moe.experts.0.w2.weight_scale": "model-
|
188 |
"model.layers.10.block_sparse_moe.experts.0.w3.input_scale": "model-00004-of-00010.safetensors",
|
189 |
"model.layers.10.block_sparse_moe.experts.0.w3.weight": "model-00004-of-00010.safetensors",
|
190 |
"model.layers.10.block_sparse_moe.experts.0.w3.weight_scale": "model-00004-of-00010.safetensors",
|
@@ -251,20 +255,22 @@
|
|
251 |
"model.layers.10.block_sparse_moe.experts.7.w3.input_scale": "model-00004-of-00010.safetensors",
|
252 |
"model.layers.10.block_sparse_moe.experts.7.w3.weight": "model-00004-of-00010.safetensors",
|
253 |
"model.layers.10.block_sparse_moe.experts.7.w3.weight_scale": "model-00004-of-00010.safetensors",
|
254 |
-
"model.layers.10.block_sparse_moe.gate.weight": "model-
|
255 |
"model.layers.10.input_layernorm.weight": "model-00004-of-00010.safetensors",
|
256 |
"model.layers.10.post_attention_layernorm.weight": "model-00004-of-00010.safetensors",
|
257 |
-
"model.layers.10.self_attn.k_proj.input_scale": "model-
|
258 |
-
"model.layers.10.self_attn.k_proj.weight": "model-
|
259 |
-
"model.layers.10.self_attn.k_proj.weight_scale": "model-
|
260 |
"model.layers.10.self_attn.k_scale": "model-00003-of-00010.safetensors",
|
261 |
-
"model.layers.10.self_attn.o_proj.
|
262 |
-
"model.layers.10.self_attn.
|
263 |
-
"model.layers.10.self_attn.
|
264 |
-
"model.layers.10.self_attn.q_proj.
|
265 |
-
"model.layers.10.self_attn.
|
266 |
-
"model.layers.10.self_attn.
|
267 |
-
"model.layers.10.self_attn.v_proj.
|
|
|
|
|
268 |
"model.layers.10.self_attn.v_scale": "model-00003-of-00010.safetensors",
|
269 |
"model.layers.11.block_sparse_moe.experts.0.w1.input_scale": "model-00004-of-00010.safetensors",
|
270 |
"model.layers.11.block_sparse_moe.experts.0.w1.weight": "model-00004-of-00010.safetensors",
|
@@ -345,7 +351,9 @@
|
|
345 |
"model.layers.11.self_attn.k_proj.weight": "model-00004-of-00010.safetensors",
|
346 |
"model.layers.11.self_attn.k_proj.weight_scale": "model-00004-of-00010.safetensors",
|
347 |
"model.layers.11.self_attn.k_scale": "model-00004-of-00010.safetensors",
|
|
|
348 |
"model.layers.11.self_attn.o_proj.weight": "model-00004-of-00010.safetensors",
|
|
|
349 |
"model.layers.11.self_attn.q_proj.input_scale": "model-00004-of-00010.safetensors",
|
350 |
"model.layers.11.self_attn.q_proj.weight": "model-00004-of-00010.safetensors",
|
351 |
"model.layers.11.self_attn.q_proj.weight_scale": "model-00004-of-00010.safetensors",
|
@@ -432,7 +440,9 @@
|
|
432 |
"model.layers.12.self_attn.k_proj.weight": "model-00004-of-00010.safetensors",
|
433 |
"model.layers.12.self_attn.k_proj.weight_scale": "model-00004-of-00010.safetensors",
|
434 |
"model.layers.12.self_attn.k_scale": "model-00004-of-00010.safetensors",
|
|
|
435 |
"model.layers.12.self_attn.o_proj.weight": "model-00004-of-00010.safetensors",
|
|
|
436 |
"model.layers.12.self_attn.q_proj.input_scale": "model-00004-of-00010.safetensors",
|
437 |
"model.layers.12.self_attn.q_proj.weight": "model-00004-of-00010.safetensors",
|
438 |
"model.layers.12.self_attn.q_proj.weight_scale": "model-00004-of-00010.safetensors",
|
@@ -467,18 +477,18 @@
|
|
467 |
"model.layers.13.block_sparse_moe.experts.2.w3.input_scale": "model-00004-of-00010.safetensors",
|
468 |
"model.layers.13.block_sparse_moe.experts.2.w3.weight": "model-00004-of-00010.safetensors",
|
469 |
"model.layers.13.block_sparse_moe.experts.2.w3.weight_scale": "model-00004-of-00010.safetensors",
|
470 |
-
"model.layers.13.block_sparse_moe.experts.3.w1.input_scale": "model-
|
471 |
-
"model.layers.13.block_sparse_moe.experts.3.w1.weight": "model-
|
472 |
-
"model.layers.13.block_sparse_moe.experts.3.w1.weight_scale": "model-
|
473 |
-
"model.layers.13.block_sparse_moe.experts.3.w2.input_scale": "model-
|
474 |
-
"model.layers.13.block_sparse_moe.experts.3.w2.weight": "model-
|
475 |
-
"model.layers.13.block_sparse_moe.experts.3.w2.weight_scale": "model-
|
476 |
-
"model.layers.13.block_sparse_moe.experts.3.w3.input_scale": "model-
|
477 |
-
"model.layers.13.block_sparse_moe.experts.3.w3.weight": "model-
|
478 |
-
"model.layers.13.block_sparse_moe.experts.3.w3.weight_scale": "model-
|
479 |
-
"model.layers.13.block_sparse_moe.experts.4.w1.input_scale": "model-
|
480 |
-
"model.layers.13.block_sparse_moe.experts.4.w1.weight": "model-
|
481 |
-
"model.layers.13.block_sparse_moe.experts.4.w1.weight_scale": "model-
|
482 |
"model.layers.13.block_sparse_moe.experts.4.w2.input_scale": "model-00005-of-00010.safetensors",
|
483 |
"model.layers.13.block_sparse_moe.experts.4.w2.weight": "model-00005-of-00010.safetensors",
|
484 |
"model.layers.13.block_sparse_moe.experts.4.w2.weight_scale": "model-00005-of-00010.safetensors",
|
@@ -519,7 +529,9 @@
|
|
519 |
"model.layers.13.self_attn.k_proj.weight": "model-00004-of-00010.safetensors",
|
520 |
"model.layers.13.self_attn.k_proj.weight_scale": "model-00004-of-00010.safetensors",
|
521 |
"model.layers.13.self_attn.k_scale": "model-00004-of-00010.safetensors",
|
|
|
522 |
"model.layers.13.self_attn.o_proj.weight": "model-00004-of-00010.safetensors",
|
|
|
523 |
"model.layers.13.self_attn.q_proj.input_scale": "model-00004-of-00010.safetensors",
|
524 |
"model.layers.13.self_attn.q_proj.weight": "model-00004-of-00010.safetensors",
|
525 |
"model.layers.13.self_attn.q_proj.weight_scale": "model-00004-of-00010.safetensors",
|
@@ -606,7 +618,9 @@
|
|
606 |
"model.layers.14.self_attn.k_proj.weight": "model-00005-of-00010.safetensors",
|
607 |
"model.layers.14.self_attn.k_proj.weight_scale": "model-00005-of-00010.safetensors",
|
608 |
"model.layers.14.self_attn.k_scale": "model-00005-of-00010.safetensors",
|
|
|
609 |
"model.layers.14.self_attn.o_proj.weight": "model-00005-of-00010.safetensors",
|
|
|
610 |
"model.layers.14.self_attn.q_proj.input_scale": "model-00005-of-00010.safetensors",
|
611 |
"model.layers.14.self_attn.q_proj.weight": "model-00005-of-00010.safetensors",
|
612 |
"model.layers.14.self_attn.q_proj.weight_scale": "model-00005-of-00010.safetensors",
|
@@ -693,7 +707,9 @@
|
|
693 |
"model.layers.15.self_attn.k_proj.weight": "model-00005-of-00010.safetensors",
|
694 |
"model.layers.15.self_attn.k_proj.weight_scale": "model-00005-of-00010.safetensors",
|
695 |
"model.layers.15.self_attn.k_scale": "model-00005-of-00010.safetensors",
|
|
|
696 |
"model.layers.15.self_attn.o_proj.weight": "model-00005-of-00010.safetensors",
|
|
|
697 |
"model.layers.15.self_attn.q_proj.input_scale": "model-00005-of-00010.safetensors",
|
698 |
"model.layers.15.self_attn.q_proj.weight": "model-00005-of-00010.safetensors",
|
699 |
"model.layers.15.self_attn.q_proj.weight_scale": "model-00005-of-00010.safetensors",
|
@@ -758,29 +774,31 @@
|
|
758 |
"model.layers.16.block_sparse_moe.experts.6.w1.input_scale": "model-00005-of-00010.safetensors",
|
759 |
"model.layers.16.block_sparse_moe.experts.6.w1.weight": "model-00005-of-00010.safetensors",
|
760 |
"model.layers.16.block_sparse_moe.experts.6.w1.weight_scale": "model-00005-of-00010.safetensors",
|
761 |
-
"model.layers.16.block_sparse_moe.experts.6.w2.input_scale": "model-
|
762 |
-
"model.layers.16.block_sparse_moe.experts.6.w2.weight": "model-
|
763 |
-
"model.layers.16.block_sparse_moe.experts.6.w2.weight_scale": "model-
|
764 |
-
"model.layers.16.block_sparse_moe.experts.6.w3.input_scale": "model-
|
765 |
-
"model.layers.16.block_sparse_moe.experts.6.w3.weight": "model-
|
766 |
-
"model.layers.16.block_sparse_moe.experts.6.w3.weight_scale": "model-
|
767 |
-
"model.layers.16.block_sparse_moe.experts.7.w1.input_scale": "model-
|
768 |
-
"model.layers.16.block_sparse_moe.experts.7.w1.weight": "model-
|
769 |
-
"model.layers.16.block_sparse_moe.experts.7.w1.weight_scale": "model-
|
770 |
-
"model.layers.16.block_sparse_moe.experts.7.w2.input_scale": "model-
|
771 |
-
"model.layers.16.block_sparse_moe.experts.7.w2.weight": "model-
|
772 |
-
"model.layers.16.block_sparse_moe.experts.7.w2.weight_scale": "model-
|
773 |
-
"model.layers.16.block_sparse_moe.experts.7.w3.input_scale": "model-
|
774 |
-
"model.layers.16.block_sparse_moe.experts.7.w3.weight": "model-
|
775 |
-
"model.layers.16.block_sparse_moe.experts.7.w3.weight_scale": "model-
|
776 |
"model.layers.16.block_sparse_moe.gate.weight": "model-00005-of-00010.safetensors",
|
777 |
-
"model.layers.16.input_layernorm.weight": "model-
|
778 |
-
"model.layers.16.post_attention_layernorm.weight": "model-
|
779 |
"model.layers.16.self_attn.k_proj.input_scale": "model-00005-of-00010.safetensors",
|
780 |
"model.layers.16.self_attn.k_proj.weight": "model-00005-of-00010.safetensors",
|
781 |
"model.layers.16.self_attn.k_proj.weight_scale": "model-00005-of-00010.safetensors",
|
782 |
"model.layers.16.self_attn.k_scale": "model-00005-of-00010.safetensors",
|
|
|
783 |
"model.layers.16.self_attn.o_proj.weight": "model-00005-of-00010.safetensors",
|
|
|
784 |
"model.layers.16.self_attn.q_proj.input_scale": "model-00005-of-00010.safetensors",
|
785 |
"model.layers.16.self_attn.q_proj.weight": "model-00005-of-00010.safetensors",
|
786 |
"model.layers.16.self_attn.q_proj.weight_scale": "model-00005-of-00010.safetensors",
|
@@ -866,15 +884,17 @@
|
|
866 |
"model.layers.17.self_attn.k_proj.input_scale": "model-00006-of-00010.safetensors",
|
867 |
"model.layers.17.self_attn.k_proj.weight": "model-00006-of-00010.safetensors",
|
868 |
"model.layers.17.self_attn.k_proj.weight_scale": "model-00006-of-00010.safetensors",
|
869 |
-
"model.layers.17.self_attn.k_scale": "model-
|
|
|
870 |
"model.layers.17.self_attn.o_proj.weight": "model-00006-of-00010.safetensors",
|
|
|
871 |
"model.layers.17.self_attn.q_proj.input_scale": "model-00006-of-00010.safetensors",
|
872 |
"model.layers.17.self_attn.q_proj.weight": "model-00006-of-00010.safetensors",
|
873 |
"model.layers.17.self_attn.q_proj.weight_scale": "model-00006-of-00010.safetensors",
|
874 |
"model.layers.17.self_attn.v_proj.input_scale": "model-00006-of-00010.safetensors",
|
875 |
"model.layers.17.self_attn.v_proj.weight": "model-00006-of-00010.safetensors",
|
876 |
"model.layers.17.self_attn.v_proj.weight_scale": "model-00006-of-00010.safetensors",
|
877 |
-
"model.layers.17.self_attn.v_scale": "model-
|
878 |
"model.layers.18.block_sparse_moe.experts.0.w1.input_scale": "model-00006-of-00010.safetensors",
|
879 |
"model.layers.18.block_sparse_moe.experts.0.w1.weight": "model-00006-of-00010.safetensors",
|
880 |
"model.layers.18.block_sparse_moe.experts.0.w1.weight_scale": "model-00006-of-00010.safetensors",
|
@@ -954,7 +974,9 @@
|
|
954 |
"model.layers.18.self_attn.k_proj.weight": "model-00006-of-00010.safetensors",
|
955 |
"model.layers.18.self_attn.k_proj.weight_scale": "model-00006-of-00010.safetensors",
|
956 |
"model.layers.18.self_attn.k_scale": "model-00006-of-00010.safetensors",
|
|
|
957 |
"model.layers.18.self_attn.o_proj.weight": "model-00006-of-00010.safetensors",
|
|
|
958 |
"model.layers.18.self_attn.q_proj.input_scale": "model-00006-of-00010.safetensors",
|
959 |
"model.layers.18.self_attn.q_proj.weight": "model-00006-of-00010.safetensors",
|
960 |
"model.layers.18.self_attn.q_proj.weight_scale": "model-00006-of-00010.safetensors",
|
@@ -1041,7 +1063,9 @@
|
|
1041 |
"model.layers.19.self_attn.k_proj.weight": "model-00006-of-00010.safetensors",
|
1042 |
"model.layers.19.self_attn.k_proj.weight_scale": "model-00006-of-00010.safetensors",
|
1043 |
"model.layers.19.self_attn.k_scale": "model-00006-of-00010.safetensors",
|
|
|
1044 |
"model.layers.19.self_attn.o_proj.weight": "model-00006-of-00010.safetensors",
|
|
|
1045 |
"model.layers.19.self_attn.q_proj.input_scale": "model-00006-of-00010.safetensors",
|
1046 |
"model.layers.19.self_attn.q_proj.weight": "model-00006-of-00010.safetensors",
|
1047 |
"model.layers.19.self_attn.q_proj.weight_scale": "model-00006-of-00010.safetensors",
|
@@ -1128,7 +1152,9 @@
|
|
1128 |
"model.layers.2.self_attn.k_proj.weight": "model-00001-of-00010.safetensors",
|
1129 |
"model.layers.2.self_attn.k_proj.weight_scale": "model-00001-of-00010.safetensors",
|
1130 |
"model.layers.2.self_attn.k_scale": "model-00001-of-00010.safetensors",
|
|
|
1131 |
"model.layers.2.self_attn.o_proj.weight": "model-00001-of-00010.safetensors",
|
|
|
1132 |
"model.layers.2.self_attn.q_proj.input_scale": "model-00001-of-00010.safetensors",
|
1133 |
"model.layers.2.self_attn.q_proj.weight": "model-00001-of-00010.safetensors",
|
1134 |
"model.layers.2.self_attn.q_proj.weight_scale": "model-00001-of-00010.safetensors",
|
@@ -1148,24 +1174,24 @@
|
|
1148 |
"model.layers.20.block_sparse_moe.experts.1.w1.input_scale": "model-00006-of-00010.safetensors",
|
1149 |
"model.layers.20.block_sparse_moe.experts.1.w1.weight": "model-00006-of-00010.safetensors",
|
1150 |
"model.layers.20.block_sparse_moe.experts.1.w1.weight_scale": "model-00006-of-00010.safetensors",
|
1151 |
-
"model.layers.20.block_sparse_moe.experts.1.w2.input_scale": "model-
|
1152 |
-
"model.layers.20.block_sparse_moe.experts.1.w2.weight": "model-
|
1153 |
-
"model.layers.20.block_sparse_moe.experts.1.w2.weight_scale": "model-
|
1154 |
-
"model.layers.20.block_sparse_moe.experts.1.w3.input_scale": "model-
|
1155 |
-
"model.layers.20.block_sparse_moe.experts.1.w3.weight": "model-
|
1156 |
-
"model.layers.20.block_sparse_moe.experts.1.w3.weight_scale": "model-
|
1157 |
-
"model.layers.20.block_sparse_moe.experts.2.w1.input_scale": "model-
|
1158 |
-
"model.layers.20.block_sparse_moe.experts.2.w1.weight": "model-
|
1159 |
-
"model.layers.20.block_sparse_moe.experts.2.w1.weight_scale": "model-
|
1160 |
-
"model.layers.20.block_sparse_moe.experts.2.w2.input_scale": "model-
|
1161 |
-
"model.layers.20.block_sparse_moe.experts.2.w2.weight": "model-
|
1162 |
-
"model.layers.20.block_sparse_moe.experts.2.w2.weight_scale": "model-
|
1163 |
-
"model.layers.20.block_sparse_moe.experts.2.w3.input_scale": "model-
|
1164 |
-
"model.layers.20.block_sparse_moe.experts.2.w3.weight": "model-
|
1165 |
-
"model.layers.20.block_sparse_moe.experts.2.w3.weight_scale": "model-
|
1166 |
-
"model.layers.20.block_sparse_moe.experts.3.w1.input_scale": "model-
|
1167 |
-
"model.layers.20.block_sparse_moe.experts.3.w1.weight": "model-
|
1168 |
-
"model.layers.20.block_sparse_moe.experts.3.w1.weight_scale": "model-
|
1169 |
"model.layers.20.block_sparse_moe.experts.3.w2.input_scale": "model-00007-of-00010.safetensors",
|
1170 |
"model.layers.20.block_sparse_moe.experts.3.w2.weight": "model-00007-of-00010.safetensors",
|
1171 |
"model.layers.20.block_sparse_moe.experts.3.w2.weight_scale": "model-00007-of-00010.safetensors",
|
@@ -1215,7 +1241,9 @@
|
|
1215 |
"model.layers.20.self_attn.k_proj.weight": "model-00006-of-00010.safetensors",
|
1216 |
"model.layers.20.self_attn.k_proj.weight_scale": "model-00006-of-00010.safetensors",
|
1217 |
"model.layers.20.self_attn.k_scale": "model-00006-of-00010.safetensors",
|
|
|
1218 |
"model.layers.20.self_attn.o_proj.weight": "model-00006-of-00010.safetensors",
|
|
|
1219 |
"model.layers.20.self_attn.q_proj.input_scale": "model-00006-of-00010.safetensors",
|
1220 |
"model.layers.20.self_attn.q_proj.weight": "model-00006-of-00010.safetensors",
|
1221 |
"model.layers.20.self_attn.q_proj.weight_scale": "model-00006-of-00010.safetensors",
|
@@ -1302,7 +1330,9 @@
|
|
1302 |
"model.layers.21.self_attn.k_proj.weight": "model-00007-of-00010.safetensors",
|
1303 |
"model.layers.21.self_attn.k_proj.weight_scale": "model-00007-of-00010.safetensors",
|
1304 |
"model.layers.21.self_attn.k_scale": "model-00007-of-00010.safetensors",
|
|
|
1305 |
"model.layers.21.self_attn.o_proj.weight": "model-00007-of-00010.safetensors",
|
|
|
1306 |
"model.layers.21.self_attn.q_proj.input_scale": "model-00007-of-00010.safetensors",
|
1307 |
"model.layers.21.self_attn.q_proj.weight": "model-00007-of-00010.safetensors",
|
1308 |
"model.layers.21.self_attn.q_proj.weight_scale": "model-00007-of-00010.safetensors",
|
@@ -1389,7 +1419,9 @@
|
|
1389 |
"model.layers.22.self_attn.k_proj.weight": "model-00007-of-00010.safetensors",
|
1390 |
"model.layers.22.self_attn.k_proj.weight_scale": "model-00007-of-00010.safetensors",
|
1391 |
"model.layers.22.self_attn.k_scale": "model-00007-of-00010.safetensors",
|
|
|
1392 |
"model.layers.22.self_attn.o_proj.weight": "model-00007-of-00010.safetensors",
|
|
|
1393 |
"model.layers.22.self_attn.q_proj.input_scale": "model-00007-of-00010.safetensors",
|
1394 |
"model.layers.22.self_attn.q_proj.weight": "model-00007-of-00010.safetensors",
|
1395 |
"model.layers.22.self_attn.q_proj.weight_scale": "model-00007-of-00010.safetensors",
|
@@ -1439,27 +1471,27 @@
|
|
1439 |
"model.layers.23.block_sparse_moe.experts.4.w2.input_scale": "model-00007-of-00010.safetensors",
|
1440 |
"model.layers.23.block_sparse_moe.experts.4.w2.weight": "model-00007-of-00010.safetensors",
|
1441 |
"model.layers.23.block_sparse_moe.experts.4.w2.weight_scale": "model-00007-of-00010.safetensors",
|
1442 |
-
"model.layers.23.block_sparse_moe.experts.4.w3.input_scale": "model-
|
1443 |
-
"model.layers.23.block_sparse_moe.experts.4.w3.weight": "model-
|
1444 |
-
"model.layers.23.block_sparse_moe.experts.4.w3.weight_scale": "model-
|
1445 |
-
"model.layers.23.block_sparse_moe.experts.5.w1.input_scale": "model-
|
1446 |
-
"model.layers.23.block_sparse_moe.experts.5.w1.weight": "model-
|
1447 |
-
"model.layers.23.block_sparse_moe.experts.5.w1.weight_scale": "model-
|
1448 |
-
"model.layers.23.block_sparse_moe.experts.5.w2.input_scale": "model-
|
1449 |
-
"model.layers.23.block_sparse_moe.experts.5.w2.weight": "model-
|
1450 |
-
"model.layers.23.block_sparse_moe.experts.5.w2.weight_scale": "model-
|
1451 |
-
"model.layers.23.block_sparse_moe.experts.5.w3.input_scale": "model-
|
1452 |
-
"model.layers.23.block_sparse_moe.experts.5.w3.weight": "model-
|
1453 |
-
"model.layers.23.block_sparse_moe.experts.5.w3.weight_scale": "model-
|
1454 |
-
"model.layers.23.block_sparse_moe.experts.6.w1.input_scale": "model-
|
1455 |
-
"model.layers.23.block_sparse_moe.experts.6.w1.weight": "model-
|
1456 |
-
"model.layers.23.block_sparse_moe.experts.6.w1.weight_scale": "model-
|
1457 |
-
"model.layers.23.block_sparse_moe.experts.6.w2.input_scale": "model-
|
1458 |
-
"model.layers.23.block_sparse_moe.experts.6.w2.weight": "model-
|
1459 |
-
"model.layers.23.block_sparse_moe.experts.6.w2.weight_scale": "model-
|
1460 |
-
"model.layers.23.block_sparse_moe.experts.6.w3.input_scale": "model-
|
1461 |
-
"model.layers.23.block_sparse_moe.experts.6.w3.weight": "model-
|
1462 |
-
"model.layers.23.block_sparse_moe.experts.6.w3.weight_scale": "model-
|
1463 |
"model.layers.23.block_sparse_moe.experts.7.w1.input_scale": "model-00008-of-00010.safetensors",
|
1464 |
"model.layers.23.block_sparse_moe.experts.7.w1.weight": "model-00008-of-00010.safetensors",
|
1465 |
"model.layers.23.block_sparse_moe.experts.7.w1.weight_scale": "model-00008-of-00010.safetensors",
|
@@ -1476,7 +1508,9 @@
|
|
1476 |
"model.layers.23.self_attn.k_proj.weight": "model-00007-of-00010.safetensors",
|
1477 |
"model.layers.23.self_attn.k_proj.weight_scale": "model-00007-of-00010.safetensors",
|
1478 |
"model.layers.23.self_attn.k_scale": "model-00007-of-00010.safetensors",
|
|
|
1479 |
"model.layers.23.self_attn.o_proj.weight": "model-00007-of-00010.safetensors",
|
|
|
1480 |
"model.layers.23.self_attn.q_proj.input_scale": "model-00007-of-00010.safetensors",
|
1481 |
"model.layers.23.self_attn.q_proj.weight": "model-00007-of-00010.safetensors",
|
1482 |
"model.layers.23.self_attn.q_proj.weight_scale": "model-00007-of-00010.safetensors",
|
@@ -1563,7 +1597,9 @@
|
|
1563 |
"model.layers.24.self_attn.k_proj.weight": "model-00008-of-00010.safetensors",
|
1564 |
"model.layers.24.self_attn.k_proj.weight_scale": "model-00008-of-00010.safetensors",
|
1565 |
"model.layers.24.self_attn.k_scale": "model-00008-of-00010.safetensors",
|
|
|
1566 |
"model.layers.24.self_attn.o_proj.weight": "model-00008-of-00010.safetensors",
|
|
|
1567 |
"model.layers.24.self_attn.q_proj.input_scale": "model-00008-of-00010.safetensors",
|
1568 |
"model.layers.24.self_attn.q_proj.weight": "model-00008-of-00010.safetensors",
|
1569 |
"model.layers.24.self_attn.q_proj.weight_scale": "model-00008-of-00010.safetensors",
|
@@ -1650,7 +1686,9 @@
|
|
1650 |
"model.layers.25.self_attn.k_proj.weight": "model-00008-of-00010.safetensors",
|
1651 |
"model.layers.25.self_attn.k_proj.weight_scale": "model-00008-of-00010.safetensors",
|
1652 |
"model.layers.25.self_attn.k_scale": "model-00008-of-00010.safetensors",
|
|
|
1653 |
"model.layers.25.self_attn.o_proj.weight": "model-00008-of-00010.safetensors",
|
|
|
1654 |
"model.layers.25.self_attn.q_proj.input_scale": "model-00008-of-00010.safetensors",
|
1655 |
"model.layers.25.self_attn.q_proj.weight": "model-00008-of-00010.safetensors",
|
1656 |
"model.layers.25.self_attn.q_proj.weight_scale": "model-00008-of-00010.safetensors",
|
@@ -1737,7 +1775,9 @@
|
|
1737 |
"model.layers.26.self_attn.k_proj.weight": "model-00008-of-00010.safetensors",
|
1738 |
"model.layers.26.self_attn.k_proj.weight_scale": "model-00008-of-00010.safetensors",
|
1739 |
"model.layers.26.self_attn.k_scale": "model-00008-of-00010.safetensors",
|
|
|
1740 |
"model.layers.26.self_attn.o_proj.weight": "model-00008-of-00010.safetensors",
|
|
|
1741 |
"model.layers.26.self_attn.q_proj.input_scale": "model-00008-of-00010.safetensors",
|
1742 |
"model.layers.26.self_attn.q_proj.weight": "model-00008-of-00010.safetensors",
|
1743 |
"model.layers.26.self_attn.q_proj.weight_scale": "model-00008-of-00010.safetensors",
|
@@ -1745,27 +1785,27 @@
|
|
1745 |
"model.layers.26.self_attn.v_proj.weight": "model-00008-of-00010.safetensors",
|
1746 |
"model.layers.26.self_attn.v_proj.weight_scale": "model-00008-of-00010.safetensors",
|
1747 |
"model.layers.26.self_attn.v_scale": "model-00008-of-00010.safetensors",
|
1748 |
-
"model.layers.27.block_sparse_moe.experts.0.w1.input_scale": "model-
|
1749 |
-
"model.layers.27.block_sparse_moe.experts.0.w1.weight": "model-
|
1750 |
-
"model.layers.27.block_sparse_moe.experts.0.w1.weight_scale": "model-
|
1751 |
-
"model.layers.27.block_sparse_moe.experts.0.w2.input_scale": "model-
|
1752 |
-
"model.layers.27.block_sparse_moe.experts.0.w2.weight": "model-
|
1753 |
-
"model.layers.27.block_sparse_moe.experts.0.w2.weight_scale": "model-
|
1754 |
-
"model.layers.27.block_sparse_moe.experts.0.w3.input_scale": "model-
|
1755 |
-
"model.layers.27.block_sparse_moe.experts.0.w3.weight": "model-
|
1756 |
-
"model.layers.27.block_sparse_moe.experts.0.w3.weight_scale": "model-
|
1757 |
-
"model.layers.27.block_sparse_moe.experts.1.w1.input_scale": "model-
|
1758 |
-
"model.layers.27.block_sparse_moe.experts.1.w1.weight": "model-
|
1759 |
-
"model.layers.27.block_sparse_moe.experts.1.w1.weight_scale": "model-
|
1760 |
-
"model.layers.27.block_sparse_moe.experts.1.w2.input_scale": "model-
|
1761 |
-
"model.layers.27.block_sparse_moe.experts.1.w2.weight": "model-
|
1762 |
-
"model.layers.27.block_sparse_moe.experts.1.w2.weight_scale": "model-
|
1763 |
-
"model.layers.27.block_sparse_moe.experts.1.w3.input_scale": "model-
|
1764 |
-
"model.layers.27.block_sparse_moe.experts.1.w3.weight": "model-
|
1765 |
-
"model.layers.27.block_sparse_moe.experts.1.w3.weight_scale": "model-
|
1766 |
-
"model.layers.27.block_sparse_moe.experts.2.w1.input_scale": "model-
|
1767 |
-
"model.layers.27.block_sparse_moe.experts.2.w1.weight": "model-
|
1768 |
-
"model.layers.27.block_sparse_moe.experts.2.w1.weight_scale": "model-
|
1769 |
"model.layers.27.block_sparse_moe.experts.2.w2.input_scale": "model-00009-of-00010.safetensors",
|
1770 |
"model.layers.27.block_sparse_moe.experts.2.w2.weight": "model-00009-of-00010.safetensors",
|
1771 |
"model.layers.27.block_sparse_moe.experts.2.w2.weight_scale": "model-00009-of-00010.safetensors",
|
@@ -1817,20 +1857,22 @@
|
|
1817 |
"model.layers.27.block_sparse_moe.experts.7.w3.input_scale": "model-00009-of-00010.safetensors",
|
1818 |
"model.layers.27.block_sparse_moe.experts.7.w3.weight": "model-00009-of-00010.safetensors",
|
1819 |
"model.layers.27.block_sparse_moe.experts.7.w3.weight_scale": "model-00009-of-00010.safetensors",
|
1820 |
-
"model.layers.27.block_sparse_moe.gate.weight": "model-
|
1821 |
"model.layers.27.input_layernorm.weight": "model-00009-of-00010.safetensors",
|
1822 |
"model.layers.27.post_attention_layernorm.weight": "model-00009-of-00010.safetensors",
|
1823 |
-
"model.layers.27.self_attn.k_proj.input_scale": "model-
|
1824 |
-
"model.layers.27.self_attn.k_proj.weight": "model-
|
1825 |
-
"model.layers.27.self_attn.k_proj.weight_scale": "model-
|
1826 |
"model.layers.27.self_attn.k_scale": "model-00008-of-00010.safetensors",
|
1827 |
-
"model.layers.27.self_attn.o_proj.
|
1828 |
-
"model.layers.27.self_attn.
|
1829 |
-
"model.layers.27.self_attn.
|
1830 |
-
"model.layers.27.self_attn.q_proj.
|
1831 |
-
"model.layers.27.self_attn.
|
1832 |
-
"model.layers.27.self_attn.
|
1833 |
-
"model.layers.27.self_attn.v_proj.
|
|
|
|
|
1834 |
"model.layers.27.self_attn.v_scale": "model-00008-of-00010.safetensors",
|
1835 |
"model.layers.28.block_sparse_moe.experts.0.w1.input_scale": "model-00009-of-00010.safetensors",
|
1836 |
"model.layers.28.block_sparse_moe.experts.0.w1.weight": "model-00009-of-00010.safetensors",
|
@@ -1911,7 +1953,9 @@
|
|
1911 |
"model.layers.28.self_attn.k_proj.weight": "model-00009-of-00010.safetensors",
|
1912 |
"model.layers.28.self_attn.k_proj.weight_scale": "model-00009-of-00010.safetensors",
|
1913 |
"model.layers.28.self_attn.k_scale": "model-00009-of-00010.safetensors",
|
|
|
1914 |
"model.layers.28.self_attn.o_proj.weight": "model-00009-of-00010.safetensors",
|
|
|
1915 |
"model.layers.28.self_attn.q_proj.input_scale": "model-00009-of-00010.safetensors",
|
1916 |
"model.layers.28.self_attn.q_proj.weight": "model-00009-of-00010.safetensors",
|
1917 |
"model.layers.28.self_attn.q_proj.weight_scale": "model-00009-of-00010.safetensors",
|
@@ -1998,7 +2042,9 @@
|
|
1998 |
"model.layers.29.self_attn.k_proj.weight": "model-00009-of-00010.safetensors",
|
1999 |
"model.layers.29.self_attn.k_proj.weight_scale": "model-00009-of-00010.safetensors",
|
2000 |
"model.layers.29.self_attn.k_scale": "model-00009-of-00010.safetensors",
|
|
|
2001 |
"model.layers.29.self_attn.o_proj.weight": "model-00009-of-00010.safetensors",
|
|
|
2002 |
"model.layers.29.self_attn.q_proj.input_scale": "model-00009-of-00010.safetensors",
|
2003 |
"model.layers.29.self_attn.q_proj.weight": "model-00009-of-00010.safetensors",
|
2004 |
"model.layers.29.self_attn.q_proj.weight_scale": "model-00009-of-00010.safetensors",
|
@@ -2018,9 +2064,9 @@
|
|
2018 |
"model.layers.3.block_sparse_moe.experts.1.w1.input_scale": "model-00001-of-00010.safetensors",
|
2019 |
"model.layers.3.block_sparse_moe.experts.1.w1.weight": "model-00001-of-00010.safetensors",
|
2020 |
"model.layers.3.block_sparse_moe.experts.1.w1.weight_scale": "model-00001-of-00010.safetensors",
|
2021 |
-
"model.layers.3.block_sparse_moe.experts.1.w2.input_scale": "model-
|
2022 |
-
"model.layers.3.block_sparse_moe.experts.1.w2.weight": "model-
|
2023 |
-
"model.layers.3.block_sparse_moe.experts.1.w2.weight_scale": "model-
|
2024 |
"model.layers.3.block_sparse_moe.experts.1.w3.input_scale": "model-00002-of-00010.safetensors",
|
2025 |
"model.layers.3.block_sparse_moe.experts.1.w3.weight": "model-00002-of-00010.safetensors",
|
2026 |
"model.layers.3.block_sparse_moe.experts.1.w3.weight_scale": "model-00002-of-00010.safetensors",
|
@@ -2085,7 +2131,9 @@
|
|
2085 |
"model.layers.3.self_attn.k_proj.weight": "model-00001-of-00010.safetensors",
|
2086 |
"model.layers.3.self_attn.k_proj.weight_scale": "model-00001-of-00010.safetensors",
|
2087 |
"model.layers.3.self_attn.k_scale": "model-00001-of-00010.safetensors",
|
|
|
2088 |
"model.layers.3.self_attn.o_proj.weight": "model-00001-of-00010.safetensors",
|
|
|
2089 |
"model.layers.3.self_attn.q_proj.input_scale": "model-00001-of-00010.safetensors",
|
2090 |
"model.layers.3.self_attn.q_proj.weight": "model-00001-of-00010.safetensors",
|
2091 |
"model.layers.3.self_attn.q_proj.weight_scale": "model-00001-of-00010.safetensors",
|
@@ -2120,33 +2168,33 @@
|
|
2120 |
"model.layers.30.block_sparse_moe.experts.2.w3.input_scale": "model-00009-of-00010.safetensors",
|
2121 |
"model.layers.30.block_sparse_moe.experts.2.w3.weight": "model-00009-of-00010.safetensors",
|
2122 |
"model.layers.30.block_sparse_moe.experts.2.w3.weight_scale": "model-00009-of-00010.safetensors",
|
2123 |
-
"model.layers.30.block_sparse_moe.experts.3.w1.input_scale": "model-
|
2124 |
-
"model.layers.30.block_sparse_moe.experts.3.w1.weight": "model-
|
2125 |
-
"model.layers.30.block_sparse_moe.experts.3.w1.weight_scale": "model-
|
2126 |
-
"model.layers.30.block_sparse_moe.experts.3.w2.input_scale": "model-
|
2127 |
-
"model.layers.30.block_sparse_moe.experts.3.w2.weight": "model-
|
2128 |
-
"model.layers.30.block_sparse_moe.experts.3.w2.weight_scale": "model-
|
2129 |
-
"model.layers.30.block_sparse_moe.experts.3.w3.input_scale": "model-
|
2130 |
-
"model.layers.30.block_sparse_moe.experts.3.w3.weight": "model-
|
2131 |
-
"model.layers.30.block_sparse_moe.experts.3.w3.weight_scale": "model-
|
2132 |
-
"model.layers.30.block_sparse_moe.experts.4.w1.input_scale": "model-
|
2133 |
-
"model.layers.30.block_sparse_moe.experts.4.w1.weight": "model-
|
2134 |
-
"model.layers.30.block_sparse_moe.experts.4.w1.weight_scale": "model-
|
2135 |
-
"model.layers.30.block_sparse_moe.experts.4.w2.input_scale": "model-
|
2136 |
-
"model.layers.30.block_sparse_moe.experts.4.w2.weight": "model-
|
2137 |
-
"model.layers.30.block_sparse_moe.experts.4.w2.weight_scale": "model-
|
2138 |
-
"model.layers.30.block_sparse_moe.experts.4.w3.input_scale": "model-
|
2139 |
-
"model.layers.30.block_sparse_moe.experts.4.w3.weight": "model-
|
2140 |
-
"model.layers.30.block_sparse_moe.experts.4.w3.weight_scale": "model-
|
2141 |
-
"model.layers.30.block_sparse_moe.experts.5.w1.input_scale": "model-
|
2142 |
-
"model.layers.30.block_sparse_moe.experts.5.w1.weight": "model-
|
2143 |
-
"model.layers.30.block_sparse_moe.experts.5.w1.weight_scale": "model-
|
2144 |
-
"model.layers.30.block_sparse_moe.experts.5.w2.input_scale": "model-
|
2145 |
-
"model.layers.30.block_sparse_moe.experts.5.w2.weight": "model-
|
2146 |
-
"model.layers.30.block_sparse_moe.experts.5.w2.weight_scale": "model-
|
2147 |
-
"model.layers.30.block_sparse_moe.experts.5.w3.input_scale": "model-
|
2148 |
-
"model.layers.30.block_sparse_moe.experts.5.w3.weight": "model-
|
2149 |
-
"model.layers.30.block_sparse_moe.experts.5.w3.weight_scale": "model-
|
2150 |
"model.layers.30.block_sparse_moe.experts.6.w1.input_scale": "model-00010-of-00010.safetensors",
|
2151 |
"model.layers.30.block_sparse_moe.experts.6.w1.weight": "model-00010-of-00010.safetensors",
|
2152 |
"model.layers.30.block_sparse_moe.experts.6.w1.weight_scale": "model-00010-of-00010.safetensors",
|
@@ -2172,7 +2220,9 @@
|
|
2172 |
"model.layers.30.self_attn.k_proj.weight": "model-00009-of-00010.safetensors",
|
2173 |
"model.layers.30.self_attn.k_proj.weight_scale": "model-00009-of-00010.safetensors",
|
2174 |
"model.layers.30.self_attn.k_scale": "model-00009-of-00010.safetensors",
|
|
|
2175 |
"model.layers.30.self_attn.o_proj.weight": "model-00009-of-00010.safetensors",
|
|
|
2176 |
"model.layers.30.self_attn.q_proj.input_scale": "model-00009-of-00010.safetensors",
|
2177 |
"model.layers.30.self_attn.q_proj.weight": "model-00009-of-00010.safetensors",
|
2178 |
"model.layers.30.self_attn.q_proj.weight_scale": "model-00009-of-00010.safetensors",
|
@@ -2259,7 +2309,9 @@
|
|
2259 |
"model.layers.31.self_attn.k_proj.weight": "model-00010-of-00010.safetensors",
|
2260 |
"model.layers.31.self_attn.k_proj.weight_scale": "model-00010-of-00010.safetensors",
|
2261 |
"model.layers.31.self_attn.k_scale": "model-00010-of-00010.safetensors",
|
|
|
2262 |
"model.layers.31.self_attn.o_proj.weight": "model-00010-of-00010.safetensors",
|
|
|
2263 |
"model.layers.31.self_attn.q_proj.input_scale": "model-00010-of-00010.safetensors",
|
2264 |
"model.layers.31.self_attn.q_proj.weight": "model-00010-of-00010.safetensors",
|
2265 |
"model.layers.31.self_attn.q_proj.weight_scale": "model-00010-of-00010.safetensors",
|
@@ -2346,7 +2398,9 @@
|
|
2346 |
"model.layers.4.self_attn.k_proj.weight": "model-00002-of-00010.safetensors",
|
2347 |
"model.layers.4.self_attn.k_proj.weight_scale": "model-00002-of-00010.safetensors",
|
2348 |
"model.layers.4.self_attn.k_scale": "model-00002-of-00010.safetensors",
|
|
|
2349 |
"model.layers.4.self_attn.o_proj.weight": "model-00002-of-00010.safetensors",
|
|
|
2350 |
"model.layers.4.self_attn.q_proj.input_scale": "model-00002-of-00010.safetensors",
|
2351 |
"model.layers.4.self_attn.q_proj.weight": "model-00002-of-00010.safetensors",
|
2352 |
"model.layers.4.self_attn.q_proj.weight_scale": "model-00002-of-00010.safetensors",
|
@@ -2433,7 +2487,9 @@
|
|
2433 |
"model.layers.5.self_attn.k_proj.weight": "model-00002-of-00010.safetensors",
|
2434 |
"model.layers.5.self_attn.k_proj.weight_scale": "model-00002-of-00010.safetensors",
|
2435 |
"model.layers.5.self_attn.k_scale": "model-00002-of-00010.safetensors",
|
|
|
2436 |
"model.layers.5.self_attn.o_proj.weight": "model-00002-of-00010.safetensors",
|
|
|
2437 |
"model.layers.5.self_attn.q_proj.input_scale": "model-00002-of-00010.safetensors",
|
2438 |
"model.layers.5.self_attn.q_proj.weight": "model-00002-of-00010.safetensors",
|
2439 |
"model.layers.5.self_attn.q_proj.weight_scale": "model-00002-of-00010.safetensors",
|
@@ -2483,12 +2539,12 @@
|
|
2483 |
"model.layers.6.block_sparse_moe.experts.4.w2.input_scale": "model-00002-of-00010.safetensors",
|
2484 |
"model.layers.6.block_sparse_moe.experts.4.w2.weight": "model-00002-of-00010.safetensors",
|
2485 |
"model.layers.6.block_sparse_moe.experts.4.w2.weight_scale": "model-00002-of-00010.safetensors",
|
2486 |
-
"model.layers.6.block_sparse_moe.experts.4.w3.input_scale": "model-
|
2487 |
-
"model.layers.6.block_sparse_moe.experts.4.w3.weight": "model-
|
2488 |
-
"model.layers.6.block_sparse_moe.experts.4.w3.weight_scale": "model-
|
2489 |
-
"model.layers.6.block_sparse_moe.experts.5.w1.input_scale": "model-
|
2490 |
-
"model.layers.6.block_sparse_moe.experts.5.w1.weight": "model-
|
2491 |
-
"model.layers.6.block_sparse_moe.experts.5.w1.weight_scale": "model-
|
2492 |
"model.layers.6.block_sparse_moe.experts.5.w2.input_scale": "model-00003-of-00010.safetensors",
|
2493 |
"model.layers.6.block_sparse_moe.experts.5.w2.weight": "model-00003-of-00010.safetensors",
|
2494 |
"model.layers.6.block_sparse_moe.experts.5.w2.weight_scale": "model-00003-of-00010.safetensors",
|
@@ -2520,7 +2576,9 @@
|
|
2520 |
"model.layers.6.self_attn.k_proj.weight": "model-00002-of-00010.safetensors",
|
2521 |
"model.layers.6.self_attn.k_proj.weight_scale": "model-00002-of-00010.safetensors",
|
2522 |
"model.layers.6.self_attn.k_scale": "model-00002-of-00010.safetensors",
|
|
|
2523 |
"model.layers.6.self_attn.o_proj.weight": "model-00002-of-00010.safetensors",
|
|
|
2524 |
"model.layers.6.self_attn.q_proj.input_scale": "model-00002-of-00010.safetensors",
|
2525 |
"model.layers.6.self_attn.q_proj.weight": "model-00002-of-00010.safetensors",
|
2526 |
"model.layers.6.self_attn.q_proj.weight_scale": "model-00002-of-00010.safetensors",
|
@@ -2607,7 +2665,9 @@
|
|
2607 |
"model.layers.7.self_attn.k_proj.weight": "model-00003-of-00010.safetensors",
|
2608 |
"model.layers.7.self_attn.k_proj.weight_scale": "model-00003-of-00010.safetensors",
|
2609 |
"model.layers.7.self_attn.k_scale": "model-00003-of-00010.safetensors",
|
|
|
2610 |
"model.layers.7.self_attn.o_proj.weight": "model-00003-of-00010.safetensors",
|
|
|
2611 |
"model.layers.7.self_attn.q_proj.input_scale": "model-00003-of-00010.safetensors",
|
2612 |
"model.layers.7.self_attn.q_proj.weight": "model-00003-of-00010.safetensors",
|
2613 |
"model.layers.7.self_attn.q_proj.weight_scale": "model-00003-of-00010.safetensors",
|
@@ -2694,7 +2754,9 @@
|
|
2694 |
"model.layers.8.self_attn.k_proj.weight": "model-00003-of-00010.safetensors",
|
2695 |
"model.layers.8.self_attn.k_proj.weight_scale": "model-00003-of-00010.safetensors",
|
2696 |
"model.layers.8.self_attn.k_scale": "model-00003-of-00010.safetensors",
|
|
|
2697 |
"model.layers.8.self_attn.o_proj.weight": "model-00003-of-00010.safetensors",
|
|
|
2698 |
"model.layers.8.self_attn.q_proj.input_scale": "model-00003-of-00010.safetensors",
|
2699 |
"model.layers.8.self_attn.q_proj.weight": "model-00003-of-00010.safetensors",
|
2700 |
"model.layers.8.self_attn.q_proj.weight_scale": "model-00003-of-00010.safetensors",
|
@@ -2781,7 +2843,9 @@
|
|
2781 |
"model.layers.9.self_attn.k_proj.weight": "model-00003-of-00010.safetensors",
|
2782 |
"model.layers.9.self_attn.k_proj.weight_scale": "model-00003-of-00010.safetensors",
|
2783 |
"model.layers.9.self_attn.k_scale": "model-00003-of-00010.safetensors",
|
|
|
2784 |
"model.layers.9.self_attn.o_proj.weight": "model-00003-of-00010.safetensors",
|
|
|
2785 |
"model.layers.9.self_attn.q_proj.input_scale": "model-00003-of-00010.safetensors",
|
2786 |
"model.layers.9.self_attn.q_proj.weight": "model-00003-of-00010.safetensors",
|
2787 |
"model.layers.9.self_attn.q_proj.weight_scale": "model-00003-of-00010.safetensors",
|
|
|
1 |
{
|
2 |
"metadata": {
|
3 |
+
"total_size": 46966255232
|
4 |
},
|
5 |
"weight_map": {
|
6 |
"lm_head.weight": "model-00010-of-00010.safetensors",
|
|
|
84 |
"model.layers.0.self_attn.k_proj.weight": "model-00001-of-00010.safetensors",
|
85 |
"model.layers.0.self_attn.k_proj.weight_scale": "model-00001-of-00010.safetensors",
|
86 |
"model.layers.0.self_attn.k_scale": "model-00001-of-00010.safetensors",
|
87 |
+
"model.layers.0.self_attn.o_proj.input_scale": "model-00001-of-00010.safetensors",
|
88 |
"model.layers.0.self_attn.o_proj.weight": "model-00001-of-00010.safetensors",
|
89 |
+
"model.layers.0.self_attn.o_proj.weight_scale": "model-00001-of-00010.safetensors",
|
90 |
"model.layers.0.self_attn.q_proj.input_scale": "model-00001-of-00010.safetensors",
|
91 |
"model.layers.0.self_attn.q_proj.weight": "model-00001-of-00010.safetensors",
|
92 |
"model.layers.0.self_attn.q_proj.weight_scale": "model-00001-of-00010.safetensors",
|
|
|
173 |
"model.layers.1.self_attn.k_proj.weight": "model-00001-of-00010.safetensors",
|
174 |
"model.layers.1.self_attn.k_proj.weight_scale": "model-00001-of-00010.safetensors",
|
175 |
"model.layers.1.self_attn.k_scale": "model-00001-of-00010.safetensors",
|
176 |
+
"model.layers.1.self_attn.o_proj.input_scale": "model-00001-of-00010.safetensors",
|
177 |
"model.layers.1.self_attn.o_proj.weight": "model-00001-of-00010.safetensors",
|
178 |
+
"model.layers.1.self_attn.o_proj.weight_scale": "model-00001-of-00010.safetensors",
|
179 |
"model.layers.1.self_attn.q_proj.input_scale": "model-00001-of-00010.safetensors",
|
180 |
"model.layers.1.self_attn.q_proj.weight": "model-00001-of-00010.safetensors",
|
181 |
"model.layers.1.self_attn.q_proj.weight_scale": "model-00001-of-00010.safetensors",
|
|
|
183 |
"model.layers.1.self_attn.v_proj.weight": "model-00001-of-00010.safetensors",
|
184 |
"model.layers.1.self_attn.v_proj.weight_scale": "model-00001-of-00010.safetensors",
|
185 |
"model.layers.1.self_attn.v_scale": "model-00001-of-00010.safetensors",
|
186 |
+
"model.layers.10.block_sparse_moe.experts.0.w1.input_scale": "model-00003-of-00010.safetensors",
|
187 |
+
"model.layers.10.block_sparse_moe.experts.0.w1.weight": "model-00003-of-00010.safetensors",
|
188 |
+
"model.layers.10.block_sparse_moe.experts.0.w1.weight_scale": "model-00003-of-00010.safetensors",
|
189 |
+
"model.layers.10.block_sparse_moe.experts.0.w2.input_scale": "model-00003-of-00010.safetensors",
|
190 |
+
"model.layers.10.block_sparse_moe.experts.0.w2.weight": "model-00003-of-00010.safetensors",
|
191 |
+
"model.layers.10.block_sparse_moe.experts.0.w2.weight_scale": "model-00003-of-00010.safetensors",
|
192 |
"model.layers.10.block_sparse_moe.experts.0.w3.input_scale": "model-00004-of-00010.safetensors",
|
193 |
"model.layers.10.block_sparse_moe.experts.0.w3.weight": "model-00004-of-00010.safetensors",
|
194 |
"model.layers.10.block_sparse_moe.experts.0.w3.weight_scale": "model-00004-of-00010.safetensors",
|
|
|
255 |
"model.layers.10.block_sparse_moe.experts.7.w3.input_scale": "model-00004-of-00010.safetensors",
|
256 |
"model.layers.10.block_sparse_moe.experts.7.w3.weight": "model-00004-of-00010.safetensors",
|
257 |
"model.layers.10.block_sparse_moe.experts.7.w3.weight_scale": "model-00004-of-00010.safetensors",
|
258 |
+
"model.layers.10.block_sparse_moe.gate.weight": "model-00003-of-00010.safetensors",
|
259 |
"model.layers.10.input_layernorm.weight": "model-00004-of-00010.safetensors",
|
260 |
"model.layers.10.post_attention_layernorm.weight": "model-00004-of-00010.safetensors",
|
261 |
+
"model.layers.10.self_attn.k_proj.input_scale": "model-00003-of-00010.safetensors",
|
262 |
+
"model.layers.10.self_attn.k_proj.weight": "model-00003-of-00010.safetensors",
|
263 |
+
"model.layers.10.self_attn.k_proj.weight_scale": "model-00003-of-00010.safetensors",
|
264 |
"model.layers.10.self_attn.k_scale": "model-00003-of-00010.safetensors",
|
265 |
+
"model.layers.10.self_attn.o_proj.input_scale": "model-00003-of-00010.safetensors",
|
266 |
+
"model.layers.10.self_attn.o_proj.weight": "model-00003-of-00010.safetensors",
|
267 |
+
"model.layers.10.self_attn.o_proj.weight_scale": "model-00003-of-00010.safetensors",
|
268 |
+
"model.layers.10.self_attn.q_proj.input_scale": "model-00003-of-00010.safetensors",
|
269 |
+
"model.layers.10.self_attn.q_proj.weight": "model-00003-of-00010.safetensors",
|
270 |
+
"model.layers.10.self_attn.q_proj.weight_scale": "model-00003-of-00010.safetensors",
|
271 |
+
"model.layers.10.self_attn.v_proj.input_scale": "model-00003-of-00010.safetensors",
|
272 |
+
"model.layers.10.self_attn.v_proj.weight": "model-00003-of-00010.safetensors",
|
273 |
+
"model.layers.10.self_attn.v_proj.weight_scale": "model-00003-of-00010.safetensors",
|
274 |
"model.layers.10.self_attn.v_scale": "model-00003-of-00010.safetensors",
|
275 |
"model.layers.11.block_sparse_moe.experts.0.w1.input_scale": "model-00004-of-00010.safetensors",
|
276 |
"model.layers.11.block_sparse_moe.experts.0.w1.weight": "model-00004-of-00010.safetensors",
|
|
|
351 |
"model.layers.11.self_attn.k_proj.weight": "model-00004-of-00010.safetensors",
|
352 |
"model.layers.11.self_attn.k_proj.weight_scale": "model-00004-of-00010.safetensors",
|
353 |
"model.layers.11.self_attn.k_scale": "model-00004-of-00010.safetensors",
|
354 |
+
"model.layers.11.self_attn.o_proj.input_scale": "model-00004-of-00010.safetensors",
|
355 |
"model.layers.11.self_attn.o_proj.weight": "model-00004-of-00010.safetensors",
|
356 |
+
"model.layers.11.self_attn.o_proj.weight_scale": "model-00004-of-00010.safetensors",
|
357 |
"model.layers.11.self_attn.q_proj.input_scale": "model-00004-of-00010.safetensors",
|
358 |
"model.layers.11.self_attn.q_proj.weight": "model-00004-of-00010.safetensors",
|
359 |
"model.layers.11.self_attn.q_proj.weight_scale": "model-00004-of-00010.safetensors",
|
|
|
440 |
"model.layers.12.self_attn.k_proj.weight": "model-00004-of-00010.safetensors",
|
441 |
"model.layers.12.self_attn.k_proj.weight_scale": "model-00004-of-00010.safetensors",
|
442 |
"model.layers.12.self_attn.k_scale": "model-00004-of-00010.safetensors",
|
443 |
+
"model.layers.12.self_attn.o_proj.input_scale": "model-00004-of-00010.safetensors",
|
444 |
"model.layers.12.self_attn.o_proj.weight": "model-00004-of-00010.safetensors",
|
445 |
+
"model.layers.12.self_attn.o_proj.weight_scale": "model-00004-of-00010.safetensors",
|
446 |
"model.layers.12.self_attn.q_proj.input_scale": "model-00004-of-00010.safetensors",
|
447 |
"model.layers.12.self_attn.q_proj.weight": "model-00004-of-00010.safetensors",
|
448 |
"model.layers.12.self_attn.q_proj.weight_scale": "model-00004-of-00010.safetensors",
|
|
|
477 |
"model.layers.13.block_sparse_moe.experts.2.w3.input_scale": "model-00004-of-00010.safetensors",
|
478 |
"model.layers.13.block_sparse_moe.experts.2.w3.weight": "model-00004-of-00010.safetensors",
|
479 |
"model.layers.13.block_sparse_moe.experts.2.w3.weight_scale": "model-00004-of-00010.safetensors",
|
480 |
+
"model.layers.13.block_sparse_moe.experts.3.w1.input_scale": "model-00004-of-00010.safetensors",
|
481 |
+
"model.layers.13.block_sparse_moe.experts.3.w1.weight": "model-00004-of-00010.safetensors",
|
482 |
+
"model.layers.13.block_sparse_moe.experts.3.w1.weight_scale": "model-00004-of-00010.safetensors",
|
483 |
+
"model.layers.13.block_sparse_moe.experts.3.w2.input_scale": "model-00004-of-00010.safetensors",
|
484 |
+
"model.layers.13.block_sparse_moe.experts.3.w2.weight": "model-00004-of-00010.safetensors",
|
485 |
+
"model.layers.13.block_sparse_moe.experts.3.w2.weight_scale": "model-00004-of-00010.safetensors",
|
486 |
+
"model.layers.13.block_sparse_moe.experts.3.w3.input_scale": "model-00004-of-00010.safetensors",
|
487 |
+
"model.layers.13.block_sparse_moe.experts.3.w3.weight": "model-00004-of-00010.safetensors",
|
488 |
+
"model.layers.13.block_sparse_moe.experts.3.w3.weight_scale": "model-00004-of-00010.safetensors",
|
489 |
+
"model.layers.13.block_sparse_moe.experts.4.w1.input_scale": "model-00004-of-00010.safetensors",
|
490 |
+
"model.layers.13.block_sparse_moe.experts.4.w1.weight": "model-00004-of-00010.safetensors",
|
491 |
+
"model.layers.13.block_sparse_moe.experts.4.w1.weight_scale": "model-00004-of-00010.safetensors",
|
492 |
"model.layers.13.block_sparse_moe.experts.4.w2.input_scale": "model-00005-of-00010.safetensors",
|
493 |
"model.layers.13.block_sparse_moe.experts.4.w2.weight": "model-00005-of-00010.safetensors",
|
494 |
"model.layers.13.block_sparse_moe.experts.4.w2.weight_scale": "model-00005-of-00010.safetensors",
|
|
|
529 |
"model.layers.13.self_attn.k_proj.weight": "model-00004-of-00010.safetensors",
|
530 |
"model.layers.13.self_attn.k_proj.weight_scale": "model-00004-of-00010.safetensors",
|
531 |
"model.layers.13.self_attn.k_scale": "model-00004-of-00010.safetensors",
|
532 |
+
"model.layers.13.self_attn.o_proj.input_scale": "model-00004-of-00010.safetensors",
|
533 |
"model.layers.13.self_attn.o_proj.weight": "model-00004-of-00010.safetensors",
|
534 |
+
"model.layers.13.self_attn.o_proj.weight_scale": "model-00004-of-00010.safetensors",
|
535 |
"model.layers.13.self_attn.q_proj.input_scale": "model-00004-of-00010.safetensors",
|
536 |
"model.layers.13.self_attn.q_proj.weight": "model-00004-of-00010.safetensors",
|
537 |
"model.layers.13.self_attn.q_proj.weight_scale": "model-00004-of-00010.safetensors",
|
|
|
618 |
"model.layers.14.self_attn.k_proj.weight": "model-00005-of-00010.safetensors",
|
619 |
"model.layers.14.self_attn.k_proj.weight_scale": "model-00005-of-00010.safetensors",
|
620 |
"model.layers.14.self_attn.k_scale": "model-00005-of-00010.safetensors",
|
621 |
+
"model.layers.14.self_attn.o_proj.input_scale": "model-00005-of-00010.safetensors",
|
622 |
"model.layers.14.self_attn.o_proj.weight": "model-00005-of-00010.safetensors",
|
623 |
+
"model.layers.14.self_attn.o_proj.weight_scale": "model-00005-of-00010.safetensors",
|
624 |
"model.layers.14.self_attn.q_proj.input_scale": "model-00005-of-00010.safetensors",
|
625 |
"model.layers.14.self_attn.q_proj.weight": "model-00005-of-00010.safetensors",
|
626 |
"model.layers.14.self_attn.q_proj.weight_scale": "model-00005-of-00010.safetensors",
|
|
|
707 |
"model.layers.15.self_attn.k_proj.weight": "model-00005-of-00010.safetensors",
|
708 |
"model.layers.15.self_attn.k_proj.weight_scale": "model-00005-of-00010.safetensors",
|
709 |
"model.layers.15.self_attn.k_scale": "model-00005-of-00010.safetensors",
|
710 |
+
"model.layers.15.self_attn.o_proj.input_scale": "model-00005-of-00010.safetensors",
|
711 |
"model.layers.15.self_attn.o_proj.weight": "model-00005-of-00010.safetensors",
|
712 |
+
"model.layers.15.self_attn.o_proj.weight_scale": "model-00005-of-00010.safetensors",
|
713 |
"model.layers.15.self_attn.q_proj.input_scale": "model-00005-of-00010.safetensors",
|
714 |
"model.layers.15.self_attn.q_proj.weight": "model-00005-of-00010.safetensors",
|
715 |
"model.layers.15.self_attn.q_proj.weight_scale": "model-00005-of-00010.safetensors",
|
|
|
774 |
"model.layers.16.block_sparse_moe.experts.6.w1.input_scale": "model-00005-of-00010.safetensors",
|
775 |
"model.layers.16.block_sparse_moe.experts.6.w1.weight": "model-00005-of-00010.safetensors",
|
776 |
"model.layers.16.block_sparse_moe.experts.6.w1.weight_scale": "model-00005-of-00010.safetensors",
|
777 |
+
"model.layers.16.block_sparse_moe.experts.6.w2.input_scale": "model-00005-of-00010.safetensors",
|
778 |
+
"model.layers.16.block_sparse_moe.experts.6.w2.weight": "model-00005-of-00010.safetensors",
|
779 |
+
"model.layers.16.block_sparse_moe.experts.6.w2.weight_scale": "model-00005-of-00010.safetensors",
|
780 |
+
"model.layers.16.block_sparse_moe.experts.6.w3.input_scale": "model-00005-of-00010.safetensors",
|
781 |
+
"model.layers.16.block_sparse_moe.experts.6.w3.weight": "model-00005-of-00010.safetensors",
|
782 |
+
"model.layers.16.block_sparse_moe.experts.6.w3.weight_scale": "model-00005-of-00010.safetensors",
|
783 |
+
"model.layers.16.block_sparse_moe.experts.7.w1.input_scale": "model-00005-of-00010.safetensors",
|
784 |
+
"model.layers.16.block_sparse_moe.experts.7.w1.weight": "model-00005-of-00010.safetensors",
|
785 |
+
"model.layers.16.block_sparse_moe.experts.7.w1.weight_scale": "model-00005-of-00010.safetensors",
|
786 |
+
"model.layers.16.block_sparse_moe.experts.7.w2.input_scale": "model-00005-of-00010.safetensors",
|
787 |
+
"model.layers.16.block_sparse_moe.experts.7.w2.weight": "model-00005-of-00010.safetensors",
|
788 |
+
"model.layers.16.block_sparse_moe.experts.7.w2.weight_scale": "model-00005-of-00010.safetensors",
|
789 |
+
"model.layers.16.block_sparse_moe.experts.7.w3.input_scale": "model-00005-of-00010.safetensors",
|
790 |
+
"model.layers.16.block_sparse_moe.experts.7.w3.weight": "model-00005-of-00010.safetensors",
|
791 |
+
"model.layers.16.block_sparse_moe.experts.7.w3.weight_scale": "model-00005-of-00010.safetensors",
|
792 |
"model.layers.16.block_sparse_moe.gate.weight": "model-00005-of-00010.safetensors",
|
793 |
+
"model.layers.16.input_layernorm.weight": "model-00005-of-00010.safetensors",
|
794 |
+
"model.layers.16.post_attention_layernorm.weight": "model-00005-of-00010.safetensors",
|
795 |
"model.layers.16.self_attn.k_proj.input_scale": "model-00005-of-00010.safetensors",
|
796 |
"model.layers.16.self_attn.k_proj.weight": "model-00005-of-00010.safetensors",
|
797 |
"model.layers.16.self_attn.k_proj.weight_scale": "model-00005-of-00010.safetensors",
|
798 |
"model.layers.16.self_attn.k_scale": "model-00005-of-00010.safetensors",
|
799 |
+
"model.layers.16.self_attn.o_proj.input_scale": "model-00005-of-00010.safetensors",
|
800 |
"model.layers.16.self_attn.o_proj.weight": "model-00005-of-00010.safetensors",
|
801 |
+
"model.layers.16.self_attn.o_proj.weight_scale": "model-00005-of-00010.safetensors",
|
802 |
"model.layers.16.self_attn.q_proj.input_scale": "model-00005-of-00010.safetensors",
|
803 |
"model.layers.16.self_attn.q_proj.weight": "model-00005-of-00010.safetensors",
|
804 |
"model.layers.16.self_attn.q_proj.weight_scale": "model-00005-of-00010.safetensors",
|
|
|
884 |
"model.layers.17.self_attn.k_proj.input_scale": "model-00006-of-00010.safetensors",
|
885 |
"model.layers.17.self_attn.k_proj.weight": "model-00006-of-00010.safetensors",
|
886 |
"model.layers.17.self_attn.k_proj.weight_scale": "model-00006-of-00010.safetensors",
|
887 |
+
"model.layers.17.self_attn.k_scale": "model-00005-of-00010.safetensors",
|
888 |
+
"model.layers.17.self_attn.o_proj.input_scale": "model-00006-of-00010.safetensors",
|
889 |
"model.layers.17.self_attn.o_proj.weight": "model-00006-of-00010.safetensors",
|
890 |
+
"model.layers.17.self_attn.o_proj.weight_scale": "model-00006-of-00010.safetensors",
|
891 |
"model.layers.17.self_attn.q_proj.input_scale": "model-00006-of-00010.safetensors",
|
892 |
"model.layers.17.self_attn.q_proj.weight": "model-00006-of-00010.safetensors",
|
893 |
"model.layers.17.self_attn.q_proj.weight_scale": "model-00006-of-00010.safetensors",
|
894 |
"model.layers.17.self_attn.v_proj.input_scale": "model-00006-of-00010.safetensors",
|
895 |
"model.layers.17.self_attn.v_proj.weight": "model-00006-of-00010.safetensors",
|
896 |
"model.layers.17.self_attn.v_proj.weight_scale": "model-00006-of-00010.safetensors",
|
897 |
+
"model.layers.17.self_attn.v_scale": "model-00005-of-00010.safetensors",
|
898 |
"model.layers.18.block_sparse_moe.experts.0.w1.input_scale": "model-00006-of-00010.safetensors",
|
899 |
"model.layers.18.block_sparse_moe.experts.0.w1.weight": "model-00006-of-00010.safetensors",
|
900 |
"model.layers.18.block_sparse_moe.experts.0.w1.weight_scale": "model-00006-of-00010.safetensors",
|
|
|
974 |
"model.layers.18.self_attn.k_proj.weight": "model-00006-of-00010.safetensors",
|
975 |
"model.layers.18.self_attn.k_proj.weight_scale": "model-00006-of-00010.safetensors",
|
976 |
"model.layers.18.self_attn.k_scale": "model-00006-of-00010.safetensors",
|
977 |
+
"model.layers.18.self_attn.o_proj.input_scale": "model-00006-of-00010.safetensors",
|
978 |
"model.layers.18.self_attn.o_proj.weight": "model-00006-of-00010.safetensors",
|
979 |
+
"model.layers.18.self_attn.o_proj.weight_scale": "model-00006-of-00010.safetensors",
|
980 |
"model.layers.18.self_attn.q_proj.input_scale": "model-00006-of-00010.safetensors",
|
981 |
"model.layers.18.self_attn.q_proj.weight": "model-00006-of-00010.safetensors",
|
982 |
"model.layers.18.self_attn.q_proj.weight_scale": "model-00006-of-00010.safetensors",
|
|
|
1063 |
"model.layers.19.self_attn.k_proj.weight": "model-00006-of-00010.safetensors",
|
1064 |
"model.layers.19.self_attn.k_proj.weight_scale": "model-00006-of-00010.safetensors",
|
1065 |
"model.layers.19.self_attn.k_scale": "model-00006-of-00010.safetensors",
|
1066 |
+
"model.layers.19.self_attn.o_proj.input_scale": "model-00006-of-00010.safetensors",
|
1067 |
"model.layers.19.self_attn.o_proj.weight": "model-00006-of-00010.safetensors",
|
1068 |
+
"model.layers.19.self_attn.o_proj.weight_scale": "model-00006-of-00010.safetensors",
|
1069 |
"model.layers.19.self_attn.q_proj.input_scale": "model-00006-of-00010.safetensors",
|
1070 |
"model.layers.19.self_attn.q_proj.weight": "model-00006-of-00010.safetensors",
|
1071 |
"model.layers.19.self_attn.q_proj.weight_scale": "model-00006-of-00010.safetensors",
|
|
|
1152 |
"model.layers.2.self_attn.k_proj.weight": "model-00001-of-00010.safetensors",
|
1153 |
"model.layers.2.self_attn.k_proj.weight_scale": "model-00001-of-00010.safetensors",
|
1154 |
"model.layers.2.self_attn.k_scale": "model-00001-of-00010.safetensors",
|
1155 |
+
"model.layers.2.self_attn.o_proj.input_scale": "model-00001-of-00010.safetensors",
|
1156 |
"model.layers.2.self_attn.o_proj.weight": "model-00001-of-00010.safetensors",
|
1157 |
+
"model.layers.2.self_attn.o_proj.weight_scale": "model-00001-of-00010.safetensors",
|
1158 |
"model.layers.2.self_attn.q_proj.input_scale": "model-00001-of-00010.safetensors",
|
1159 |
"model.layers.2.self_attn.q_proj.weight": "model-00001-of-00010.safetensors",
|
1160 |
"model.layers.2.self_attn.q_proj.weight_scale": "model-00001-of-00010.safetensors",
|
|
|
1174 |
"model.layers.20.block_sparse_moe.experts.1.w1.input_scale": "model-00006-of-00010.safetensors",
|
1175 |
"model.layers.20.block_sparse_moe.experts.1.w1.weight": "model-00006-of-00010.safetensors",
|
1176 |
"model.layers.20.block_sparse_moe.experts.1.w1.weight_scale": "model-00006-of-00010.safetensors",
|
1177 |
+
"model.layers.20.block_sparse_moe.experts.1.w2.input_scale": "model-00006-of-00010.safetensors",
|
1178 |
+
"model.layers.20.block_sparse_moe.experts.1.w2.weight": "model-00006-of-00010.safetensors",
|
1179 |
+
"model.layers.20.block_sparse_moe.experts.1.w2.weight_scale": "model-00006-of-00010.safetensors",
|
1180 |
+
"model.layers.20.block_sparse_moe.experts.1.w3.input_scale": "model-00006-of-00010.safetensors",
|
1181 |
+
"model.layers.20.block_sparse_moe.experts.1.w3.weight": "model-00006-of-00010.safetensors",
|
1182 |
+
"model.layers.20.block_sparse_moe.experts.1.w3.weight_scale": "model-00006-of-00010.safetensors",
|
1183 |
+
"model.layers.20.block_sparse_moe.experts.2.w1.input_scale": "model-00006-of-00010.safetensors",
|
1184 |
+
"model.layers.20.block_sparse_moe.experts.2.w1.weight": "model-00006-of-00010.safetensors",
|
1185 |
+
"model.layers.20.block_sparse_moe.experts.2.w1.weight_scale": "model-00006-of-00010.safetensors",
|
1186 |
+
"model.layers.20.block_sparse_moe.experts.2.w2.input_scale": "model-00006-of-00010.safetensors",
|
1187 |
+
"model.layers.20.block_sparse_moe.experts.2.w2.weight": "model-00006-of-00010.safetensors",
|
1188 |
+
"model.layers.20.block_sparse_moe.experts.2.w2.weight_scale": "model-00006-of-00010.safetensors",
|
1189 |
+
"model.layers.20.block_sparse_moe.experts.2.w3.input_scale": "model-00006-of-00010.safetensors",
|
1190 |
+
"model.layers.20.block_sparse_moe.experts.2.w3.weight": "model-00006-of-00010.safetensors",
|
1191 |
+
"model.layers.20.block_sparse_moe.experts.2.w3.weight_scale": "model-00006-of-00010.safetensors",
|
1192 |
+
"model.layers.20.block_sparse_moe.experts.3.w1.input_scale": "model-00006-of-00010.safetensors",
|
1193 |
+
"model.layers.20.block_sparse_moe.experts.3.w1.weight": "model-00006-of-00010.safetensors",
|
1194 |
+
"model.layers.20.block_sparse_moe.experts.3.w1.weight_scale": "model-00006-of-00010.safetensors",
|
1195 |
"model.layers.20.block_sparse_moe.experts.3.w2.input_scale": "model-00007-of-00010.safetensors",
|
1196 |
"model.layers.20.block_sparse_moe.experts.3.w2.weight": "model-00007-of-00010.safetensors",
|
1197 |
"model.layers.20.block_sparse_moe.experts.3.w2.weight_scale": "model-00007-of-00010.safetensors",
|
|
|
1241 |
"model.layers.20.self_attn.k_proj.weight": "model-00006-of-00010.safetensors",
|
1242 |
"model.layers.20.self_attn.k_proj.weight_scale": "model-00006-of-00010.safetensors",
|
1243 |
"model.layers.20.self_attn.k_scale": "model-00006-of-00010.safetensors",
|
1244 |
+
"model.layers.20.self_attn.o_proj.input_scale": "model-00006-of-00010.safetensors",
|
1245 |
"model.layers.20.self_attn.o_proj.weight": "model-00006-of-00010.safetensors",
|
1246 |
+
"model.layers.20.self_attn.o_proj.weight_scale": "model-00006-of-00010.safetensors",
|
1247 |
"model.layers.20.self_attn.q_proj.input_scale": "model-00006-of-00010.safetensors",
|
1248 |
"model.layers.20.self_attn.q_proj.weight": "model-00006-of-00010.safetensors",
|
1249 |
"model.layers.20.self_attn.q_proj.weight_scale": "model-00006-of-00010.safetensors",
|
|
|
1330 |
"model.layers.21.self_attn.k_proj.weight": "model-00007-of-00010.safetensors",
|
1331 |
"model.layers.21.self_attn.k_proj.weight_scale": "model-00007-of-00010.safetensors",
|
1332 |
"model.layers.21.self_attn.k_scale": "model-00007-of-00010.safetensors",
|
1333 |
+
"model.layers.21.self_attn.o_proj.input_scale": "model-00007-of-00010.safetensors",
|
1334 |
"model.layers.21.self_attn.o_proj.weight": "model-00007-of-00010.safetensors",
|
1335 |
+
"model.layers.21.self_attn.o_proj.weight_scale": "model-00007-of-00010.safetensors",
|
1336 |
"model.layers.21.self_attn.q_proj.input_scale": "model-00007-of-00010.safetensors",
|
1337 |
"model.layers.21.self_attn.q_proj.weight": "model-00007-of-00010.safetensors",
|
1338 |
"model.layers.21.self_attn.q_proj.weight_scale": "model-00007-of-00010.safetensors",
|
|
|
1419 |
"model.layers.22.self_attn.k_proj.weight": "model-00007-of-00010.safetensors",
|
1420 |
"model.layers.22.self_attn.k_proj.weight_scale": "model-00007-of-00010.safetensors",
|
1421 |
"model.layers.22.self_attn.k_scale": "model-00007-of-00010.safetensors",
|
1422 |
+
"model.layers.22.self_attn.o_proj.input_scale": "model-00007-of-00010.safetensors",
|
1423 |
"model.layers.22.self_attn.o_proj.weight": "model-00007-of-00010.safetensors",
|
1424 |
+
"model.layers.22.self_attn.o_proj.weight_scale": "model-00007-of-00010.safetensors",
|
1425 |
"model.layers.22.self_attn.q_proj.input_scale": "model-00007-of-00010.safetensors",
|
1426 |
"model.layers.22.self_attn.q_proj.weight": "model-00007-of-00010.safetensors",
|
1427 |
"model.layers.22.self_attn.q_proj.weight_scale": "model-00007-of-00010.safetensors",
|
|
|
1471 |
"model.layers.23.block_sparse_moe.experts.4.w2.input_scale": "model-00007-of-00010.safetensors",
|
1472 |
"model.layers.23.block_sparse_moe.experts.4.w2.weight": "model-00007-of-00010.safetensors",
|
1473 |
"model.layers.23.block_sparse_moe.experts.4.w2.weight_scale": "model-00007-of-00010.safetensors",
|
1474 |
+
"model.layers.23.block_sparse_moe.experts.4.w3.input_scale": "model-00007-of-00010.safetensors",
|
1475 |
+
"model.layers.23.block_sparse_moe.experts.4.w3.weight": "model-00007-of-00010.safetensors",
|
1476 |
+
"model.layers.23.block_sparse_moe.experts.4.w3.weight_scale": "model-00007-of-00010.safetensors",
|
1477 |
+
"model.layers.23.block_sparse_moe.experts.5.w1.input_scale": "model-00007-of-00010.safetensors",
|
1478 |
+
"model.layers.23.block_sparse_moe.experts.5.w1.weight": "model-00007-of-00010.safetensors",
|
1479 |
+
"model.layers.23.block_sparse_moe.experts.5.w1.weight_scale": "model-00007-of-00010.safetensors",
|
1480 |
+
"model.layers.23.block_sparse_moe.experts.5.w2.input_scale": "model-00007-of-00010.safetensors",
|
1481 |
+
"model.layers.23.block_sparse_moe.experts.5.w2.weight": "model-00007-of-00010.safetensors",
|
1482 |
+
"model.layers.23.block_sparse_moe.experts.5.w2.weight_scale": "model-00007-of-00010.safetensors",
|
1483 |
+
"model.layers.23.block_sparse_moe.experts.5.w3.input_scale": "model-00007-of-00010.safetensors",
|
1484 |
+
"model.layers.23.block_sparse_moe.experts.5.w3.weight": "model-00007-of-00010.safetensors",
|
1485 |
+
"model.layers.23.block_sparse_moe.experts.5.w3.weight_scale": "model-00007-of-00010.safetensors",
|
1486 |
+
"model.layers.23.block_sparse_moe.experts.6.w1.input_scale": "model-00007-of-00010.safetensors",
|
1487 |
+
"model.layers.23.block_sparse_moe.experts.6.w1.weight": "model-00007-of-00010.safetensors",
|
1488 |
+
"model.layers.23.block_sparse_moe.experts.6.w1.weight_scale": "model-00007-of-00010.safetensors",
|
1489 |
+
"model.layers.23.block_sparse_moe.experts.6.w2.input_scale": "model-00007-of-00010.safetensors",
|
1490 |
+
"model.layers.23.block_sparse_moe.experts.6.w2.weight": "model-00007-of-00010.safetensors",
|
1491 |
+
"model.layers.23.block_sparse_moe.experts.6.w2.weight_scale": "model-00007-of-00010.safetensors",
|
1492 |
+
"model.layers.23.block_sparse_moe.experts.6.w3.input_scale": "model-00007-of-00010.safetensors",
|
1493 |
+
"model.layers.23.block_sparse_moe.experts.6.w3.weight": "model-00007-of-00010.safetensors",
|
1494 |
+
"model.layers.23.block_sparse_moe.experts.6.w3.weight_scale": "model-00007-of-00010.safetensors",
|
1495 |
"model.layers.23.block_sparse_moe.experts.7.w1.input_scale": "model-00008-of-00010.safetensors",
|
1496 |
"model.layers.23.block_sparse_moe.experts.7.w1.weight": "model-00008-of-00010.safetensors",
|
1497 |
"model.layers.23.block_sparse_moe.experts.7.w1.weight_scale": "model-00008-of-00010.safetensors",
|
|
|
1508 |
"model.layers.23.self_attn.k_proj.weight": "model-00007-of-00010.safetensors",
|
1509 |
"model.layers.23.self_attn.k_proj.weight_scale": "model-00007-of-00010.safetensors",
|
1510 |
"model.layers.23.self_attn.k_scale": "model-00007-of-00010.safetensors",
|
1511 |
+
"model.layers.23.self_attn.o_proj.input_scale": "model-00007-of-00010.safetensors",
|
1512 |
"model.layers.23.self_attn.o_proj.weight": "model-00007-of-00010.safetensors",
|
1513 |
+
"model.layers.23.self_attn.o_proj.weight_scale": "model-00007-of-00010.safetensors",
|
1514 |
"model.layers.23.self_attn.q_proj.input_scale": "model-00007-of-00010.safetensors",
|
1515 |
"model.layers.23.self_attn.q_proj.weight": "model-00007-of-00010.safetensors",
|
1516 |
"model.layers.23.self_attn.q_proj.weight_scale": "model-00007-of-00010.safetensors",
|
|
|
1597 |
"model.layers.24.self_attn.k_proj.weight": "model-00008-of-00010.safetensors",
|
1598 |
"model.layers.24.self_attn.k_proj.weight_scale": "model-00008-of-00010.safetensors",
|
1599 |
"model.layers.24.self_attn.k_scale": "model-00008-of-00010.safetensors",
|
1600 |
+
"model.layers.24.self_attn.o_proj.input_scale": "model-00008-of-00010.safetensors",
|
1601 |
"model.layers.24.self_attn.o_proj.weight": "model-00008-of-00010.safetensors",
|
1602 |
+
"model.layers.24.self_attn.o_proj.weight_scale": "model-00008-of-00010.safetensors",
|
1603 |
"model.layers.24.self_attn.q_proj.input_scale": "model-00008-of-00010.safetensors",
|
1604 |
"model.layers.24.self_attn.q_proj.weight": "model-00008-of-00010.safetensors",
|
1605 |
"model.layers.24.self_attn.q_proj.weight_scale": "model-00008-of-00010.safetensors",
|
|
|
1686 |
"model.layers.25.self_attn.k_proj.weight": "model-00008-of-00010.safetensors",
|
1687 |
"model.layers.25.self_attn.k_proj.weight_scale": "model-00008-of-00010.safetensors",
|
1688 |
"model.layers.25.self_attn.k_scale": "model-00008-of-00010.safetensors",
|
1689 |
+
"model.layers.25.self_attn.o_proj.input_scale": "model-00008-of-00010.safetensors",
|
1690 |
"model.layers.25.self_attn.o_proj.weight": "model-00008-of-00010.safetensors",
|
1691 |
+
"model.layers.25.self_attn.o_proj.weight_scale": "model-00008-of-00010.safetensors",
|
1692 |
"model.layers.25.self_attn.q_proj.input_scale": "model-00008-of-00010.safetensors",
|
1693 |
"model.layers.25.self_attn.q_proj.weight": "model-00008-of-00010.safetensors",
|
1694 |
"model.layers.25.self_attn.q_proj.weight_scale": "model-00008-of-00010.safetensors",
|
|
|
1775 |
"model.layers.26.self_attn.k_proj.weight": "model-00008-of-00010.safetensors",
|
1776 |
"model.layers.26.self_attn.k_proj.weight_scale": "model-00008-of-00010.safetensors",
|
1777 |
"model.layers.26.self_attn.k_scale": "model-00008-of-00010.safetensors",
|
1778 |
+
"model.layers.26.self_attn.o_proj.input_scale": "model-00008-of-00010.safetensors",
|
1779 |
"model.layers.26.self_attn.o_proj.weight": "model-00008-of-00010.safetensors",
|
1780 |
+
"model.layers.26.self_attn.o_proj.weight_scale": "model-00008-of-00010.safetensors",
|
1781 |
"model.layers.26.self_attn.q_proj.input_scale": "model-00008-of-00010.safetensors",
|
1782 |
"model.layers.26.self_attn.q_proj.weight": "model-00008-of-00010.safetensors",
|
1783 |
"model.layers.26.self_attn.q_proj.weight_scale": "model-00008-of-00010.safetensors",
|
|
|
1785 |
"model.layers.26.self_attn.v_proj.weight": "model-00008-of-00010.safetensors",
|
1786 |
"model.layers.26.self_attn.v_proj.weight_scale": "model-00008-of-00010.safetensors",
|
1787 |
"model.layers.26.self_attn.v_scale": "model-00008-of-00010.safetensors",
|
1788 |
+
"model.layers.27.block_sparse_moe.experts.0.w1.input_scale": "model-00008-of-00010.safetensors",
|
1789 |
+
"model.layers.27.block_sparse_moe.experts.0.w1.weight": "model-00008-of-00010.safetensors",
|
1790 |
+
"model.layers.27.block_sparse_moe.experts.0.w1.weight_scale": "model-00008-of-00010.safetensors",
|
1791 |
+
"model.layers.27.block_sparse_moe.experts.0.w2.input_scale": "model-00008-of-00010.safetensors",
|
1792 |
+
"model.layers.27.block_sparse_moe.experts.0.w2.weight": "model-00008-of-00010.safetensors",
|
1793 |
+
"model.layers.27.block_sparse_moe.experts.0.w2.weight_scale": "model-00008-of-00010.safetensors",
|
1794 |
+
"model.layers.27.block_sparse_moe.experts.0.w3.input_scale": "model-00008-of-00010.safetensors",
|
1795 |
+
"model.layers.27.block_sparse_moe.experts.0.w3.weight": "model-00008-of-00010.safetensors",
|
1796 |
+
"model.layers.27.block_sparse_moe.experts.0.w3.weight_scale": "model-00008-of-00010.safetensors",
|
1797 |
+
"model.layers.27.block_sparse_moe.experts.1.w1.input_scale": "model-00008-of-00010.safetensors",
|
1798 |
+
"model.layers.27.block_sparse_moe.experts.1.w1.weight": "model-00008-of-00010.safetensors",
|
1799 |
+
"model.layers.27.block_sparse_moe.experts.1.w1.weight_scale": "model-00008-of-00010.safetensors",
|
1800 |
+
"model.layers.27.block_sparse_moe.experts.1.w2.input_scale": "model-00008-of-00010.safetensors",
|
1801 |
+
"model.layers.27.block_sparse_moe.experts.1.w2.weight": "model-00008-of-00010.safetensors",
|
1802 |
+
"model.layers.27.block_sparse_moe.experts.1.w2.weight_scale": "model-00008-of-00010.safetensors",
|
1803 |
+
"model.layers.27.block_sparse_moe.experts.1.w3.input_scale": "model-00008-of-00010.safetensors",
|
1804 |
+
"model.layers.27.block_sparse_moe.experts.1.w3.weight": "model-00008-of-00010.safetensors",
|
1805 |
+
"model.layers.27.block_sparse_moe.experts.1.w3.weight_scale": "model-00008-of-00010.safetensors",
|
1806 |
+
"model.layers.27.block_sparse_moe.experts.2.w1.input_scale": "model-00008-of-00010.safetensors",
|
1807 |
+
"model.layers.27.block_sparse_moe.experts.2.w1.weight": "model-00008-of-00010.safetensors",
|
1808 |
+
"model.layers.27.block_sparse_moe.experts.2.w1.weight_scale": "model-00008-of-00010.safetensors",
|
1809 |
"model.layers.27.block_sparse_moe.experts.2.w2.input_scale": "model-00009-of-00010.safetensors",
|
1810 |
"model.layers.27.block_sparse_moe.experts.2.w2.weight": "model-00009-of-00010.safetensors",
|
1811 |
"model.layers.27.block_sparse_moe.experts.2.w2.weight_scale": "model-00009-of-00010.safetensors",
|
|
|
1857 |
"model.layers.27.block_sparse_moe.experts.7.w3.input_scale": "model-00009-of-00010.safetensors",
|
1858 |
"model.layers.27.block_sparse_moe.experts.7.w3.weight": "model-00009-of-00010.safetensors",
|
1859 |
"model.layers.27.block_sparse_moe.experts.7.w3.weight_scale": "model-00009-of-00010.safetensors",
|
1860 |
+
"model.layers.27.block_sparse_moe.gate.weight": "model-00008-of-00010.safetensors",
|
1861 |
"model.layers.27.input_layernorm.weight": "model-00009-of-00010.safetensors",
|
1862 |
"model.layers.27.post_attention_layernorm.weight": "model-00009-of-00010.safetensors",
|
1863 |
+
"model.layers.27.self_attn.k_proj.input_scale": "model-00008-of-00010.safetensors",
|
1864 |
+
"model.layers.27.self_attn.k_proj.weight": "model-00008-of-00010.safetensors",
|
1865 |
+
"model.layers.27.self_attn.k_proj.weight_scale": "model-00008-of-00010.safetensors",
|
1866 |
"model.layers.27.self_attn.k_scale": "model-00008-of-00010.safetensors",
|
1867 |
+
"model.layers.27.self_attn.o_proj.input_scale": "model-00008-of-00010.safetensors",
|
1868 |
+
"model.layers.27.self_attn.o_proj.weight": "model-00008-of-00010.safetensors",
|
1869 |
+
"model.layers.27.self_attn.o_proj.weight_scale": "model-00008-of-00010.safetensors",
|
1870 |
+
"model.layers.27.self_attn.q_proj.input_scale": "model-00008-of-00010.safetensors",
|
1871 |
+
"model.layers.27.self_attn.q_proj.weight": "model-00008-of-00010.safetensors",
|
1872 |
+
"model.layers.27.self_attn.q_proj.weight_scale": "model-00008-of-00010.safetensors",
|
1873 |
+
"model.layers.27.self_attn.v_proj.input_scale": "model-00008-of-00010.safetensors",
|
1874 |
+
"model.layers.27.self_attn.v_proj.weight": "model-00008-of-00010.safetensors",
|
1875 |
+
"model.layers.27.self_attn.v_proj.weight_scale": "model-00008-of-00010.safetensors",
|
1876 |
"model.layers.27.self_attn.v_scale": "model-00008-of-00010.safetensors",
|
1877 |
"model.layers.28.block_sparse_moe.experts.0.w1.input_scale": "model-00009-of-00010.safetensors",
|
1878 |
"model.layers.28.block_sparse_moe.experts.0.w1.weight": "model-00009-of-00010.safetensors",
|
|
|
1953 |
"model.layers.28.self_attn.k_proj.weight": "model-00009-of-00010.safetensors",
|
1954 |
"model.layers.28.self_attn.k_proj.weight_scale": "model-00009-of-00010.safetensors",
|
1955 |
"model.layers.28.self_attn.k_scale": "model-00009-of-00010.safetensors",
|
1956 |
+
"model.layers.28.self_attn.o_proj.input_scale": "model-00009-of-00010.safetensors",
|
1957 |
"model.layers.28.self_attn.o_proj.weight": "model-00009-of-00010.safetensors",
|
1958 |
+
"model.layers.28.self_attn.o_proj.weight_scale": "model-00009-of-00010.safetensors",
|
1959 |
"model.layers.28.self_attn.q_proj.input_scale": "model-00009-of-00010.safetensors",
|
1960 |
"model.layers.28.self_attn.q_proj.weight": "model-00009-of-00010.safetensors",
|
1961 |
"model.layers.28.self_attn.q_proj.weight_scale": "model-00009-of-00010.safetensors",
|
|
|
2042 |
"model.layers.29.self_attn.k_proj.weight": "model-00009-of-00010.safetensors",
|
2043 |
"model.layers.29.self_attn.k_proj.weight_scale": "model-00009-of-00010.safetensors",
|
2044 |
"model.layers.29.self_attn.k_scale": "model-00009-of-00010.safetensors",
|
2045 |
+
"model.layers.29.self_attn.o_proj.input_scale": "model-00009-of-00010.safetensors",
|
2046 |
"model.layers.29.self_attn.o_proj.weight": "model-00009-of-00010.safetensors",
|
2047 |
+
"model.layers.29.self_attn.o_proj.weight_scale": "model-00009-of-00010.safetensors",
|
2048 |
"model.layers.29.self_attn.q_proj.input_scale": "model-00009-of-00010.safetensors",
|
2049 |
"model.layers.29.self_attn.q_proj.weight": "model-00009-of-00010.safetensors",
|
2050 |
"model.layers.29.self_attn.q_proj.weight_scale": "model-00009-of-00010.safetensors",
|
|
|
2064 |
"model.layers.3.block_sparse_moe.experts.1.w1.input_scale": "model-00001-of-00010.safetensors",
|
2065 |
"model.layers.3.block_sparse_moe.experts.1.w1.weight": "model-00001-of-00010.safetensors",
|
2066 |
"model.layers.3.block_sparse_moe.experts.1.w1.weight_scale": "model-00001-of-00010.safetensors",
|
2067 |
+
"model.layers.3.block_sparse_moe.experts.1.w2.input_scale": "model-00001-of-00010.safetensors",
|
2068 |
+
"model.layers.3.block_sparse_moe.experts.1.w2.weight": "model-00001-of-00010.safetensors",
|
2069 |
+
"model.layers.3.block_sparse_moe.experts.1.w2.weight_scale": "model-00001-of-00010.safetensors",
|
2070 |
"model.layers.3.block_sparse_moe.experts.1.w3.input_scale": "model-00002-of-00010.safetensors",
|
2071 |
"model.layers.3.block_sparse_moe.experts.1.w3.weight": "model-00002-of-00010.safetensors",
|
2072 |
"model.layers.3.block_sparse_moe.experts.1.w3.weight_scale": "model-00002-of-00010.safetensors",
|
|
|
2131 |
"model.layers.3.self_attn.k_proj.weight": "model-00001-of-00010.safetensors",
|
2132 |
"model.layers.3.self_attn.k_proj.weight_scale": "model-00001-of-00010.safetensors",
|
2133 |
"model.layers.3.self_attn.k_scale": "model-00001-of-00010.safetensors",
|
2134 |
+
"model.layers.3.self_attn.o_proj.input_scale": "model-00001-of-00010.safetensors",
|
2135 |
"model.layers.3.self_attn.o_proj.weight": "model-00001-of-00010.safetensors",
|
2136 |
+
"model.layers.3.self_attn.o_proj.weight_scale": "model-00001-of-00010.safetensors",
|
2137 |
"model.layers.3.self_attn.q_proj.input_scale": "model-00001-of-00010.safetensors",
|
2138 |
"model.layers.3.self_attn.q_proj.weight": "model-00001-of-00010.safetensors",
|
2139 |
"model.layers.3.self_attn.q_proj.weight_scale": "model-00001-of-00010.safetensors",
|
|
|
2168 |
"model.layers.30.block_sparse_moe.experts.2.w3.input_scale": "model-00009-of-00010.safetensors",
|
2169 |
"model.layers.30.block_sparse_moe.experts.2.w3.weight": "model-00009-of-00010.safetensors",
|
2170 |
"model.layers.30.block_sparse_moe.experts.2.w3.weight_scale": "model-00009-of-00010.safetensors",
|
2171 |
+
"model.layers.30.block_sparse_moe.experts.3.w1.input_scale": "model-00009-of-00010.safetensors",
|
2172 |
+
"model.layers.30.block_sparse_moe.experts.3.w1.weight": "model-00009-of-00010.safetensors",
|
2173 |
+
"model.layers.30.block_sparse_moe.experts.3.w1.weight_scale": "model-00009-of-00010.safetensors",
|
2174 |
+
"model.layers.30.block_sparse_moe.experts.3.w2.input_scale": "model-00009-of-00010.safetensors",
|
2175 |
+
"model.layers.30.block_sparse_moe.experts.3.w2.weight": "model-00009-of-00010.safetensors",
|
2176 |
+
"model.layers.30.block_sparse_moe.experts.3.w2.weight_scale": "model-00009-of-00010.safetensors",
|
2177 |
+
"model.layers.30.block_sparse_moe.experts.3.w3.input_scale": "model-00009-of-00010.safetensors",
|
2178 |
+
"model.layers.30.block_sparse_moe.experts.3.w3.weight": "model-00009-of-00010.safetensors",
|
2179 |
+
"model.layers.30.block_sparse_moe.experts.3.w3.weight_scale": "model-00009-of-00010.safetensors",
|
2180 |
+
"model.layers.30.block_sparse_moe.experts.4.w1.input_scale": "model-00009-of-00010.safetensors",
|
2181 |
+
"model.layers.30.block_sparse_moe.experts.4.w1.weight": "model-00009-of-00010.safetensors",
|
2182 |
+
"model.layers.30.block_sparse_moe.experts.4.w1.weight_scale": "model-00009-of-00010.safetensors",
|
2183 |
+
"model.layers.30.block_sparse_moe.experts.4.w2.input_scale": "model-00009-of-00010.safetensors",
|
2184 |
+
"model.layers.30.block_sparse_moe.experts.4.w2.weight": "model-00009-of-00010.safetensors",
|
2185 |
+
"model.layers.30.block_sparse_moe.experts.4.w2.weight_scale": "model-00009-of-00010.safetensors",
|
2186 |
+
"model.layers.30.block_sparse_moe.experts.4.w3.input_scale": "model-00009-of-00010.safetensors",
|
2187 |
+
"model.layers.30.block_sparse_moe.experts.4.w3.weight": "model-00009-of-00010.safetensors",
|
2188 |
+
"model.layers.30.block_sparse_moe.experts.4.w3.weight_scale": "model-00009-of-00010.safetensors",
|
2189 |
+
"model.layers.30.block_sparse_moe.experts.5.w1.input_scale": "model-00009-of-00010.safetensors",
|
2190 |
+
"model.layers.30.block_sparse_moe.experts.5.w1.weight": "model-00009-of-00010.safetensors",
|
2191 |
+
"model.layers.30.block_sparse_moe.experts.5.w1.weight_scale": "model-00009-of-00010.safetensors",
|
2192 |
+
"model.layers.30.block_sparse_moe.experts.5.w2.input_scale": "model-00009-of-00010.safetensors",
|
2193 |
+
"model.layers.30.block_sparse_moe.experts.5.w2.weight": "model-00009-of-00010.safetensors",
|
2194 |
+
"model.layers.30.block_sparse_moe.experts.5.w2.weight_scale": "model-00009-of-00010.safetensors",
|
2195 |
+
"model.layers.30.block_sparse_moe.experts.5.w3.input_scale": "model-00009-of-00010.safetensors",
|
2196 |
+
"model.layers.30.block_sparse_moe.experts.5.w3.weight": "model-00009-of-00010.safetensors",
|
2197 |
+
"model.layers.30.block_sparse_moe.experts.5.w3.weight_scale": "model-00009-of-00010.safetensors",
|
2198 |
"model.layers.30.block_sparse_moe.experts.6.w1.input_scale": "model-00010-of-00010.safetensors",
|
2199 |
"model.layers.30.block_sparse_moe.experts.6.w1.weight": "model-00010-of-00010.safetensors",
|
2200 |
"model.layers.30.block_sparse_moe.experts.6.w1.weight_scale": "model-00010-of-00010.safetensors",
|
|
|
2220 |
"model.layers.30.self_attn.k_proj.weight": "model-00009-of-00010.safetensors",
|
2221 |
"model.layers.30.self_attn.k_proj.weight_scale": "model-00009-of-00010.safetensors",
|
2222 |
"model.layers.30.self_attn.k_scale": "model-00009-of-00010.safetensors",
|
2223 |
+
"model.layers.30.self_attn.o_proj.input_scale": "model-00009-of-00010.safetensors",
|
2224 |
"model.layers.30.self_attn.o_proj.weight": "model-00009-of-00010.safetensors",
|
2225 |
+
"model.layers.30.self_attn.o_proj.weight_scale": "model-00009-of-00010.safetensors",
|
2226 |
"model.layers.30.self_attn.q_proj.input_scale": "model-00009-of-00010.safetensors",
|
2227 |
"model.layers.30.self_attn.q_proj.weight": "model-00009-of-00010.safetensors",
|
2228 |
"model.layers.30.self_attn.q_proj.weight_scale": "model-00009-of-00010.safetensors",
|
|
|
2309 |
"model.layers.31.self_attn.k_proj.weight": "model-00010-of-00010.safetensors",
|
2310 |
"model.layers.31.self_attn.k_proj.weight_scale": "model-00010-of-00010.safetensors",
|
2311 |
"model.layers.31.self_attn.k_scale": "model-00010-of-00010.safetensors",
|
2312 |
+
"model.layers.31.self_attn.o_proj.input_scale": "model-00010-of-00010.safetensors",
|
2313 |
"model.layers.31.self_attn.o_proj.weight": "model-00010-of-00010.safetensors",
|
2314 |
+
"model.layers.31.self_attn.o_proj.weight_scale": "model-00010-of-00010.safetensors",
|
2315 |
"model.layers.31.self_attn.q_proj.input_scale": "model-00010-of-00010.safetensors",
|
2316 |
"model.layers.31.self_attn.q_proj.weight": "model-00010-of-00010.safetensors",
|
2317 |
"model.layers.31.self_attn.q_proj.weight_scale": "model-00010-of-00010.safetensors",
|
|
|
2398 |
"model.layers.4.self_attn.k_proj.weight": "model-00002-of-00010.safetensors",
|
2399 |
"model.layers.4.self_attn.k_proj.weight_scale": "model-00002-of-00010.safetensors",
|
2400 |
"model.layers.4.self_attn.k_scale": "model-00002-of-00010.safetensors",
|
2401 |
+
"model.layers.4.self_attn.o_proj.input_scale": "model-00002-of-00010.safetensors",
|
2402 |
"model.layers.4.self_attn.o_proj.weight": "model-00002-of-00010.safetensors",
|
2403 |
+
"model.layers.4.self_attn.o_proj.weight_scale": "model-00002-of-00010.safetensors",
|
2404 |
"model.layers.4.self_attn.q_proj.input_scale": "model-00002-of-00010.safetensors",
|
2405 |
"model.layers.4.self_attn.q_proj.weight": "model-00002-of-00010.safetensors",
|
2406 |
"model.layers.4.self_attn.q_proj.weight_scale": "model-00002-of-00010.safetensors",
|
|
|
2487 |
"model.layers.5.self_attn.k_proj.weight": "model-00002-of-00010.safetensors",
|
2488 |
"model.layers.5.self_attn.k_proj.weight_scale": "model-00002-of-00010.safetensors",
|
2489 |
"model.layers.5.self_attn.k_scale": "model-00002-of-00010.safetensors",
|
2490 |
+
"model.layers.5.self_attn.o_proj.input_scale": "model-00002-of-00010.safetensors",
|
2491 |
"model.layers.5.self_attn.o_proj.weight": "model-00002-of-00010.safetensors",
|
2492 |
+
"model.layers.5.self_attn.o_proj.weight_scale": "model-00002-of-00010.safetensors",
|
2493 |
"model.layers.5.self_attn.q_proj.input_scale": "model-00002-of-00010.safetensors",
|
2494 |
"model.layers.5.self_attn.q_proj.weight": "model-00002-of-00010.safetensors",
|
2495 |
"model.layers.5.self_attn.q_proj.weight_scale": "model-00002-of-00010.safetensors",
|
|
|
2539 |
"model.layers.6.block_sparse_moe.experts.4.w2.input_scale": "model-00002-of-00010.safetensors",
|
2540 |
"model.layers.6.block_sparse_moe.experts.4.w2.weight": "model-00002-of-00010.safetensors",
|
2541 |
"model.layers.6.block_sparse_moe.experts.4.w2.weight_scale": "model-00002-of-00010.safetensors",
|
2542 |
+
"model.layers.6.block_sparse_moe.experts.4.w3.input_scale": "model-00002-of-00010.safetensors",
|
2543 |
+
"model.layers.6.block_sparse_moe.experts.4.w3.weight": "model-00002-of-00010.safetensors",
|
2544 |
+
"model.layers.6.block_sparse_moe.experts.4.w3.weight_scale": "model-00002-of-00010.safetensors",
|
2545 |
+
"model.layers.6.block_sparse_moe.experts.5.w1.input_scale": "model-00002-of-00010.safetensors",
|
2546 |
+
"model.layers.6.block_sparse_moe.experts.5.w1.weight": "model-00002-of-00010.safetensors",
|
2547 |
+
"model.layers.6.block_sparse_moe.experts.5.w1.weight_scale": "model-00002-of-00010.safetensors",
|
2548 |
"model.layers.6.block_sparse_moe.experts.5.w2.input_scale": "model-00003-of-00010.safetensors",
|
2549 |
"model.layers.6.block_sparse_moe.experts.5.w2.weight": "model-00003-of-00010.safetensors",
|
2550 |
"model.layers.6.block_sparse_moe.experts.5.w2.weight_scale": "model-00003-of-00010.safetensors",
|
|
|
2576 |
"model.layers.6.self_attn.k_proj.weight": "model-00002-of-00010.safetensors",
|
2577 |
"model.layers.6.self_attn.k_proj.weight_scale": "model-00002-of-00010.safetensors",
|
2578 |
"model.layers.6.self_attn.k_scale": "model-00002-of-00010.safetensors",
|
2579 |
+
"model.layers.6.self_attn.o_proj.input_scale": "model-00002-of-00010.safetensors",
|
2580 |
"model.layers.6.self_attn.o_proj.weight": "model-00002-of-00010.safetensors",
|
2581 |
+
"model.layers.6.self_attn.o_proj.weight_scale": "model-00002-of-00010.safetensors",
|
2582 |
"model.layers.6.self_attn.q_proj.input_scale": "model-00002-of-00010.safetensors",
|
2583 |
"model.layers.6.self_attn.q_proj.weight": "model-00002-of-00010.safetensors",
|
2584 |
"model.layers.6.self_attn.q_proj.weight_scale": "model-00002-of-00010.safetensors",
|
|
|
2665 |
"model.layers.7.self_attn.k_proj.weight": "model-00003-of-00010.safetensors",
|
2666 |
"model.layers.7.self_attn.k_proj.weight_scale": "model-00003-of-00010.safetensors",
|
2667 |
"model.layers.7.self_attn.k_scale": "model-00003-of-00010.safetensors",
|
2668 |
+
"model.layers.7.self_attn.o_proj.input_scale": "model-00003-of-00010.safetensors",
|
2669 |
"model.layers.7.self_attn.o_proj.weight": "model-00003-of-00010.safetensors",
|
2670 |
+
"model.layers.7.self_attn.o_proj.weight_scale": "model-00003-of-00010.safetensors",
|
2671 |
"model.layers.7.self_attn.q_proj.input_scale": "model-00003-of-00010.safetensors",
|
2672 |
"model.layers.7.self_attn.q_proj.weight": "model-00003-of-00010.safetensors",
|
2673 |
"model.layers.7.self_attn.q_proj.weight_scale": "model-00003-of-00010.safetensors",
|
|
|
2754 |
"model.layers.8.self_attn.k_proj.weight": "model-00003-of-00010.safetensors",
|
2755 |
"model.layers.8.self_attn.k_proj.weight_scale": "model-00003-of-00010.safetensors",
|
2756 |
"model.layers.8.self_attn.k_scale": "model-00003-of-00010.safetensors",
|
2757 |
+
"model.layers.8.self_attn.o_proj.input_scale": "model-00003-of-00010.safetensors",
|
2758 |
"model.layers.8.self_attn.o_proj.weight": "model-00003-of-00010.safetensors",
|
2759 |
+
"model.layers.8.self_attn.o_proj.weight_scale": "model-00003-of-00010.safetensors",
|
2760 |
"model.layers.8.self_attn.q_proj.input_scale": "model-00003-of-00010.safetensors",
|
2761 |
"model.layers.8.self_attn.q_proj.weight": "model-00003-of-00010.safetensors",
|
2762 |
"model.layers.8.self_attn.q_proj.weight_scale": "model-00003-of-00010.safetensors",
|
|
|
2843 |
"model.layers.9.self_attn.k_proj.weight": "model-00003-of-00010.safetensors",
|
2844 |
"model.layers.9.self_attn.k_proj.weight_scale": "model-00003-of-00010.safetensors",
|
2845 |
"model.layers.9.self_attn.k_scale": "model-00003-of-00010.safetensors",
|
2846 |
+
"model.layers.9.self_attn.o_proj.input_scale": "model-00003-of-00010.safetensors",
|
2847 |
"model.layers.9.self_attn.o_proj.weight": "model-00003-of-00010.safetensors",
|
2848 |
+
"model.layers.9.self_attn.o_proj.weight_scale": "model-00003-of-00010.safetensors",
|
2849 |
"model.layers.9.self_attn.q_proj.input_scale": "model-00003-of-00010.safetensors",
|
2850 |
"model.layers.9.self_attn.q_proj.weight": "model-00003-of-00010.safetensors",
|
2851 |
"model.layers.9.self_attn.q_proj.weight_scale": "model-00003-of-00010.safetensors",
|
tokenizer_config.json
CHANGED
@@ -1,7 +1,7 @@
|
|
1 |
{
|
2 |
"add_bos_token": true,
|
3 |
"add_eos_token": false,
|
4 |
-
"add_prefix_space":
|
5 |
"added_tokens_decoder": {
|
6 |
"0": {
|
7 |
"content": "<unk>",
|
@@ -30,10 +30,10 @@
|
|
30 |
},
|
31 |
"additional_special_tokens": [],
|
32 |
"bos_token": "<s>",
|
33 |
-
"chat_template": "{
|
34 |
"clean_up_tokenization_spaces": false,
|
35 |
"eos_token": "</s>",
|
36 |
-
"legacy":
|
37 |
"model_max_length": 1000000000000000019884624838656,
|
38 |
"pad_token": null,
|
39 |
"sp_model_kwargs": {},
|
|
|
1 |
{
|
2 |
"add_bos_token": true,
|
3 |
"add_eos_token": false,
|
4 |
+
"add_prefix_space": true,
|
5 |
"added_tokens_decoder": {
|
6 |
"0": {
|
7 |
"content": "<unk>",
|
|
|
30 |
},
|
31 |
"additional_special_tokens": [],
|
32 |
"bos_token": "<s>",
|
33 |
+
"chat_template": "{{ bos_token }}{% for message in messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if message['role'] == 'user' %}{{ '[INST] ' + message['content'] + ' [/INST]' }}{% elif message['role'] == 'assistant' %}{{ ' ' + message['content'] + eos_token}}{% else %}{{ raise_exception('Only user and assistant roles are supported!') }}{% endif %}{% endfor %}",
|
34 |
"clean_up_tokenization_spaces": false,
|
35 |
"eos_token": "</s>",
|
36 |
+
"legacy": true,
|
37 |
"model_max_length": 1000000000000000019884624838656,
|
38 |
"pad_token": null,
|
39 |
"sp_model_kwargs": {},
|