diff --git "a/ndarray-cache.json" "b/ndarray-cache.json" new file mode 100644--- /dev/null +++ "b/ndarray-cache.json" @@ -0,0 +1,3975 @@ +{ + "metadata": { + "ParamSize": 313, + "ParamBytes": 3927297024.0, + "BitsPerParam": 4.12551973205239 + }, + "records": [ + { + "dataPath": "params_shard_0.bin", + "format": "raw-shard", + "nbytes": 272498688, + "records": [ + { + "name": "lm_head.q_weight", + "shape": [ + 448, + 152064 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 272498688, + "byteOffset": 0 + } + ], + "md5sum": "bf2d0126ac3d6fd2240a63c91332076e" + }, + { + "dataPath": "params_shard_1.bin", + "format": "raw-shard", + "nbytes": 272498688, + "records": [ + { + "name": "model.embed_tokens.q_weight", + "shape": [ + 152064, + 448 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 272498688, + "byteOffset": 0 + } + ], + "md5sum": "98298f69f233ef0a8132bff7c7bcc314" + }, + { + "dataPath": "params_shard_2.bin", + "format": "raw-shard", + "nbytes": 33947648, + "records": [ + { + "name": "model.layers.0.mlp.down_proj.q_weight", + "shape": [ + 2368, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33947648, + "byteOffset": 0 + } + ], + "md5sum": "60cd0b673d66b6e77c5d13c8f0d26e11" + }, + { + "dataPath": "params_shard_3.bin", + "format": "raw-shard", + "nbytes": 67895296, + "records": [ + { + "name": "model.layers.0.mlp.gate_up_proj.q_weight", + "shape": [ + 448, + 37888 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 67895296, + "byteOffset": 0 + } + ], + "md5sum": "c6a5d07a8b3893d73b7016794550854e" + }, + { + "dataPath": "params_shard_4.bin", + "format": "raw-shard", + "nbytes": 28752896, + "records": [ + { + "name": "lm_head.q_scale", + "shape": [ + 28, + 152064 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8515584, + "byteOffset": 0 + }, + { + "name": "model.embed_tokens.q_scale", + "shape": [ + 152064, + 28 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8515584, + "byteOffset": 8515584 + }, + { + "name": "model.layers.0.input_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 17031168 + }, + { + "name": "model.layers.0.mlp.down_proj.q_scale", + "shape": [ + 148, + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1060864, + "byteOffset": 17038336 + }, + { + "name": "model.layers.0.mlp.gate_up_proj.q_scale", + "shape": [ + 28, + 37888 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2121728, + "byteOffset": 18099200 + }, + { + "name": "model.layers.0.post_attention_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 20220928 + }, + { + "name": "model.layers.0.self_attn.c_attn.bias", + "shape": [ + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 20228096 + }, + { + "name": "model.layers.0.self_attn.c_attn.q_weight", + "shape": [ + 448, + 4608 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8257536, + "byteOffset": 20237312 + }, + { + "name": "model.layers.0.self_attn.c_attn.q_scale", + "shape": [ + 28, + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 258048, + "byteOffset": 28494848 + } + ], + "md5sum": "b38c0cffe86ae0d71ef4430d32b7b4be" + }, + { + "dataPath": "params_shard_5.bin", + "format": "raw-shard", + "nbytes": 33947648, + "records": [ + { + "name": "model.layers.1.mlp.down_proj.q_weight", + "shape": [ + 2368, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33947648, + "byteOffset": 0 + } + ], + "md5sum": "5abfb30ea8787e0fde3732267df5e2cb" + }, + { + "dataPath": "params_shard_6.bin", + "format": "raw-shard", + "nbytes": 67895296, + "records": [ + { + "name": "model.layers.1.mlp.gate_up_proj.q_weight", + "shape": [ + 448, + 37888 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 67895296, + "byteOffset": 0 + } + ], + "md5sum": "b6572e8f6875cf4afa02a3d5e08cdf90" + }, + { + "dataPath": "params_shard_7.bin", + "format": "raw-shard", + "nbytes": 33947648, + "records": [ + { + "name": "model.layers.2.mlp.down_proj.q_weight", + "shape": [ + 2368, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33947648, + "byteOffset": 0 + } + ], + "md5sum": "12e40deaa8465e954fd6ab7862ce537b" + }, + { + "dataPath": "params_shard_8.bin", + "format": "raw-shard", + "nbytes": 67895296, + "records": [ + { + "name": "model.layers.2.mlp.gate_up_proj.q_weight", + "shape": [ + 448, + 37888 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 67895296, + "byteOffset": 0 + } + ], + "md5sum": "ab8814fa6b8686a3d27af26010a45953" + }, + { + "dataPath": "params_shard_9.bin", + "format": "raw-shard", + "nbytes": 28174336, + "records": [ + { + "name": "model.layers.0.self_attn.o_proj.q_weight", + "shape": [ + 448, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6422528, + "byteOffset": 0 + }, + { + "name": "model.layers.0.self_attn.o_proj.q_scale", + "shape": [ + 28, + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 200704, + "byteOffset": 6422528 + }, + { + "name": "model.layers.1.input_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 6623232 + }, + { + "name": "model.layers.1.mlp.down_proj.q_scale", + "shape": [ + 148, + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1060864, + "byteOffset": 6630400 + }, + { + "name": "model.layers.1.mlp.gate_up_proj.q_scale", + "shape": [ + 28, + 37888 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2121728, + "byteOffset": 7691264 + }, + { + "name": "model.layers.1.post_attention_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 9812992 + }, + { + "name": "model.layers.1.self_attn.c_attn.bias", + "shape": [ + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 9820160 + }, + { + "name": "model.layers.1.self_attn.c_attn.q_weight", + "shape": [ + 448, + 4608 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8257536, + "byteOffset": 9829376 + }, + { + "name": "model.layers.1.self_attn.c_attn.q_scale", + "shape": [ + 28, + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 258048, + "byteOffset": 18086912 + }, + { + "name": "model.layers.1.self_attn.o_proj.q_weight", + "shape": [ + 448, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6422528, + "byteOffset": 18344960 + }, + { + "name": "model.layers.1.self_attn.o_proj.q_scale", + "shape": [ + 28, + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 200704, + "byteOffset": 24767488 + }, + { + "name": "model.layers.2.input_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 24968192 + }, + { + "name": "model.layers.2.mlp.down_proj.q_scale", + "shape": [ + 148, + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1060864, + "byteOffset": 24975360 + }, + { + "name": "model.layers.2.mlp.gate_up_proj.q_scale", + "shape": [ + 28, + 37888 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2121728, + "byteOffset": 26036224 + }, + { + "name": "model.layers.2.post_attention_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 28157952 + }, + { + "name": "model.layers.2.self_attn.c_attn.bias", + "shape": [ + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 28165120 + } + ], + "md5sum": "178c6a3fb33e38bca5fc9ad3ce5fa190" + }, + { + "dataPath": "params_shard_10.bin", + "format": "raw-shard", + "nbytes": 33947648, + "records": [ + { + "name": "model.layers.3.mlp.down_proj.q_weight", + "shape": [ + 2368, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33947648, + "byteOffset": 0 + } + ], + "md5sum": "8a85c22cd72e50ee2bdb90d2c226ff06" + }, + { + "dataPath": "params_shard_11.bin", + "format": "raw-shard", + "nbytes": 67895296, + "records": [ + { + "name": "model.layers.3.mlp.gate_up_proj.q_weight", + "shape": [ + 448, + 37888 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 67895296, + "byteOffset": 0 + } + ], + "md5sum": "b297e8964492bcaa7e65016d8bd398d4" + }, + { + "dataPath": "params_shard_12.bin", + "format": "raw-shard", + "nbytes": 33947648, + "records": [ + { + "name": "model.layers.4.mlp.down_proj.q_weight", + "shape": [ + 2368, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33947648, + "byteOffset": 0 + } + ], + "md5sum": "7c2b54c844b14e22001f9fde0da47e93" + }, + { + "dataPath": "params_shard_13.bin", + "format": "raw-shard", + "nbytes": 33490944, + "records": [ + { + "name": "model.layers.2.self_attn.c_attn.q_weight", + "shape": [ + 448, + 4608 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8257536, + "byteOffset": 0 + }, + { + "name": "model.layers.2.self_attn.c_attn.q_scale", + "shape": [ + 28, + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 258048, + "byteOffset": 8257536 + }, + { + "name": "model.layers.2.self_attn.o_proj.q_weight", + "shape": [ + 448, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6422528, + "byteOffset": 8515584 + }, + { + "name": "model.layers.2.self_attn.o_proj.q_scale", + "shape": [ + 28, + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 200704, + "byteOffset": 14938112 + }, + { + "name": "model.layers.3.input_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 15138816 + }, + { + "name": "model.layers.3.mlp.down_proj.q_scale", + "shape": [ + 148, + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1060864, + "byteOffset": 15145984 + }, + { + "name": "model.layers.3.mlp.gate_up_proj.q_scale", + "shape": [ + 28, + 37888 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2121728, + "byteOffset": 16206848 + }, + { + "name": "model.layers.3.post_attention_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 18328576 + }, + { + "name": "model.layers.3.self_attn.c_attn.bias", + "shape": [ + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 18335744 + }, + { + "name": "model.layers.3.self_attn.c_attn.q_weight", + "shape": [ + 448, + 4608 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8257536, + "byteOffset": 18344960 + }, + { + "name": "model.layers.3.self_attn.c_attn.q_scale", + "shape": [ + 28, + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 258048, + "byteOffset": 26602496 + }, + { + "name": "model.layers.3.self_attn.o_proj.q_weight", + "shape": [ + 448, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6422528, + "byteOffset": 26860544 + }, + { + "name": "model.layers.3.self_attn.o_proj.q_scale", + "shape": [ + 28, + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 200704, + "byteOffset": 33283072 + }, + { + "name": "model.layers.4.input_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 33483776 + } + ], + "md5sum": "398fa91e1e3d7560b7243dfd298cf14c" + }, + { + "dataPath": "params_shard_14.bin", + "format": "raw-shard", + "nbytes": 67895296, + "records": [ + { + "name": "model.layers.4.mlp.gate_up_proj.q_weight", + "shape": [ + 448, + 37888 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 67895296, + "byteOffset": 0 + } + ], + "md5sum": "2a96e5329d825ee8c18bfd92e4c7bbb6" + }, + { + "dataPath": "params_shard_15.bin", + "format": "raw-shard", + "nbytes": 33947648, + "records": [ + { + "name": "model.layers.5.mlp.down_proj.q_weight", + "shape": [ + 2368, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33947648, + "byteOffset": 0 + } + ], + "md5sum": "e8892b8d9af664083d01e0dda68b5e75" + }, + { + "dataPath": "params_shard_16.bin", + "format": "raw-shard", + "nbytes": 67895296, + "records": [ + { + "name": "model.layers.5.mlp.gate_up_proj.q_weight", + "shape": [ + 448, + 37888 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 67895296, + "byteOffset": 0 + } + ], + "md5sum": "cb963b71d8fafc1cbd7ff831114f6826" + }, + { + "dataPath": "params_shard_17.bin", + "format": "raw-shard", + "nbytes": 30059520, + "records": [ + { + "name": "model.layers.4.mlp.down_proj.q_scale", + "shape": [ + 148, + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1060864, + "byteOffset": 0 + }, + { + "name": "model.layers.4.mlp.gate_up_proj.q_scale", + "shape": [ + 28, + 37888 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2121728, + "byteOffset": 1060864 + }, + { + "name": "model.layers.4.post_attention_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 3182592 + }, + { + "name": "model.layers.4.self_attn.c_attn.bias", + "shape": [ + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 3189760 + }, + { + "name": "model.layers.4.self_attn.c_attn.q_weight", + "shape": [ + 448, + 4608 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8257536, + "byteOffset": 3198976 + }, + { + "name": "model.layers.4.self_attn.c_attn.q_scale", + "shape": [ + 28, + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 258048, + "byteOffset": 11456512 + }, + { + "name": "model.layers.4.self_attn.o_proj.q_weight", + "shape": [ + 448, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6422528, + "byteOffset": 11714560 + }, + { + "name": "model.layers.4.self_attn.o_proj.q_scale", + "shape": [ + 28, + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 200704, + "byteOffset": 18137088 + }, + { + "name": "model.layers.5.input_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 18337792 + }, + { + "name": "model.layers.5.mlp.down_proj.q_scale", + "shape": [ + 148, + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1060864, + "byteOffset": 18344960 + }, + { + "name": "model.layers.5.mlp.gate_up_proj.q_scale", + "shape": [ + 28, + 37888 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2121728, + "byteOffset": 19405824 + }, + { + "name": "model.layers.5.post_attention_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 21527552 + }, + { + "name": "model.layers.5.self_attn.c_attn.bias", + "shape": [ + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 21534720 + }, + { + "name": "model.layers.5.self_attn.c_attn.q_weight", + "shape": [ + 448, + 4608 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8257536, + "byteOffset": 21543936 + }, + { + "name": "model.layers.5.self_attn.c_attn.q_scale", + "shape": [ + 28, + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 258048, + "byteOffset": 29801472 + } + ], + "md5sum": "d5310eb04468403e0f394aeb0d22effd" + }, + { + "dataPath": "params_shard_18.bin", + "format": "raw-shard", + "nbytes": 33947648, + "records": [ + { + "name": "model.layers.6.mlp.down_proj.q_weight", + "shape": [ + 2368, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33947648, + "byteOffset": 0 + } + ], + "md5sum": "221fbaf9f803fcc112300c55cca919f5" + }, + { + "dataPath": "params_shard_19.bin", + "format": "raw-shard", + "nbytes": 67895296, + "records": [ + { + "name": "model.layers.6.mlp.gate_up_proj.q_weight", + "shape": [ + 448, + 37888 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 67895296, + "byteOffset": 0 + } + ], + "md5sum": "9aadfbbdf470fac93fd4acefa8912d25" + }, + { + "dataPath": "params_shard_20.bin", + "format": "raw-shard", + "nbytes": 33947648, + "records": [ + { + "name": "model.layers.7.mlp.down_proj.q_weight", + "shape": [ + 2368, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33947648, + "byteOffset": 0 + } + ], + "md5sum": "b2a4094a1ad76a266c8088899d504b51" + }, + { + "dataPath": "params_shard_21.bin", + "format": "raw-shard", + "nbytes": 67895296, + "records": [ + { + "name": "model.layers.7.mlp.gate_up_proj.q_weight", + "shape": [ + 448, + 37888 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 67895296, + "byteOffset": 0 + } + ], + "md5sum": "494f1690e02b807cbcd5b844b74c453e" + }, + { + "dataPath": "params_shard_22.bin", + "format": "raw-shard", + "nbytes": 28174336, + "records": [ + { + "name": "model.layers.5.self_attn.o_proj.q_weight", + "shape": [ + 448, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6422528, + "byteOffset": 0 + }, + { + "name": "model.layers.5.self_attn.o_proj.q_scale", + "shape": [ + 28, + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 200704, + "byteOffset": 6422528 + }, + { + "name": "model.layers.6.input_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 6623232 + }, + { + "name": "model.layers.6.mlp.down_proj.q_scale", + "shape": [ + 148, + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1060864, + "byteOffset": 6630400 + }, + { + "name": "model.layers.6.mlp.gate_up_proj.q_scale", + "shape": [ + 28, + 37888 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2121728, + "byteOffset": 7691264 + }, + { + "name": "model.layers.6.post_attention_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 9812992 + }, + { + "name": "model.layers.6.self_attn.c_attn.bias", + "shape": [ + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 9820160 + }, + { + "name": "model.layers.6.self_attn.c_attn.q_weight", + "shape": [ + 448, + 4608 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8257536, + "byteOffset": 9829376 + }, + { + "name": "model.layers.6.self_attn.c_attn.q_scale", + "shape": [ + 28, + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 258048, + "byteOffset": 18086912 + }, + { + "name": "model.layers.6.self_attn.o_proj.q_weight", + "shape": [ + 448, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6422528, + "byteOffset": 18344960 + }, + { + "name": "model.layers.6.self_attn.o_proj.q_scale", + "shape": [ + 28, + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 200704, + "byteOffset": 24767488 + }, + { + "name": "model.layers.7.input_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 24968192 + }, + { + "name": "model.layers.7.mlp.down_proj.q_scale", + "shape": [ + 148, + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1060864, + "byteOffset": 24975360 + }, + { + "name": "model.layers.7.mlp.gate_up_proj.q_scale", + "shape": [ + 28, + 37888 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2121728, + "byteOffset": 26036224 + }, + { + "name": "model.layers.7.post_attention_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 28157952 + }, + { + "name": "model.layers.7.self_attn.c_attn.bias", + "shape": [ + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 28165120 + } + ], + "md5sum": "06521204547273d11bad199c4ac1c04e" + }, + { + "dataPath": "params_shard_23.bin", + "format": "raw-shard", + "nbytes": 33947648, + "records": [ + { + "name": "model.layers.10.mlp.down_proj.q_weight", + "shape": [ + 2368, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33947648, + "byteOffset": 0 + } + ], + "md5sum": "7c65dd61774c239a2f54eb82d44e3eb5" + }, + { + "dataPath": "params_shard_24.bin", + "format": "raw-shard", + "nbytes": 67895296, + "records": [ + { + "name": "model.layers.10.mlp.gate_up_proj.q_weight", + "shape": [ + 448, + 37888 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 67895296, + "byteOffset": 0 + } + ], + "md5sum": "3791891a3cdd39eff2741c0a171e8ddd" + }, + { + "dataPath": "params_shard_25.bin", + "format": "raw-shard", + "nbytes": 33492992, + "records": [ + { + "name": "model.layers.7.self_attn.c_attn.q_weight", + "shape": [ + 448, + 4608 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8257536, + "byteOffset": 0 + }, + { + "name": "model.layers.7.self_attn.c_attn.q_scale", + "shape": [ + 28, + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 258048, + "byteOffset": 8257536 + }, + { + "name": "model.layers.7.self_attn.o_proj.q_weight", + "shape": [ + 448, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6422528, + "byteOffset": 8515584 + }, + { + "name": "model.layers.7.self_attn.o_proj.q_scale", + "shape": [ + 28, + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 200704, + "byteOffset": 14938112 + }, + { + "name": "model.layers.8.self_attn.c_attn.bias", + "shape": [ + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 15138816 + }, + { + "name": "model.layers.8.self_attn.c_attn.q_weight", + "shape": [ + 448, + 4608 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8257536, + "byteOffset": 15148032 + }, + { + "name": "model.layers.8.self_attn.c_attn.q_scale", + "shape": [ + 28, + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 258048, + "byteOffset": 23405568 + }, + { + "name": "model.layers.8.self_attn.o_proj.q_weight", + "shape": [ + 448, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6422528, + "byteOffset": 23663616 + }, + { + "name": "model.layers.8.self_attn.o_proj.q_scale", + "shape": [ + 28, + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 200704, + "byteOffset": 30086144 + }, + { + "name": "model.layers.10.input_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 30286848 + }, + { + "name": "model.layers.10.mlp.down_proj.q_scale", + "shape": [ + 148, + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1060864, + "byteOffset": 30294016 + }, + { + "name": "model.layers.10.mlp.gate_up_proj.q_scale", + "shape": [ + 28, + 37888 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2121728, + "byteOffset": 31354880 + }, + { + "name": "model.layers.10.post_attention_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 33476608 + }, + { + "name": "model.layers.10.self_attn.c_attn.bias", + "shape": [ + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 33483776 + } + ], + "md5sum": "f6cf8ff37c0d9142c3ace7c631aced1e" + }, + { + "dataPath": "params_shard_26.bin", + "format": "raw-shard", + "nbytes": 33947648, + "records": [ + { + "name": "model.layers.11.mlp.down_proj.q_weight", + "shape": [ + 2368, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33947648, + "byteOffset": 0 + } + ], + "md5sum": "6425132f8bac243bc3e625277c03b9a3" + }, + { + "dataPath": "params_shard_27.bin", + "format": "raw-shard", + "nbytes": 67895296, + "records": [ + { + "name": "model.layers.11.mlp.gate_up_proj.q_weight", + "shape": [ + 448, + 37888 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 67895296, + "byteOffset": 0 + } + ], + "md5sum": "f871efa481e28e9d961f4b7af33c99d0" + }, + { + "dataPath": "params_shard_28.bin", + "format": "raw-shard", + "nbytes": 33947648, + "records": [ + { + "name": "model.layers.12.mlp.down_proj.q_weight", + "shape": [ + 2368, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33947648, + "byteOffset": 0 + } + ], + "md5sum": "70786ddfd64ec6e77e5cf66d6a59a86c" + }, + { + "dataPath": "params_shard_29.bin", + "format": "raw-shard", + "nbytes": 33490944, + "records": [ + { + "name": "model.layers.10.self_attn.c_attn.q_weight", + "shape": [ + 448, + 4608 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8257536, + "byteOffset": 0 + }, + { + "name": "model.layers.10.self_attn.c_attn.q_scale", + "shape": [ + 28, + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 258048, + "byteOffset": 8257536 + }, + { + "name": "model.layers.10.self_attn.o_proj.q_weight", + "shape": [ + 448, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6422528, + "byteOffset": 8515584 + }, + { + "name": "model.layers.10.self_attn.o_proj.q_scale", + "shape": [ + 28, + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 200704, + "byteOffset": 14938112 + }, + { + "name": "model.layers.11.input_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 15138816 + }, + { + "name": "model.layers.11.mlp.down_proj.q_scale", + "shape": [ + 148, + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1060864, + "byteOffset": 15145984 + }, + { + "name": "model.layers.11.mlp.gate_up_proj.q_scale", + "shape": [ + 28, + 37888 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2121728, + "byteOffset": 16206848 + }, + { + "name": "model.layers.11.post_attention_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 18328576 + }, + { + "name": "model.layers.11.self_attn.c_attn.bias", + "shape": [ + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 18335744 + }, + { + "name": "model.layers.11.self_attn.c_attn.q_weight", + "shape": [ + 448, + 4608 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8257536, + "byteOffset": 18344960 + }, + { + "name": "model.layers.11.self_attn.c_attn.q_scale", + "shape": [ + 28, + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 258048, + "byteOffset": 26602496 + }, + { + "name": "model.layers.11.self_attn.o_proj.q_weight", + "shape": [ + 448, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6422528, + "byteOffset": 26860544 + }, + { + "name": "model.layers.11.self_attn.o_proj.q_scale", + "shape": [ + 28, + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 200704, + "byteOffset": 33283072 + }, + { + "name": "model.layers.12.input_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 33483776 + } + ], + "md5sum": "60f373ada0016aefab28f9a3c4d53f65" + }, + { + "dataPath": "params_shard_30.bin", + "format": "raw-shard", + "nbytes": 67895296, + "records": [ + { + "name": "model.layers.12.mlp.gate_up_proj.q_weight", + "shape": [ + 448, + 37888 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 67895296, + "byteOffset": 0 + } + ], + "md5sum": "c1e72658dc59ab8cbbc850cc42caf564" + }, + { + "dataPath": "params_shard_31.bin", + "format": "raw-shard", + "nbytes": 33947648, + "records": [ + { + "name": "model.layers.13.mlp.down_proj.q_weight", + "shape": [ + 2368, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33947648, + "byteOffset": 0 + } + ], + "md5sum": "620387988a5e4af9db5193a7769b3eae" + }, + { + "dataPath": "params_shard_32.bin", + "format": "raw-shard", + "nbytes": 67895296, + "records": [ + { + "name": "model.layers.13.mlp.gate_up_proj.q_weight", + "shape": [ + 448, + 37888 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 67895296, + "byteOffset": 0 + } + ], + "md5sum": "afd4d37ec6ab5af8e28fb5a2a19a8566" + }, + { + "dataPath": "params_shard_33.bin", + "format": "raw-shard", + "nbytes": 30059520, + "records": [ + { + "name": "model.layers.12.mlp.down_proj.q_scale", + "shape": [ + 148, + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1060864, + "byteOffset": 0 + }, + { + "name": "model.layers.12.mlp.gate_up_proj.q_scale", + "shape": [ + 28, + 37888 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2121728, + "byteOffset": 1060864 + }, + { + "name": "model.layers.12.post_attention_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 3182592 + }, + { + "name": "model.layers.12.self_attn.c_attn.bias", + "shape": [ + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 3189760 + }, + { + "name": "model.layers.12.self_attn.c_attn.q_weight", + "shape": [ + 448, + 4608 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8257536, + "byteOffset": 3198976 + }, + { + "name": "model.layers.12.self_attn.c_attn.q_scale", + "shape": [ + 28, + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 258048, + "byteOffset": 11456512 + }, + { + "name": "model.layers.12.self_attn.o_proj.q_weight", + "shape": [ + 448, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6422528, + "byteOffset": 11714560 + }, + { + "name": "model.layers.12.self_attn.o_proj.q_scale", + "shape": [ + 28, + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 200704, + "byteOffset": 18137088 + }, + { + "name": "model.layers.13.input_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 18337792 + }, + { + "name": "model.layers.13.mlp.down_proj.q_scale", + "shape": [ + 148, + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1060864, + "byteOffset": 18344960 + }, + { + "name": "model.layers.13.mlp.gate_up_proj.q_scale", + "shape": [ + 28, + 37888 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2121728, + "byteOffset": 19405824 + }, + { + "name": "model.layers.13.post_attention_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 21527552 + }, + { + "name": "model.layers.13.self_attn.c_attn.bias", + "shape": [ + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 21534720 + }, + { + "name": "model.layers.13.self_attn.c_attn.q_weight", + "shape": [ + 448, + 4608 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8257536, + "byteOffset": 21543936 + }, + { + "name": "model.layers.13.self_attn.c_attn.q_scale", + "shape": [ + 28, + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 258048, + "byteOffset": 29801472 + } + ], + "md5sum": "cdcbeee880af6bc8590049aa86672b72" + }, + { + "dataPath": "params_shard_34.bin", + "format": "raw-shard", + "nbytes": 33947648, + "records": [ + { + "name": "model.layers.14.mlp.down_proj.q_weight", + "shape": [ + 2368, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33947648, + "byteOffset": 0 + } + ], + "md5sum": "86ec7422bff27fa1e781cbdd28f76f4f" + }, + { + "dataPath": "params_shard_35.bin", + "format": "raw-shard", + "nbytes": 67895296, + "records": [ + { + "name": "model.layers.14.mlp.gate_up_proj.q_weight", + "shape": [ + 448, + 37888 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 67895296, + "byteOffset": 0 + } + ], + "md5sum": "76e3eb6f6e0b854165b4d7bb441c2242" + }, + { + "dataPath": "params_shard_36.bin", + "format": "raw-shard", + "nbytes": 33947648, + "records": [ + { + "name": "model.layers.15.mlp.down_proj.q_weight", + "shape": [ + 2368, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33947648, + "byteOffset": 0 + } + ], + "md5sum": "97d1999e25ffbccd2bde6fd2d7e2f7e7" + }, + { + "dataPath": "params_shard_37.bin", + "format": "raw-shard", + "nbytes": 67895296, + "records": [ + { + "name": "model.layers.15.mlp.gate_up_proj.q_weight", + "shape": [ + 448, + 37888 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 67895296, + "byteOffset": 0 + } + ], + "md5sum": "a3d436036f680001fde853cfee2c5601" + }, + { + "dataPath": "params_shard_38.bin", + "format": "raw-shard", + "nbytes": 28174336, + "records": [ + { + "name": "model.layers.13.self_attn.o_proj.q_weight", + "shape": [ + 448, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6422528, + "byteOffset": 0 + }, + { + "name": "model.layers.13.self_attn.o_proj.q_scale", + "shape": [ + 28, + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 200704, + "byteOffset": 6422528 + }, + { + "name": "model.layers.14.input_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 6623232 + }, + { + "name": "model.layers.14.mlp.down_proj.q_scale", + "shape": [ + 148, + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1060864, + "byteOffset": 6630400 + }, + { + "name": "model.layers.14.mlp.gate_up_proj.q_scale", + "shape": [ + 28, + 37888 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2121728, + "byteOffset": 7691264 + }, + { + "name": "model.layers.14.post_attention_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 9812992 + }, + { + "name": "model.layers.14.self_attn.c_attn.bias", + "shape": [ + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 9820160 + }, + { + "name": "model.layers.14.self_attn.c_attn.q_weight", + "shape": [ + 448, + 4608 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8257536, + "byteOffset": 9829376 + }, + { + "name": "model.layers.14.self_attn.c_attn.q_scale", + "shape": [ + 28, + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 258048, + "byteOffset": 18086912 + }, + { + "name": "model.layers.14.self_attn.o_proj.q_weight", + "shape": [ + 448, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6422528, + "byteOffset": 18344960 + }, + { + "name": "model.layers.14.self_attn.o_proj.q_scale", + "shape": [ + 28, + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 200704, + "byteOffset": 24767488 + }, + { + "name": "model.layers.15.input_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 24968192 + }, + { + "name": "model.layers.15.mlp.down_proj.q_scale", + "shape": [ + 148, + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1060864, + "byteOffset": 24975360 + }, + { + "name": "model.layers.15.mlp.gate_up_proj.q_scale", + "shape": [ + 28, + 37888 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2121728, + "byteOffset": 26036224 + }, + { + "name": "model.layers.15.post_attention_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 28157952 + }, + { + "name": "model.layers.15.self_attn.c_attn.bias", + "shape": [ + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 28165120 + } + ], + "md5sum": "0b9ea7b34221b7de9d440002de7fd93b" + }, + { + "dataPath": "params_shard_39.bin", + "format": "raw-shard", + "nbytes": 33947648, + "records": [ + { + "name": "model.layers.16.mlp.down_proj.q_weight", + "shape": [ + 2368, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33947648, + "byteOffset": 0 + } + ], + "md5sum": "b9055e0839430e5affd67d66687ff593" + }, + { + "dataPath": "params_shard_40.bin", + "format": "raw-shard", + "nbytes": 67895296, + "records": [ + { + "name": "model.layers.16.mlp.gate_up_proj.q_weight", + "shape": [ + 448, + 37888 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 67895296, + "byteOffset": 0 + } + ], + "md5sum": "e4b5fb8d74dff5535431286d25b778f2" + }, + { + "dataPath": "params_shard_41.bin", + "format": "raw-shard", + "nbytes": 33947648, + "records": [ + { + "name": "model.layers.17.mlp.down_proj.q_weight", + "shape": [ + 2368, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33947648, + "byteOffset": 0 + } + ], + "md5sum": "368ea3dd119bc018d4b68125ac698ed1" + }, + { + "dataPath": "params_shard_42.bin", + "format": "raw-shard", + "nbytes": 33490944, + "records": [ + { + "name": "model.layers.15.self_attn.c_attn.q_weight", + "shape": [ + 448, + 4608 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8257536, + "byteOffset": 0 + }, + { + "name": "model.layers.15.self_attn.c_attn.q_scale", + "shape": [ + 28, + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 258048, + "byteOffset": 8257536 + }, + { + "name": "model.layers.15.self_attn.o_proj.q_weight", + "shape": [ + 448, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6422528, + "byteOffset": 8515584 + }, + { + "name": "model.layers.15.self_attn.o_proj.q_scale", + "shape": [ + 28, + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 200704, + "byteOffset": 14938112 + }, + { + "name": "model.layers.16.input_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 15138816 + }, + { + "name": "model.layers.16.mlp.down_proj.q_scale", + "shape": [ + 148, + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1060864, + "byteOffset": 15145984 + }, + { + "name": "model.layers.16.mlp.gate_up_proj.q_scale", + "shape": [ + 28, + 37888 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2121728, + "byteOffset": 16206848 + }, + { + "name": "model.layers.16.post_attention_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 18328576 + }, + { + "name": "model.layers.16.self_attn.c_attn.bias", + "shape": [ + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 18335744 + }, + { + "name": "model.layers.16.self_attn.c_attn.q_weight", + "shape": [ + 448, + 4608 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8257536, + "byteOffset": 18344960 + }, + { + "name": "model.layers.16.self_attn.c_attn.q_scale", + "shape": [ + 28, + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 258048, + "byteOffset": 26602496 + }, + { + "name": "model.layers.16.self_attn.o_proj.q_weight", + "shape": [ + 448, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6422528, + "byteOffset": 26860544 + }, + { + "name": "model.layers.16.self_attn.o_proj.q_scale", + "shape": [ + 28, + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 200704, + "byteOffset": 33283072 + }, + { + "name": "model.layers.17.input_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 33483776 + } + ], + "md5sum": "ed8cbdc2fb1e391ccece277b2bdb9e6e" + }, + { + "dataPath": "params_shard_43.bin", + "format": "raw-shard", + "nbytes": 67895296, + "records": [ + { + "name": "model.layers.17.mlp.gate_up_proj.q_weight", + "shape": [ + 448, + 37888 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 67895296, + "byteOffset": 0 + } + ], + "md5sum": "2f7b3c632ce97ddd9c051766f059ec8d" + }, + { + "dataPath": "params_shard_44.bin", + "format": "raw-shard", + "nbytes": 67895296, + "records": [ + { + "name": "model.layers.18.mlp.gate_up_proj.q_weight", + "shape": [ + 448, + 37888 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 67895296, + "byteOffset": 0 + } + ], + "md5sum": "7c8689e94884df6c7789cd72315bf5d6" + }, + { + "dataPath": "params_shard_45.bin", + "format": "raw-shard", + "nbytes": 28984320, + "records": [ + { + "name": "model.layers.17.mlp.down_proj.q_scale", + "shape": [ + 148, + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1060864, + "byteOffset": 0 + }, + { + "name": "model.layers.17.mlp.gate_up_proj.q_scale", + "shape": [ + 28, + 37888 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2121728, + "byteOffset": 1060864 + }, + { + "name": "model.layers.17.post_attention_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 3182592 + }, + { + "name": "model.layers.17.self_attn.c_attn.bias", + "shape": [ + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 3189760 + }, + { + "name": "model.layers.17.self_attn.c_attn.q_weight", + "shape": [ + 448, + 4608 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8257536, + "byteOffset": 3198976 + }, + { + "name": "model.layers.17.self_attn.c_attn.q_scale", + "shape": [ + 28, + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 258048, + "byteOffset": 11456512 + }, + { + "name": "model.layers.17.self_attn.o_proj.q_weight", + "shape": [ + 448, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6422528, + "byteOffset": 11714560 + }, + { + "name": "model.layers.17.self_attn.o_proj.q_scale", + "shape": [ + 28, + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 200704, + "byteOffset": 18137088 + }, + { + "name": "model.layers.18.mlp.gate_up_proj.q_scale", + "shape": [ + 28, + 37888 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2121728, + "byteOffset": 18337792 + }, + { + "name": "model.layers.18.self_attn.c_attn.bias", + "shape": [ + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 20459520 + }, + { + "name": "model.layers.18.self_attn.c_attn.q_weight", + "shape": [ + 448, + 4608 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8257536, + "byteOffset": 20468736 + }, + { + "name": "model.layers.18.self_attn.c_attn.q_scale", + "shape": [ + 28, + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 258048, + "byteOffset": 28726272 + } + ], + "md5sum": "65cfd3b2cf4de9b1f32028a5a15739bf" + }, + { + "dataPath": "params_shard_46.bin", + "format": "raw-shard", + "nbytes": 33947648, + "records": [ + { + "name": "model.layers.8.mlp.down_proj.q_weight", + "shape": [ + 2368, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33947648, + "byteOffset": 0 + } + ], + "md5sum": "965d0c6ef79ad5fed1b3c77e7c8f030b" + }, + { + "dataPath": "params_shard_47.bin", + "format": "raw-shard", + "nbytes": 67895296, + "records": [ + { + "name": "model.layers.8.mlp.gate_up_proj.q_weight", + "shape": [ + 448, + 37888 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 67895296, + "byteOffset": 0 + } + ], + "md5sum": "a9ade532c03a8dc110094d961463300d" + }, + { + "dataPath": "params_shard_48.bin", + "format": "raw-shard", + "nbytes": 33947648, + "records": [ + { + "name": "model.layers.9.mlp.down_proj.q_weight", + "shape": [ + 2368, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33947648, + "byteOffset": 0 + } + ], + "md5sum": "0240842238f4af9131cce218a590ea32" + }, + { + "dataPath": "params_shard_49.bin", + "format": "raw-shard", + "nbytes": 67895296, + "records": [ + { + "name": "model.layers.9.mlp.gate_up_proj.q_weight", + "shape": [ + 448, + 37888 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 67895296, + "byteOffset": 0 + } + ], + "md5sum": "68a24d294b8fd76e8e1570a2753fa630" + }, + { + "dataPath": "params_shard_50.bin", + "format": "raw-shard", + "nbytes": 33947648, + "records": [ + { + "name": "model.layers.18.mlp.down_proj.q_weight", + "shape": [ + 2368, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33947648, + "byteOffset": 0 + } + ], + "md5sum": "34efd518bb1b1660c0958e866587911a" + }, + { + "dataPath": "params_shard_51.bin", + "format": "raw-shard", + "nbytes": 33947648, + "records": [ + { + "name": "model.layers.19.mlp.down_proj.q_weight", + "shape": [ + 2368, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33947648, + "byteOffset": 0 + } + ], + "md5sum": "139704a6153f58ec81497476b2ba4157" + }, + { + "dataPath": "params_shard_52.bin", + "format": "raw-shard", + "nbytes": 67895296, + "records": [ + { + "name": "model.layers.19.mlp.gate_up_proj.q_weight", + "shape": [ + 448, + 37888 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 67895296, + "byteOffset": 0 + } + ], + "md5sum": "03a402bcc479631bac3dc1d00aaef04d" + }, + { + "dataPath": "params_shard_53.bin", + "format": "raw-shard", + "nbytes": 32446464, + "records": [ + { + "name": "model.layers.18.self_attn.o_proj.q_weight", + "shape": [ + 448, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6422528, + "byteOffset": 0 + }, + { + "name": "model.layers.18.self_attn.o_proj.q_scale", + "shape": [ + 28, + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 200704, + "byteOffset": 6422528 + }, + { + "name": "model.layers.8.input_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 6623232 + }, + { + "name": "model.layers.8.mlp.down_proj.q_scale", + "shape": [ + 148, + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1060864, + "byteOffset": 6630400 + }, + { + "name": "model.layers.8.mlp.gate_up_proj.q_scale", + "shape": [ + 28, + 37888 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2121728, + "byteOffset": 7691264 + }, + { + "name": "model.layers.8.post_attention_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 9812992 + }, + { + "name": "model.layers.9.input_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 9820160 + }, + { + "name": "model.layers.9.mlp.down_proj.q_scale", + "shape": [ + 148, + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1060864, + "byteOffset": 9827328 + }, + { + "name": "model.layers.9.mlp.gate_up_proj.q_scale", + "shape": [ + 28, + 37888 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2121728, + "byteOffset": 10888192 + }, + { + "name": "model.layers.9.post_attention_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 13009920 + }, + { + "name": "model.layers.9.self_attn.c_attn.bias", + "shape": [ + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 13017088 + }, + { + "name": "model.layers.9.self_attn.c_attn.q_weight", + "shape": [ + 448, + 4608 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8257536, + "byteOffset": 13026304 + }, + { + "name": "model.layers.9.self_attn.c_attn.q_scale", + "shape": [ + 28, + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 258048, + "byteOffset": 21283840 + }, + { + "name": "model.layers.9.self_attn.o_proj.q_weight", + "shape": [ + 448, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6422528, + "byteOffset": 21541888 + }, + { + "name": "model.layers.9.self_attn.o_proj.q_scale", + "shape": [ + 28, + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 200704, + "byteOffset": 27964416 + }, + { + "name": "model.layers.18.input_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 28165120 + }, + { + "name": "model.layers.18.mlp.down_proj.q_scale", + "shape": [ + 148, + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1060864, + "byteOffset": 28172288 + }, + { + "name": "model.layers.18.post_attention_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 29233152 + }, + { + "name": "model.layers.19.input_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 29240320 + }, + { + "name": "model.layers.19.mlp.down_proj.q_scale", + "shape": [ + 148, + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1060864, + "byteOffset": 29247488 + }, + { + "name": "model.layers.19.mlp.gate_up_proj.q_scale", + "shape": [ + 28, + 37888 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2121728, + "byteOffset": 30308352 + }, + { + "name": "model.layers.19.post_attention_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 32430080 + }, + { + "name": "model.layers.19.self_attn.c_attn.bias", + "shape": [ + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 32437248 + } + ], + "md5sum": "887785b438a5acee9b729a810ef9d04b" + }, + { + "dataPath": "params_shard_54.bin", + "format": "raw-shard", + "nbytes": 33947648, + "records": [ + { + "name": "model.layers.20.mlp.down_proj.q_weight", + "shape": [ + 2368, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33947648, + "byteOffset": 0 + } + ], + "md5sum": "4bc5e9bba29446754ec79022f77e11af" + }, + { + "dataPath": "params_shard_55.bin", + "format": "raw-shard", + "nbytes": 67895296, + "records": [ + { + "name": "model.layers.20.mlp.gate_up_proj.q_weight", + "shape": [ + 448, + 37888 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 67895296, + "byteOffset": 0 + } + ], + "md5sum": "31ed438bc1fed28048dfced8d530c2d7" + }, + { + "dataPath": "params_shard_56.bin", + "format": "raw-shard", + "nbytes": 33947648, + "records": [ + { + "name": "model.layers.21.mlp.down_proj.q_weight", + "shape": [ + 2368, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33947648, + "byteOffset": 0 + } + ], + "md5sum": "6674ed95ac88037371acf18f441f7f9c" + }, + { + "dataPath": "params_shard_57.bin", + "format": "raw-shard", + "nbytes": 33490944, + "records": [ + { + "name": "model.layers.19.self_attn.c_attn.q_weight", + "shape": [ + 448, + 4608 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8257536, + "byteOffset": 0 + }, + { + "name": "model.layers.19.self_attn.c_attn.q_scale", + "shape": [ + 28, + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 258048, + "byteOffset": 8257536 + }, + { + "name": "model.layers.19.self_attn.o_proj.q_weight", + "shape": [ + 448, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6422528, + "byteOffset": 8515584 + }, + { + "name": "model.layers.19.self_attn.o_proj.q_scale", + "shape": [ + 28, + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 200704, + "byteOffset": 14938112 + }, + { + "name": "model.layers.20.input_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 15138816 + }, + { + "name": "model.layers.20.mlp.down_proj.q_scale", + "shape": [ + 148, + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1060864, + "byteOffset": 15145984 + }, + { + "name": "model.layers.20.mlp.gate_up_proj.q_scale", + "shape": [ + 28, + 37888 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2121728, + "byteOffset": 16206848 + }, + { + "name": "model.layers.20.post_attention_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 18328576 + }, + { + "name": "model.layers.20.self_attn.c_attn.bias", + "shape": [ + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 18335744 + }, + { + "name": "model.layers.20.self_attn.c_attn.q_weight", + "shape": [ + 448, + 4608 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8257536, + "byteOffset": 18344960 + }, + { + "name": "model.layers.20.self_attn.c_attn.q_scale", + "shape": [ + 28, + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 258048, + "byteOffset": 26602496 + }, + { + "name": "model.layers.20.self_attn.o_proj.q_weight", + "shape": [ + 448, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6422528, + "byteOffset": 26860544 + }, + { + "name": "model.layers.20.self_attn.o_proj.q_scale", + "shape": [ + 28, + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 200704, + "byteOffset": 33283072 + }, + { + "name": "model.layers.21.input_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 33483776 + } + ], + "md5sum": "7102c8b709d549a6ddbe2d3c3358676e" + }, + { + "dataPath": "params_shard_58.bin", + "format": "raw-shard", + "nbytes": 67895296, + "records": [ + { + "name": "model.layers.21.mlp.gate_up_proj.q_weight", + "shape": [ + 448, + 37888 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 67895296, + "byteOffset": 0 + } + ], + "md5sum": "cfbc0bf2d94f47e2bbd948668b4a6db1" + }, + { + "dataPath": "params_shard_59.bin", + "format": "raw-shard", + "nbytes": 33947648, + "records": [ + { + "name": "model.layers.22.mlp.down_proj.q_weight", + "shape": [ + 2368, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33947648, + "byteOffset": 0 + } + ], + "md5sum": "371aedc5fe9d7542df3647b489785823" + }, + { + "dataPath": "params_shard_60.bin", + "format": "raw-shard", + "nbytes": 67895296, + "records": [ + { + "name": "model.layers.22.mlp.gate_up_proj.q_weight", + "shape": [ + 448, + 37888 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 67895296, + "byteOffset": 0 + } + ], + "md5sum": "59f5a63196ab7e1b2b3bf356ce58cf58" + }, + { + "dataPath": "params_shard_61.bin", + "format": "raw-shard", + "nbytes": 30059520, + "records": [ + { + "name": "model.layers.21.mlp.down_proj.q_scale", + "shape": [ + 148, + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1060864, + "byteOffset": 0 + }, + { + "name": "model.layers.21.mlp.gate_up_proj.q_scale", + "shape": [ + 28, + 37888 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2121728, + "byteOffset": 1060864 + }, + { + "name": "model.layers.21.post_attention_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 3182592 + }, + { + "name": "model.layers.21.self_attn.c_attn.bias", + "shape": [ + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 3189760 + }, + { + "name": "model.layers.21.self_attn.c_attn.q_weight", + "shape": [ + 448, + 4608 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8257536, + "byteOffset": 3198976 + }, + { + "name": "model.layers.21.self_attn.c_attn.q_scale", + "shape": [ + 28, + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 258048, + "byteOffset": 11456512 + }, + { + "name": "model.layers.21.self_attn.o_proj.q_weight", + "shape": [ + 448, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6422528, + "byteOffset": 11714560 + }, + { + "name": "model.layers.21.self_attn.o_proj.q_scale", + "shape": [ + 28, + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 200704, + "byteOffset": 18137088 + }, + { + "name": "model.layers.22.input_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 18337792 + }, + { + "name": "model.layers.22.mlp.down_proj.q_scale", + "shape": [ + 148, + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1060864, + "byteOffset": 18344960 + }, + { + "name": "model.layers.22.mlp.gate_up_proj.q_scale", + "shape": [ + 28, + 37888 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2121728, + "byteOffset": 19405824 + }, + { + "name": "model.layers.22.post_attention_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 21527552 + }, + { + "name": "model.layers.22.self_attn.c_attn.bias", + "shape": [ + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 21534720 + }, + { + "name": "model.layers.22.self_attn.c_attn.q_weight", + "shape": [ + 448, + 4608 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8257536, + "byteOffset": 21543936 + }, + { + "name": "model.layers.22.self_attn.c_attn.q_scale", + "shape": [ + 28, + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 258048, + "byteOffset": 29801472 + } + ], + "md5sum": "7092a8fbcb33c29e4d08d2a7a058c156" + }, + { + "dataPath": "params_shard_62.bin", + "format": "raw-shard", + "nbytes": 33947648, + "records": [ + { + "name": "model.layers.23.mlp.down_proj.q_weight", + "shape": [ + 2368, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33947648, + "byteOffset": 0 + } + ], + "md5sum": "f9fcd685bff01f6d50e146a46b481b56" + }, + { + "dataPath": "params_shard_63.bin", + "format": "raw-shard", + "nbytes": 67895296, + "records": [ + { + "name": "model.layers.23.mlp.gate_up_proj.q_weight", + "shape": [ + 448, + 37888 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 67895296, + "byteOffset": 0 + } + ], + "md5sum": "e985959af627f755b2f4888b37457e13" + }, + { + "dataPath": "params_shard_64.bin", + "format": "raw-shard", + "nbytes": 33947648, + "records": [ + { + "name": "model.layers.24.mlp.down_proj.q_weight", + "shape": [ + 2368, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33947648, + "byteOffset": 0 + } + ], + "md5sum": "b14d38b1d7a958b9e060444333e9b3b0" + }, + { + "dataPath": "params_shard_65.bin", + "format": "raw-shard", + "nbytes": 67895296, + "records": [ + { + "name": "model.layers.24.mlp.gate_up_proj.q_weight", + "shape": [ + 448, + 37888 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 67895296, + "byteOffset": 0 + } + ], + "md5sum": "bbf59f0271030b91bd28d7ff9480e9fa" + }, + { + "dataPath": "params_shard_66.bin", + "format": "raw-shard", + "nbytes": 28174336, + "records": [ + { + "name": "model.layers.22.self_attn.o_proj.q_weight", + "shape": [ + 448, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6422528, + "byteOffset": 0 + }, + { + "name": "model.layers.22.self_attn.o_proj.q_scale", + "shape": [ + 28, + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 200704, + "byteOffset": 6422528 + }, + { + "name": "model.layers.23.input_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 6623232 + }, + { + "name": "model.layers.23.mlp.down_proj.q_scale", + "shape": [ + 148, + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1060864, + "byteOffset": 6630400 + }, + { + "name": "model.layers.23.mlp.gate_up_proj.q_scale", + "shape": [ + 28, + 37888 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2121728, + "byteOffset": 7691264 + }, + { + "name": "model.layers.23.post_attention_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 9812992 + }, + { + "name": "model.layers.23.self_attn.c_attn.bias", + "shape": [ + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 9820160 + }, + { + "name": "model.layers.23.self_attn.c_attn.q_weight", + "shape": [ + 448, + 4608 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8257536, + "byteOffset": 9829376 + }, + { + "name": "model.layers.23.self_attn.c_attn.q_scale", + "shape": [ + 28, + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 258048, + "byteOffset": 18086912 + }, + { + "name": "model.layers.23.self_attn.o_proj.q_weight", + "shape": [ + 448, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6422528, + "byteOffset": 18344960 + }, + { + "name": "model.layers.23.self_attn.o_proj.q_scale", + "shape": [ + 28, + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 200704, + "byteOffset": 24767488 + }, + { + "name": "model.layers.24.input_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 24968192 + }, + { + "name": "model.layers.24.mlp.down_proj.q_scale", + "shape": [ + 148, + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1060864, + "byteOffset": 24975360 + }, + { + "name": "model.layers.24.mlp.gate_up_proj.q_scale", + "shape": [ + 28, + 37888 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2121728, + "byteOffset": 26036224 + }, + { + "name": "model.layers.24.post_attention_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 28157952 + }, + { + "name": "model.layers.24.self_attn.c_attn.bias", + "shape": [ + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 28165120 + } + ], + "md5sum": "4ed8b617f3992544577e8933ffd77314" + }, + { + "dataPath": "params_shard_67.bin", + "format": "raw-shard", + "nbytes": 33947648, + "records": [ + { + "name": "model.layers.25.mlp.down_proj.q_weight", + "shape": [ + 2368, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33947648, + "byteOffset": 0 + } + ], + "md5sum": "717f3478f7442da0f9b368eb85295586" + }, + { + "dataPath": "params_shard_68.bin", + "format": "raw-shard", + "nbytes": 67895296, + "records": [ + { + "name": "model.layers.25.mlp.gate_up_proj.q_weight", + "shape": [ + 448, + 37888 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 67895296, + "byteOffset": 0 + } + ], + "md5sum": "1c4b3639920360d84fed03051a0de437" + }, + { + "dataPath": "params_shard_69.bin", + "format": "raw-shard", + "nbytes": 33947648, + "records": [ + { + "name": "model.layers.26.mlp.down_proj.q_weight", + "shape": [ + 2368, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33947648, + "byteOffset": 0 + } + ], + "md5sum": "c939a48045a78236c11b8ca08e6090ad" + }, + { + "dataPath": "params_shard_70.bin", + "format": "raw-shard", + "nbytes": 33490944, + "records": [ + { + "name": "model.layers.24.self_attn.c_attn.q_weight", + "shape": [ + 448, + 4608 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8257536, + "byteOffset": 0 + }, + { + "name": "model.layers.24.self_attn.c_attn.q_scale", + "shape": [ + 28, + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 258048, + "byteOffset": 8257536 + }, + { + "name": "model.layers.24.self_attn.o_proj.q_weight", + "shape": [ + 448, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6422528, + "byteOffset": 8515584 + }, + { + "name": "model.layers.24.self_attn.o_proj.q_scale", + "shape": [ + 28, + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 200704, + "byteOffset": 14938112 + }, + { + "name": "model.layers.25.input_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 15138816 + }, + { + "name": "model.layers.25.mlp.down_proj.q_scale", + "shape": [ + 148, + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1060864, + "byteOffset": 15145984 + }, + { + "name": "model.layers.25.mlp.gate_up_proj.q_scale", + "shape": [ + 28, + 37888 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2121728, + "byteOffset": 16206848 + }, + { + "name": "model.layers.25.post_attention_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 18328576 + }, + { + "name": "model.layers.25.self_attn.c_attn.bias", + "shape": [ + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 18335744 + }, + { + "name": "model.layers.25.self_attn.c_attn.q_weight", + "shape": [ + 448, + 4608 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8257536, + "byteOffset": 18344960 + }, + { + "name": "model.layers.25.self_attn.c_attn.q_scale", + "shape": [ + 28, + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 258048, + "byteOffset": 26602496 + }, + { + "name": "model.layers.25.self_attn.o_proj.q_weight", + "shape": [ + 448, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6422528, + "byteOffset": 26860544 + }, + { + "name": "model.layers.25.self_attn.o_proj.q_scale", + "shape": [ + 28, + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 200704, + "byteOffset": 33283072 + }, + { + "name": "model.layers.26.input_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 33483776 + } + ], + "md5sum": "a7d88373af20f2ba28802a32b136aa7c" + }, + { + "dataPath": "params_shard_71.bin", + "format": "raw-shard", + "nbytes": 67895296, + "records": [ + { + "name": "model.layers.26.mlp.gate_up_proj.q_weight", + "shape": [ + 448, + 37888 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 67895296, + "byteOffset": 0 + } + ], + "md5sum": "32a3ea6342193c1b3e99a35c029e9e77" + }, + { + "dataPath": "params_shard_72.bin", + "format": "raw-shard", + "nbytes": 33947648, + "records": [ + { + "name": "model.layers.27.mlp.down_proj.q_weight", + "shape": [ + 2368, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33947648, + "byteOffset": 0 + } + ], + "md5sum": "d8f76832efadf065c5fce9f621524f9e" + }, + { + "dataPath": "params_shard_73.bin", + "format": "raw-shard", + "nbytes": 67895296, + "records": [ + { + "name": "model.layers.27.mlp.gate_up_proj.q_weight", + "shape": [ + 448, + 37888 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 67895296, + "byteOffset": 0 + } + ], + "md5sum": "0bb50127725aacd5b1da73438cddcfa5" + }, + { + "dataPath": "params_shard_74.bin", + "format": "raw-shard", + "nbytes": 30059520, + "records": [ + { + "name": "model.layers.26.mlp.down_proj.q_scale", + "shape": [ + 148, + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1060864, + "byteOffset": 0 + }, + { + "name": "model.layers.26.mlp.gate_up_proj.q_scale", + "shape": [ + 28, + 37888 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2121728, + "byteOffset": 1060864 + }, + { + "name": "model.layers.26.post_attention_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 3182592 + }, + { + "name": "model.layers.26.self_attn.c_attn.bias", + "shape": [ + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 3189760 + }, + { + "name": "model.layers.26.self_attn.c_attn.q_weight", + "shape": [ + 448, + 4608 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8257536, + "byteOffset": 3198976 + }, + { + "name": "model.layers.26.self_attn.c_attn.q_scale", + "shape": [ + 28, + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 258048, + "byteOffset": 11456512 + }, + { + "name": "model.layers.26.self_attn.o_proj.q_weight", + "shape": [ + 448, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6422528, + "byteOffset": 11714560 + }, + { + "name": "model.layers.26.self_attn.o_proj.q_scale", + "shape": [ + 28, + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 200704, + "byteOffset": 18137088 + }, + { + "name": "model.layers.27.input_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 18337792 + }, + { + "name": "model.layers.27.mlp.down_proj.q_scale", + "shape": [ + 148, + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1060864, + "byteOffset": 18344960 + }, + { + "name": "model.layers.27.mlp.gate_up_proj.q_scale", + "shape": [ + 28, + 37888 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2121728, + "byteOffset": 19405824 + }, + { + "name": "model.layers.27.post_attention_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 21527552 + }, + { + "name": "model.layers.27.self_attn.c_attn.bias", + "shape": [ + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 21534720 + }, + { + "name": "model.layers.27.self_attn.c_attn.q_weight", + "shape": [ + 448, + 4608 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8257536, + "byteOffset": 21543936 + }, + { + "name": "model.layers.27.self_attn.c_attn.q_scale", + "shape": [ + 28, + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 258048, + "byteOffset": 29801472 + } + ], + "md5sum": "c04195cfd50f29d7a06069e3c6d11ea5" + }, + { + "dataPath": "params_shard_75.bin", + "format": "raw-shard", + "nbytes": 6630400, + "records": [ + { + "name": "model.layers.27.self_attn.o_proj.q_weight", + "shape": [ + 448, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6422528, + "byteOffset": 0 + }, + { + "name": "model.layers.27.self_attn.o_proj.q_scale", + "shape": [ + 28, + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 200704, + "byteOffset": 6422528 + }, + { + "name": "model.norm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 6623232 + } + ], + "md5sum": "2f47b0e16edbc7fba2256ad35d55dd99" + } + ] +} \ No newline at end of file