diff --git "a/ndarray-cache-b16.json" "b/ndarray-cache-b16.json" new file mode 100644--- /dev/null +++ "b/ndarray-cache-b16.json" @@ -0,0 +1,4071 @@ +{ + "metadata": { + "ParamSize": 313, + "ParamBytes": 4760885248.0, + "BitsPerParam": 5.0011817065612245 + }, + "records": [ + { + "dataPath": "params_shard_0.bin", + "format": "raw-shard", + "nbytes": 272498688, + "records": [ + { + "name": "lm_head.q_weight", + "shape": [ + 152064, + 448 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 272498688, + "byteOffset": 0 + } + ], + "md5sum": "5921f61d819fadc414e191f2fe50609d" + }, + { + "dataPath": "params_shard_1.bin", + "format": "raw-shard", + "nbytes": 34062336, + "records": [ + { + "name": "lm_head.q_scale", + "shape": [ + 152064, + 112 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 34062336, + "byteOffset": 0 + } + ], + "md5sum": "9a9aa169c56502d9b272ad62bd829486" + }, + { + "dataPath": "params_shard_2.bin", + "format": "raw-shard", + "nbytes": 33947648, + "records": [ + { + "name": "model.layers.22.mlp.down_proj.q_weight", + "shape": [ + 3584, + 2368 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33947648, + "byteOffset": 0 + } + ], + "md5sum": "2e3048f64ebafb0e4e69c2ad1b3d6847" + }, + { + "dataPath": "params_shard_3.bin", + "format": "raw-shard", + "nbytes": 33947648, + "records": [ + { + "name": "model.layers.23.mlp.down_proj.q_weight", + "shape": [ + 3584, + 2368 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33947648, + "byteOffset": 0 + } + ], + "md5sum": "8abf1049f4ed143573b48a07f733a244" + }, + { + "dataPath": "params_shard_4.bin", + "format": "raw-shard", + "nbytes": 67895296, + "records": [ + { + "name": "model.layers.23.mlp.gate_up_proj.q_weight", + "shape": [ + 37888, + 448 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 67895296, + "byteOffset": 0 + } + ], + "md5sum": "5577c1eeaa670041787df18c48cfc3fd" + }, + { + "dataPath": "params_shard_5.bin", + "format": "raw-shard", + "nbytes": 33947648, + "records": [ + { + "name": "model.layers.24.mlp.down_proj.q_weight", + "shape": [ + 3584, + 2368 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33947648, + "byteOffset": 0 + } + ], + "md5sum": "66252cbccad0738c18974fd7d8cd3675" + }, + { + "dataPath": "params_shard_6.bin", + "format": "raw-shard", + "nbytes": 33533952, + "records": [ + { + "name": "model.layers.22.input_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7168, + "byteOffset": 0 + }, + { + "name": "model.layers.22.mlp.down_proj.q_scale", + "shape": [ + 3584, + 592 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4243456, + "byteOffset": 7168 + }, + { + "name": "model.layers.22.post_attention_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7168, + "byteOffset": 4250624 + }, + { + "name": "model.layers.23.input_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7168, + "byteOffset": 4257792 + }, + { + "name": "model.layers.23.mlp.down_proj.q_scale", + "shape": [ + 3584, + 592 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4243456, + "byteOffset": 4264960 + }, + { + "name": "model.layers.23.mlp.gate_up_proj.q_scale", + "shape": [ + 37888, + 112 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8486912, + "byteOffset": 8508416 + }, + { + "name": "model.layers.23.post_attention_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7168, + "byteOffset": 16995328 + }, + { + "name": "model.layers.23.self_attn.c_attn.bias", + "shape": [ + 4608 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 9216, + "byteOffset": 17002496 + }, + { + "name": "model.layers.23.self_attn.c_attn.q_weight", + "shape": [ + 4608, + 448 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8257536, + "byteOffset": 17011712 + }, + { + "name": "model.layers.23.self_attn.c_attn.q_scale", + "shape": [ + 4608, + 112 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1032192, + "byteOffset": 25269248 + }, + { + "name": "model.layers.23.self_attn.o_proj.q_weight", + "shape": [ + 3584, + 448 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6422528, + "byteOffset": 26301440 + }, + { + "name": "model.layers.23.self_attn.o_proj.q_scale", + "shape": [ + 3584, + 112 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 802816, + "byteOffset": 32723968 + }, + { + "name": "model.layers.24.input_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7168, + "byteOffset": 33526784 + } + ], + "md5sum": "80f732ece25e51627885922e46f8cf8c" + }, + { + "dataPath": "params_shard_7.bin", + "format": "raw-shard", + "nbytes": 67895296, + "records": [ + { + "name": "model.layers.24.mlp.gate_up_proj.q_weight", + "shape": [ + 37888, + 448 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 67895296, + "byteOffset": 0 + } + ], + "md5sum": "334f00c41b492b88d2bac39596c9f583" + }, + { + "dataPath": "params_shard_8.bin", + "format": "raw-shard", + "nbytes": 33947648, + "records": [ + { + "name": "model.layers.25.mlp.down_proj.q_weight", + "shape": [ + 3584, + 2368 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33947648, + "byteOffset": 0 + } + ], + "md5sum": "18050ca90808938d39b0c2c6ad0f8716" + }, + { + "dataPath": "params_shard_9.bin", + "format": "raw-shard", + "nbytes": 67895296, + "records": [ + { + "name": "model.layers.25.mlp.gate_up_proj.q_weight", + "shape": [ + 37888, + 448 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 67895296, + "byteOffset": 0 + } + ], + "md5sum": "e167b7cd1dc1065b711a1741d9f62f1b" + }, + { + "dataPath": "params_shard_10.bin", + "format": "raw-shard", + "nbytes": 33512448, + "records": [ + { + "name": "model.layers.24.mlp.down_proj.q_scale", + "shape": [ + 3584, + 592 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4243456, + "byteOffset": 0 + }, + { + "name": "model.layers.24.mlp.gate_up_proj.q_scale", + "shape": [ + 37888, + 112 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8486912, + "byteOffset": 4243456 + }, + { + "name": "model.layers.24.post_attention_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7168, + "byteOffset": 12730368 + }, + { + "name": "model.layers.24.self_attn.c_attn.bias", + "shape": [ + 4608 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 9216, + "byteOffset": 12737536 + }, + { + "name": "model.layers.24.self_attn.c_attn.q_weight", + "shape": [ + 4608, + 448 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8257536, + "byteOffset": 12746752 + }, + { + "name": "model.layers.24.self_attn.c_attn.q_scale", + "shape": [ + 4608, + 112 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1032192, + "byteOffset": 21004288 + }, + { + "name": "model.layers.24.self_attn.o_proj.q_weight", + "shape": [ + 3584, + 448 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6422528, + "byteOffset": 22036480 + }, + { + "name": "model.layers.24.self_attn.o_proj.q_scale", + "shape": [ + 3584, + 112 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 802816, + "byteOffset": 28459008 + }, + { + "name": "model.layers.25.input_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7168, + "byteOffset": 29261824 + }, + { + "name": "model.layers.25.mlp.down_proj.q_scale", + "shape": [ + 3584, + 592 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4243456, + "byteOffset": 29268992 + } + ], + "md5sum": "4c682adfbdb4f9e958e37f21715ccb40" + }, + { + "dataPath": "params_shard_11.bin", + "format": "raw-shard", + "nbytes": 33947648, + "records": [ + { + "name": "model.layers.26.mlp.down_proj.q_weight", + "shape": [ + 3584, + 2368 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33947648, + "byteOffset": 0 + } + ], + "md5sum": "ec040a71dbdd6cc5d6ec716005a74e61" + }, + { + "dataPath": "params_shard_12.bin", + "format": "raw-shard", + "nbytes": 67895296, + "records": [ + { + "name": "model.layers.26.mlp.gate_up_proj.q_weight", + "shape": [ + 37888, + 448 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 67895296, + "byteOffset": 0 + } + ], + "md5sum": "09756712bd844903230a693fcd3a8168" + }, + { + "dataPath": "params_shard_13.bin", + "format": "raw-shard", + "nbytes": 29268992, + "records": [ + { + "name": "model.layers.25.mlp.gate_up_proj.q_scale", + "shape": [ + 37888, + 112 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8486912, + "byteOffset": 0 + }, + { + "name": "model.layers.25.post_attention_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7168, + "byteOffset": 8486912 + }, + { + "name": "model.layers.25.self_attn.c_attn.bias", + "shape": [ + 4608 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 9216, + "byteOffset": 8494080 + }, + { + "name": "model.layers.25.self_attn.c_attn.q_weight", + "shape": [ + 4608, + 448 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8257536, + "byteOffset": 8503296 + }, + { + "name": "model.layers.25.self_attn.c_attn.q_scale", + "shape": [ + 4608, + 112 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1032192, + "byteOffset": 16760832 + }, + { + "name": "model.layers.25.self_attn.o_proj.q_weight", + "shape": [ + 3584, + 448 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6422528, + "byteOffset": 17793024 + }, + { + "name": "model.layers.25.self_attn.o_proj.q_scale", + "shape": [ + 3584, + 112 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 802816, + "byteOffset": 24215552 + }, + { + "name": "model.layers.26.input_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7168, + "byteOffset": 25018368 + }, + { + "name": "model.layers.26.mlp.down_proj.q_scale", + "shape": [ + 3584, + 592 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4243456, + "byteOffset": 25025536 + } + ], + "md5sum": "ded192a3e4e08351bbfb41636e379a8d" + }, + { + "dataPath": "params_shard_14.bin", + "format": "raw-shard", + "nbytes": 33947648, + "records": [ + { + "name": "model.layers.27.mlp.down_proj.q_weight", + "shape": [ + 3584, + 2368 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33947648, + "byteOffset": 0 + } + ], + "md5sum": "a99f4db4a9797c0e4fb63475c7e852a0" + }, + { + "dataPath": "params_shard_15.bin", + "format": "raw-shard", + "nbytes": 67895296, + "records": [ + { + "name": "model.layers.27.mlp.gate_up_proj.q_weight", + "shape": [ + 37888, + 448 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 67895296, + "byteOffset": 0 + } + ], + "md5sum": "bb6a2d2bf9884fff83b5deb4b6efeec3" + }, + { + "dataPath": "params_shard_16.bin", + "format": "raw-shard", + "nbytes": 29268992, + "records": [ + { + "name": "model.layers.26.mlp.gate_up_proj.q_scale", + "shape": [ + 37888, + 112 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8486912, + "byteOffset": 0 + }, + { + "name": "model.layers.26.post_attention_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7168, + "byteOffset": 8486912 + }, + { + "name": "model.layers.26.self_attn.c_attn.bias", + "shape": [ + 4608 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 9216, + "byteOffset": 8494080 + }, + { + "name": "model.layers.26.self_attn.c_attn.q_weight", + "shape": [ + 4608, + 448 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8257536, + "byteOffset": 8503296 + }, + { + "name": "model.layers.26.self_attn.c_attn.q_scale", + "shape": [ + 4608, + 112 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1032192, + "byteOffset": 16760832 + }, + { + "name": "model.layers.26.self_attn.o_proj.q_weight", + "shape": [ + 3584, + 448 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6422528, + "byteOffset": 17793024 + }, + { + "name": "model.layers.26.self_attn.o_proj.q_scale", + "shape": [ + 3584, + 112 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 802816, + "byteOffset": 24215552 + }, + { + "name": "model.layers.27.input_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7168, + "byteOffset": 25018368 + }, + { + "name": "model.layers.27.mlp.down_proj.q_scale", + "shape": [ + 3584, + 592 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4243456, + "byteOffset": 25025536 + } + ], + "md5sum": "dd32c19a070eac82233b9502b9f54b9f" + }, + { + "dataPath": "params_shard_17.bin", + "format": "raw-shard", + "nbytes": 272498688, + "records": [ + { + "name": "model.embed_tokens.q_weight", + "shape": [ + 152064, + 448 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 272498688, + "byteOffset": 0 + } + ], + "md5sum": "eefea4a4b9bbe5712525e54374082b74" + }, + { + "dataPath": "params_shard_18.bin", + "format": "raw-shard", + "nbytes": 34062336, + "records": [ + { + "name": "model.embed_tokens.q_scale", + "shape": [ + 152064, + 112 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 34062336, + "byteOffset": 0 + } + ], + "md5sum": "937b18d26922cac01721af99b45d30e9" + }, + { + "dataPath": "params_shard_19.bin", + "format": "raw-shard", + "nbytes": 33947648, + "records": [ + { + "name": "model.layers.0.mlp.down_proj.q_weight", + "shape": [ + 3584, + 2368 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33947648, + "byteOffset": 0 + } + ], + "md5sum": "fd09f10a616ecbe93f3367b609a2f51f" + }, + { + "dataPath": "params_shard_20.bin", + "format": "raw-shard", + "nbytes": 67895296, + "records": [ + { + "name": "model.layers.0.mlp.gate_up_proj.q_weight", + "shape": [ + 37888, + 448 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 67895296, + "byteOffset": 0 + } + ], + "md5sum": "c1e1bd92e4774bee443a60ff63a99260" + }, + { + "dataPath": "params_shard_21.bin", + "format": "raw-shard", + "nbytes": 29276160, + "records": [ + { + "name": "model.layers.27.mlp.gate_up_proj.q_scale", + "shape": [ + 37888, + 112 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8486912, + "byteOffset": 0 + }, + { + "name": "model.layers.27.post_attention_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7168, + "byteOffset": 8486912 + }, + { + "name": "model.layers.27.self_attn.c_attn.bias", + "shape": [ + 4608 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 9216, + "byteOffset": 8494080 + }, + { + "name": "model.layers.27.self_attn.c_attn.q_weight", + "shape": [ + 4608, + 448 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8257536, + "byteOffset": 8503296 + }, + { + "name": "model.layers.27.self_attn.c_attn.q_scale", + "shape": [ + 4608, + 112 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1032192, + "byteOffset": 16760832 + }, + { + "name": "model.layers.27.self_attn.o_proj.q_weight", + "shape": [ + 3584, + 448 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6422528, + "byteOffset": 17793024 + }, + { + "name": "model.layers.27.self_attn.o_proj.q_scale", + "shape": [ + 3584, + 112 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 802816, + "byteOffset": 24215552 + }, + { + "name": "model.norm.weight", + "shape": [ + 3584 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7168, + "byteOffset": 25018368 + }, + { + "name": "model.layers.0.input_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7168, + "byteOffset": 25025536 + }, + { + "name": "model.layers.0.mlp.down_proj.q_scale", + "shape": [ + 3584, + 592 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4243456, + "byteOffset": 25032704 + } + ], + "md5sum": "9dcf078eaf9a4e96df66c4629bc6b2e9" + }, + { + "dataPath": "params_shard_22.bin", + "format": "raw-shard", + "nbytes": 33947648, + "records": [ + { + "name": "model.layers.1.mlp.down_proj.q_weight", + "shape": [ + 3584, + 2368 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33947648, + "byteOffset": 0 + } + ], + "md5sum": "6f678debdab8b211a2def493b48f95bc" + }, + { + "dataPath": "params_shard_23.bin", + "format": "raw-shard", + "nbytes": 67895296, + "records": [ + { + "name": "model.layers.1.mlp.gate_up_proj.q_weight", + "shape": [ + 37888, + 448 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 67895296, + "byteOffset": 0 + } + ], + "md5sum": "336c5e17e463ad36522a6c0451e75ff2" + }, + { + "dataPath": "params_shard_24.bin", + "format": "raw-shard", + "nbytes": 29268992, + "records": [ + { + "name": "model.layers.0.mlp.gate_up_proj.q_scale", + "shape": [ + 37888, + 112 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8486912, + "byteOffset": 0 + }, + { + "name": "model.layers.0.post_attention_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7168, + "byteOffset": 8486912 + }, + { + "name": "model.layers.0.self_attn.c_attn.bias", + "shape": [ + 4608 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 9216, + "byteOffset": 8494080 + }, + { + "name": "model.layers.0.self_attn.c_attn.q_weight", + "shape": [ + 4608, + 448 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8257536, + "byteOffset": 8503296 + }, + { + "name": "model.layers.0.self_attn.c_attn.q_scale", + "shape": [ + 4608, + 112 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1032192, + "byteOffset": 16760832 + }, + { + "name": "model.layers.0.self_attn.o_proj.q_weight", + "shape": [ + 3584, + 448 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6422528, + "byteOffset": 17793024 + }, + { + "name": "model.layers.0.self_attn.o_proj.q_scale", + "shape": [ + 3584, + 112 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 802816, + "byteOffset": 24215552 + }, + { + "name": "model.layers.1.input_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7168, + "byteOffset": 25018368 + }, + { + "name": "model.layers.1.mlp.down_proj.q_scale", + "shape": [ + 3584, + 592 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4243456, + "byteOffset": 25025536 + } + ], + "md5sum": "7191d7e51728f71f732386e080c46353" + }, + { + "dataPath": "params_shard_25.bin", + "format": "raw-shard", + "nbytes": 33947648, + "records": [ + { + "name": "model.layers.2.mlp.down_proj.q_weight", + "shape": [ + 3584, + 2368 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33947648, + "byteOffset": 0 + } + ], + "md5sum": "3feeb890cc58af8f5f6c204640d882a3" + }, + { + "dataPath": "params_shard_26.bin", + "format": "raw-shard", + "nbytes": 67895296, + "records": [ + { + "name": "model.layers.2.mlp.gate_up_proj.q_weight", + "shape": [ + 37888, + 448 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 67895296, + "byteOffset": 0 + } + ], + "md5sum": "41b10743b431c71a1018a1d1533b0247" + }, + { + "dataPath": "params_shard_27.bin", + "format": "raw-shard", + "nbytes": 29268992, + "records": [ + { + "name": "model.layers.1.mlp.gate_up_proj.q_scale", + "shape": [ + 37888, + 112 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8486912, + "byteOffset": 0 + }, + { + "name": "model.layers.1.post_attention_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7168, + "byteOffset": 8486912 + }, + { + "name": "model.layers.1.self_attn.c_attn.bias", + "shape": [ + 4608 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 9216, + "byteOffset": 8494080 + }, + { + "name": "model.layers.1.self_attn.c_attn.q_weight", + "shape": [ + 4608, + 448 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8257536, + "byteOffset": 8503296 + }, + { + "name": "model.layers.1.self_attn.c_attn.q_scale", + "shape": [ + 4608, + 112 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1032192, + "byteOffset": 16760832 + }, + { + "name": "model.layers.1.self_attn.o_proj.q_weight", + "shape": [ + 3584, + 448 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6422528, + "byteOffset": 17793024 + }, + { + "name": "model.layers.1.self_attn.o_proj.q_scale", + "shape": [ + 3584, + 112 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 802816, + "byteOffset": 24215552 + }, + { + "name": "model.layers.2.input_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7168, + "byteOffset": 25018368 + }, + { + "name": "model.layers.2.mlp.down_proj.q_scale", + "shape": [ + 3584, + 592 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4243456, + "byteOffset": 25025536 + } + ], + "md5sum": "0995e996942cfe38a0ff59960148adf8" + }, + { + "dataPath": "params_shard_28.bin", + "format": "raw-shard", + "nbytes": 33947648, + "records": [ + { + "name": "model.layers.3.mlp.down_proj.q_weight", + "shape": [ + 3584, + 2368 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33947648, + "byteOffset": 0 + } + ], + "md5sum": "06c9cd98bd60735eb55d1cfe2723bac2" + }, + { + "dataPath": "params_shard_29.bin", + "format": "raw-shard", + "nbytes": 67895296, + "records": [ + { + "name": "model.layers.3.mlp.gate_up_proj.q_weight", + "shape": [ + 37888, + 448 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 67895296, + "byteOffset": 0 + } + ], + "md5sum": "03ed157b3e5c421f43a17b5517a169df" + }, + { + "dataPath": "params_shard_30.bin", + "format": "raw-shard", + "nbytes": 29268992, + "records": [ + { + "name": "model.layers.2.mlp.gate_up_proj.q_scale", + "shape": [ + 37888, + 112 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8486912, + "byteOffset": 0 + }, + { + "name": "model.layers.2.post_attention_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7168, + "byteOffset": 8486912 + }, + { + "name": "model.layers.2.self_attn.c_attn.bias", + "shape": [ + 4608 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 9216, + "byteOffset": 8494080 + }, + { + "name": "model.layers.2.self_attn.c_attn.q_weight", + "shape": [ + 4608, + 448 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8257536, + "byteOffset": 8503296 + }, + { + "name": "model.layers.2.self_attn.c_attn.q_scale", + "shape": [ + 4608, + 112 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1032192, + "byteOffset": 16760832 + }, + { + "name": "model.layers.2.self_attn.o_proj.q_weight", + "shape": [ + 3584, + 448 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6422528, + "byteOffset": 17793024 + }, + { + "name": "model.layers.2.self_attn.o_proj.q_scale", + "shape": [ + 3584, + 112 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 802816, + "byteOffset": 24215552 + }, + { + "name": "model.layers.3.input_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7168, + "byteOffset": 25018368 + }, + { + "name": "model.layers.3.mlp.down_proj.q_scale", + "shape": [ + 3584, + 592 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4243456, + "byteOffset": 25025536 + } + ], + "md5sum": "996297eb04c298e83e976cb47cefa434" + }, + { + "dataPath": "params_shard_31.bin", + "format": "raw-shard", + "nbytes": 33947648, + "records": [ + { + "name": "model.layers.4.mlp.down_proj.q_weight", + "shape": [ + 3584, + 2368 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33947648, + "byteOffset": 0 + } + ], + "md5sum": "3a3c65c29f8b2e67c0ea7259a650e249" + }, + { + "dataPath": "params_shard_32.bin", + "format": "raw-shard", + "nbytes": 67895296, + "records": [ + { + "name": "model.layers.4.mlp.gate_up_proj.q_weight", + "shape": [ + 37888, + 448 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 67895296, + "byteOffset": 0 + } + ], + "md5sum": "4d55a3b41e7a0190d1d87fc7561f32d4" + }, + { + "dataPath": "params_shard_33.bin", + "format": "raw-shard", + "nbytes": 29268992, + "records": [ + { + "name": "model.layers.3.mlp.gate_up_proj.q_scale", + "shape": [ + 37888, + 112 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8486912, + "byteOffset": 0 + }, + { + "name": "model.layers.3.post_attention_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7168, + "byteOffset": 8486912 + }, + { + "name": "model.layers.3.self_attn.c_attn.bias", + "shape": [ + 4608 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 9216, + "byteOffset": 8494080 + }, + { + "name": "model.layers.3.self_attn.c_attn.q_weight", + "shape": [ + 4608, + 448 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8257536, + "byteOffset": 8503296 + }, + { + "name": "model.layers.3.self_attn.c_attn.q_scale", + "shape": [ + 4608, + 112 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1032192, + "byteOffset": 16760832 + }, + { + "name": "model.layers.3.self_attn.o_proj.q_weight", + "shape": [ + 3584, + 448 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6422528, + "byteOffset": 17793024 + }, + { + "name": "model.layers.3.self_attn.o_proj.q_scale", + "shape": [ + 3584, + 112 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 802816, + "byteOffset": 24215552 + }, + { + "name": "model.layers.4.input_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7168, + "byteOffset": 25018368 + }, + { + "name": "model.layers.4.mlp.down_proj.q_scale", + "shape": [ + 3584, + 592 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4243456, + "byteOffset": 25025536 + } + ], + "md5sum": "287570a1f4b8d14ed5ab7ddee02bb2a4" + }, + { + "dataPath": "params_shard_34.bin", + "format": "raw-shard", + "nbytes": 33947648, + "records": [ + { + "name": "model.layers.5.mlp.down_proj.q_weight", + "shape": [ + 3584, + 2368 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33947648, + "byteOffset": 0 + } + ], + "md5sum": "7c360fd61b0e5f690f892a49851eb7e2" + }, + { + "dataPath": "params_shard_35.bin", + "format": "raw-shard", + "nbytes": 67895296, + "records": [ + { + "name": "model.layers.5.mlp.gate_up_proj.q_weight", + "shape": [ + 37888, + 448 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 67895296, + "byteOffset": 0 + } + ], + "md5sum": "532e9d503f97404d2cf8971592c85c16" + }, + { + "dataPath": "params_shard_36.bin", + "format": "raw-shard", + "nbytes": 29268992, + "records": [ + { + "name": "model.layers.4.mlp.gate_up_proj.q_scale", + "shape": [ + 37888, + 112 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8486912, + "byteOffset": 0 + }, + { + "name": "model.layers.4.post_attention_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7168, + "byteOffset": 8486912 + }, + { + "name": "model.layers.4.self_attn.c_attn.bias", + "shape": [ + 4608 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 9216, + "byteOffset": 8494080 + }, + { + "name": "model.layers.4.self_attn.c_attn.q_weight", + "shape": [ + 4608, + 448 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8257536, + "byteOffset": 8503296 + }, + { + "name": "model.layers.4.self_attn.c_attn.q_scale", + "shape": [ + 4608, + 112 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1032192, + "byteOffset": 16760832 + }, + { + "name": "model.layers.4.self_attn.o_proj.q_weight", + "shape": [ + 3584, + 448 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6422528, + "byteOffset": 17793024 + }, + { + "name": "model.layers.4.self_attn.o_proj.q_scale", + "shape": [ + 3584, + 112 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 802816, + "byteOffset": 24215552 + }, + { + "name": "model.layers.5.input_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7168, + "byteOffset": 25018368 + }, + { + "name": "model.layers.5.mlp.down_proj.q_scale", + "shape": [ + 3584, + 592 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4243456, + "byteOffset": 25025536 + } + ], + "md5sum": "79283ecc1af612d750a440b294a409fc" + }, + { + "dataPath": "params_shard_37.bin", + "format": "raw-shard", + "nbytes": 33285120, + "records": [ + { + "name": "model.layers.5.mlp.gate_up_proj.q_scale", + "shape": [ + 37888, + 112 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8486912, + "byteOffset": 0 + }, + { + "name": "model.layers.5.post_attention_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7168, + "byteOffset": 8486912 + }, + { + "name": "model.layers.5.self_attn.c_attn.bias", + "shape": [ + 4608 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 9216, + "byteOffset": 8494080 + }, + { + "name": "model.layers.5.self_attn.c_attn.q_weight", + "shape": [ + 4608, + 448 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8257536, + "byteOffset": 8503296 + }, + { + "name": "model.layers.5.self_attn.c_attn.q_scale", + "shape": [ + 4608, + 112 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1032192, + "byteOffset": 16760832 + }, + { + "name": "model.layers.5.self_attn.o_proj.q_weight", + "shape": [ + 3584, + 448 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6422528, + "byteOffset": 17793024 + }, + { + "name": "model.layers.5.self_attn.o_proj.q_scale", + "shape": [ + 3584, + 112 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 802816, + "byteOffset": 24215552 + }, + { + "name": "model.layers.6.self_attn.c_attn.bias", + "shape": [ + 4608 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 9216, + "byteOffset": 25018368 + }, + { + "name": "model.layers.6.self_attn.c_attn.q_weight", + "shape": [ + 4608, + 448 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8257536, + "byteOffset": 25027584 + } + ], + "md5sum": "f3214d4ffea92c6d30b0fa5735aa7fea" + }, + { + "dataPath": "params_shard_38.bin", + "format": "raw-shard", + "nbytes": 33947648, + "records": [ + { + "name": "model.layers.10.mlp.down_proj.q_weight", + "shape": [ + 3584, + 2368 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33947648, + "byteOffset": 0 + } + ], + "md5sum": "fb097db6fada0486548d035432ee9744" + }, + { + "dataPath": "params_shard_39.bin", + "format": "raw-shard", + "nbytes": 67895296, + "records": [ + { + "name": "model.layers.10.mlp.gate_up_proj.q_weight", + "shape": [ + 37888, + 448 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 67895296, + "byteOffset": 0 + } + ], + "md5sum": "3689a1641b34f3eda42c90cb71fac0cf" + }, + { + "dataPath": "params_shard_40.bin", + "format": "raw-shard", + "nbytes": 30301184, + "records": [ + { + "name": "model.layers.6.self_attn.c_attn.q_scale", + "shape": [ + 4608, + 112 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1032192, + "byteOffset": 0 + }, + { + "name": "model.layers.6.self_attn.o_proj.q_weight", + "shape": [ + 3584, + 448 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6422528, + "byteOffset": 1032192 + }, + { + "name": "model.layers.6.self_attn.o_proj.q_scale", + "shape": [ + 3584, + 112 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 802816, + "byteOffset": 7454720 + }, + { + "name": "model.layers.10.input_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7168, + "byteOffset": 8257536 + }, + { + "name": "model.layers.10.mlp.down_proj.q_scale", + "shape": [ + 3584, + 592 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4243456, + "byteOffset": 8264704 + }, + { + "name": "model.layers.10.mlp.gate_up_proj.q_scale", + "shape": [ + 37888, + 112 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8486912, + "byteOffset": 12508160 + }, + { + "name": "model.layers.10.post_attention_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7168, + "byteOffset": 20995072 + }, + { + "name": "model.layers.10.self_attn.c_attn.bias", + "shape": [ + 4608 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 9216, + "byteOffset": 21002240 + }, + { + "name": "model.layers.10.self_attn.c_attn.q_weight", + "shape": [ + 4608, + 448 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8257536, + "byteOffset": 21011456 + }, + { + "name": "model.layers.10.self_attn.c_attn.q_scale", + "shape": [ + 4608, + 112 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1032192, + "byteOffset": 29268992 + } + ], + "md5sum": "bc8938cf6943e3bb3a5f45b375edb843" + }, + { + "dataPath": "params_shard_41.bin", + "format": "raw-shard", + "nbytes": 33947648, + "records": [ + { + "name": "model.layers.11.mlp.down_proj.q_weight", + "shape": [ + 3584, + 2368 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33947648, + "byteOffset": 0 + } + ], + "md5sum": "c0b9a1cab360a22d98556432206cedf7" + }, + { + "dataPath": "params_shard_42.bin", + "format": "raw-shard", + "nbytes": 67895296, + "records": [ + { + "name": "model.layers.11.mlp.gate_up_proj.q_weight", + "shape": [ + 37888, + 448 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 67895296, + "byteOffset": 0 + } + ], + "md5sum": "50c9510afcfa9e3d604ff7715cc01237" + }, + { + "dataPath": "params_shard_43.bin", + "format": "raw-shard", + "nbytes": 29268992, + "records": [ + { + "name": "model.layers.10.self_attn.o_proj.q_weight", + "shape": [ + 3584, + 448 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6422528, + "byteOffset": 0 + }, + { + "name": "model.layers.10.self_attn.o_proj.q_scale", + "shape": [ + 3584, + 112 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 802816, + "byteOffset": 6422528 + }, + { + "name": "model.layers.11.input_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7168, + "byteOffset": 7225344 + }, + { + "name": "model.layers.11.mlp.down_proj.q_scale", + "shape": [ + 3584, + 592 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4243456, + "byteOffset": 7232512 + }, + { + "name": "model.layers.11.mlp.gate_up_proj.q_scale", + "shape": [ + 37888, + 112 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8486912, + "byteOffset": 11475968 + }, + { + "name": "model.layers.11.post_attention_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7168, + "byteOffset": 19962880 + }, + { + "name": "model.layers.11.self_attn.c_attn.bias", + "shape": [ + 4608 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 9216, + "byteOffset": 19970048 + }, + { + "name": "model.layers.11.self_attn.c_attn.q_weight", + "shape": [ + 4608, + 448 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8257536, + "byteOffset": 19979264 + }, + { + "name": "model.layers.11.self_attn.c_attn.q_scale", + "shape": [ + 4608, + 112 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1032192, + "byteOffset": 28236800 + } + ], + "md5sum": "3cffa7f0de09f17c0daaacfa278c2d9c" + }, + { + "dataPath": "params_shard_44.bin", + "format": "raw-shard", + "nbytes": 33947648, + "records": [ + { + "name": "model.layers.12.mlp.down_proj.q_weight", + "shape": [ + 3584, + 2368 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33947648, + "byteOffset": 0 + } + ], + "md5sum": "3701968369a8498ee6e9010406e69ed8" + }, + { + "dataPath": "params_shard_45.bin", + "format": "raw-shard", + "nbytes": 67895296, + "records": [ + { + "name": "model.layers.12.mlp.gate_up_proj.q_weight", + "shape": [ + 37888, + 448 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 67895296, + "byteOffset": 0 + } + ], + "md5sum": "ce8b0d32a4f67ab8d52f54f7829fee89" + }, + { + "dataPath": "params_shard_46.bin", + "format": "raw-shard", + "nbytes": 29268992, + "records": [ + { + "name": "model.layers.11.self_attn.o_proj.q_weight", + "shape": [ + 3584, + 448 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6422528, + "byteOffset": 0 + }, + { + "name": "model.layers.11.self_attn.o_proj.q_scale", + "shape": [ + 3584, + 112 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 802816, + "byteOffset": 6422528 + }, + { + "name": "model.layers.12.input_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7168, + "byteOffset": 7225344 + }, + { + "name": "model.layers.12.mlp.down_proj.q_scale", + "shape": [ + 3584, + 592 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4243456, + "byteOffset": 7232512 + }, + { + "name": "model.layers.12.mlp.gate_up_proj.q_scale", + "shape": [ + 37888, + 112 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8486912, + "byteOffset": 11475968 + }, + { + "name": "model.layers.12.post_attention_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7168, + "byteOffset": 19962880 + }, + { + "name": "model.layers.12.self_attn.c_attn.bias", + "shape": [ + 4608 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 9216, + "byteOffset": 19970048 + }, + { + "name": "model.layers.12.self_attn.c_attn.q_weight", + "shape": [ + 4608, + 448 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8257536, + "byteOffset": 19979264 + }, + { + "name": "model.layers.12.self_attn.c_attn.q_scale", + "shape": [ + 4608, + 112 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1032192, + "byteOffset": 28236800 + } + ], + "md5sum": "ebe1ce6009575458be005d17a4f15aab" + }, + { + "dataPath": "params_shard_47.bin", + "format": "raw-shard", + "nbytes": 33947648, + "records": [ + { + "name": "model.layers.13.mlp.down_proj.q_weight", + "shape": [ + 3584, + 2368 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33947648, + "byteOffset": 0 + } + ], + "md5sum": "e84cdd8e5fb15a3edb7a55616bd8357e" + }, + { + "dataPath": "params_shard_48.bin", + "format": "raw-shard", + "nbytes": 67895296, + "records": [ + { + "name": "model.layers.13.mlp.gate_up_proj.q_weight", + "shape": [ + 37888, + 448 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 67895296, + "byteOffset": 0 + } + ], + "md5sum": "4ca648713303b5f307c155a6e97304bf" + }, + { + "dataPath": "params_shard_49.bin", + "format": "raw-shard", + "nbytes": 29268992, + "records": [ + { + "name": "model.layers.12.self_attn.o_proj.q_weight", + "shape": [ + 3584, + 448 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6422528, + "byteOffset": 0 + }, + { + "name": "model.layers.12.self_attn.o_proj.q_scale", + "shape": [ + 3584, + 112 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 802816, + "byteOffset": 6422528 + }, + { + "name": "model.layers.13.input_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7168, + "byteOffset": 7225344 + }, + { + "name": "model.layers.13.mlp.down_proj.q_scale", + "shape": [ + 3584, + 592 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4243456, + "byteOffset": 7232512 + }, + { + "name": "model.layers.13.mlp.gate_up_proj.q_scale", + "shape": [ + 37888, + 112 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8486912, + "byteOffset": 11475968 + }, + { + "name": "model.layers.13.post_attention_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7168, + "byteOffset": 19962880 + }, + { + "name": "model.layers.13.self_attn.c_attn.bias", + "shape": [ + 4608 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 9216, + "byteOffset": 19970048 + }, + { + "name": "model.layers.13.self_attn.c_attn.q_weight", + "shape": [ + 4608, + 448 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8257536, + "byteOffset": 19979264 + }, + { + "name": "model.layers.13.self_attn.c_attn.q_scale", + "shape": [ + 4608, + 112 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1032192, + "byteOffset": 28236800 + } + ], + "md5sum": "e3ab4fc99f975c9dc3c865b776aed995" + }, + { + "dataPath": "params_shard_50.bin", + "format": "raw-shard", + "nbytes": 67895296, + "records": [ + { + "name": "model.layers.14.mlp.gate_up_proj.q_weight", + "shape": [ + 37888, + 448 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 67895296, + "byteOffset": 0 + } + ], + "md5sum": "93f63b4642dcebf4bdfbe1aadd65a2b7" + }, + { + "dataPath": "params_shard_51.bin", + "format": "raw-shard", + "nbytes": 33947648, + "records": [ + { + "name": "model.layers.6.mlp.down_proj.q_weight", + "shape": [ + 3584, + 2368 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33947648, + "byteOffset": 0 + } + ], + "md5sum": "1eacf9a25b6f0bf3d83588c18b1ecf2b" + }, + { + "dataPath": "params_shard_52.bin", + "format": "raw-shard", + "nbytes": 32243712, + "records": [ + { + "name": "model.layers.13.self_attn.o_proj.q_weight", + "shape": [ + 3584, + 448 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6422528, + "byteOffset": 0 + }, + { + "name": "model.layers.13.self_attn.o_proj.q_scale", + "shape": [ + 3584, + 112 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 802816, + "byteOffset": 6422528 + }, + { + "name": "model.layers.14.mlp.gate_up_proj.q_scale", + "shape": [ + 37888, + 112 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8486912, + "byteOffset": 7225344 + }, + { + "name": "model.layers.14.self_attn.c_attn.bias", + "shape": [ + 4608 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 9216, + "byteOffset": 15712256 + }, + { + "name": "model.layers.14.self_attn.c_attn.q_weight", + "shape": [ + 4608, + 448 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8257536, + "byteOffset": 15721472 + }, + { + "name": "model.layers.14.self_attn.c_attn.q_scale", + "shape": [ + 4608, + 112 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1032192, + "byteOffset": 23979008 + }, + { + "name": "model.layers.14.self_attn.o_proj.q_weight", + "shape": [ + 3584, + 448 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6422528, + "byteOffset": 25011200 + }, + { + "name": "model.layers.14.self_attn.o_proj.q_scale", + "shape": [ + 3584, + 112 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 802816, + "byteOffset": 31433728 + }, + { + "name": "model.layers.6.input_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7168, + "byteOffset": 32236544 + } + ], + "md5sum": "36e2c77113dae6f659796ad063d2606e" + }, + { + "dataPath": "params_shard_53.bin", + "format": "raw-shard", + "nbytes": 67895296, + "records": [ + { + "name": "model.layers.6.mlp.gate_up_proj.q_weight", + "shape": [ + 37888, + 448 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 67895296, + "byteOffset": 0 + } + ], + "md5sum": "d296524dfb8158eaf547bf0a69366fe8" + }, + { + "dataPath": "params_shard_54.bin", + "format": "raw-shard", + "nbytes": 33947648, + "records": [ + { + "name": "model.layers.7.mlp.down_proj.q_weight", + "shape": [ + 3584, + 2368 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33947648, + "byteOffset": 0 + } + ], + "md5sum": "4c2c51dc6a4ff4e4d699588967202b7a" + }, + { + "dataPath": "params_shard_55.bin", + "format": "raw-shard", + "nbytes": 67895296, + "records": [ + { + "name": "model.layers.7.mlp.gate_up_proj.q_weight", + "shape": [ + 37888, + 448 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 67895296, + "byteOffset": 0 + } + ], + "md5sum": "fe1c48ebc0a882e4335fe2c5d08c0fa2" + }, + { + "dataPath": "params_shard_56.bin", + "format": "raw-shard", + "nbytes": 25491456, + "records": [ + { + "name": "model.layers.6.mlp.down_proj.q_scale", + "shape": [ + 3584, + 592 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4243456, + "byteOffset": 0 + }, + { + "name": "model.layers.6.mlp.gate_up_proj.q_scale", + "shape": [ + 37888, + 112 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8486912, + "byteOffset": 4243456 + }, + { + "name": "model.layers.6.post_attention_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7168, + "byteOffset": 12730368 + }, + { + "name": "model.layers.7.input_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7168, + "byteOffset": 12737536 + }, + { + "name": "model.layers.7.mlp.down_proj.q_scale", + "shape": [ + 3584, + 592 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4243456, + "byteOffset": 12744704 + }, + { + "name": "model.layers.7.mlp.gate_up_proj.q_scale", + "shape": [ + 37888, + 112 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8486912, + "byteOffset": 16988160 + }, + { + "name": "model.layers.7.post_attention_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7168, + "byteOffset": 25475072 + }, + { + "name": "model.layers.7.self_attn.c_attn.bias", + "shape": [ + 4608 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 9216, + "byteOffset": 25482240 + } + ], + "md5sum": "8bf652de02e779633b11b9c5a861a328" + }, + { + "dataPath": "params_shard_57.bin", + "format": "raw-shard", + "nbytes": 33947648, + "records": [ + { + "name": "model.layers.8.mlp.down_proj.q_weight", + "shape": [ + 3584, + 2368 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33947648, + "byteOffset": 0 + } + ], + "md5sum": "2e4057266f86cf5ce771b06a71ba4d99" + }, + { + "dataPath": "params_shard_58.bin", + "format": "raw-shard", + "nbytes": 67895296, + "records": [ + { + "name": "model.layers.8.mlp.gate_up_proj.q_weight", + "shape": [ + 37888, + 448 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 67895296, + "byteOffset": 0 + } + ], + "md5sum": "1d8328f10ead1cee42f3561a78e23d64" + }, + { + "dataPath": "params_shard_59.bin", + "format": "raw-shard", + "nbytes": 29268992, + "records": [ + { + "name": "model.layers.7.self_attn.c_attn.q_weight", + "shape": [ + 4608, + 448 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8257536, + "byteOffset": 0 + }, + { + "name": "model.layers.7.self_attn.c_attn.q_scale", + "shape": [ + 4608, + 112 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1032192, + "byteOffset": 8257536 + }, + { + "name": "model.layers.7.self_attn.o_proj.q_weight", + "shape": [ + 3584, + 448 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6422528, + "byteOffset": 9289728 + }, + { + "name": "model.layers.7.self_attn.o_proj.q_scale", + "shape": [ + 3584, + 112 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 802816, + "byteOffset": 15712256 + }, + { + "name": "model.layers.8.input_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7168, + "byteOffset": 16515072 + }, + { + "name": "model.layers.8.mlp.down_proj.q_scale", + "shape": [ + 3584, + 592 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4243456, + "byteOffset": 16522240 + }, + { + "name": "model.layers.8.mlp.gate_up_proj.q_scale", + "shape": [ + 37888, + 112 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8486912, + "byteOffset": 20765696 + }, + { + "name": "model.layers.8.post_attention_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7168, + "byteOffset": 29252608 + }, + { + "name": "model.layers.8.self_attn.c_attn.bias", + "shape": [ + 4608 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 9216, + "byteOffset": 29259776 + } + ], + "md5sum": "a426017fe8f3a98e602187261f5caff9" + }, + { + "dataPath": "params_shard_60.bin", + "format": "raw-shard", + "nbytes": 33947648, + "records": [ + { + "name": "model.layers.9.mlp.down_proj.q_weight", + "shape": [ + 3584, + 2368 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33947648, + "byteOffset": 0 + } + ], + "md5sum": "20fab3671281c5c2736ef97fab54611f" + }, + { + "dataPath": "params_shard_61.bin", + "format": "raw-shard", + "nbytes": 67895296, + "records": [ + { + "name": "model.layers.9.mlp.gate_up_proj.q_weight", + "shape": [ + 37888, + 448 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 67895296, + "byteOffset": 0 + } + ], + "md5sum": "8f45e70ea36b4ba12f68bf0255d6f000" + }, + { + "dataPath": "params_shard_62.bin", + "format": "raw-shard", + "nbytes": 29268992, + "records": [ + { + "name": "model.layers.8.self_attn.c_attn.q_weight", + "shape": [ + 4608, + 448 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8257536, + "byteOffset": 0 + }, + { + "name": "model.layers.8.self_attn.c_attn.q_scale", + "shape": [ + 4608, + 112 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1032192, + "byteOffset": 8257536 + }, + { + "name": "model.layers.8.self_attn.o_proj.q_weight", + "shape": [ + 3584, + 448 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6422528, + "byteOffset": 9289728 + }, + { + "name": "model.layers.8.self_attn.o_proj.q_scale", + "shape": [ + 3584, + 112 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 802816, + "byteOffset": 15712256 + }, + { + "name": "model.layers.9.input_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7168, + "byteOffset": 16515072 + }, + { + "name": "model.layers.9.mlp.down_proj.q_scale", + "shape": [ + 3584, + 592 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4243456, + "byteOffset": 16522240 + }, + { + "name": "model.layers.9.mlp.gate_up_proj.q_scale", + "shape": [ + 37888, + 112 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8486912, + "byteOffset": 20765696 + }, + { + "name": "model.layers.9.post_attention_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7168, + "byteOffset": 29252608 + }, + { + "name": "model.layers.9.self_attn.c_attn.bias", + "shape": [ + 4608 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 9216, + "byteOffset": 29259776 + } + ], + "md5sum": "3a75f1559aa90550ce18856e45c930de" + }, + { + "dataPath": "params_shard_63.bin", + "format": "raw-shard", + "nbytes": 33947648, + "records": [ + { + "name": "model.layers.14.mlp.down_proj.q_weight", + "shape": [ + 3584, + 2368 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33947648, + "byteOffset": 0 + } + ], + "md5sum": "ebd2f64b5627454dad4eed40a71c913c" + }, + { + "dataPath": "params_shard_64.bin", + "format": "raw-shard", + "nbytes": 33947648, + "records": [ + { + "name": "model.layers.15.mlp.down_proj.q_weight", + "shape": [ + 3584, + 2368 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33947648, + "byteOffset": 0 + } + ], + "md5sum": "79e62cfd68ea8eff0674a51e53b93676" + }, + { + "dataPath": "params_shard_65.bin", + "format": "raw-shard", + "nbytes": 67895296, + "records": [ + { + "name": "model.layers.15.mlp.gate_up_proj.q_weight", + "shape": [ + 37888, + 448 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 67895296, + "byteOffset": 0 + } + ], + "md5sum": "48091d0799a81b7eca4b25acf9417d62" + }, + { + "dataPath": "params_shard_66.bin", + "format": "raw-shard", + "nbytes": 33526784, + "records": [ + { + "name": "model.layers.9.self_attn.c_attn.q_weight", + "shape": [ + 4608, + 448 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8257536, + "byteOffset": 0 + }, + { + "name": "model.layers.9.self_attn.c_attn.q_scale", + "shape": [ + 4608, + 112 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1032192, + "byteOffset": 8257536 + }, + { + "name": "model.layers.9.self_attn.o_proj.q_weight", + "shape": [ + 3584, + 448 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6422528, + "byteOffset": 9289728 + }, + { + "name": "model.layers.9.self_attn.o_proj.q_scale", + "shape": [ + 3584, + 112 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 802816, + "byteOffset": 15712256 + }, + { + "name": "model.layers.14.input_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7168, + "byteOffset": 16515072 + }, + { + "name": "model.layers.14.mlp.down_proj.q_scale", + "shape": [ + 3584, + 592 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4243456, + "byteOffset": 16522240 + }, + { + "name": "model.layers.14.post_attention_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7168, + "byteOffset": 20765696 + }, + { + "name": "model.layers.15.input_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7168, + "byteOffset": 20772864 + }, + { + "name": "model.layers.15.mlp.down_proj.q_scale", + "shape": [ + 3584, + 592 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4243456, + "byteOffset": 20780032 + }, + { + "name": "model.layers.15.mlp.gate_up_proj.q_scale", + "shape": [ + 37888, + 112 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8486912, + "byteOffset": 25023488 + }, + { + "name": "model.layers.15.post_attention_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7168, + "byteOffset": 33510400 + }, + { + "name": "model.layers.15.self_attn.c_attn.bias", + "shape": [ + 4608 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 9216, + "byteOffset": 33517568 + } + ], + "md5sum": "4b70d829c5ba1c4a2eea2e568d17e063" + }, + { + "dataPath": "params_shard_67.bin", + "format": "raw-shard", + "nbytes": 33947648, + "records": [ + { + "name": "model.layers.16.mlp.down_proj.q_weight", + "shape": [ + 3584, + 2368 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33947648, + "byteOffset": 0 + } + ], + "md5sum": "567ac62d9b5a34c592a0b6e1ebcf99c9" + }, + { + "dataPath": "params_shard_68.bin", + "format": "raw-shard", + "nbytes": 67895296, + "records": [ + { + "name": "model.layers.16.mlp.gate_up_proj.q_weight", + "shape": [ + 37888, + 448 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 67895296, + "byteOffset": 0 + } + ], + "md5sum": "ee2e931c141252ba9a501127cb368fd6" + }, + { + "dataPath": "params_shard_69.bin", + "format": "raw-shard", + "nbytes": 29268992, + "records": [ + { + "name": "model.layers.15.self_attn.c_attn.q_weight", + "shape": [ + 4608, + 448 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8257536, + "byteOffset": 0 + }, + { + "name": "model.layers.15.self_attn.c_attn.q_scale", + "shape": [ + 4608, + 112 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1032192, + "byteOffset": 8257536 + }, + { + "name": "model.layers.15.self_attn.o_proj.q_weight", + "shape": [ + 3584, + 448 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6422528, + "byteOffset": 9289728 + }, + { + "name": "model.layers.15.self_attn.o_proj.q_scale", + "shape": [ + 3584, + 112 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 802816, + "byteOffset": 15712256 + }, + { + "name": "model.layers.16.input_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7168, + "byteOffset": 16515072 + }, + { + "name": "model.layers.16.mlp.down_proj.q_scale", + "shape": [ + 3584, + 592 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4243456, + "byteOffset": 16522240 + }, + { + "name": "model.layers.16.mlp.gate_up_proj.q_scale", + "shape": [ + 37888, + 112 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8486912, + "byteOffset": 20765696 + }, + { + "name": "model.layers.16.post_attention_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7168, + "byteOffset": 29252608 + }, + { + "name": "model.layers.16.self_attn.c_attn.bias", + "shape": [ + 4608 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 9216, + "byteOffset": 29259776 + } + ], + "md5sum": "25bf363cd21de0454c4bfb748d1214c0" + }, + { + "dataPath": "params_shard_70.bin", + "format": "raw-shard", + "nbytes": 33947648, + "records": [ + { + "name": "model.layers.17.mlp.down_proj.q_weight", + "shape": [ + 3584, + 2368 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33947648, + "byteOffset": 0 + } + ], + "md5sum": "784f62538311f764836bea2551c5a18f" + }, + { + "dataPath": "params_shard_71.bin", + "format": "raw-shard", + "nbytes": 67895296, + "records": [ + { + "name": "model.layers.17.mlp.gate_up_proj.q_weight", + "shape": [ + 37888, + 448 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 67895296, + "byteOffset": 0 + } + ], + "md5sum": "5b8ed3df1951bbb3d64cfa88ea014020" + }, + { + "dataPath": "params_shard_72.bin", + "format": "raw-shard", + "nbytes": 29268992, + "records": [ + { + "name": "model.layers.16.self_attn.c_attn.q_weight", + "shape": [ + 4608, + 448 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8257536, + "byteOffset": 0 + }, + { + "name": "model.layers.16.self_attn.c_attn.q_scale", + "shape": [ + 4608, + 112 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1032192, + "byteOffset": 8257536 + }, + { + "name": "model.layers.16.self_attn.o_proj.q_weight", + "shape": [ + 3584, + 448 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6422528, + "byteOffset": 9289728 + }, + { + "name": "model.layers.16.self_attn.o_proj.q_scale", + "shape": [ + 3584, + 112 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 802816, + "byteOffset": 15712256 + }, + { + "name": "model.layers.17.input_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7168, + "byteOffset": 16515072 + }, + { + "name": "model.layers.17.mlp.down_proj.q_scale", + "shape": [ + 3584, + 592 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4243456, + "byteOffset": 16522240 + }, + { + "name": "model.layers.17.mlp.gate_up_proj.q_scale", + "shape": [ + 37888, + 112 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8486912, + "byteOffset": 20765696 + }, + { + "name": "model.layers.17.post_attention_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7168, + "byteOffset": 29252608 + }, + { + "name": "model.layers.17.self_attn.c_attn.bias", + "shape": [ + 4608 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 9216, + "byteOffset": 29259776 + } + ], + "md5sum": "b93aff9801e5b75ed89ccdbde77e7370" + }, + { + "dataPath": "params_shard_73.bin", + "format": "raw-shard", + "nbytes": 33947648, + "records": [ + { + "name": "model.layers.18.mlp.down_proj.q_weight", + "shape": [ + 3584, + 2368 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33947648, + "byteOffset": 0 + } + ], + "md5sum": "33c0fa2575b5e5e224f378abd91b7346" + }, + { + "dataPath": "params_shard_74.bin", + "format": "raw-shard", + "nbytes": 67895296, + "records": [ + { + "name": "model.layers.18.mlp.gate_up_proj.q_weight", + "shape": [ + 37888, + 448 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 67895296, + "byteOffset": 0 + } + ], + "md5sum": "9b45c89f4116e43d436261888ec54357" + }, + { + "dataPath": "params_shard_75.bin", + "format": "raw-shard", + "nbytes": 29268992, + "records": [ + { + "name": "model.layers.17.self_attn.c_attn.q_weight", + "shape": [ + 4608, + 448 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8257536, + "byteOffset": 0 + }, + { + "name": "model.layers.17.self_attn.c_attn.q_scale", + "shape": [ + 4608, + 112 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1032192, + "byteOffset": 8257536 + }, + { + "name": "model.layers.17.self_attn.o_proj.q_weight", + "shape": [ + 3584, + 448 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6422528, + "byteOffset": 9289728 + }, + { + "name": "model.layers.17.self_attn.o_proj.q_scale", + "shape": [ + 3584, + 112 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 802816, + "byteOffset": 15712256 + }, + { + "name": "model.layers.18.input_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7168, + "byteOffset": 16515072 + }, + { + "name": "model.layers.18.mlp.down_proj.q_scale", + "shape": [ + 3584, + 592 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4243456, + "byteOffset": 16522240 + }, + { + "name": "model.layers.18.mlp.gate_up_proj.q_scale", + "shape": [ + 37888, + 112 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8486912, + "byteOffset": 20765696 + }, + { + "name": "model.layers.18.post_attention_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7168, + "byteOffset": 29252608 + }, + { + "name": "model.layers.18.self_attn.c_attn.bias", + "shape": [ + 4608 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 9216, + "byteOffset": 29259776 + } + ], + "md5sum": "98dac4ee5ffdff6f7d175dbae9efbdc0" + }, + { + "dataPath": "params_shard_76.bin", + "format": "raw-shard", + "nbytes": 33947648, + "records": [ + { + "name": "model.layers.19.mlp.down_proj.q_weight", + "shape": [ + 3584, + 2368 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33947648, + "byteOffset": 0 + } + ], + "md5sum": "90ec8165d63529998740f8e0df002e39" + }, + { + "dataPath": "params_shard_77.bin", + "format": "raw-shard", + "nbytes": 67895296, + "records": [ + { + "name": "model.layers.19.mlp.gate_up_proj.q_weight", + "shape": [ + 37888, + 448 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 67895296, + "byteOffset": 0 + } + ], + "md5sum": "83357e7ef8e61b89519d6d6fae46c69e" + }, + { + "dataPath": "params_shard_78.bin", + "format": "raw-shard", + "nbytes": 29268992, + "records": [ + { + "name": "model.layers.18.self_attn.c_attn.q_weight", + "shape": [ + 4608, + 448 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8257536, + "byteOffset": 0 + }, + { + "name": "model.layers.18.self_attn.c_attn.q_scale", + "shape": [ + 4608, + 112 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1032192, + "byteOffset": 8257536 + }, + { + "name": "model.layers.18.self_attn.o_proj.q_weight", + "shape": [ + 3584, + 448 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6422528, + "byteOffset": 9289728 + }, + { + "name": "model.layers.18.self_attn.o_proj.q_scale", + "shape": [ + 3584, + 112 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 802816, + "byteOffset": 15712256 + }, + { + "name": "model.layers.19.input_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7168, + "byteOffset": 16515072 + }, + { + "name": "model.layers.19.mlp.down_proj.q_scale", + "shape": [ + 3584, + 592 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4243456, + "byteOffset": 16522240 + }, + { + "name": "model.layers.19.mlp.gate_up_proj.q_scale", + "shape": [ + 37888, + 112 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8486912, + "byteOffset": 20765696 + }, + { + "name": "model.layers.19.post_attention_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7168, + "byteOffset": 29252608 + }, + { + "name": "model.layers.19.self_attn.c_attn.bias", + "shape": [ + 4608 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 9216, + "byteOffset": 29259776 + } + ], + "md5sum": "eb4069d58b4f873ae667e450a838e2b9" + }, + { + "dataPath": "params_shard_79.bin", + "format": "raw-shard", + "nbytes": 33947648, + "records": [ + { + "name": "model.layers.20.mlp.down_proj.q_weight", + "shape": [ + 3584, + 2368 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33947648, + "byteOffset": 0 + } + ], + "md5sum": "8abeb7aff06fff527f3d4e6a60248e72" + }, + { + "dataPath": "params_shard_80.bin", + "format": "raw-shard", + "nbytes": 67895296, + "records": [ + { + "name": "model.layers.20.mlp.gate_up_proj.q_weight", + "shape": [ + 37888, + 448 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 67895296, + "byteOffset": 0 + } + ], + "md5sum": "9454b0a387195b37f88c7fd45011de85" + }, + { + "dataPath": "params_shard_81.bin", + "format": "raw-shard", + "nbytes": 29268992, + "records": [ + { + "name": "model.layers.19.self_attn.c_attn.q_weight", + "shape": [ + 4608, + 448 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8257536, + "byteOffset": 0 + }, + { + "name": "model.layers.19.self_attn.c_attn.q_scale", + "shape": [ + 4608, + 112 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1032192, + "byteOffset": 8257536 + }, + { + "name": "model.layers.19.self_attn.o_proj.q_weight", + "shape": [ + 3584, + 448 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6422528, + "byteOffset": 9289728 + }, + { + "name": "model.layers.19.self_attn.o_proj.q_scale", + "shape": [ + 3584, + 112 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 802816, + "byteOffset": 15712256 + }, + { + "name": "model.layers.20.input_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7168, + "byteOffset": 16515072 + }, + { + "name": "model.layers.20.mlp.down_proj.q_scale", + "shape": [ + 3584, + 592 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4243456, + "byteOffset": 16522240 + }, + { + "name": "model.layers.20.mlp.gate_up_proj.q_scale", + "shape": [ + 37888, + 112 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8486912, + "byteOffset": 20765696 + }, + { + "name": "model.layers.20.post_attention_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7168, + "byteOffset": 29252608 + }, + { + "name": "model.layers.20.self_attn.c_attn.bias", + "shape": [ + 4608 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 9216, + "byteOffset": 29259776 + } + ], + "md5sum": "e34679b0bca045cc7e73e666a100e072" + }, + { + "dataPath": "params_shard_82.bin", + "format": "raw-shard", + "nbytes": 33947648, + "records": [ + { + "name": "model.layers.21.mlp.down_proj.q_weight", + "shape": [ + 3584, + 2368 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33947648, + "byteOffset": 0 + } + ], + "md5sum": "c88dc508685de8f867b520b7a0cdeefa" + }, + { + "dataPath": "params_shard_83.bin", + "format": "raw-shard", + "nbytes": 67895296, + "records": [ + { + "name": "model.layers.21.mlp.gate_up_proj.q_weight", + "shape": [ + 37888, + 448 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 67895296, + "byteOffset": 0 + } + ], + "md5sum": "c87b53ffcd0afbcac529759465fa46ec" + }, + { + "dataPath": "params_shard_84.bin", + "format": "raw-shard", + "nbytes": 29268992, + "records": [ + { + "name": "model.layers.20.self_attn.c_attn.q_weight", + "shape": [ + 4608, + 448 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8257536, + "byteOffset": 0 + }, + { + "name": "model.layers.20.self_attn.c_attn.q_scale", + "shape": [ + 4608, + 112 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1032192, + "byteOffset": 8257536 + }, + { + "name": "model.layers.20.self_attn.o_proj.q_weight", + "shape": [ + 3584, + 448 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6422528, + "byteOffset": 9289728 + }, + { + "name": "model.layers.20.self_attn.o_proj.q_scale", + "shape": [ + 3584, + 112 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 802816, + "byteOffset": 15712256 + }, + { + "name": "model.layers.21.input_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7168, + "byteOffset": 16515072 + }, + { + "name": "model.layers.21.mlp.down_proj.q_scale", + "shape": [ + 3584, + 592 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4243456, + "byteOffset": 16522240 + }, + { + "name": "model.layers.21.mlp.gate_up_proj.q_scale", + "shape": [ + 37888, + 112 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8486912, + "byteOffset": 20765696 + }, + { + "name": "model.layers.21.post_attention_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7168, + "byteOffset": 29252608 + }, + { + "name": "model.layers.21.self_attn.c_attn.bias", + "shape": [ + 4608 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 9216, + "byteOffset": 29259776 + } + ], + "md5sum": "f8aec16572cc4c76712ff528d5ad29f4" + }, + { + "dataPath": "params_shard_85.bin", + "format": "raw-shard", + "nbytes": 67895296, + "records": [ + { + "name": "model.layers.22.mlp.gate_up_proj.q_weight", + "shape": [ + 37888, + 448 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 67895296, + "byteOffset": 0 + } + ], + "md5sum": "4fff06aa12fb6966c9039bea200a9091" + }, + { + "dataPath": "params_shard_86.bin", + "format": "raw-shard", + "nbytes": 33268736, + "records": [ + { + "name": "model.layers.21.self_attn.c_attn.q_weight", + "shape": [ + 4608, + 448 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8257536, + "byteOffset": 0 + }, + { + "name": "model.layers.21.self_attn.c_attn.q_scale", + "shape": [ + 4608, + 112 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1032192, + "byteOffset": 8257536 + }, + { + "name": "model.layers.21.self_attn.o_proj.q_weight", + "shape": [ + 3584, + 448 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6422528, + "byteOffset": 9289728 + }, + { + "name": "model.layers.21.self_attn.o_proj.q_scale", + "shape": [ + 3584, + 112 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 802816, + "byteOffset": 15712256 + }, + { + "name": "model.layers.22.mlp.gate_up_proj.q_scale", + "shape": [ + 37888, + 112 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8486912, + "byteOffset": 16515072 + }, + { + "name": "model.layers.22.self_attn.c_attn.bias", + "shape": [ + 4608 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 9216, + "byteOffset": 25001984 + }, + { + "name": "model.layers.22.self_attn.c_attn.q_weight", + "shape": [ + 4608, + 448 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8257536, + "byteOffset": 25011200 + } + ], + "md5sum": "a7d1608585f681280841185d5545cc66" + }, + { + "dataPath": "params_shard_87.bin", + "format": "raw-shard", + "nbytes": 8257536, + "records": [ + { + "name": "model.layers.22.self_attn.c_attn.q_scale", + "shape": [ + 4608, + 112 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1032192, + "byteOffset": 0 + }, + { + "name": "model.layers.22.self_attn.o_proj.q_weight", + "shape": [ + 3584, + 448 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6422528, + "byteOffset": 1032192 + }, + { + "name": "model.layers.22.self_attn.o_proj.q_scale", + "shape": [ + 3584, + 112 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 802816, + "byteOffset": 7454720 + } + ], + "md5sum": "7c7e1af5f51e28585afce5a8bd2dc1e2" + } + ] +} \ No newline at end of file