diff --git "a/tensor-cache.json" "b/tensor-cache.json" new file mode 100644--- /dev/null +++ "b/tensor-cache.json" @@ -0,0 +1,3119 @@ +{ + "metadata": { + "ParamSize": 269, + "ParamBytes": 1148854272.0, + "BitsPerParam": 5.003642591223663 + }, + "records": [ + { + "dataPath": "params_shard_0.bin", + "format": "raw-shard", + "nbytes": 155582464, + "records": [ + { + "name": "lm_head.q_weight", + "shape": [ + 151936, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 155582464, + "byteOffset": 0 + } + ], + "md5sum": "1e05c86b4a6817becf50cf14a24d5af9" + }, + { + "dataPath": "params_shard_1.bin", + "format": "raw-shard", + "nbytes": 28901376, + "records": [ + { + "name": "lm_head.q_scale", + "shape": [ + 151936, + 64 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 19447808, + "byteOffset": 0 + }, + { + "name": "transformer.h.14.attn.c_attn.bias", + "shape": [ + 6144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 19447808 + }, + { + "name": "transformer.h.14.attn.c_attn.q_weight", + "shape": [ + 6144, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6291456, + "byteOffset": 19460096 + }, + { + "name": "transformer.h.14.attn.c_attn.q_scale", + "shape": [ + 6144, + 64 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 786432, + "byteOffset": 25751552 + }, + { + "name": "transformer.h.14.attn.c_proj.q_weight", + "shape": [ + 2048, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 26537984 + }, + { + "name": "transformer.h.14.attn.c_proj.q_scale", + "shape": [ + 2048, + 64 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 262144, + "byteOffset": 28635136 + }, + { + "name": "transformer.h.14.ln_2.weight", + "shape": [ + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 28897280 + } + ], + "md5sum": "b2887840aa3c50c86eddc71480000dde" + }, + { + "dataPath": "params_shard_2.bin", + "format": "raw-shard", + "nbytes": 28479488, + "records": [ + { + "name": "transformer.h.14.mlp.c_proj.q_weight", + "shape": [ + 2048, + 688 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 5636096, + "byteOffset": 0 + }, + { + "name": "transformer.h.14.mlp.c_proj.q_scale", + "shape": [ + 2048, + 172 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 704512, + "byteOffset": 5636096 + }, + { + "name": "transformer.h.14.mlp.gate_up_proj.q_weight", + "shape": [ + 11008, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 11272192, + "byteOffset": 6340608 + }, + { + "name": "transformer.h.14.mlp.gate_up_proj.q_scale", + "shape": [ + 11008, + 64 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1409024, + "byteOffset": 17612800 + }, + { + "name": "transformer.h.15.attn.c_attn.bias", + "shape": [ + 6144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 19021824 + }, + { + "name": "transformer.h.15.attn.c_attn.q_weight", + "shape": [ + 6144, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6291456, + "byteOffset": 19034112 + }, + { + "name": "transformer.h.15.attn.c_attn.q_scale", + "shape": [ + 6144, + 64 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 786432, + "byteOffset": 25325568 + }, + { + "name": "transformer.h.15.attn.c_proj.q_weight", + "shape": [ + 2048, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 26112000 + }, + { + "name": "transformer.h.15.attn.c_proj.q_scale", + "shape": [ + 2048, + 64 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 262144, + "byteOffset": 28209152 + }, + { + "name": "transformer.h.15.ln_1.weight", + "shape": [ + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 28471296 + }, + { + "name": "transformer.h.15.ln_2.weight", + "shape": [ + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 28475392 + } + ], + "md5sum": "8505effb56012bbcda91b9e7bb93c158" + }, + { + "dataPath": "params_shard_3.bin", + "format": "raw-shard", + "nbytes": 28479488, + "records": [ + { + "name": "transformer.h.15.mlp.c_proj.q_weight", + "shape": [ + 2048, + 688 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 5636096, + "byteOffset": 0 + }, + { + "name": "transformer.h.15.mlp.c_proj.q_scale", + "shape": [ + 2048, + 172 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 704512, + "byteOffset": 5636096 + }, + { + "name": "transformer.h.15.mlp.gate_up_proj.q_weight", + "shape": [ + 11008, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 11272192, + "byteOffset": 6340608 + }, + { + "name": "transformer.h.15.mlp.gate_up_proj.q_scale", + "shape": [ + 11008, + 64 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1409024, + "byteOffset": 17612800 + }, + { + "name": "transformer.h.16.attn.c_attn.bias", + "shape": [ + 6144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 19021824 + }, + { + "name": "transformer.h.16.attn.c_attn.q_weight", + "shape": [ + 6144, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6291456, + "byteOffset": 19034112 + }, + { + "name": "transformer.h.16.attn.c_attn.q_scale", + "shape": [ + 6144, + 64 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 786432, + "byteOffset": 25325568 + }, + { + "name": "transformer.h.16.attn.c_proj.q_weight", + "shape": [ + 2048, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 26112000 + }, + { + "name": "transformer.h.16.attn.c_proj.q_scale", + "shape": [ + 2048, + 64 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 262144, + "byteOffset": 28209152 + }, + { + "name": "transformer.h.16.ln_1.weight", + "shape": [ + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 28471296 + }, + { + "name": "transformer.h.16.ln_2.weight", + "shape": [ + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 28475392 + } + ], + "md5sum": "6d06fde4309f888c01ea3f616f56aea7" + }, + { + "dataPath": "params_shard_4.bin", + "format": "raw-shard", + "nbytes": 28479488, + "records": [ + { + "name": "transformer.h.16.mlp.c_proj.q_weight", + "shape": [ + 2048, + 688 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 5636096, + "byteOffset": 0 + }, + { + "name": "transformer.h.16.mlp.c_proj.q_scale", + "shape": [ + 2048, + 172 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 704512, + "byteOffset": 5636096 + }, + { + "name": "transformer.h.16.mlp.gate_up_proj.q_weight", + "shape": [ + 11008, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 11272192, + "byteOffset": 6340608 + }, + { + "name": "transformer.h.16.mlp.gate_up_proj.q_scale", + "shape": [ + 11008, + 64 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1409024, + "byteOffset": 17612800 + }, + { + "name": "transformer.h.17.attn.c_attn.bias", + "shape": [ + 6144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 19021824 + }, + { + "name": "transformer.h.17.attn.c_attn.q_weight", + "shape": [ + 6144, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6291456, + "byteOffset": 19034112 + }, + { + "name": "transformer.h.17.attn.c_attn.q_scale", + "shape": [ + 6144, + 64 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 786432, + "byteOffset": 25325568 + }, + { + "name": "transformer.h.17.attn.c_proj.q_weight", + "shape": [ + 2048, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 26112000 + }, + { + "name": "transformer.h.17.attn.c_proj.q_scale", + "shape": [ + 2048, + 64 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 262144, + "byteOffset": 28209152 + }, + { + "name": "transformer.h.17.ln_1.weight", + "shape": [ + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 28471296 + }, + { + "name": "transformer.h.17.ln_2.weight", + "shape": [ + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 28475392 + } + ], + "md5sum": "54562ccd9d1dac76da5f102d96ea88db" + }, + { + "dataPath": "params_shard_5.bin", + "format": "raw-shard", + "nbytes": 28479488, + "records": [ + { + "name": "transformer.h.17.mlp.c_proj.q_weight", + "shape": [ + 2048, + 688 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 5636096, + "byteOffset": 0 + }, + { + "name": "transformer.h.17.mlp.c_proj.q_scale", + "shape": [ + 2048, + 172 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 704512, + "byteOffset": 5636096 + }, + { + "name": "transformer.h.17.mlp.gate_up_proj.q_weight", + "shape": [ + 11008, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 11272192, + "byteOffset": 6340608 + }, + { + "name": "transformer.h.17.mlp.gate_up_proj.q_scale", + "shape": [ + 11008, + 64 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1409024, + "byteOffset": 17612800 + }, + { + "name": "transformer.h.18.attn.c_attn.bias", + "shape": [ + 6144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 19021824 + }, + { + "name": "transformer.h.18.attn.c_attn.q_weight", + "shape": [ + 6144, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6291456, + "byteOffset": 19034112 + }, + { + "name": "transformer.h.18.attn.c_attn.q_scale", + "shape": [ + 6144, + 64 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 786432, + "byteOffset": 25325568 + }, + { + "name": "transformer.h.18.attn.c_proj.q_weight", + "shape": [ + 2048, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 26112000 + }, + { + "name": "transformer.h.18.attn.c_proj.q_scale", + "shape": [ + 2048, + 64 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 262144, + "byteOffset": 28209152 + }, + { + "name": "transformer.h.18.ln_1.weight", + "shape": [ + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 28471296 + }, + { + "name": "transformer.h.18.ln_2.weight", + "shape": [ + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 28475392 + } + ], + "md5sum": "effec158a3cf0d8bff6f8203ac4580da" + }, + { + "dataPath": "params_shard_6.bin", + "format": "raw-shard", + "nbytes": 28479488, + "records": [ + { + "name": "transformer.h.18.mlp.c_proj.q_weight", + "shape": [ + 2048, + 688 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 5636096, + "byteOffset": 0 + }, + { + "name": "transformer.h.18.mlp.c_proj.q_scale", + "shape": [ + 2048, + 172 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 704512, + "byteOffset": 5636096 + }, + { + "name": "transformer.h.18.mlp.gate_up_proj.q_weight", + "shape": [ + 11008, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 11272192, + "byteOffset": 6340608 + }, + { + "name": "transformer.h.18.mlp.gate_up_proj.q_scale", + "shape": [ + 11008, + 64 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1409024, + "byteOffset": 17612800 + }, + { + "name": "transformer.h.19.attn.c_attn.bias", + "shape": [ + 6144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 19021824 + }, + { + "name": "transformer.h.19.attn.c_attn.q_weight", + "shape": [ + 6144, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6291456, + "byteOffset": 19034112 + }, + { + "name": "transformer.h.19.attn.c_attn.q_scale", + "shape": [ + 6144, + 64 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 786432, + "byteOffset": 25325568 + }, + { + "name": "transformer.h.19.attn.c_proj.q_weight", + "shape": [ + 2048, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 26112000 + }, + { + "name": "transformer.h.19.attn.c_proj.q_scale", + "shape": [ + 2048, + 64 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 262144, + "byteOffset": 28209152 + }, + { + "name": "transformer.h.19.ln_1.weight", + "shape": [ + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 28471296 + }, + { + "name": "transformer.h.19.ln_2.weight", + "shape": [ + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 28475392 + } + ], + "md5sum": "c9d1281313a446d172bc62105553beb4" + }, + { + "dataPath": "params_shard_7.bin", + "format": "raw-shard", + "nbytes": 28479488, + "records": [ + { + "name": "transformer.h.19.mlp.c_proj.q_weight", + "shape": [ + 2048, + 688 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 5636096, + "byteOffset": 0 + }, + { + "name": "transformer.h.19.mlp.c_proj.q_scale", + "shape": [ + 2048, + 172 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 704512, + "byteOffset": 5636096 + }, + { + "name": "transformer.h.19.mlp.gate_up_proj.q_weight", + "shape": [ + 11008, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 11272192, + "byteOffset": 6340608 + }, + { + "name": "transformer.h.19.mlp.gate_up_proj.q_scale", + "shape": [ + 11008, + 64 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1409024, + "byteOffset": 17612800 + }, + { + "name": "transformer.h.20.attn.c_attn.bias", + "shape": [ + 6144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 19021824 + }, + { + "name": "transformer.h.20.attn.c_attn.q_weight", + "shape": [ + 6144, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6291456, + "byteOffset": 19034112 + }, + { + "name": "transformer.h.20.attn.c_attn.q_scale", + "shape": [ + 6144, + 64 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 786432, + "byteOffset": 25325568 + }, + { + "name": "transformer.h.20.attn.c_proj.q_weight", + "shape": [ + 2048, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 26112000 + }, + { + "name": "transformer.h.20.attn.c_proj.q_scale", + "shape": [ + 2048, + 64 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 262144, + "byteOffset": 28209152 + }, + { + "name": "transformer.h.20.ln_1.weight", + "shape": [ + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 28471296 + }, + { + "name": "transformer.h.20.ln_2.weight", + "shape": [ + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 28475392 + } + ], + "md5sum": "45967ea0914361d74b74a2393fa9ad5b" + }, + { + "dataPath": "params_shard_8.bin", + "format": "raw-shard", + "nbytes": 28479488, + "records": [ + { + "name": "transformer.h.20.mlp.c_proj.q_weight", + "shape": [ + 2048, + 688 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 5636096, + "byteOffset": 0 + }, + { + "name": "transformer.h.20.mlp.c_proj.q_scale", + "shape": [ + 2048, + 172 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 704512, + "byteOffset": 5636096 + }, + { + "name": "transformer.h.20.mlp.gate_up_proj.q_weight", + "shape": [ + 11008, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 11272192, + "byteOffset": 6340608 + }, + { + "name": "transformer.h.20.mlp.gate_up_proj.q_scale", + "shape": [ + 11008, + 64 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1409024, + "byteOffset": 17612800 + }, + { + "name": "transformer.h.21.attn.c_attn.bias", + "shape": [ + 6144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 19021824 + }, + { + "name": "transformer.h.21.attn.c_attn.q_weight", + "shape": [ + 6144, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6291456, + "byteOffset": 19034112 + }, + { + "name": "transformer.h.21.attn.c_attn.q_scale", + "shape": [ + 6144, + 64 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 786432, + "byteOffset": 25325568 + }, + { + "name": "transformer.h.21.attn.c_proj.q_weight", + "shape": [ + 2048, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 26112000 + }, + { + "name": "transformer.h.21.attn.c_proj.q_scale", + "shape": [ + 2048, + 64 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 262144, + "byteOffset": 28209152 + }, + { + "name": "transformer.h.21.ln_1.weight", + "shape": [ + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 28471296 + }, + { + "name": "transformer.h.21.ln_2.weight", + "shape": [ + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 28475392 + } + ], + "md5sum": "cd2167f1d7ef0d5f0d0ab7be1a61a9ea" + }, + { + "dataPath": "params_shard_9.bin", + "format": "raw-shard", + "nbytes": 28479488, + "records": [ + { + "name": "transformer.h.21.mlp.c_proj.q_weight", + "shape": [ + 2048, + 688 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 5636096, + "byteOffset": 0 + }, + { + "name": "transformer.h.21.mlp.c_proj.q_scale", + "shape": [ + 2048, + 172 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 704512, + "byteOffset": 5636096 + }, + { + "name": "transformer.h.21.mlp.gate_up_proj.q_weight", + "shape": [ + 11008, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 11272192, + "byteOffset": 6340608 + }, + { + "name": "transformer.h.21.mlp.gate_up_proj.q_scale", + "shape": [ + 11008, + 64 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1409024, + "byteOffset": 17612800 + }, + { + "name": "transformer.h.22.attn.c_attn.bias", + "shape": [ + 6144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 19021824 + }, + { + "name": "transformer.h.22.attn.c_attn.q_weight", + "shape": [ + 6144, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6291456, + "byteOffset": 19034112 + }, + { + "name": "transformer.h.22.attn.c_attn.q_scale", + "shape": [ + 6144, + 64 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 786432, + "byteOffset": 25325568 + }, + { + "name": "transformer.h.22.attn.c_proj.q_weight", + "shape": [ + 2048, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 26112000 + }, + { + "name": "transformer.h.22.attn.c_proj.q_scale", + "shape": [ + 2048, + 64 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 262144, + "byteOffset": 28209152 + }, + { + "name": "transformer.h.22.ln_1.weight", + "shape": [ + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 28471296 + }, + { + "name": "transformer.h.22.ln_2.weight", + "shape": [ + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 28475392 + } + ], + "md5sum": "5726391072a5d73c66edccceb8366967" + }, + { + "dataPath": "params_shard_10.bin", + "format": "raw-shard", + "nbytes": 28479488, + "records": [ + { + "name": "transformer.h.22.mlp.c_proj.q_weight", + "shape": [ + 2048, + 688 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 5636096, + "byteOffset": 0 + }, + { + "name": "transformer.h.22.mlp.c_proj.q_scale", + "shape": [ + 2048, + 172 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 704512, + "byteOffset": 5636096 + }, + { + "name": "transformer.h.22.mlp.gate_up_proj.q_weight", + "shape": [ + 11008, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 11272192, + "byteOffset": 6340608 + }, + { + "name": "transformer.h.22.mlp.gate_up_proj.q_scale", + "shape": [ + 11008, + 64 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1409024, + "byteOffset": 17612800 + }, + { + "name": "transformer.h.23.attn.c_attn.bias", + "shape": [ + 6144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 19021824 + }, + { + "name": "transformer.h.23.attn.c_attn.q_weight", + "shape": [ + 6144, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6291456, + "byteOffset": 19034112 + }, + { + "name": "transformer.h.23.attn.c_attn.q_scale", + "shape": [ + 6144, + 64 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 786432, + "byteOffset": 25325568 + }, + { + "name": "transformer.h.23.attn.c_proj.q_weight", + "shape": [ + 2048, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 26112000 + }, + { + "name": "transformer.h.23.attn.c_proj.q_scale", + "shape": [ + 2048, + 64 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 262144, + "byteOffset": 28209152 + }, + { + "name": "transformer.h.23.ln_1.weight", + "shape": [ + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 28471296 + }, + { + "name": "transformer.h.23.ln_2.weight", + "shape": [ + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 28475392 + } + ], + "md5sum": "bdfd67cd512421a63e91b8541e9bbb50" + }, + { + "dataPath": "params_shard_11.bin", + "format": "raw-shard", + "nbytes": 28483584, + "records": [ + { + "name": "transformer.h.23.mlp.c_proj.q_weight", + "shape": [ + 2048, + 688 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 5636096, + "byteOffset": 0 + }, + { + "name": "transformer.h.23.mlp.c_proj.q_scale", + "shape": [ + 2048, + 172 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 704512, + "byteOffset": 5636096 + }, + { + "name": "transformer.h.23.mlp.gate_up_proj.q_weight", + "shape": [ + 11008, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 11272192, + "byteOffset": 6340608 + }, + { + "name": "transformer.h.23.mlp.gate_up_proj.q_scale", + "shape": [ + 11008, + 64 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1409024, + "byteOffset": 17612800 + }, + { + "name": "transformer.ln_f.weight", + "shape": [ + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 19021824 + }, + { + "name": "transformer.h.0.attn.c_attn.bias", + "shape": [ + 6144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 19025920 + }, + { + "name": "transformer.h.0.attn.c_attn.q_weight", + "shape": [ + 6144, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6291456, + "byteOffset": 19038208 + }, + { + "name": "transformer.h.0.attn.c_attn.q_scale", + "shape": [ + 6144, + 64 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 786432, + "byteOffset": 25329664 + }, + { + "name": "transformer.h.0.attn.c_proj.q_weight", + "shape": [ + 2048, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 26116096 + }, + { + "name": "transformer.h.0.attn.c_proj.q_scale", + "shape": [ + 2048, + 64 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 262144, + "byteOffset": 28213248 + }, + { + "name": "transformer.h.0.ln_1.weight", + "shape": [ + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 28475392 + }, + { + "name": "transformer.h.0.ln_2.weight", + "shape": [ + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 28479488 + } + ], + "md5sum": "38f8fa4d8aaf75c8c214d07af7c7d771" + }, + { + "dataPath": "params_shard_12.bin", + "format": "raw-shard", + "nbytes": 28479488, + "records": [ + { + "name": "transformer.h.0.mlp.c_proj.q_weight", + "shape": [ + 2048, + 688 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 5636096, + "byteOffset": 0 + }, + { + "name": "transformer.h.0.mlp.c_proj.q_scale", + "shape": [ + 2048, + 172 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 704512, + "byteOffset": 5636096 + }, + { + "name": "transformer.h.0.mlp.gate_up_proj.q_weight", + "shape": [ + 11008, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 11272192, + "byteOffset": 6340608 + }, + { + "name": "transformer.h.0.mlp.gate_up_proj.q_scale", + "shape": [ + 11008, + 64 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1409024, + "byteOffset": 17612800 + }, + { + "name": "transformer.h.1.attn.c_attn.bias", + "shape": [ + 6144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 19021824 + }, + { + "name": "transformer.h.1.attn.c_attn.q_weight", + "shape": [ + 6144, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6291456, + "byteOffset": 19034112 + }, + { + "name": "transformer.h.1.attn.c_attn.q_scale", + "shape": [ + 6144, + 64 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 786432, + "byteOffset": 25325568 + }, + { + "name": "transformer.h.1.attn.c_proj.q_weight", + "shape": [ + 2048, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 26112000 + }, + { + "name": "transformer.h.1.attn.c_proj.q_scale", + "shape": [ + 2048, + 64 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 262144, + "byteOffset": 28209152 + }, + { + "name": "transformer.h.1.ln_1.weight", + "shape": [ + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 28471296 + }, + { + "name": "transformer.h.1.ln_2.weight", + "shape": [ + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 28475392 + } + ], + "md5sum": "2c0d99fc137694746dfd614307dd6d4b" + }, + { + "dataPath": "params_shard_13.bin", + "format": "raw-shard", + "nbytes": 28479488, + "records": [ + { + "name": "transformer.h.1.mlp.c_proj.q_weight", + "shape": [ + 2048, + 688 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 5636096, + "byteOffset": 0 + }, + { + "name": "transformer.h.1.mlp.c_proj.q_scale", + "shape": [ + 2048, + 172 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 704512, + "byteOffset": 5636096 + }, + { + "name": "transformer.h.1.mlp.gate_up_proj.q_weight", + "shape": [ + 11008, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 11272192, + "byteOffset": 6340608 + }, + { + "name": "transformer.h.1.mlp.gate_up_proj.q_scale", + "shape": [ + 11008, + 64 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1409024, + "byteOffset": 17612800 + }, + { + "name": "transformer.h.10.attn.c_attn.bias", + "shape": [ + 6144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 19021824 + }, + { + "name": "transformer.h.10.attn.c_attn.q_weight", + "shape": [ + 6144, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6291456, + "byteOffset": 19034112 + }, + { + "name": "transformer.h.10.attn.c_attn.q_scale", + "shape": [ + 6144, + 64 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 786432, + "byteOffset": 25325568 + }, + { + "name": "transformer.h.10.attn.c_proj.q_weight", + "shape": [ + 2048, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 26112000 + }, + { + "name": "transformer.h.10.attn.c_proj.q_scale", + "shape": [ + 2048, + 64 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 262144, + "byteOffset": 28209152 + }, + { + "name": "transformer.h.10.ln_1.weight", + "shape": [ + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 28471296 + }, + { + "name": "transformer.h.10.ln_2.weight", + "shape": [ + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 28475392 + } + ], + "md5sum": "b5ef86981682d6cd90646fc2ef08eb4d" + }, + { + "dataPath": "params_shard_14.bin", + "format": "raw-shard", + "nbytes": 28479488, + "records": [ + { + "name": "transformer.h.10.mlp.c_proj.q_weight", + "shape": [ + 2048, + 688 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 5636096, + "byteOffset": 0 + }, + { + "name": "transformer.h.10.mlp.c_proj.q_scale", + "shape": [ + 2048, + 172 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 704512, + "byteOffset": 5636096 + }, + { + "name": "transformer.h.10.mlp.gate_up_proj.q_weight", + "shape": [ + 11008, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 11272192, + "byteOffset": 6340608 + }, + { + "name": "transformer.h.10.mlp.gate_up_proj.q_scale", + "shape": [ + 11008, + 64 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1409024, + "byteOffset": 17612800 + }, + { + "name": "transformer.h.11.attn.c_attn.bias", + "shape": [ + 6144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 19021824 + }, + { + "name": "transformer.h.11.attn.c_attn.q_weight", + "shape": [ + 6144, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6291456, + "byteOffset": 19034112 + }, + { + "name": "transformer.h.11.attn.c_attn.q_scale", + "shape": [ + 6144, + 64 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 786432, + "byteOffset": 25325568 + }, + { + "name": "transformer.h.11.attn.c_proj.q_weight", + "shape": [ + 2048, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 26112000 + }, + { + "name": "transformer.h.11.attn.c_proj.q_scale", + "shape": [ + 2048, + 64 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 262144, + "byteOffset": 28209152 + }, + { + "name": "transformer.h.11.ln_1.weight", + "shape": [ + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 28471296 + }, + { + "name": "transformer.h.11.ln_2.weight", + "shape": [ + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 28475392 + } + ], + "md5sum": "34ef921057e3a153f25eb8b7fb90b1c3" + }, + { + "dataPath": "params_shard_15.bin", + "format": "raw-shard", + "nbytes": 28479488, + "records": [ + { + "name": "transformer.h.11.mlp.c_proj.q_weight", + "shape": [ + 2048, + 688 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 5636096, + "byteOffset": 0 + }, + { + "name": "transformer.h.11.mlp.c_proj.q_scale", + "shape": [ + 2048, + 172 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 704512, + "byteOffset": 5636096 + }, + { + "name": "transformer.h.11.mlp.gate_up_proj.q_weight", + "shape": [ + 11008, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 11272192, + "byteOffset": 6340608 + }, + { + "name": "transformer.h.11.mlp.gate_up_proj.q_scale", + "shape": [ + 11008, + 64 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1409024, + "byteOffset": 17612800 + }, + { + "name": "transformer.h.12.attn.c_attn.bias", + "shape": [ + 6144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 19021824 + }, + { + "name": "transformer.h.12.attn.c_attn.q_weight", + "shape": [ + 6144, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6291456, + "byteOffset": 19034112 + }, + { + "name": "transformer.h.12.attn.c_attn.q_scale", + "shape": [ + 6144, + 64 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 786432, + "byteOffset": 25325568 + }, + { + "name": "transformer.h.12.attn.c_proj.q_weight", + "shape": [ + 2048, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 26112000 + }, + { + "name": "transformer.h.12.attn.c_proj.q_scale", + "shape": [ + 2048, + 64 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 262144, + "byteOffset": 28209152 + }, + { + "name": "transformer.h.12.ln_1.weight", + "shape": [ + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 28471296 + }, + { + "name": "transformer.h.12.ln_2.weight", + "shape": [ + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 28475392 + } + ], + "md5sum": "3b365214d6ea61ade26e3cacdd276c96" + }, + { + "dataPath": "params_shard_16.bin", + "format": "raw-shard", + "nbytes": 28479488, + "records": [ + { + "name": "transformer.h.12.mlp.c_proj.q_weight", + "shape": [ + 2048, + 688 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 5636096, + "byteOffset": 0 + }, + { + "name": "transformer.h.12.mlp.c_proj.q_scale", + "shape": [ + 2048, + 172 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 704512, + "byteOffset": 5636096 + }, + { + "name": "transformer.h.12.mlp.gate_up_proj.q_weight", + "shape": [ + 11008, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 11272192, + "byteOffset": 6340608 + }, + { + "name": "transformer.h.12.mlp.gate_up_proj.q_scale", + "shape": [ + 11008, + 64 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1409024, + "byteOffset": 17612800 + }, + { + "name": "transformer.h.13.attn.c_attn.bias", + "shape": [ + 6144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 19021824 + }, + { + "name": "transformer.h.13.attn.c_attn.q_weight", + "shape": [ + 6144, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6291456, + "byteOffset": 19034112 + }, + { + "name": "transformer.h.13.attn.c_attn.q_scale", + "shape": [ + 6144, + 64 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 786432, + "byteOffset": 25325568 + }, + { + "name": "transformer.h.13.attn.c_proj.q_weight", + "shape": [ + 2048, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 26112000 + }, + { + "name": "transformer.h.13.attn.c_proj.q_scale", + "shape": [ + 2048, + 64 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 262144, + "byteOffset": 28209152 + }, + { + "name": "transformer.h.13.ln_1.weight", + "shape": [ + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 28471296 + }, + { + "name": "transformer.h.13.ln_2.weight", + "shape": [ + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 28475392 + } + ], + "md5sum": "782946c6fef27e24ef445121f800b782" + }, + { + "dataPath": "params_shard_17.bin", + "format": "raw-shard", + "nbytes": 28483584, + "records": [ + { + "name": "transformer.h.13.mlp.c_proj.q_weight", + "shape": [ + 2048, + 688 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 5636096, + "byteOffset": 0 + }, + { + "name": "transformer.h.13.mlp.c_proj.q_scale", + "shape": [ + 2048, + 172 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 704512, + "byteOffset": 5636096 + }, + { + "name": "transformer.h.13.mlp.gate_up_proj.q_weight", + "shape": [ + 11008, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 11272192, + "byteOffset": 6340608 + }, + { + "name": "transformer.h.13.mlp.gate_up_proj.q_scale", + "shape": [ + 11008, + 64 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1409024, + "byteOffset": 17612800 + }, + { + "name": "transformer.h.14.ln_1.weight", + "shape": [ + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 19021824 + }, + { + "name": "transformer.h.2.attn.c_attn.bias", + "shape": [ + 6144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 19025920 + }, + { + "name": "transformer.h.2.attn.c_attn.q_weight", + "shape": [ + 6144, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6291456, + "byteOffset": 19038208 + }, + { + "name": "transformer.h.2.attn.c_attn.q_scale", + "shape": [ + 6144, + 64 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 786432, + "byteOffset": 25329664 + }, + { + "name": "transformer.h.2.attn.c_proj.q_weight", + "shape": [ + 2048, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 26116096 + }, + { + "name": "transformer.h.2.attn.c_proj.q_scale", + "shape": [ + 2048, + 64 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 262144, + "byteOffset": 28213248 + }, + { + "name": "transformer.h.2.ln_1.weight", + "shape": [ + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 28475392 + }, + { + "name": "transformer.h.2.ln_2.weight", + "shape": [ + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 28479488 + } + ], + "md5sum": "ae507bfdad127d0feea0a1e2ebbd4ff4" + }, + { + "dataPath": "params_shard_18.bin", + "format": "raw-shard", + "nbytes": 28479488, + "records": [ + { + "name": "transformer.h.2.mlp.c_proj.q_weight", + "shape": [ + 2048, + 688 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 5636096, + "byteOffset": 0 + }, + { + "name": "transformer.h.2.mlp.c_proj.q_scale", + "shape": [ + 2048, + 172 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 704512, + "byteOffset": 5636096 + }, + { + "name": "transformer.h.2.mlp.gate_up_proj.q_weight", + "shape": [ + 11008, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 11272192, + "byteOffset": 6340608 + }, + { + "name": "transformer.h.2.mlp.gate_up_proj.q_scale", + "shape": [ + 11008, + 64 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1409024, + "byteOffset": 17612800 + }, + { + "name": "transformer.h.3.attn.c_attn.bias", + "shape": [ + 6144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 19021824 + }, + { + "name": "transformer.h.3.attn.c_attn.q_weight", + "shape": [ + 6144, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6291456, + "byteOffset": 19034112 + }, + { + "name": "transformer.h.3.attn.c_attn.q_scale", + "shape": [ + 6144, + 64 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 786432, + "byteOffset": 25325568 + }, + { + "name": "transformer.h.3.attn.c_proj.q_weight", + "shape": [ + 2048, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 26112000 + }, + { + "name": "transformer.h.3.attn.c_proj.q_scale", + "shape": [ + 2048, + 64 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 262144, + "byteOffset": 28209152 + }, + { + "name": "transformer.h.3.ln_1.weight", + "shape": [ + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 28471296 + }, + { + "name": "transformer.h.3.ln_2.weight", + "shape": [ + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 28475392 + } + ], + "md5sum": "1e7fbe0e72a3a21093fcdec70b3a505b" + }, + { + "dataPath": "params_shard_19.bin", + "format": "raw-shard", + "nbytes": 28479488, + "records": [ + { + "name": "transformer.h.3.mlp.c_proj.q_weight", + "shape": [ + 2048, + 688 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 5636096, + "byteOffset": 0 + }, + { + "name": "transformer.h.3.mlp.c_proj.q_scale", + "shape": [ + 2048, + 172 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 704512, + "byteOffset": 5636096 + }, + { + "name": "transformer.h.3.mlp.gate_up_proj.q_weight", + "shape": [ + 11008, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 11272192, + "byteOffset": 6340608 + }, + { + "name": "transformer.h.3.mlp.gate_up_proj.q_scale", + "shape": [ + 11008, + 64 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1409024, + "byteOffset": 17612800 + }, + { + "name": "transformer.h.4.attn.c_attn.bias", + "shape": [ + 6144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 19021824 + }, + { + "name": "transformer.h.4.attn.c_attn.q_weight", + "shape": [ + 6144, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6291456, + "byteOffset": 19034112 + }, + { + "name": "transformer.h.4.attn.c_attn.q_scale", + "shape": [ + 6144, + 64 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 786432, + "byteOffset": 25325568 + }, + { + "name": "transformer.h.4.attn.c_proj.q_weight", + "shape": [ + 2048, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 26112000 + }, + { + "name": "transformer.h.4.attn.c_proj.q_scale", + "shape": [ + 2048, + 64 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 262144, + "byteOffset": 28209152 + }, + { + "name": "transformer.h.4.ln_1.weight", + "shape": [ + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 28471296 + }, + { + "name": "transformer.h.4.ln_2.weight", + "shape": [ + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 28475392 + } + ], + "md5sum": "6c78b1bdd909ca633c577e655385c143" + }, + { + "dataPath": "params_shard_20.bin", + "format": "raw-shard", + "nbytes": 28479488, + "records": [ + { + "name": "transformer.h.4.mlp.c_proj.q_weight", + "shape": [ + 2048, + 688 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 5636096, + "byteOffset": 0 + }, + { + "name": "transformer.h.4.mlp.c_proj.q_scale", + "shape": [ + 2048, + 172 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 704512, + "byteOffset": 5636096 + }, + { + "name": "transformer.h.4.mlp.gate_up_proj.q_weight", + "shape": [ + 11008, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 11272192, + "byteOffset": 6340608 + }, + { + "name": "transformer.h.4.mlp.gate_up_proj.q_scale", + "shape": [ + 11008, + 64 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1409024, + "byteOffset": 17612800 + }, + { + "name": "transformer.h.5.attn.c_attn.bias", + "shape": [ + 6144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 19021824 + }, + { + "name": "transformer.h.5.attn.c_attn.q_weight", + "shape": [ + 6144, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6291456, + "byteOffset": 19034112 + }, + { + "name": "transformer.h.5.attn.c_attn.q_scale", + "shape": [ + 6144, + 64 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 786432, + "byteOffset": 25325568 + }, + { + "name": "transformer.h.5.attn.c_proj.q_weight", + "shape": [ + 2048, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 26112000 + }, + { + "name": "transformer.h.5.attn.c_proj.q_scale", + "shape": [ + 2048, + 64 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 262144, + "byteOffset": 28209152 + }, + { + "name": "transformer.h.5.ln_1.weight", + "shape": [ + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 28471296 + }, + { + "name": "transformer.h.5.ln_2.weight", + "shape": [ + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 28475392 + } + ], + "md5sum": "3c4b4bb23287ea57ae74c09f4c81cf4f" + }, + { + "dataPath": "params_shard_21.bin", + "format": "raw-shard", + "nbytes": 28479488, + "records": [ + { + "name": "transformer.h.5.mlp.c_proj.q_weight", + "shape": [ + 2048, + 688 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 5636096, + "byteOffset": 0 + }, + { + "name": "transformer.h.5.mlp.c_proj.q_scale", + "shape": [ + 2048, + 172 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 704512, + "byteOffset": 5636096 + }, + { + "name": "transformer.h.5.mlp.gate_up_proj.q_weight", + "shape": [ + 11008, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 11272192, + "byteOffset": 6340608 + }, + { + "name": "transformer.h.5.mlp.gate_up_proj.q_scale", + "shape": [ + 11008, + 64 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1409024, + "byteOffset": 17612800 + }, + { + "name": "transformer.h.6.attn.c_attn.bias", + "shape": [ + 6144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 19021824 + }, + { + "name": "transformer.h.6.attn.c_attn.q_weight", + "shape": [ + 6144, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6291456, + "byteOffset": 19034112 + }, + { + "name": "transformer.h.6.attn.c_attn.q_scale", + "shape": [ + 6144, + 64 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 786432, + "byteOffset": 25325568 + }, + { + "name": "transformer.h.6.attn.c_proj.q_weight", + "shape": [ + 2048, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 26112000 + }, + { + "name": "transformer.h.6.attn.c_proj.q_scale", + "shape": [ + 2048, + 64 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 262144, + "byteOffset": 28209152 + }, + { + "name": "transformer.h.6.ln_1.weight", + "shape": [ + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 28471296 + }, + { + "name": "transformer.h.6.ln_2.weight", + "shape": [ + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 28475392 + } + ], + "md5sum": "387574102a1f7b9e21f57d84c8d582f5" + }, + { + "dataPath": "params_shard_22.bin", + "format": "raw-shard", + "nbytes": 28479488, + "records": [ + { + "name": "transformer.h.6.mlp.c_proj.q_weight", + "shape": [ + 2048, + 688 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 5636096, + "byteOffset": 0 + }, + { + "name": "transformer.h.6.mlp.c_proj.q_scale", + "shape": [ + 2048, + 172 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 704512, + "byteOffset": 5636096 + }, + { + "name": "transformer.h.6.mlp.gate_up_proj.q_weight", + "shape": [ + 11008, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 11272192, + "byteOffset": 6340608 + }, + { + "name": "transformer.h.6.mlp.gate_up_proj.q_scale", + "shape": [ + 11008, + 64 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1409024, + "byteOffset": 17612800 + }, + { + "name": "transformer.h.7.attn.c_attn.bias", + "shape": [ + 6144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 19021824 + }, + { + "name": "transformer.h.7.attn.c_attn.q_weight", + "shape": [ + 6144, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6291456, + "byteOffset": 19034112 + }, + { + "name": "transformer.h.7.attn.c_attn.q_scale", + "shape": [ + 6144, + 64 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 786432, + "byteOffset": 25325568 + }, + { + "name": "transformer.h.7.attn.c_proj.q_weight", + "shape": [ + 2048, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 26112000 + }, + { + "name": "transformer.h.7.attn.c_proj.q_scale", + "shape": [ + 2048, + 64 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 262144, + "byteOffset": 28209152 + }, + { + "name": "transformer.h.7.ln_1.weight", + "shape": [ + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 28471296 + }, + { + "name": "transformer.h.7.ln_2.weight", + "shape": [ + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 28475392 + } + ], + "md5sum": "e946e6bf12b4a1bd34ac6db0816d80a2" + }, + { + "dataPath": "params_shard_23.bin", + "format": "raw-shard", + "nbytes": 28479488, + "records": [ + { + "name": "transformer.h.7.mlp.c_proj.q_weight", + "shape": [ + 2048, + 688 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 5636096, + "byteOffset": 0 + }, + { + "name": "transformer.h.7.mlp.c_proj.q_scale", + "shape": [ + 2048, + 172 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 704512, + "byteOffset": 5636096 + }, + { + "name": "transformer.h.7.mlp.gate_up_proj.q_weight", + "shape": [ + 11008, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 11272192, + "byteOffset": 6340608 + }, + { + "name": "transformer.h.7.mlp.gate_up_proj.q_scale", + "shape": [ + 11008, + 64 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1409024, + "byteOffset": 17612800 + }, + { + "name": "transformer.h.8.attn.c_attn.bias", + "shape": [ + 6144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 19021824 + }, + { + "name": "transformer.h.8.attn.c_attn.q_weight", + "shape": [ + 6144, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6291456, + "byteOffset": 19034112 + }, + { + "name": "transformer.h.8.attn.c_attn.q_scale", + "shape": [ + 6144, + 64 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 786432, + "byteOffset": 25325568 + }, + { + "name": "transformer.h.8.attn.c_proj.q_weight", + "shape": [ + 2048, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 26112000 + }, + { + "name": "transformer.h.8.attn.c_proj.q_scale", + "shape": [ + 2048, + 64 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 262144, + "byteOffset": 28209152 + }, + { + "name": "transformer.h.8.ln_1.weight", + "shape": [ + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 28471296 + }, + { + "name": "transformer.h.8.ln_2.weight", + "shape": [ + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 28475392 + } + ], + "md5sum": "083f96c1ccc9bf3deacd7c7c780741a5" + }, + { + "dataPath": "params_shard_24.bin", + "format": "raw-shard", + "nbytes": 28479488, + "records": [ + { + "name": "transformer.h.8.mlp.c_proj.q_weight", + "shape": [ + 2048, + 688 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 5636096, + "byteOffset": 0 + }, + { + "name": "transformer.h.8.mlp.c_proj.q_scale", + "shape": [ + 2048, + 172 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 704512, + "byteOffset": 5636096 + }, + { + "name": "transformer.h.8.mlp.gate_up_proj.q_weight", + "shape": [ + 11008, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 11272192, + "byteOffset": 6340608 + }, + { + "name": "transformer.h.8.mlp.gate_up_proj.q_scale", + "shape": [ + 11008, + 64 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1409024, + "byteOffset": 17612800 + }, + { + "name": "transformer.h.9.attn.c_attn.bias", + "shape": [ + 6144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 19021824 + }, + { + "name": "transformer.h.9.attn.c_attn.q_weight", + "shape": [ + 6144, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6291456, + "byteOffset": 19034112 + }, + { + "name": "transformer.h.9.attn.c_attn.q_scale", + "shape": [ + 6144, + 64 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 786432, + "byteOffset": 25325568 + }, + { + "name": "transformer.h.9.attn.c_proj.q_weight", + "shape": [ + 2048, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 26112000 + }, + { + "name": "transformer.h.9.attn.c_proj.q_scale", + "shape": [ + 2048, + 64 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 262144, + "byteOffset": 28209152 + }, + { + "name": "transformer.h.9.ln_1.weight", + "shape": [ + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 28471296 + }, + { + "name": "transformer.h.9.ln_2.weight", + "shape": [ + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 28475392 + } + ], + "md5sum": "700245f42313a639789e0a8c7cbba5af" + }, + { + "dataPath": "params_shard_25.bin", + "format": "raw-shard", + "nbytes": 155582464, + "records": [ + { + "name": "transformer.wte.q_weight", + "shape": [ + 151936, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 155582464, + "byteOffset": 0 + } + ], + "md5sum": "66781657c5d52e21655cc2e793d86b1d" + }, + { + "dataPath": "params_shard_26.bin", + "format": "raw-shard", + "nbytes": 19447808, + "records": [ + { + "name": "transformer.wte.q_scale", + "shape": [ + 151936, + 64 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 19447808, + "byteOffset": 0 + } + ], + "md5sum": "bde5745df7175914a7458e8b7449ed11" + }, + { + "dataPath": "params_shard_27.bin", + "format": "raw-shard", + "nbytes": 19021824, + "records": [ + { + "name": "transformer.h.9.mlp.c_proj.q_weight", + "shape": [ + 2048, + 688 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 5636096, + "byteOffset": 0 + }, + { + "name": "transformer.h.9.mlp.c_proj.q_scale", + "shape": [ + 2048, + 172 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 704512, + "byteOffset": 5636096 + }, + { + "name": "transformer.h.9.mlp.gate_up_proj.q_weight", + "shape": [ + 11008, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 11272192, + "byteOffset": 6340608 + }, + { + "name": "transformer.h.9.mlp.gate_up_proj.q_scale", + "shape": [ + 11008, + 64 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1409024, + "byteOffset": 17612800 + } + ], + "md5sum": "50a251c3cb2105a3a604395a1b7e45bb" + } + ] +} \ No newline at end of file