diff --git "a/ndarray-cache.json" "b/ndarray-cache.json" new file mode 100644--- /dev/null +++ "b/ndarray-cache.json" @@ -0,0 +1,3025 @@ +{ + "metadata": { + "ParamSize": 199, + "ParamBytes": 15231233024.0, + "BitsPerParam": 16.0 + }, + "records": [ + { + "dataPath": "params_shard_0.bin", + "format": "raw-shard", + "nbytes": 1089994752, + "records": [ + { + "name": "lm_head.weight", + "shape": [ + 152064, + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1089994752, + "byteOffset": 0 + } + ], + "md5sum": "dcd913747b6d87ab207ef37ef9f7fee1" + }, + { + "dataPath": "params_shard_1.bin", + "format": "raw-shard", + "nbytes": 135790592, + "records": [ + { + "name": "model.layers.19.mlp.down_proj.weight", + "shape": [ + 3584, + 18944 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 135790592, + "byteOffset": 0 + } + ], + "md5sum": "ab39bbb3ea6166fe564609e3df0cc707" + }, + { + "dataPath": "params_shard_2.bin", + "format": "raw-shard", + "nbytes": 271581184, + "records": [ + { + "name": "model.layers.19.mlp.gate_up_proj.weight", + "shape": [ + 37888, + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 271581184, + "byteOffset": 0 + } + ], + "md5sum": "8a493af534d34f9e54ebe63a29af883b" + }, + { + "dataPath": "params_shard_3.bin", + "format": "raw-shard", + "nbytes": 135790592, + "records": [ + { + "name": "model.layers.20.mlp.down_proj.weight", + "shape": [ + 3584, + 18944 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 135790592, + "byteOffset": 0 + } + ], + "md5sum": "aca3377a7a59426d634692a81d69e966" + }, + { + "dataPath": "params_shard_4.bin", + "format": "raw-shard", + "nbytes": 271581184, + "records": [ + { + "name": "model.layers.20.mlp.gate_up_proj.weight", + "shape": [ + 37888, + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 271581184, + "byteOffset": 0 + } + ], + "md5sum": "1918cb231e4c0a78adea7f425f85975b" + }, + { + "dataPath": "params_shard_5.bin", + "format": "raw-shard", + "nbytes": 33030144, + "records": [ + { + "name": "model.layers.20.self_attn.c_attn.weight", + "shape": [ + 4608, + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 33030144, + "byteOffset": 0 + } + ], + "md5sum": "aae0866ebfd59372f3cdf0aa89181f0e" + }, + { + "dataPath": "params_shard_6.bin", + "format": "raw-shard", + "nbytes": 25690112, + "records": [ + { + "name": "model.layers.20.self_attn.o_proj.weight", + "shape": [ + 3584, + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 25690112, + "byteOffset": 0 + } + ], + "md5sum": "9bdb83c432addf592c071ad7523f958e" + }, + { + "dataPath": "params_shard_7.bin", + "format": "raw-shard", + "nbytes": 135790592, + "records": [ + { + "name": "model.layers.21.mlp.down_proj.weight", + "shape": [ + 3584, + 18944 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 135790592, + "byteOffset": 0 + } + ], + "md5sum": "64ccfbeee7c0d533415db5ef0531a468" + }, + { + "dataPath": "params_shard_8.bin", + "format": "raw-shard", + "nbytes": 271581184, + "records": [ + { + "name": "model.layers.21.mlp.gate_up_proj.weight", + "shape": [ + 37888, + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 271581184, + "byteOffset": 0 + } + ], + "md5sum": "caa34b0c7b47e18474e236679195e046" + }, + { + "dataPath": "params_shard_9.bin", + "format": "raw-shard", + "nbytes": 33030144, + "records": [ + { + "name": "model.layers.21.self_attn.c_attn.weight", + "shape": [ + 4608, + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 33030144, + "byteOffset": 0 + } + ], + "md5sum": "c1378cc6240dac0d24314cc055797284" + }, + { + "dataPath": "params_shard_10.bin", + "format": "raw-shard", + "nbytes": 25690112, + "records": [ + { + "name": "model.layers.21.self_attn.o_proj.weight", + "shape": [ + 3584, + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 25690112, + "byteOffset": 0 + } + ], + "md5sum": "d60b929a803ff89b75d41dc21b2ef7fb" + }, + { + "dataPath": "params_shard_11.bin", + "format": "raw-shard", + "nbytes": 135790592, + "records": [ + { + "name": "model.layers.22.mlp.down_proj.weight", + "shape": [ + 3584, + 18944 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 135790592, + "byteOffset": 0 + } + ], + "md5sum": "52e14bf60e71a9f5a0448db583c3a073" + }, + { + "dataPath": "params_shard_12.bin", + "format": "raw-shard", + "nbytes": 271581184, + "records": [ + { + "name": "model.layers.22.mlp.gate_up_proj.weight", + "shape": [ + 37888, + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 271581184, + "byteOffset": 0 + } + ], + "md5sum": "581eab76d13f24fd822e8538cb8a5fbf" + }, + { + "dataPath": "params_shard_13.bin", + "format": "raw-shard", + "nbytes": 33030144, + "records": [ + { + "name": "model.layers.22.self_attn.c_attn.weight", + "shape": [ + 4608, + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 33030144, + "byteOffset": 0 + } + ], + "md5sum": "117815d56d880f6c889eaa2a5f406121" + }, + { + "dataPath": "params_shard_14.bin", + "format": "raw-shard", + "nbytes": 25690112, + "records": [ + { + "name": "model.layers.22.self_attn.o_proj.weight", + "shape": [ + 3584, + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 25690112, + "byteOffset": 0 + } + ], + "md5sum": "c9b3eac039a4e7062d69f34ad241d192" + }, + { + "dataPath": "params_shard_15.bin", + "format": "raw-shard", + "nbytes": 135790592, + "records": [ + { + "name": "model.layers.23.mlp.down_proj.weight", + "shape": [ + 3584, + 18944 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 135790592, + "byteOffset": 0 + } + ], + "md5sum": "31455ca170428d1dbc606abab13e1618" + }, + { + "dataPath": "params_shard_16.bin", + "format": "raw-shard", + "nbytes": 271581184, + "records": [ + { + "name": "model.layers.23.mlp.gate_up_proj.weight", + "shape": [ + 37888, + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 271581184, + "byteOffset": 0 + } + ], + "md5sum": "9f46bf025c4734d737b62c6a96aa5c58" + }, + { + "dataPath": "params_shard_17.bin", + "format": "raw-shard", + "nbytes": 33030144, + "records": [ + { + "name": "model.layers.23.self_attn.c_attn.weight", + "shape": [ + 4608, + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 33030144, + "byteOffset": 0 + } + ], + "md5sum": "44047e7698cec8657a31d0a450303604" + }, + { + "dataPath": "params_shard_18.bin", + "format": "raw-shard", + "nbytes": 25690112, + "records": [ + { + "name": "model.layers.23.self_attn.o_proj.weight", + "shape": [ + 3584, + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 25690112, + "byteOffset": 0 + } + ], + "md5sum": "46236d06fefb37c94e18a390b6c24b89" + }, + { + "dataPath": "params_shard_19.bin", + "format": "raw-shard", + "nbytes": 135790592, + "records": [ + { + "name": "model.layers.24.mlp.down_proj.weight", + "shape": [ + 3584, + 18944 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 135790592, + "byteOffset": 0 + } + ], + "md5sum": "52f52ec7815f508e7cc1a7d4b057eea1" + }, + { + "dataPath": "params_shard_20.bin", + "format": "raw-shard", + "nbytes": 271581184, + "records": [ + { + "name": "model.layers.24.mlp.gate_up_proj.weight", + "shape": [ + 37888, + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 271581184, + "byteOffset": 0 + } + ], + "md5sum": "0b2302cf822dedfa73a9c58c552405e5" + }, + { + "dataPath": "params_shard_21.bin", + "format": "raw-shard", + "nbytes": 33030144, + "records": [ + { + "name": "model.layers.24.self_attn.c_attn.weight", + "shape": [ + 4608, + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 33030144, + "byteOffset": 0 + } + ], + "md5sum": "ab772922eb83bee95399519b0d70a1e2" + }, + { + "dataPath": "params_shard_22.bin", + "format": "raw-shard", + "nbytes": 25690112, + "records": [ + { + "name": "model.layers.24.self_attn.o_proj.weight", + "shape": [ + 3584, + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 25690112, + "byteOffset": 0 + } + ], + "md5sum": "234c8a053fe3279d53265a652a078c03" + }, + { + "dataPath": "params_shard_23.bin", + "format": "raw-shard", + "nbytes": 135790592, + "records": [ + { + "name": "model.layers.25.mlp.down_proj.weight", + "shape": [ + 3584, + 18944 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 135790592, + "byteOffset": 0 + } + ], + "md5sum": "e0b0c5fe266e65cf6678146830761272" + }, + { + "dataPath": "params_shard_24.bin", + "format": "raw-shard", + "nbytes": 271581184, + "records": [ + { + "name": "model.layers.25.mlp.gate_up_proj.weight", + "shape": [ + 37888, + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 271581184, + "byteOffset": 0 + } + ], + "md5sum": "89048878c9db1eabc209b3ac7dfc0c74" + }, + { + "dataPath": "params_shard_25.bin", + "format": "raw-shard", + "nbytes": 33030144, + "records": [ + { + "name": "model.layers.25.self_attn.c_attn.weight", + "shape": [ + 4608, + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 33030144, + "byteOffset": 0 + } + ], + "md5sum": "63587e7a673a9cf1a609e4c50d41be97" + }, + { + "dataPath": "params_shard_26.bin", + "format": "raw-shard", + "nbytes": 25690112, + "records": [ + { + "name": "model.layers.25.self_attn.o_proj.weight", + "shape": [ + 3584, + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 25690112, + "byteOffset": 0 + } + ], + "md5sum": "5dacfbc0c17e33631d5cdf8d47c111b0" + }, + { + "dataPath": "params_shard_27.bin", + "format": "raw-shard", + "nbytes": 135790592, + "records": [ + { + "name": "model.layers.26.mlp.down_proj.weight", + "shape": [ + 3584, + 18944 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 135790592, + "byteOffset": 0 + } + ], + "md5sum": "16d2072d203e11100afc32f2c7afcda5" + }, + { + "dataPath": "params_shard_28.bin", + "format": "raw-shard", + "nbytes": 271581184, + "records": [ + { + "name": "model.layers.26.mlp.gate_up_proj.weight", + "shape": [ + 37888, + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 271581184, + "byteOffset": 0 + } + ], + "md5sum": "9ae165695f55c0b78118639eb599f63a" + }, + { + "dataPath": "params_shard_29.bin", + "format": "raw-shard", + "nbytes": 33030144, + "records": [ + { + "name": "model.layers.26.self_attn.c_attn.weight", + "shape": [ + 4608, + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 33030144, + "byteOffset": 0 + } + ], + "md5sum": "bef013f07fe6a907cc6997f3e452459b" + }, + { + "dataPath": "params_shard_30.bin", + "format": "raw-shard", + "nbytes": 25690112, + "records": [ + { + "name": "model.layers.26.self_attn.o_proj.weight", + "shape": [ + 3584, + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 25690112, + "byteOffset": 0 + } + ], + "md5sum": "cbd2d1cdff66082b28d7836cf0dcf51e" + }, + { + "dataPath": "params_shard_31.bin", + "format": "raw-shard", + "nbytes": 135790592, + "records": [ + { + "name": "model.layers.27.mlp.down_proj.weight", + "shape": [ + 3584, + 18944 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 135790592, + "byteOffset": 0 + } + ], + "md5sum": "e221025395d53fc7db3edc5108a5e37f" + }, + { + "dataPath": "params_shard_32.bin", + "format": "raw-shard", + "nbytes": 271581184, + "records": [ + { + "name": "model.layers.27.mlp.gate_up_proj.weight", + "shape": [ + 37888, + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 271581184, + "byteOffset": 0 + } + ], + "md5sum": "4ab4b6adf6728ac10fbfb46da56ce044" + }, + { + "dataPath": "params_shard_33.bin", + "format": "raw-shard", + "nbytes": 33030144, + "records": [ + { + "name": "model.layers.27.self_attn.c_attn.weight", + "shape": [ + 4608, + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 33030144, + "byteOffset": 0 + } + ], + "md5sum": "3e8a74316a7be5530ccccc42b69fbb37" + }, + { + "dataPath": "params_shard_34.bin", + "format": "raw-shard", + "nbytes": 25690112, + "records": [ + { + "name": "model.layers.27.self_attn.o_proj.weight", + "shape": [ + 3584, + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 25690112, + "byteOffset": 0 + } + ], + "md5sum": "6e8ad5b778b70a83ef596042aee327b7" + }, + { + "dataPath": "params_shard_35.bin", + "format": "raw-shard", + "nbytes": 1089994752, + "records": [ + { + "name": "model.embed_tokens.weight", + "shape": [ + 152064, + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1089994752, + "byteOffset": 0 + } + ], + "md5sum": "254abd287d334208de44c1f355454f1d" + }, + { + "dataPath": "params_shard_36.bin", + "format": "raw-shard", + "nbytes": 135790592, + "records": [ + { + "name": "model.layers.0.mlp.down_proj.weight", + "shape": [ + 3584, + 18944 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 135790592, + "byteOffset": 0 + } + ], + "md5sum": "cd37904ee5237e6d21e43b22a7dbf8e2" + }, + { + "dataPath": "params_shard_37.bin", + "format": "raw-shard", + "nbytes": 271581184, + "records": [ + { + "name": "model.layers.0.mlp.gate_up_proj.weight", + "shape": [ + 37888, + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 271581184, + "byteOffset": 0 + } + ], + "md5sum": "94ef3721abe33da63c96d4628de93c88" + }, + { + "dataPath": "params_shard_38.bin", + "format": "raw-shard", + "nbytes": 33030144, + "records": [ + { + "name": "model.layers.0.self_attn.c_attn.weight", + "shape": [ + 4608, + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 33030144, + "byteOffset": 0 + } + ], + "md5sum": "167c28bd65048906e7f6250ce657380a" + }, + { + "dataPath": "params_shard_39.bin", + "format": "raw-shard", + "nbytes": 25690112, + "records": [ + { + "name": "model.layers.0.self_attn.o_proj.weight", + "shape": [ + 3584, + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 25690112, + "byteOffset": 0 + } + ], + "md5sum": "6dd1667e195073638ad5327a5782cba6" + }, + { + "dataPath": "params_shard_40.bin", + "format": "raw-shard", + "nbytes": 135790592, + "records": [ + { + "name": "model.layers.1.mlp.down_proj.weight", + "shape": [ + 3584, + 18944 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 135790592, + "byteOffset": 0 + } + ], + "md5sum": "4b63ee2064b0b1f3b76acc5db57c1272" + }, + { + "dataPath": "params_shard_41.bin", + "format": "raw-shard", + "nbytes": 271581184, + "records": [ + { + "name": "model.layers.1.mlp.gate_up_proj.weight", + "shape": [ + 37888, + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 271581184, + "byteOffset": 0 + } + ], + "md5sum": "b40704de954f59ec29564840b6a11169" + }, + { + "dataPath": "params_shard_42.bin", + "format": "raw-shard", + "nbytes": 33030144, + "records": [ + { + "name": "model.layers.1.self_attn.c_attn.weight", + "shape": [ + 4608, + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 33030144, + "byteOffset": 0 + } + ], + "md5sum": "2ec8663eeb85fac87e675da743a322cc" + }, + { + "dataPath": "params_shard_43.bin", + "format": "raw-shard", + "nbytes": 25690112, + "records": [ + { + "name": "model.layers.1.self_attn.o_proj.weight", + "shape": [ + 3584, + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 25690112, + "byteOffset": 0 + } + ], + "md5sum": "9dc76d400acdbd440d79fe38833cfd9f" + }, + { + "dataPath": "params_shard_44.bin", + "format": "raw-shard", + "nbytes": 135790592, + "records": [ + { + "name": "model.layers.10.mlp.down_proj.weight", + "shape": [ + 3584, + 18944 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 135790592, + "byteOffset": 0 + } + ], + "md5sum": "de27ae42c1a253033d9b7c30bbfdbcd8" + }, + { + "dataPath": "params_shard_45.bin", + "format": "raw-shard", + "nbytes": 271581184, + "records": [ + { + "name": "model.layers.10.mlp.gate_up_proj.weight", + "shape": [ + 37888, + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 271581184, + "byteOffset": 0 + } + ], + "md5sum": "1a106a75074f2b4593541af8ef716664" + }, + { + "dataPath": "params_shard_46.bin", + "format": "raw-shard", + "nbytes": 33030144, + "records": [ + { + "name": "model.layers.10.self_attn.c_attn.weight", + "shape": [ + 4608, + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 33030144, + "byteOffset": 0 + } + ], + "md5sum": "c471b3cb9d5da399c9a965f3d26c53af" + }, + { + "dataPath": "params_shard_47.bin", + "format": "raw-shard", + "nbytes": 25690112, + "records": [ + { + "name": "model.layers.10.self_attn.o_proj.weight", + "shape": [ + 3584, + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 25690112, + "byteOffset": 0 + } + ], + "md5sum": "836998ceba602dafa9ee9c80c5cbc530" + }, + { + "dataPath": "params_shard_48.bin", + "format": "raw-shard", + "nbytes": 135790592, + "records": [ + { + "name": "model.layers.11.mlp.down_proj.weight", + "shape": [ + 3584, + 18944 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 135790592, + "byteOffset": 0 + } + ], + "md5sum": "f6e55ddf5bdd0f81833b64909799b8ab" + }, + { + "dataPath": "params_shard_49.bin", + "format": "raw-shard", + "nbytes": 271581184, + "records": [ + { + "name": "model.layers.11.mlp.gate_up_proj.weight", + "shape": [ + 37888, + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 271581184, + "byteOffset": 0 + } + ], + "md5sum": "393a2559bd5fa656d3093163cfd9fb8d" + }, + { + "dataPath": "params_shard_50.bin", + "format": "raw-shard", + "nbytes": 33030144, + "records": [ + { + "name": "model.layers.11.self_attn.c_attn.weight", + "shape": [ + 4608, + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 33030144, + "byteOffset": 0 + } + ], + "md5sum": "310cb2ad7ee85417d5c1ba4d43311038" + }, + { + "dataPath": "params_shard_51.bin", + "format": "raw-shard", + "nbytes": 25690112, + "records": [ + { + "name": "model.layers.11.self_attn.o_proj.weight", + "shape": [ + 3584, + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 25690112, + "byteOffset": 0 + } + ], + "md5sum": "ed81584bc3d46928218e42c64ddfa334" + }, + { + "dataPath": "params_shard_52.bin", + "format": "raw-shard", + "nbytes": 135790592, + "records": [ + { + "name": "model.layers.12.mlp.down_proj.weight", + "shape": [ + 3584, + 18944 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 135790592, + "byteOffset": 0 + } + ], + "md5sum": "b8afb6fc392e59a230c8f449c2f9a4be" + }, + { + "dataPath": "params_shard_53.bin", + "format": "raw-shard", + "nbytes": 271581184, + "records": [ + { + "name": "model.layers.12.mlp.gate_up_proj.weight", + "shape": [ + 37888, + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 271581184, + "byteOffset": 0 + } + ], + "md5sum": "85d2dc8c147818f18c2c03ef51b691bb" + }, + { + "dataPath": "params_shard_54.bin", + "format": "raw-shard", + "nbytes": 33030144, + "records": [ + { + "name": "model.layers.12.self_attn.c_attn.weight", + "shape": [ + 4608, + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 33030144, + "byteOffset": 0 + } + ], + "md5sum": "fadc998cf7ecad1f3c0a269586e3d90a" + }, + { + "dataPath": "params_shard_55.bin", + "format": "raw-shard", + "nbytes": 25690112, + "records": [ + { + "name": "model.layers.12.self_attn.o_proj.weight", + "shape": [ + 3584, + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 25690112, + "byteOffset": 0 + } + ], + "md5sum": "aff73ae14a84b6313ef1b53f2288d2a6" + }, + { + "dataPath": "params_shard_56.bin", + "format": "raw-shard", + "nbytes": 135790592, + "records": [ + { + "name": "model.layers.13.mlp.down_proj.weight", + "shape": [ + 3584, + 18944 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 135790592, + "byteOffset": 0 + } + ], + "md5sum": "1cf5e4a0f5c7a0cb37795dc3d487c122" + }, + { + "dataPath": "params_shard_57.bin", + "format": "raw-shard", + "nbytes": 271581184, + "records": [ + { + "name": "model.layers.13.mlp.gate_up_proj.weight", + "shape": [ + 37888, + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 271581184, + "byteOffset": 0 + } + ], + "md5sum": "813de318208be4cb0721092a3d3e25d4" + }, + { + "dataPath": "params_shard_58.bin", + "format": "raw-shard", + "nbytes": 33030144, + "records": [ + { + "name": "model.layers.13.self_attn.c_attn.weight", + "shape": [ + 4608, + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 33030144, + "byteOffset": 0 + } + ], + "md5sum": "b76c8602800998e411d9fe83a1c1461a" + }, + { + "dataPath": "params_shard_59.bin", + "format": "raw-shard", + "nbytes": 25690112, + "records": [ + { + "name": "model.layers.13.self_attn.o_proj.weight", + "shape": [ + 3584, + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 25690112, + "byteOffset": 0 + } + ], + "md5sum": "6ed74d0730e0db45f97a1754bdcc2c7d" + }, + { + "dataPath": "params_shard_60.bin", + "format": "raw-shard", + "nbytes": 135790592, + "records": [ + { + "name": "model.layers.14.mlp.down_proj.weight", + "shape": [ + 3584, + 18944 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 135790592, + "byteOffset": 0 + } + ], + "md5sum": "3ddf8961a04f5ebf7bc425494d116568" + }, + { + "dataPath": "params_shard_61.bin", + "format": "raw-shard", + "nbytes": 271581184, + "records": [ + { + "name": "model.layers.14.mlp.gate_up_proj.weight", + "shape": [ + 37888, + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 271581184, + "byteOffset": 0 + } + ], + "md5sum": "113438a6c39db1d434e11695190c1b6d" + }, + { + "dataPath": "params_shard_62.bin", + "format": "raw-shard", + "nbytes": 33030144, + "records": [ + { + "name": "model.layers.14.self_attn.c_attn.weight", + "shape": [ + 4608, + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 33030144, + "byteOffset": 0 + } + ], + "md5sum": "cdcc31107ca6829f203e92dd5addf5cc" + }, + { + "dataPath": "params_shard_63.bin", + "format": "raw-shard", + "nbytes": 25690112, + "records": [ + { + "name": "model.layers.14.self_attn.o_proj.weight", + "shape": [ + 3584, + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 25690112, + "byteOffset": 0 + } + ], + "md5sum": "20d1f08d485fcf28d253081e2a0f6c7f" + }, + { + "dataPath": "params_shard_64.bin", + "format": "raw-shard", + "nbytes": 135790592, + "records": [ + { + "name": "model.layers.15.mlp.down_proj.weight", + "shape": [ + 3584, + 18944 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 135790592, + "byteOffset": 0 + } + ], + "md5sum": "f66296d6524b1c81ff00ad70ebd8152c" + }, + { + "dataPath": "params_shard_65.bin", + "format": "raw-shard", + "nbytes": 271581184, + "records": [ + { + "name": "model.layers.15.mlp.gate_up_proj.weight", + "shape": [ + 37888, + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 271581184, + "byteOffset": 0 + } + ], + "md5sum": "0baf85ae6423b5d5bb13ffee108dfe8f" + }, + { + "dataPath": "params_shard_66.bin", + "format": "raw-shard", + "nbytes": 33030144, + "records": [ + { + "name": "model.layers.15.self_attn.c_attn.weight", + "shape": [ + 4608, + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 33030144, + "byteOffset": 0 + } + ], + "md5sum": "e8156f7aed4a1dc1148b01cc4eb0ed97" + }, + { + "dataPath": "params_shard_67.bin", + "format": "raw-shard", + "nbytes": 25690112, + "records": [ + { + "name": "model.layers.15.self_attn.o_proj.weight", + "shape": [ + 3584, + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 25690112, + "byteOffset": 0 + } + ], + "md5sum": "f1bcd77a02971fa623a94a9f67b40b4a" + }, + { + "dataPath": "params_shard_68.bin", + "format": "raw-shard", + "nbytes": 135790592, + "records": [ + { + "name": "model.layers.16.mlp.down_proj.weight", + "shape": [ + 3584, + 18944 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 135790592, + "byteOffset": 0 + } + ], + "md5sum": "423409e9c2c994c97141bd21eabce4a5" + }, + { + "dataPath": "params_shard_69.bin", + "format": "raw-shard", + "nbytes": 271581184, + "records": [ + { + "name": "model.layers.16.mlp.gate_up_proj.weight", + "shape": [ + 37888, + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 271581184, + "byteOffset": 0 + } + ], + "md5sum": "fa5c8ebc03b43a3176e9e89f76f19df6" + }, + { + "dataPath": "params_shard_70.bin", + "format": "raw-shard", + "nbytes": 33030144, + "records": [ + { + "name": "model.layers.16.self_attn.c_attn.weight", + "shape": [ + 4608, + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 33030144, + "byteOffset": 0 + } + ], + "md5sum": "9654f98275ea5f798b9d690008f1a461" + }, + { + "dataPath": "params_shard_71.bin", + "format": "raw-shard", + "nbytes": 25690112, + "records": [ + { + "name": "model.layers.16.self_attn.o_proj.weight", + "shape": [ + 3584, + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 25690112, + "byteOffset": 0 + } + ], + "md5sum": "c9a1d46fb82ed043fe8a905f5c28e1b3" + }, + { + "dataPath": "params_shard_72.bin", + "format": "raw-shard", + "nbytes": 135790592, + "records": [ + { + "name": "model.layers.17.mlp.down_proj.weight", + "shape": [ + 3584, + 18944 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 135790592, + "byteOffset": 0 + } + ], + "md5sum": "daa9c2b305e8833bfe5dda77883d792d" + }, + { + "dataPath": "params_shard_73.bin", + "format": "raw-shard", + "nbytes": 271581184, + "records": [ + { + "name": "model.layers.17.mlp.gate_up_proj.weight", + "shape": [ + 37888, + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 271581184, + "byteOffset": 0 + } + ], + "md5sum": "20c1948a3adb04ed45d6eea4a3c507b3" + }, + { + "dataPath": "params_shard_74.bin", + "format": "raw-shard", + "nbytes": 33030144, + "records": [ + { + "name": "model.layers.17.self_attn.c_attn.weight", + "shape": [ + 4608, + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 33030144, + "byteOffset": 0 + } + ], + "md5sum": "7b6c5a70cf5006b13b1fcb93e2d6f1d9" + }, + { + "dataPath": "params_shard_75.bin", + "format": "raw-shard", + "nbytes": 25690112, + "records": [ + { + "name": "model.layers.17.self_attn.o_proj.weight", + "shape": [ + 3584, + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 25690112, + "byteOffset": 0 + } + ], + "md5sum": "08190962b20ea73a205f94d7a9fa966b" + }, + { + "dataPath": "params_shard_76.bin", + "format": "raw-shard", + "nbytes": 135790592, + "records": [ + { + "name": "model.layers.18.mlp.down_proj.weight", + "shape": [ + 3584, + 18944 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 135790592, + "byteOffset": 0 + } + ], + "md5sum": "9ffef47a4f3e178207b7c742fe5fabac" + }, + { + "dataPath": "params_shard_77.bin", + "format": "raw-shard", + "nbytes": 271581184, + "records": [ + { + "name": "model.layers.18.mlp.gate_up_proj.weight", + "shape": [ + 37888, + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 271581184, + "byteOffset": 0 + } + ], + "md5sum": "900ec597f771b29362bc5ec140f9c65a" + }, + { + "dataPath": "params_shard_78.bin", + "format": "raw-shard", + "nbytes": 33030144, + "records": [ + { + "name": "model.layers.18.self_attn.c_attn.weight", + "shape": [ + 4608, + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 33030144, + "byteOffset": 0 + } + ], + "md5sum": "902bdb9cff38b6442129c92579ca0df4" + }, + { + "dataPath": "params_shard_79.bin", + "format": "raw-shard", + "nbytes": 25690112, + "records": [ + { + "name": "model.layers.18.self_attn.o_proj.weight", + "shape": [ + 3584, + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 25690112, + "byteOffset": 0 + } + ], + "md5sum": "74b62c8b11a5eab772136594e40ede27" + }, + { + "dataPath": "params_shard_80.bin", + "format": "raw-shard", + "nbytes": 33030144, + "records": [ + { + "name": "model.layers.19.self_attn.c_attn.weight", + "shape": [ + 4608, + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 33030144, + "byteOffset": 0 + } + ], + "md5sum": "8042e0afd1d2c038bad50168bf1f2cb0" + }, + { + "dataPath": "params_shard_81.bin", + "format": "raw-shard", + "nbytes": 135790592, + "records": [ + { + "name": "model.layers.2.mlp.down_proj.weight", + "shape": [ + 3584, + 18944 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 135790592, + "byteOffset": 0 + } + ], + "md5sum": "0df2f4f0c17c4f20ca498b7e23840ec6" + }, + { + "dataPath": "params_shard_82.bin", + "format": "raw-shard", + "nbytes": 271581184, + "records": [ + { + "name": "model.layers.2.mlp.gate_up_proj.weight", + "shape": [ + 37888, + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 271581184, + "byteOffset": 0 + } + ], + "md5sum": "63581172bf34b03b5d0defe786d36999" + }, + { + "dataPath": "params_shard_83.bin", + "format": "raw-shard", + "nbytes": 33030144, + "records": [ + { + "name": "model.layers.2.self_attn.c_attn.weight", + "shape": [ + 4608, + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 33030144, + "byteOffset": 0 + } + ], + "md5sum": "e87541271179409bbdd8316e40e1dcc1" + }, + { + "dataPath": "params_shard_84.bin", + "format": "raw-shard", + "nbytes": 25690112, + "records": [ + { + "name": "model.layers.2.self_attn.o_proj.weight", + "shape": [ + 3584, + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 25690112, + "byteOffset": 0 + } + ], + "md5sum": "35d2c2265a1d68c86352d6d6115be4c1" + }, + { + "dataPath": "params_shard_85.bin", + "format": "raw-shard", + "nbytes": 135790592, + "records": [ + { + "name": "model.layers.3.mlp.down_proj.weight", + "shape": [ + 3584, + 18944 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 135790592, + "byteOffset": 0 + } + ], + "md5sum": "4c7457fbdd7eb260fb5d35670398f48a" + }, + { + "dataPath": "params_shard_86.bin", + "format": "raw-shard", + "nbytes": 271581184, + "records": [ + { + "name": "model.layers.3.mlp.gate_up_proj.weight", + "shape": [ + 37888, + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 271581184, + "byteOffset": 0 + } + ], + "md5sum": "6e5d8e445aacc132be01fdab6ab3e7fa" + }, + { + "dataPath": "params_shard_87.bin", + "format": "raw-shard", + "nbytes": 33030144, + "records": [ + { + "name": "model.layers.3.self_attn.c_attn.weight", + "shape": [ + 4608, + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 33030144, + "byteOffset": 0 + } + ], + "md5sum": "7246a707d36e428ad0974211d84e9e44" + }, + { + "dataPath": "params_shard_88.bin", + "format": "raw-shard", + "nbytes": 25690112, + "records": [ + { + "name": "model.layers.3.self_attn.o_proj.weight", + "shape": [ + 3584, + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 25690112, + "byteOffset": 0 + } + ], + "md5sum": "2ed42e29dadf859990839ee0d5914623" + }, + { + "dataPath": "params_shard_89.bin", + "format": "raw-shard", + "nbytes": 135790592, + "records": [ + { + "name": "model.layers.4.mlp.down_proj.weight", + "shape": [ + 3584, + 18944 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 135790592, + "byteOffset": 0 + } + ], + "md5sum": "97200233d4c8cbe6dc430f6316a5b5a7" + }, + { + "dataPath": "params_shard_90.bin", + "format": "raw-shard", + "nbytes": 271581184, + "records": [ + { + "name": "model.layers.4.mlp.gate_up_proj.weight", + "shape": [ + 37888, + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 271581184, + "byteOffset": 0 + } + ], + "md5sum": "c2e8a379ce5dd2de73f965b57fa525c5" + }, + { + "dataPath": "params_shard_91.bin", + "format": "raw-shard", + "nbytes": 33030144, + "records": [ + { + "name": "model.layers.4.self_attn.c_attn.weight", + "shape": [ + 4608, + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 33030144, + "byteOffset": 0 + } + ], + "md5sum": "106d503a7d359b607411e77ba4b76b2e" + }, + { + "dataPath": "params_shard_92.bin", + "format": "raw-shard", + "nbytes": 25690112, + "records": [ + { + "name": "model.layers.4.self_attn.o_proj.weight", + "shape": [ + 3584, + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 25690112, + "byteOffset": 0 + } + ], + "md5sum": "165f35bace1ecaa339de5d359b2bc1b7" + }, + { + "dataPath": "params_shard_93.bin", + "format": "raw-shard", + "nbytes": 135790592, + "records": [ + { + "name": "model.layers.5.mlp.down_proj.weight", + "shape": [ + 3584, + 18944 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 135790592, + "byteOffset": 0 + } + ], + "md5sum": "519de89df300c7edeb1190b928d3c640" + }, + { + "dataPath": "params_shard_94.bin", + "format": "raw-shard", + "nbytes": 271581184, + "records": [ + { + "name": "model.layers.5.mlp.gate_up_proj.weight", + "shape": [ + 37888, + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 271581184, + "byteOffset": 0 + } + ], + "md5sum": "c075173b850bbfb5ca61503bcfe8d6f0" + }, + { + "dataPath": "params_shard_95.bin", + "format": "raw-shard", + "nbytes": 33030144, + "records": [ + { + "name": "model.layers.5.self_attn.c_attn.weight", + "shape": [ + 4608, + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 33030144, + "byteOffset": 0 + } + ], + "md5sum": "b1b4e23ff0aa129456e5d87e2c081e84" + }, + { + "dataPath": "params_shard_96.bin", + "format": "raw-shard", + "nbytes": 25690112, + "records": [ + { + "name": "model.layers.5.self_attn.o_proj.weight", + "shape": [ + 3584, + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 25690112, + "byteOffset": 0 + } + ], + "md5sum": "087915547db9c86442eaeb53315c1553" + }, + { + "dataPath": "params_shard_97.bin", + "format": "raw-shard", + "nbytes": 135790592, + "records": [ + { + "name": "model.layers.6.mlp.down_proj.weight", + "shape": [ + 3584, + 18944 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 135790592, + "byteOffset": 0 + } + ], + "md5sum": "eb4a810f39aecaf9d82533be60d6d382" + }, + { + "dataPath": "params_shard_98.bin", + "format": "raw-shard", + "nbytes": 271581184, + "records": [ + { + "name": "model.layers.6.mlp.gate_up_proj.weight", + "shape": [ + 37888, + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 271581184, + "byteOffset": 0 + } + ], + "md5sum": "aa012cae74ab0436925c5ba9eb87e296" + }, + { + "dataPath": "params_shard_99.bin", + "format": "raw-shard", + "nbytes": 33030144, + "records": [ + { + "name": "model.layers.6.self_attn.c_attn.weight", + "shape": [ + 4608, + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 33030144, + "byteOffset": 0 + } + ], + "md5sum": "1d326a4d5ce400ab3b65d9495bc93ba4" + }, + { + "dataPath": "params_shard_100.bin", + "format": "raw-shard", + "nbytes": 25690112, + "records": [ + { + "name": "model.layers.6.self_attn.o_proj.weight", + "shape": [ + 3584, + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 25690112, + "byteOffset": 0 + } + ], + "md5sum": "c8a2220d1d0099ea446720fbac8e3f6c" + }, + { + "dataPath": "params_shard_101.bin", + "format": "raw-shard", + "nbytes": 135790592, + "records": [ + { + "name": "model.layers.7.mlp.down_proj.weight", + "shape": [ + 3584, + 18944 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 135790592, + "byteOffset": 0 + } + ], + "md5sum": "00920b341df121cfcc54eaf7dedabf40" + }, + { + "dataPath": "params_shard_102.bin", + "format": "raw-shard", + "nbytes": 271581184, + "records": [ + { + "name": "model.layers.7.mlp.gate_up_proj.weight", + "shape": [ + 37888, + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 271581184, + "byteOffset": 0 + } + ], + "md5sum": "32113240d8aca15ec8ae39430c3f3f22" + }, + { + "dataPath": "params_shard_103.bin", + "format": "raw-shard", + "nbytes": 33030144, + "records": [ + { + "name": "model.layers.7.self_attn.c_attn.weight", + "shape": [ + 4608, + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 33030144, + "byteOffset": 0 + } + ], + "md5sum": "207e4e2b0ced9ceb4a35e6a75654a773" + }, + { + "dataPath": "params_shard_104.bin", + "format": "raw-shard", + "nbytes": 25690112, + "records": [ + { + "name": "model.layers.7.self_attn.o_proj.weight", + "shape": [ + 3584, + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 25690112, + "byteOffset": 0 + } + ], + "md5sum": "895cd9e1fa8ebdc43aef89745a422263" + }, + { + "dataPath": "params_shard_105.bin", + "format": "raw-shard", + "nbytes": 135790592, + "records": [ + { + "name": "model.layers.8.mlp.down_proj.weight", + "shape": [ + 3584, + 18944 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 135790592, + "byteOffset": 0 + } + ], + "md5sum": "f885957c113595fd7abf88d5202f1cde" + }, + { + "dataPath": "params_shard_106.bin", + "format": "raw-shard", + "nbytes": 271581184, + "records": [ + { + "name": "model.layers.8.mlp.gate_up_proj.weight", + "shape": [ + 37888, + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 271581184, + "byteOffset": 0 + } + ], + "md5sum": "fe4d7f48ad2e6a701f5e9085e8b0271f" + }, + { + "dataPath": "params_shard_107.bin", + "format": "raw-shard", + "nbytes": 33030144, + "records": [ + { + "name": "model.layers.8.self_attn.c_attn.weight", + "shape": [ + 4608, + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 33030144, + "byteOffset": 0 + } + ], + "md5sum": "d6af4ea9517130ccc5307ba6c33756c4" + }, + { + "dataPath": "params_shard_108.bin", + "format": "raw-shard", + "nbytes": 25690112, + "records": [ + { + "name": "model.layers.8.self_attn.o_proj.weight", + "shape": [ + 3584, + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 25690112, + "byteOffset": 0 + } + ], + "md5sum": "254637c53e5887f23ba66af6da8743c1" + }, + { + "dataPath": "params_shard_109.bin", + "format": "raw-shard", + "nbytes": 135790592, + "records": [ + { + "name": "model.layers.9.mlp.down_proj.weight", + "shape": [ + 3584, + 18944 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 135790592, + "byteOffset": 0 + } + ], + "md5sum": "6dfc05d9494307bfed654b5e948025d8" + }, + { + "dataPath": "params_shard_110.bin", + "format": "raw-shard", + "nbytes": 271581184, + "records": [ + { + "name": "model.layers.9.mlp.gate_up_proj.weight", + "shape": [ + 37888, + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 271581184, + "byteOffset": 0 + } + ], + "md5sum": "147c574708867fe9e8dec55ca9466280" + }, + { + "dataPath": "params_shard_111.bin", + "format": "raw-shard", + "nbytes": 33030144, + "records": [ + { + "name": "model.layers.9.self_attn.c_attn.weight", + "shape": [ + 4608, + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 33030144, + "byteOffset": 0 + } + ], + "md5sum": "470b4a47f24467ec4471b43435063d60" + }, + { + "dataPath": "params_shard_112.bin", + "format": "raw-shard", + "nbytes": 25690112, + "records": [ + { + "name": "model.layers.9.self_attn.o_proj.weight", + "shape": [ + 3584, + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 25690112, + "byteOffset": 0 + } + ], + "md5sum": "bb771974e63d490906f710d3683fed64" + }, + { + "dataPath": "params_shard_113.bin", + "format": "raw-shard", + "nbytes": 26356736, + "records": [ + { + "name": "model.layers.19.input_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 0 + }, + { + "name": "model.layers.19.post_attention_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 7168 + }, + { + "name": "model.layers.19.self_attn.o_proj.weight", + "shape": [ + 3584, + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 25690112, + "byteOffset": 14336 + }, + { + "name": "model.layers.20.input_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 25704448 + }, + { + "name": "model.layers.20.post_attention_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 25711616 + }, + { + "name": "model.layers.20.self_attn.c_attn.bias", + "shape": [ + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 25718784 + }, + { + "name": "model.layers.21.input_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 25728000 + }, + { + "name": "model.layers.21.post_attention_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 25735168 + }, + { + "name": "model.layers.21.self_attn.c_attn.bias", + "shape": [ + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 25742336 + }, + { + "name": "model.layers.22.input_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 25751552 + }, + { + "name": "model.layers.22.post_attention_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 25758720 + }, + { + "name": "model.layers.22.self_attn.c_attn.bias", + "shape": [ + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 25765888 + }, + { + "name": "model.layers.23.input_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 25775104 + }, + { + "name": "model.layers.23.post_attention_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 25782272 + }, + { + "name": "model.layers.23.self_attn.c_attn.bias", + "shape": [ + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 25789440 + }, + { + "name": "model.layers.24.input_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 25798656 + }, + { + "name": "model.layers.24.post_attention_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 25805824 + }, + { + "name": "model.layers.24.self_attn.c_attn.bias", + "shape": [ + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 25812992 + }, + { + "name": "model.layers.25.input_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 25822208 + }, + { + "name": "model.layers.25.post_attention_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 25829376 + }, + { + "name": "model.layers.25.self_attn.c_attn.bias", + "shape": [ + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 25836544 + }, + { + "name": "model.layers.26.input_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 25845760 + }, + { + "name": "model.layers.26.post_attention_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 25852928 + }, + { + "name": "model.layers.26.self_attn.c_attn.bias", + "shape": [ + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 25860096 + }, + { + "name": "model.layers.27.input_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 25869312 + }, + { + "name": "model.layers.27.post_attention_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 25876480 + }, + { + "name": "model.layers.27.self_attn.c_attn.bias", + "shape": [ + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 25883648 + }, + { + "name": "model.norm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 25892864 + }, + { + "name": "model.layers.0.input_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 25900032 + }, + { + "name": "model.layers.0.post_attention_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 25907200 + }, + { + "name": "model.layers.0.self_attn.c_attn.bias", + "shape": [ + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 25914368 + }, + { + "name": "model.layers.1.input_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 25923584 + }, + { + "name": "model.layers.1.post_attention_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 25930752 + }, + { + "name": "model.layers.1.self_attn.c_attn.bias", + "shape": [ + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 25937920 + }, + { + "name": "model.layers.10.input_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 25947136 + }, + { + "name": "model.layers.10.post_attention_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 25954304 + }, + { + "name": "model.layers.10.self_attn.c_attn.bias", + "shape": [ + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 25961472 + }, + { + "name": "model.layers.11.input_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 25970688 + }, + { + "name": "model.layers.11.post_attention_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 25977856 + }, + { + "name": "model.layers.11.self_attn.c_attn.bias", + "shape": [ + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 25985024 + }, + { + "name": "model.layers.12.input_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 25994240 + }, + { + "name": "model.layers.12.post_attention_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 26001408 + }, + { + "name": "model.layers.12.self_attn.c_attn.bias", + "shape": [ + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 26008576 + }, + { + "name": "model.layers.13.input_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 26017792 + }, + { + "name": "model.layers.13.post_attention_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 26024960 + }, + { + "name": "model.layers.13.self_attn.c_attn.bias", + "shape": [ + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 26032128 + }, + { + "name": "model.layers.14.input_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 26041344 + }, + { + "name": "model.layers.14.post_attention_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 26048512 + }, + { + "name": "model.layers.14.self_attn.c_attn.bias", + "shape": [ + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 26055680 + }, + { + "name": "model.layers.15.input_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 26064896 + }, + { + "name": "model.layers.15.post_attention_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 26072064 + }, + { + "name": "model.layers.15.self_attn.c_attn.bias", + "shape": [ + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 26079232 + }, + { + "name": "model.layers.16.input_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 26088448 + }, + { + "name": "model.layers.16.post_attention_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 26095616 + }, + { + "name": "model.layers.16.self_attn.c_attn.bias", + "shape": [ + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 26102784 + }, + { + "name": "model.layers.17.input_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 26112000 + }, + { + "name": "model.layers.17.post_attention_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 26119168 + }, + { + "name": "model.layers.17.self_attn.c_attn.bias", + "shape": [ + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 26126336 + }, + { + "name": "model.layers.18.input_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 26135552 + }, + { + "name": "model.layers.18.post_attention_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 26142720 + }, + { + "name": "model.layers.18.self_attn.c_attn.bias", + "shape": [ + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 26149888 + }, + { + "name": "model.layers.19.self_attn.c_attn.bias", + "shape": [ + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 26159104 + }, + { + "name": "model.layers.2.input_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 26168320 + }, + { + "name": "model.layers.2.post_attention_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 26175488 + }, + { + "name": "model.layers.2.self_attn.c_attn.bias", + "shape": [ + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 26182656 + }, + { + "name": "model.layers.3.input_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 26191872 + }, + { + "name": "model.layers.3.post_attention_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 26199040 + }, + { + "name": "model.layers.3.self_attn.c_attn.bias", + "shape": [ + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 26206208 + }, + { + "name": "model.layers.4.input_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 26215424 + }, + { + "name": "model.layers.4.post_attention_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 26222592 + }, + { + "name": "model.layers.4.self_attn.c_attn.bias", + "shape": [ + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 26229760 + }, + { + "name": "model.layers.5.input_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 26238976 + }, + { + "name": "model.layers.5.post_attention_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 26246144 + }, + { + "name": "model.layers.5.self_attn.c_attn.bias", + "shape": [ + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 26253312 + }, + { + "name": "model.layers.6.input_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 26262528 + }, + { + "name": "model.layers.6.post_attention_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 26269696 + }, + { + "name": "model.layers.6.self_attn.c_attn.bias", + "shape": [ + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 26276864 + }, + { + "name": "model.layers.7.input_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 26286080 + }, + { + "name": "model.layers.7.post_attention_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 26293248 + }, + { + "name": "model.layers.7.self_attn.c_attn.bias", + "shape": [ + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 26300416 + }, + { + "name": "model.layers.8.input_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 26309632 + }, + { + "name": "model.layers.8.post_attention_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 26316800 + }, + { + "name": "model.layers.8.self_attn.c_attn.bias", + "shape": [ + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 26323968 + }, + { + "name": "model.layers.9.input_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 26333184 + }, + { + "name": "model.layers.9.post_attention_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 26340352 + }, + { + "name": "model.layers.9.self_attn.c_attn.bias", + "shape": [ + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 26347520 + } + ], + "md5sum": "41f8301782ccec72fcfbc67780084399" + } + ] +} \ No newline at end of file