diff --git a/README.md b/README.md new file mode 100644 index 0000000000000000000000000000000000000000..63471258c386a6b2f22ffca711224f151421f4cf --- /dev/null +++ b/README.md @@ -0,0 +1,52 @@ +--- +library_name: mlc-llm +base_model: nvidia/OpenCodeReasoning-Nemotron-1.1-32B +tags: +- mlc-llm +- web-llm +--- + +# OpenCodeReasoning-Nemotron-1.1-32B-q0f16-MLC + +This is the [OpenCodeReasoning-Nemotron-1.1-32B](https://huggingface.co/nvidia/OpenCodeReasoning-Nemotron-1.1-32B) model in MLC format `q0f16`. +The model can be used with [MLC-LLM](https://github.com/mlc-ai/mlc-llm) and [WebLLM](https://github.com/mlc-ai/web-llm). + +## Example Usage + +Before using the examples, please follow the [installation guide](https://llm.mlc.ai/docs/install/mlc_llm.html#install-mlc-packages). + +### Chat CLI + +```bash +mlc_llm chat HF://JackBinary/OpenCodeReasoning-Nemotron-1.1-32B-q0f16-MLC +```` + +### REST Server + +```bash +mlc_llm serve HF://JackBinary/OpenCodeReasoning-Nemotron-1.1-32B-q0f16-MLC +``` + +### Python API + +```python +from mlc_llm import MLCEngine + +model = "HF://JackBinary/OpenCodeReasoning-Nemotron-1.1-32B-q0f16-MLC" +engine = MLCEngine(model) + +for response in engine.chat.completions.create( + messages=[{"role": "user", "content": "What is the meaning of life?"}], + model=model, + stream=True, +): + for choice in response.choices: + print(choice.delta.content, end="", flush=True) +print("\n") + +engine.terminate() +``` + +## Documentation + +For more on MLC LLM, visit the [documentation](https://llm.mlc.ai/docs/) and [GitHub repo](https://github.com/mlc-ai/mlc-llm). diff --git a/ndarray-cache.json b/ndarray-cache.json new file mode 100644 index 0000000000000000000000000000000000000000..859e7c4db5a97e2f600abf636efa7d066055cf6f --- /dev/null +++ b/ndarray-cache.json @@ -0,0 +1,6849 @@ +{ + "metadata": { + "ParamSize": 451, + "ParamBytes": 65527752704.0, + "BitsPerParam": 13.92728514370428 + }, + "records": [ + { + "dataPath": "params_shard_0.bin", + "format": "raw-shard", + "nbytes": 1557135360, + "records": [ + { + "name": "lm_head.weight", + "shape": [ + 152064, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1557135360, + "byteOffset": 0 + } + ], + "md5sum": "c11cf3478e75b671ce4c41390a4b0b7c" + }, + { + "dataPath": "params_shard_1.bin", + "format": "raw-shard", + "nbytes": 283115520, + "records": [ + { + "name": "model.layers.58.mlp.down_proj.weight", + "shape": [ + 5120, + 27648 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 283115520, + "byteOffset": 0 + } + ], + "md5sum": "08382f845f3c178125be02e28cce5815" + }, + { + "dataPath": "params_shard_2.bin", + "format": "raw-shard", + "nbytes": 566231040, + "records": [ + { + "name": "model.layers.58.mlp.gate_up_proj.weight", + "shape": [ + 55296, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 566231040, + "byteOffset": 0 + } + ], + "md5sum": "1991da3ddf42fb8cb8a7e25eea939308" + }, + { + "dataPath": "params_shard_3.bin", + "format": "raw-shard", + "nbytes": 283115520, + "records": [ + { + "name": "model.layers.59.mlp.down_proj.weight", + "shape": [ + 5120, + 27648 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 283115520, + "byteOffset": 0 + } + ], + "md5sum": "eaf699c19614ed5b28bdfc3bcdfc59a0" + }, + { + "dataPath": "params_shard_4.bin", + "format": "raw-shard", + "nbytes": 566231040, + "records": [ + { + "name": "model.layers.59.mlp.gate_up_proj.weight", + "shape": [ + 55296, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 566231040, + "byteOffset": 0 + } + ], + "md5sum": "078d5a2669019619cf7ac7024b7929bd" + }, + { + "dataPath": "params_shard_5.bin", + "format": "raw-shard", + "nbytes": 73400320, + "records": [ + { + "name": "model.layers.59.self_attn.c_attn.weight", + "shape": [ + 7168, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 73400320, + "byteOffset": 0 + } + ], + "md5sum": "86353f7ce0710142ccd2bef37c49e01e" + }, + { + "dataPath": "params_shard_6.bin", + "format": "raw-shard", + "nbytes": 52428800, + "records": [ + { + "name": "model.layers.59.self_attn.o_proj.weight", + "shape": [ + 5120, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 52428800, + "byteOffset": 0 + } + ], + "md5sum": "75b54e342b2eec6ca88023483a946945" + }, + { + "dataPath": "params_shard_7.bin", + "format": "raw-shard", + "nbytes": 283115520, + "records": [ + { + "name": "model.layers.60.mlp.down_proj.weight", + "shape": [ + 5120, + 27648 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 283115520, + "byteOffset": 0 + } + ], + "md5sum": "7a7e7a72f451b9f5d5ce2793b240c629" + }, + { + "dataPath": "params_shard_8.bin", + "format": "raw-shard", + "nbytes": 566231040, + "records": [ + { + "name": "model.layers.60.mlp.gate_up_proj.weight", + "shape": [ + 55296, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 566231040, + "byteOffset": 0 + } + ], + "md5sum": "f482bf2553c5f67f3cba12e6b7cab478" + }, + { + "dataPath": "params_shard_9.bin", + "format": "raw-shard", + "nbytes": 73400320, + "records": [ + { + "name": "model.layers.60.self_attn.c_attn.weight", + "shape": [ + 7168, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 73400320, + "byteOffset": 0 + } + ], + "md5sum": "2ee68ef95fe7655ed0b52651a607d504" + }, + { + "dataPath": "params_shard_10.bin", + "format": "raw-shard", + "nbytes": 52428800, + "records": [ + { + "name": "model.layers.60.self_attn.o_proj.weight", + "shape": [ + 5120, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 52428800, + "byteOffset": 0 + } + ], + "md5sum": "8b077e411b1dfaa8f79bc16ef3a17fb2" + }, + { + "dataPath": "params_shard_11.bin", + "format": "raw-shard", + "nbytes": 283115520, + "records": [ + { + "name": "model.layers.61.mlp.down_proj.weight", + "shape": [ + 5120, + 27648 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 283115520, + "byteOffset": 0 + } + ], + "md5sum": "ed77c541c31ab8829d13f560ec5f4355" + }, + { + "dataPath": "params_shard_12.bin", + "format": "raw-shard", + "nbytes": 566231040, + "records": [ + { + "name": "model.layers.61.mlp.gate_up_proj.weight", + "shape": [ + 55296, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 566231040, + "byteOffset": 0 + } + ], + "md5sum": "85ad0336a80031f1419dfd410c94f652" + }, + { + "dataPath": "params_shard_13.bin", + "format": "raw-shard", + "nbytes": 73400320, + "records": [ + { + "name": "model.layers.61.self_attn.c_attn.weight", + "shape": [ + 7168, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 73400320, + "byteOffset": 0 + } + ], + "md5sum": "d907d7e411e468bdf2dd9bb34c0f49df" + }, + { + "dataPath": "params_shard_14.bin", + "format": "raw-shard", + "nbytes": 52428800, + "records": [ + { + "name": "model.layers.61.self_attn.o_proj.weight", + "shape": [ + 5120, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 52428800, + "byteOffset": 0 + } + ], + "md5sum": "1856554a4c5e40ce53673f05e94ce1f0" + }, + { + "dataPath": "params_shard_15.bin", + "format": "raw-shard", + "nbytes": 283115520, + "records": [ + { + "name": "model.layers.62.mlp.down_proj.weight", + "shape": [ + 5120, + 27648 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 283115520, + "byteOffset": 0 + } + ], + "md5sum": "192e46b225038c15aa11137a0233e4f6" + }, + { + "dataPath": "params_shard_16.bin", + "format": "raw-shard", + "nbytes": 566231040, + "records": [ + { + "name": "model.layers.62.mlp.gate_up_proj.weight", + "shape": [ + 55296, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 566231040, + "byteOffset": 0 + } + ], + "md5sum": "55c5da594f982688dfa871ea04c08967" + }, + { + "dataPath": "params_shard_17.bin", + "format": "raw-shard", + "nbytes": 73400320, + "records": [ + { + "name": "model.layers.62.self_attn.c_attn.weight", + "shape": [ + 7168, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 73400320, + "byteOffset": 0 + } + ], + "md5sum": "a2e6142514ab2389b026ed5d2b173a5f" + }, + { + "dataPath": "params_shard_18.bin", + "format": "raw-shard", + "nbytes": 52428800, + "records": [ + { + "name": "model.layers.62.self_attn.o_proj.weight", + "shape": [ + 5120, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 52428800, + "byteOffset": 0 + } + ], + "md5sum": "933e361673feda3e13b79ab391f97852" + }, + { + "dataPath": "params_shard_19.bin", + "format": "raw-shard", + "nbytes": 283115520, + "records": [ + { + "name": "model.layers.63.mlp.down_proj.weight", + "shape": [ + 5120, + 27648 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 283115520, + "byteOffset": 0 + } + ], + "md5sum": "452af359f8f2e7c57cb733e0b6c0c4e3" + }, + { + "dataPath": "params_shard_20.bin", + "format": "raw-shard", + "nbytes": 566231040, + "records": [ + { + "name": "model.layers.63.mlp.gate_up_proj.weight", + "shape": [ + 55296, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 566231040, + "byteOffset": 0 + } + ], + "md5sum": "721d102f022c882e2bf4bb9bf3033600" + }, + { + "dataPath": "params_shard_21.bin", + "format": "raw-shard", + "nbytes": 73400320, + "records": [ + { + "name": "model.layers.63.self_attn.c_attn.weight", + "shape": [ + 7168, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 73400320, + "byteOffset": 0 + } + ], + "md5sum": "459aa217d75dafb2144b141f562b61eb" + }, + { + "dataPath": "params_shard_22.bin", + "format": "raw-shard", + "nbytes": 52428800, + "records": [ + { + "name": "model.layers.63.self_attn.o_proj.weight", + "shape": [ + 5120, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 52428800, + "byteOffset": 0 + } + ], + "md5sum": "440a42b547dc002eb3a8bcd72d575a74" + }, + { + "dataPath": "params_shard_23.bin", + "format": "raw-shard", + "nbytes": 1557135360, + "records": [ + { + "name": "model.embed_tokens.weight", + "shape": [ + 152064, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1557135360, + "byteOffset": 0 + } + ], + "md5sum": "7e52ab5dfe8e6a239b4720eace189e98" + }, + { + "dataPath": "params_shard_24.bin", + "format": "raw-shard", + "nbytes": 283115520, + "records": [ + { + "name": "model.layers.0.mlp.down_proj.weight", + "shape": [ + 5120, + 27648 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 283115520, + "byteOffset": 0 + } + ], + "md5sum": "ace91badb3d3e1e4266ed959d1fca6d7" + }, + { + "dataPath": "params_shard_25.bin", + "format": "raw-shard", + "nbytes": 566231040, + "records": [ + { + "name": "model.layers.0.mlp.gate_up_proj.weight", + "shape": [ + 55296, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 566231040, + "byteOffset": 0 + } + ], + "md5sum": "0f938aca4f9bfed6ff4058bb2d0c9335" + }, + { + "dataPath": "params_shard_26.bin", + "format": "raw-shard", + "nbytes": 73400320, + "records": [ + { + "name": "model.layers.0.self_attn.c_attn.weight", + "shape": [ + 7168, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 73400320, + "byteOffset": 0 + } + ], + "md5sum": "12929ba2a9ccc0cd538dad56e0a3f596" + }, + { + "dataPath": "params_shard_27.bin", + "format": "raw-shard", + "nbytes": 52428800, + "records": [ + { + "name": "model.layers.0.self_attn.o_proj.weight", + "shape": [ + 5120, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 52428800, + "byteOffset": 0 + } + ], + "md5sum": "dc2aeb200d9470e87d7f6f97fe144b3e" + }, + { + "dataPath": "params_shard_28.bin", + "format": "raw-shard", + "nbytes": 283115520, + "records": [ + { + "name": "model.layers.1.mlp.down_proj.weight", + "shape": [ + 5120, + 27648 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 283115520, + "byteOffset": 0 + } + ], + "md5sum": "3e3203142858a5f212ba3c6f7dad182d" + }, + { + "dataPath": "params_shard_29.bin", + "format": "raw-shard", + "nbytes": 566231040, + "records": [ + { + "name": "model.layers.1.mlp.gate_up_proj.weight", + "shape": [ + 55296, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 566231040, + "byteOffset": 0 + } + ], + "md5sum": "434d00a426c23b9eb524f147a9bf350e" + }, + { + "dataPath": "params_shard_30.bin", + "format": "raw-shard", + "nbytes": 73400320, + "records": [ + { + "name": "model.layers.1.self_attn.c_attn.weight", + "shape": [ + 7168, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 73400320, + "byteOffset": 0 + } + ], + "md5sum": "ceb34c47eefcce073c44854f5267f733" + }, + { + "dataPath": "params_shard_31.bin", + "format": "raw-shard", + "nbytes": 52428800, + "records": [ + { + "name": "model.layers.1.self_attn.o_proj.weight", + "shape": [ + 5120, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 52428800, + "byteOffset": 0 + } + ], + "md5sum": "6e9801985d11c36d2b6f87a25b4c4c61" + }, + { + "dataPath": "params_shard_32.bin", + "format": "raw-shard", + "nbytes": 283115520, + "records": [ + { + "name": "model.layers.2.mlp.down_proj.weight", + "shape": [ + 5120, + 27648 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 283115520, + "byteOffset": 0 + } + ], + "md5sum": "a44584f15f7a2210d580631f08a4b0b7" + }, + { + "dataPath": "params_shard_33.bin", + "format": "raw-shard", + "nbytes": 566231040, + "records": [ + { + "name": "model.layers.2.mlp.gate_up_proj.weight", + "shape": [ + 55296, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 566231040, + "byteOffset": 0 + } + ], + "md5sum": "3588fd1b9597394c853821e5284a4ae4" + }, + { + "dataPath": "params_shard_34.bin", + "format": "raw-shard", + "nbytes": 73400320, + "records": [ + { + "name": "model.layers.2.self_attn.c_attn.weight", + "shape": [ + 7168, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 73400320, + "byteOffset": 0 + } + ], + "md5sum": "8cafe41db7db28f3004a517bde1c4ef6" + }, + { + "dataPath": "params_shard_35.bin", + "format": "raw-shard", + "nbytes": 52428800, + "records": [ + { + "name": "model.layers.2.self_attn.o_proj.weight", + "shape": [ + 5120, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 52428800, + "byteOffset": 0 + } + ], + "md5sum": "1cdcc1dfe6301092599532f9a4454c46" + }, + { + "dataPath": "params_shard_36.bin", + "format": "raw-shard", + "nbytes": 283115520, + "records": [ + { + "name": "model.layers.3.mlp.down_proj.weight", + "shape": [ + 5120, + 27648 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 283115520, + "byteOffset": 0 + } + ], + "md5sum": "9a6ebc4223b9196dc92f1b6d84681d95" + }, + { + "dataPath": "params_shard_37.bin", + "format": "raw-shard", + "nbytes": 566231040, + "records": [ + { + "name": "model.layers.3.mlp.gate_up_proj.weight", + "shape": [ + 55296, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 566231040, + "byteOffset": 0 + } + ], + "md5sum": "af6a7733a525b4658718c5316f512e2f" + }, + { + "dataPath": "params_shard_38.bin", + "format": "raw-shard", + "nbytes": 73400320, + "records": [ + { + "name": "model.layers.3.self_attn.c_attn.weight", + "shape": [ + 7168, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 73400320, + "byteOffset": 0 + } + ], + "md5sum": "1b0e5ef048c353eedc80754cc00b669c" + }, + { + "dataPath": "params_shard_39.bin", + "format": "raw-shard", + "nbytes": 52428800, + "records": [ + { + "name": "model.layers.3.self_attn.o_proj.weight", + "shape": [ + 5120, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 52428800, + "byteOffset": 0 + } + ], + "md5sum": "dd5497f813a11bd7446659ee343c5c58" + }, + { + "dataPath": "params_shard_40.bin", + "format": "raw-shard", + "nbytes": 283115520, + "records": [ + { + "name": "model.layers.4.mlp.down_proj.weight", + "shape": [ + 5120, + 27648 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 283115520, + "byteOffset": 0 + } + ], + "md5sum": "0be3d921d8cd7c9d379adf939280b37c" + }, + { + "dataPath": "params_shard_41.bin", + "format": "raw-shard", + "nbytes": 566231040, + "records": [ + { + "name": "model.layers.4.mlp.gate_up_proj.weight", + "shape": [ + 55296, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 566231040, + "byteOffset": 0 + } + ], + "md5sum": "24840caccc52f2aab6077a1bed9d84c6" + }, + { + "dataPath": "params_shard_42.bin", + "format": "raw-shard", + "nbytes": 73400320, + "records": [ + { + "name": "model.layers.4.self_attn.c_attn.weight", + "shape": [ + 7168, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 73400320, + "byteOffset": 0 + } + ], + "md5sum": "f963228f4897d218ae9b4a501bba63d2" + }, + { + "dataPath": "params_shard_43.bin", + "format": "raw-shard", + "nbytes": 52428800, + "records": [ + { + "name": "model.layers.4.self_attn.o_proj.weight", + "shape": [ + 5120, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 52428800, + "byteOffset": 0 + } + ], + "md5sum": "ad9fe2ee932f296fa83642b6af3940c0" + }, + { + "dataPath": "params_shard_44.bin", + "format": "raw-shard", + "nbytes": 283115520, + "records": [ + { + "name": "model.layers.5.mlp.down_proj.weight", + "shape": [ + 5120, + 27648 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 283115520, + "byteOffset": 0 + } + ], + "md5sum": "90708c54f8d552917eabdafe41ae64cd" + }, + { + "dataPath": "params_shard_45.bin", + "format": "raw-shard", + "nbytes": 566231040, + "records": [ + { + "name": "model.layers.5.mlp.gate_up_proj.weight", + "shape": [ + 55296, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 566231040, + "byteOffset": 0 + } + ], + "md5sum": "1ef39a88e3683c6c21a1fccc3e3c0efe" + }, + { + "dataPath": "params_shard_46.bin", + "format": "raw-shard", + "nbytes": 73400320, + "records": [ + { + "name": "model.layers.5.self_attn.c_attn.weight", + "shape": [ + 7168, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 73400320, + "byteOffset": 0 + } + ], + "md5sum": "cdfec41c8f527a5d6f8e320be8653511" + }, + { + "dataPath": "params_shard_47.bin", + "format": "raw-shard", + "nbytes": 52428800, + "records": [ + { + "name": "model.layers.5.self_attn.o_proj.weight", + "shape": [ + 5120, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 52428800, + "byteOffset": 0 + } + ], + "md5sum": "6e70cdd636d8e3cb68aed158e6a4e5dc" + }, + { + "dataPath": "params_shard_48.bin", + "format": "raw-shard", + "nbytes": 283115520, + "records": [ + { + "name": "model.layers.6.mlp.down_proj.weight", + "shape": [ + 5120, + 27648 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 283115520, + "byteOffset": 0 + } + ], + "md5sum": "67114310577007465a066cd8f8524b88" + }, + { + "dataPath": "params_shard_49.bin", + "format": "raw-shard", + "nbytes": 566231040, + "records": [ + { + "name": "model.layers.6.mlp.gate_up_proj.weight", + "shape": [ + 55296, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 566231040, + "byteOffset": 0 + } + ], + "md5sum": "4529270bfaa535a30b7f41d99f58f10c" + }, + { + "dataPath": "params_shard_50.bin", + "format": "raw-shard", + "nbytes": 73400320, + "records": [ + { + "name": "model.layers.6.self_attn.c_attn.weight", + "shape": [ + 7168, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 73400320, + "byteOffset": 0 + } + ], + "md5sum": "f274853986038be46388516964f6a29c" + }, + { + "dataPath": "params_shard_51.bin", + "format": "raw-shard", + "nbytes": 52428800, + "records": [ + { + "name": "model.layers.6.self_attn.o_proj.weight", + "shape": [ + 5120, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 52428800, + "byteOffset": 0 + } + ], + "md5sum": "4774986aa3f2485bf14c00881b1a07c0" + }, + { + "dataPath": "params_shard_52.bin", + "format": "raw-shard", + "nbytes": 283115520, + "records": [ + { + "name": "model.layers.7.mlp.down_proj.weight", + "shape": [ + 5120, + 27648 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 283115520, + "byteOffset": 0 + } + ], + "md5sum": "9525f9ad897375b271276fe3b8f27fc2" + }, + { + "dataPath": "params_shard_53.bin", + "format": "raw-shard", + "nbytes": 566231040, + "records": [ + { + "name": "model.layers.7.mlp.gate_up_proj.weight", + "shape": [ + 55296, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 566231040, + "byteOffset": 0 + } + ], + "md5sum": "0810a15124713d0a8198c1857006354b" + }, + { + "dataPath": "params_shard_54.bin", + "format": "raw-shard", + "nbytes": 73400320, + "records": [ + { + "name": "model.layers.7.self_attn.c_attn.weight", + "shape": [ + 7168, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 73400320, + "byteOffset": 0 + } + ], + "md5sum": "3b0015f538d7f2e069614abae2cb3587" + }, + { + "dataPath": "params_shard_55.bin", + "format": "raw-shard", + "nbytes": 52428800, + "records": [ + { + "name": "model.layers.7.self_attn.o_proj.weight", + "shape": [ + 5120, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 52428800, + "byteOffset": 0 + } + ], + "md5sum": "6800a07f49887e3b0473ab877d0e8393" + }, + { + "dataPath": "params_shard_56.bin", + "format": "raw-shard", + "nbytes": 566231040, + "records": [ + { + "name": "model.layers.8.mlp.gate_up_proj.weight", + "shape": [ + 55296, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 566231040, + "byteOffset": 0 + } + ], + "md5sum": "2c6d616a3ef37e0023605225b2dcc5a6" + }, + { + "dataPath": "params_shard_57.bin", + "format": "raw-shard", + "nbytes": 73400320, + "records": [ + { + "name": "model.layers.8.self_attn.c_attn.weight", + "shape": [ + 7168, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 73400320, + "byteOffset": 0 + } + ], + "md5sum": "4ec4e1a92a8b73b669475ef8525384dd" + }, + { + "dataPath": "params_shard_58.bin", + "format": "raw-shard", + "nbytes": 52428800, + "records": [ + { + "name": "model.layers.8.self_attn.o_proj.weight", + "shape": [ + 5120, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 52428800, + "byteOffset": 0 + } + ], + "md5sum": "17472ccb7af0a4ab9cc0b87d78a56b6a" + }, + { + "dataPath": "params_shard_59.bin", + "format": "raw-shard", + "nbytes": 283115520, + "records": [ + { + "name": "model.layers.10.mlp.down_proj.weight", + "shape": [ + 5120, + 27648 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 283115520, + "byteOffset": 0 + } + ], + "md5sum": "a9d153badd4c4fc756ec25ff03c80463" + }, + { + "dataPath": "params_shard_60.bin", + "format": "raw-shard", + "nbytes": 566231040, + "records": [ + { + "name": "model.layers.10.mlp.gate_up_proj.weight", + "shape": [ + 55296, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 566231040, + "byteOffset": 0 + } + ], + "md5sum": "77315db4c38410ec75241ef0106b83bc" + }, + { + "dataPath": "params_shard_61.bin", + "format": "raw-shard", + "nbytes": 73400320, + "records": [ + { + "name": "model.layers.10.self_attn.c_attn.weight", + "shape": [ + 7168, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 73400320, + "byteOffset": 0 + } + ], + "md5sum": "3f40fca1c532bb4ef4ce29617388d3b4" + }, + { + "dataPath": "params_shard_62.bin", + "format": "raw-shard", + "nbytes": 52428800, + "records": [ + { + "name": "model.layers.10.self_attn.o_proj.weight", + "shape": [ + 5120, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 52428800, + "byteOffset": 0 + } + ], + "md5sum": "1ca5be6a5f55c5a87ccfa4d435899d06" + }, + { + "dataPath": "params_shard_63.bin", + "format": "raw-shard", + "nbytes": 283115520, + "records": [ + { + "name": "model.layers.11.mlp.down_proj.weight", + "shape": [ + 5120, + 27648 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 283115520, + "byteOffset": 0 + } + ], + "md5sum": "a96970618a9ea4733bee834fd27a81c2" + }, + { + "dataPath": "params_shard_64.bin", + "format": "raw-shard", + "nbytes": 566231040, + "records": [ + { + "name": "model.layers.11.mlp.gate_up_proj.weight", + "shape": [ + 55296, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 566231040, + "byteOffset": 0 + } + ], + "md5sum": "3c0dc95dea6f6f7760fd7256a95c234d" + }, + { + "dataPath": "params_shard_65.bin", + "format": "raw-shard", + "nbytes": 73400320, + "records": [ + { + "name": "model.layers.11.self_attn.c_attn.weight", + "shape": [ + 7168, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 73400320, + "byteOffset": 0 + } + ], + "md5sum": "e8a8c59c19d2ed2ecb91611740690436" + }, + { + "dataPath": "params_shard_66.bin", + "format": "raw-shard", + "nbytes": 52428800, + "records": [ + { + "name": "model.layers.11.self_attn.o_proj.weight", + "shape": [ + 5120, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 52428800, + "byteOffset": 0 + } + ], + "md5sum": "344be3d1fe17bc816f4e0dc9b349e8f1" + }, + { + "dataPath": "params_shard_67.bin", + "format": "raw-shard", + "nbytes": 283115520, + "records": [ + { + "name": "model.layers.12.mlp.down_proj.weight", + "shape": [ + 5120, + 27648 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 283115520, + "byteOffset": 0 + } + ], + "md5sum": "0212b22ca7c4b1d36f6270b0bb7be03b" + }, + { + "dataPath": "params_shard_68.bin", + "format": "raw-shard", + "nbytes": 566231040, + "records": [ + { + "name": "model.layers.12.mlp.gate_up_proj.weight", + "shape": [ + 55296, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 566231040, + "byteOffset": 0 + } + ], + "md5sum": "8de04c1797aabaed325abccc6cfa9176" + }, + { + "dataPath": "params_shard_69.bin", + "format": "raw-shard", + "nbytes": 73400320, + "records": [ + { + "name": "model.layers.12.self_attn.c_attn.weight", + "shape": [ + 7168, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 73400320, + "byteOffset": 0 + } + ], + "md5sum": "01d76ca02a55946d522171e853cccc78" + }, + { + "dataPath": "params_shard_70.bin", + "format": "raw-shard", + "nbytes": 52428800, + "records": [ + { + "name": "model.layers.12.self_attn.o_proj.weight", + "shape": [ + 5120, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 52428800, + "byteOffset": 0 + } + ], + "md5sum": "12ba9c40c0d76ad4bf0f337822c2be57" + }, + { + "dataPath": "params_shard_71.bin", + "format": "raw-shard", + "nbytes": 283115520, + "records": [ + { + "name": "model.layers.13.mlp.down_proj.weight", + "shape": [ + 5120, + 27648 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 283115520, + "byteOffset": 0 + } + ], + "md5sum": "c9e6f2659ceae052b9a34dfb7d48515b" + }, + { + "dataPath": "params_shard_72.bin", + "format": "raw-shard", + "nbytes": 566231040, + "records": [ + { + "name": "model.layers.13.mlp.gate_up_proj.weight", + "shape": [ + 55296, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 566231040, + "byteOffset": 0 + } + ], + "md5sum": "4cebdcff986138ec67b9df88fa332dd9" + }, + { + "dataPath": "params_shard_73.bin", + "format": "raw-shard", + "nbytes": 73400320, + "records": [ + { + "name": "model.layers.13.self_attn.c_attn.weight", + "shape": [ + 7168, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 73400320, + "byteOffset": 0 + } + ], + "md5sum": "a53ecb99cebed09207d71fd040508d03" + }, + { + "dataPath": "params_shard_74.bin", + "format": "raw-shard", + "nbytes": 52428800, + "records": [ + { + "name": "model.layers.13.self_attn.o_proj.weight", + "shape": [ + 5120, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 52428800, + "byteOffset": 0 + } + ], + "md5sum": "c4603556f2b5caa335c76d2dd3928588" + }, + { + "dataPath": "params_shard_75.bin", + "format": "raw-shard", + "nbytes": 283115520, + "records": [ + { + "name": "model.layers.14.mlp.down_proj.weight", + "shape": [ + 5120, + 27648 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 283115520, + "byteOffset": 0 + } + ], + "md5sum": "91695f78b2f9db83b2cfd5260ee72ebd" + }, + { + "dataPath": "params_shard_76.bin", + "format": "raw-shard", + "nbytes": 566231040, + "records": [ + { + "name": "model.layers.14.mlp.gate_up_proj.weight", + "shape": [ + 55296, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 566231040, + "byteOffset": 0 + } + ], + "md5sum": "60bf6c3219205a0b58439dbb11efd3d9" + }, + { + "dataPath": "params_shard_77.bin", + "format": "raw-shard", + "nbytes": 73400320, + "records": [ + { + "name": "model.layers.14.self_attn.c_attn.weight", + "shape": [ + 7168, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 73400320, + "byteOffset": 0 + } + ], + "md5sum": "0d48ef3eba0b702cb03f9caa9c2a4df2" + }, + { + "dataPath": "params_shard_78.bin", + "format": "raw-shard", + "nbytes": 52428800, + "records": [ + { + "name": "model.layers.14.self_attn.o_proj.weight", + "shape": [ + 5120, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 52428800, + "byteOffset": 0 + } + ], + "md5sum": "1f8e90978f80efef4d7bc9206183cacb" + }, + { + "dataPath": "params_shard_79.bin", + "format": "raw-shard", + "nbytes": 283115520, + "records": [ + { + "name": "model.layers.15.mlp.down_proj.weight", + "shape": [ + 5120, + 27648 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 283115520, + "byteOffset": 0 + } + ], + "md5sum": "7cae550b3e33cfed275fa9a83c367c70" + }, + { + "dataPath": "params_shard_80.bin", + "format": "raw-shard", + "nbytes": 566231040, + "records": [ + { + "name": "model.layers.15.mlp.gate_up_proj.weight", + "shape": [ + 55296, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 566231040, + "byteOffset": 0 + } + ], + "md5sum": "7bc3046c68a83dc4b43cf1d361ad1f3e" + }, + { + "dataPath": "params_shard_81.bin", + "format": "raw-shard", + "nbytes": 73400320, + "records": [ + { + "name": "model.layers.15.self_attn.c_attn.weight", + "shape": [ + 7168, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 73400320, + "byteOffset": 0 + } + ], + "md5sum": "b77f5e900843b449278461a0da57a51a" + }, + { + "dataPath": "params_shard_82.bin", + "format": "raw-shard", + "nbytes": 52428800, + "records": [ + { + "name": "model.layers.15.self_attn.o_proj.weight", + "shape": [ + 5120, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 52428800, + "byteOffset": 0 + } + ], + "md5sum": "ae8d93aaa16d6567303335fe5444b589" + }, + { + "dataPath": "params_shard_83.bin", + "format": "raw-shard", + "nbytes": 283115520, + "records": [ + { + "name": "model.layers.16.mlp.down_proj.weight", + "shape": [ + 5120, + 27648 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 283115520, + "byteOffset": 0 + } + ], + "md5sum": "ae0d84fc84c78d6235e5f51ba033eea8" + }, + { + "dataPath": "params_shard_84.bin", + "format": "raw-shard", + "nbytes": 566231040, + "records": [ + { + "name": "model.layers.16.mlp.gate_up_proj.weight", + "shape": [ + 55296, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 566231040, + "byteOffset": 0 + } + ], + "md5sum": "e126e95b7354ee248424ab334a4595a9" + }, + { + "dataPath": "params_shard_85.bin", + "format": "raw-shard", + "nbytes": 73400320, + "records": [ + { + "name": "model.layers.16.self_attn.c_attn.weight", + "shape": [ + 7168, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 73400320, + "byteOffset": 0 + } + ], + "md5sum": "d4fda2fce1a8e0120ce613d4d6c5a45a" + }, + { + "dataPath": "params_shard_86.bin", + "format": "raw-shard", + "nbytes": 52428800, + "records": [ + { + "name": "model.layers.16.self_attn.o_proj.weight", + "shape": [ + 5120, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 52428800, + "byteOffset": 0 + } + ], + "md5sum": "14e202f0f72d5c67f6fe338a1ddbc356" + }, + { + "dataPath": "params_shard_87.bin", + "format": "raw-shard", + "nbytes": 283115520, + "records": [ + { + "name": "model.layers.17.mlp.down_proj.weight", + "shape": [ + 5120, + 27648 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 283115520, + "byteOffset": 0 + } + ], + "md5sum": "92f32b0b5f17e56f074f11cfe7e1ae3b" + }, + { + "dataPath": "params_shard_88.bin", + "format": "raw-shard", + "nbytes": 566231040, + "records": [ + { + "name": "model.layers.17.mlp.gate_up_proj.weight", + "shape": [ + 55296, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 566231040, + "byteOffset": 0 + } + ], + "md5sum": "b0625c80eeb0ca915b80106d0604e1c5" + }, + { + "dataPath": "params_shard_89.bin", + "format": "raw-shard", + "nbytes": 73400320, + "records": [ + { + "name": "model.layers.17.self_attn.c_attn.weight", + "shape": [ + 7168, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 73400320, + "byteOffset": 0 + } + ], + "md5sum": "a1bc3275fa3ad4a199e998cf457d8029" + }, + { + "dataPath": "params_shard_90.bin", + "format": "raw-shard", + "nbytes": 52428800, + "records": [ + { + "name": "model.layers.17.self_attn.o_proj.weight", + "shape": [ + 5120, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 52428800, + "byteOffset": 0 + } + ], + "md5sum": "475330ea0f3a0d42f462d2b2ff2c0696" + }, + { + "dataPath": "params_shard_91.bin", + "format": "raw-shard", + "nbytes": 566231040, + "records": [ + { + "name": "model.layers.18.mlp.gate_up_proj.weight", + "shape": [ + 55296, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 566231040, + "byteOffset": 0 + } + ], + "md5sum": "056ed00ec6850da568925efde53ffadb" + }, + { + "dataPath": "params_shard_92.bin", + "format": "raw-shard", + "nbytes": 73400320, + "records": [ + { + "name": "model.layers.18.self_attn.c_attn.weight", + "shape": [ + 7168, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 73400320, + "byteOffset": 0 + } + ], + "md5sum": "906ae0628d713939a32222aa48976ab2" + }, + { + "dataPath": "params_shard_93.bin", + "format": "raw-shard", + "nbytes": 52428800, + "records": [ + { + "name": "model.layers.18.self_attn.o_proj.weight", + "shape": [ + 5120, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 52428800, + "byteOffset": 0 + } + ], + "md5sum": "5efd524b3d4a277dd124be9678f94e2f" + }, + { + "dataPath": "params_shard_94.bin", + "format": "raw-shard", + "nbytes": 283115520, + "records": [ + { + "name": "model.layers.8.mlp.down_proj.weight", + "shape": [ + 5120, + 27648 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 283115520, + "byteOffset": 0 + } + ], + "md5sum": "407352f0c790ffe7158bf962f2c861de" + }, + { + "dataPath": "params_shard_95.bin", + "format": "raw-shard", + "nbytes": 283115520, + "records": [ + { + "name": "model.layers.9.mlp.down_proj.weight", + "shape": [ + 5120, + 27648 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 283115520, + "byteOffset": 0 + } + ], + "md5sum": "20a3ad27e050c04d36c8a4cce4f77aba" + }, + { + "dataPath": "params_shard_96.bin", + "format": "raw-shard", + "nbytes": 566231040, + "records": [ + { + "name": "model.layers.9.mlp.gate_up_proj.weight", + "shape": [ + 55296, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 566231040, + "byteOffset": 0 + } + ], + "md5sum": "4b724cc8354dbc4a357ba8528725c77f" + }, + { + "dataPath": "params_shard_97.bin", + "format": "raw-shard", + "nbytes": 73400320, + "records": [ + { + "name": "model.layers.9.self_attn.c_attn.weight", + "shape": [ + 7168, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 73400320, + "byteOffset": 0 + } + ], + "md5sum": "53e7fe85adc3cf2a9653659e3595dfb1" + }, + { + "dataPath": "params_shard_98.bin", + "format": "raw-shard", + "nbytes": 52428800, + "records": [ + { + "name": "model.layers.9.self_attn.o_proj.weight", + "shape": [ + 5120, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 52428800, + "byteOffset": 0 + } + ], + "md5sum": "af445d043881e3dfc8e11449f843198a" + }, + { + "dataPath": "params_shard_99.bin", + "format": "raw-shard", + "nbytes": 283115520, + "records": [ + { + "name": "model.layers.18.mlp.down_proj.weight", + "shape": [ + 5120, + 27648 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 283115520, + "byteOffset": 0 + } + ], + "md5sum": "95667c4db0eb3e7fe613e2f31817cfbf" + }, + { + "dataPath": "params_shard_100.bin", + "format": "raw-shard", + "nbytes": 283115520, + "records": [ + { + "name": "model.layers.19.mlp.down_proj.weight", + "shape": [ + 5120, + 27648 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 283115520, + "byteOffset": 0 + } + ], + "md5sum": "590db25395696f506482b182c2ab0f60" + }, + { + "dataPath": "params_shard_101.bin", + "format": "raw-shard", + "nbytes": 566231040, + "records": [ + { + "name": "model.layers.19.mlp.gate_up_proj.weight", + "shape": [ + 55296, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 566231040, + "byteOffset": 0 + } + ], + "md5sum": "b7dd4948e858d6fad56d9c981ed22adb" + }, + { + "dataPath": "params_shard_102.bin", + "format": "raw-shard", + "nbytes": 73400320, + "records": [ + { + "name": "model.layers.19.self_attn.c_attn.weight", + "shape": [ + 7168, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 73400320, + "byteOffset": 0 + } + ], + "md5sum": "86b918ecf2fafbf5a9e0db700f00af6d" + }, + { + "dataPath": "params_shard_103.bin", + "format": "raw-shard", + "nbytes": 52428800, + "records": [ + { + "name": "model.layers.19.self_attn.o_proj.weight", + "shape": [ + 5120, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 52428800, + "byteOffset": 0 + } + ], + "md5sum": "b66f6698ebee824a30983f7d892c0c65" + }, + { + "dataPath": "params_shard_104.bin", + "format": "raw-shard", + "nbytes": 283115520, + "records": [ + { + "name": "model.layers.20.mlp.down_proj.weight", + "shape": [ + 5120, + 27648 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 283115520, + "byteOffset": 0 + } + ], + "md5sum": "08daa0954c34852a19aaa097462d74be" + }, + { + "dataPath": "params_shard_105.bin", + "format": "raw-shard", + "nbytes": 566231040, + "records": [ + { + "name": "model.layers.20.mlp.gate_up_proj.weight", + "shape": [ + 55296, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 566231040, + "byteOffset": 0 + } + ], + "md5sum": "0f8c9da832c7386f59b9b6e82fddde8d" + }, + { + "dataPath": "params_shard_106.bin", + "format": "raw-shard", + "nbytes": 73400320, + "records": [ + { + "name": "model.layers.20.self_attn.c_attn.weight", + "shape": [ + 7168, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 73400320, + "byteOffset": 0 + } + ], + "md5sum": "437198888a20303cc75635c6893f5473" + }, + { + "dataPath": "params_shard_107.bin", + "format": "raw-shard", + "nbytes": 52428800, + "records": [ + { + "name": "model.layers.20.self_attn.o_proj.weight", + "shape": [ + 5120, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 52428800, + "byteOffset": 0 + } + ], + "md5sum": "b5be96ee728cabf8c1e65adee4b468b0" + }, + { + "dataPath": "params_shard_108.bin", + "format": "raw-shard", + "nbytes": 283115520, + "records": [ + { + "name": "model.layers.21.mlp.down_proj.weight", + "shape": [ + 5120, + 27648 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 283115520, + "byteOffset": 0 + } + ], + "md5sum": "d19f831287d919e1f4214f6ddbca6bc8" + }, + { + "dataPath": "params_shard_109.bin", + "format": "raw-shard", + "nbytes": 566231040, + "records": [ + { + "name": "model.layers.21.mlp.gate_up_proj.weight", + "shape": [ + 55296, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 566231040, + "byteOffset": 0 + } + ], + "md5sum": "3d931cead84ac8b60ad435d75cab4e4c" + }, + { + "dataPath": "params_shard_110.bin", + "format": "raw-shard", + "nbytes": 73400320, + "records": [ + { + "name": "model.layers.21.self_attn.c_attn.weight", + "shape": [ + 7168, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 73400320, + "byteOffset": 0 + } + ], + "md5sum": "e93e96f51c5f8eaad4e4eecd5303d7ea" + }, + { + "dataPath": "params_shard_111.bin", + "format": "raw-shard", + "nbytes": 52428800, + "records": [ + { + "name": "model.layers.21.self_attn.o_proj.weight", + "shape": [ + 5120, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 52428800, + "byteOffset": 0 + } + ], + "md5sum": "d83dc9e1e1c7a129e41739990cb38389" + }, + { + "dataPath": "params_shard_112.bin", + "format": "raw-shard", + "nbytes": 283115520, + "records": [ + { + "name": "model.layers.22.mlp.down_proj.weight", + "shape": [ + 5120, + 27648 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 283115520, + "byteOffset": 0 + } + ], + "md5sum": "424fe40438e009e5a20dc4b1ad96f772" + }, + { + "dataPath": "params_shard_113.bin", + "format": "raw-shard", + "nbytes": 566231040, + "records": [ + { + "name": "model.layers.22.mlp.gate_up_proj.weight", + "shape": [ + 55296, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 566231040, + "byteOffset": 0 + } + ], + "md5sum": "08d97d5daabb4eacab96e51441ce945e" + }, + { + "dataPath": "params_shard_114.bin", + "format": "raw-shard", + "nbytes": 73400320, + "records": [ + { + "name": "model.layers.22.self_attn.c_attn.weight", + "shape": [ + 7168, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 73400320, + "byteOffset": 0 + } + ], + "md5sum": "91ce9c9d7a3a221c81d2ef82595298d7" + }, + { + "dataPath": "params_shard_115.bin", + "format": "raw-shard", + "nbytes": 52428800, + "records": [ + { + "name": "model.layers.22.self_attn.o_proj.weight", + "shape": [ + 5120, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 52428800, + "byteOffset": 0 + } + ], + "md5sum": "942e77205affd0e30b9429a88c1518a2" + }, + { + "dataPath": "params_shard_116.bin", + "format": "raw-shard", + "nbytes": 283115520, + "records": [ + { + "name": "model.layers.23.mlp.down_proj.weight", + "shape": [ + 5120, + 27648 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 283115520, + "byteOffset": 0 + } + ], + "md5sum": "a09db7c3b5e6fb95255b045ef067fe61" + }, + { + "dataPath": "params_shard_117.bin", + "format": "raw-shard", + "nbytes": 566231040, + "records": [ + { + "name": "model.layers.23.mlp.gate_up_proj.weight", + "shape": [ + 55296, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 566231040, + "byteOffset": 0 + } + ], + "md5sum": "6e7c42f01ff92660a13318a6a3785b07" + }, + { + "dataPath": "params_shard_118.bin", + "format": "raw-shard", + "nbytes": 73400320, + "records": [ + { + "name": "model.layers.23.self_attn.c_attn.weight", + "shape": [ + 7168, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 73400320, + "byteOffset": 0 + } + ], + "md5sum": "ed08e7ef2d7bf43bf13f39f083b3527d" + }, + { + "dataPath": "params_shard_119.bin", + "format": "raw-shard", + "nbytes": 52428800, + "records": [ + { + "name": "model.layers.23.self_attn.o_proj.weight", + "shape": [ + 5120, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 52428800, + "byteOffset": 0 + } + ], + "md5sum": "0318886b973422467f8ca511bcc9a668" + }, + { + "dataPath": "params_shard_120.bin", + "format": "raw-shard", + "nbytes": 283115520, + "records": [ + { + "name": "model.layers.24.mlp.down_proj.weight", + "shape": [ + 5120, + 27648 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 283115520, + "byteOffset": 0 + } + ], + "md5sum": "d813e3c67148d223d17b81960416b88b" + }, + { + "dataPath": "params_shard_121.bin", + "format": "raw-shard", + "nbytes": 566231040, + "records": [ + { + "name": "model.layers.24.mlp.gate_up_proj.weight", + "shape": [ + 55296, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 566231040, + "byteOffset": 0 + } + ], + "md5sum": "10d0a346373d758d217f64cdc2adaece" + }, + { + "dataPath": "params_shard_122.bin", + "format": "raw-shard", + "nbytes": 73400320, + "records": [ + { + "name": "model.layers.24.self_attn.c_attn.weight", + "shape": [ + 7168, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 73400320, + "byteOffset": 0 + } + ], + "md5sum": "4e245cd2e1968ffd2ca9bc229a6ca055" + }, + { + "dataPath": "params_shard_123.bin", + "format": "raw-shard", + "nbytes": 52428800, + "records": [ + { + "name": "model.layers.24.self_attn.o_proj.weight", + "shape": [ + 5120, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 52428800, + "byteOffset": 0 + } + ], + "md5sum": "1033a56d50496d8adf14ecac2db7758d" + }, + { + "dataPath": "params_shard_124.bin", + "format": "raw-shard", + "nbytes": 283115520, + "records": [ + { + "name": "model.layers.25.mlp.down_proj.weight", + "shape": [ + 5120, + 27648 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 283115520, + "byteOffset": 0 + } + ], + "md5sum": "eb56b0aa69756e3882665717cda66219" + }, + { + "dataPath": "params_shard_125.bin", + "format": "raw-shard", + "nbytes": 566231040, + "records": [ + { + "name": "model.layers.25.mlp.gate_up_proj.weight", + "shape": [ + 55296, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 566231040, + "byteOffset": 0 + } + ], + "md5sum": "93490233219d2f5e7a7cad76926127e7" + }, + { + "dataPath": "params_shard_126.bin", + "format": "raw-shard", + "nbytes": 73400320, + "records": [ + { + "name": "model.layers.25.self_attn.c_attn.weight", + "shape": [ + 7168, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 73400320, + "byteOffset": 0 + } + ], + "md5sum": "f9b00317a840bcf6828d29a58732d17f" + }, + { + "dataPath": "params_shard_127.bin", + "format": "raw-shard", + "nbytes": 52428800, + "records": [ + { + "name": "model.layers.25.self_attn.o_proj.weight", + "shape": [ + 5120, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 52428800, + "byteOffset": 0 + } + ], + "md5sum": "5d225df46ede9fe84e5caae1d8875837" + }, + { + "dataPath": "params_shard_128.bin", + "format": "raw-shard", + "nbytes": 283115520, + "records": [ + { + "name": "model.layers.26.mlp.down_proj.weight", + "shape": [ + 5120, + 27648 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 283115520, + "byteOffset": 0 + } + ], + "md5sum": "3078c5de9d190052c630633cc383f4e6" + }, + { + "dataPath": "params_shard_129.bin", + "format": "raw-shard", + "nbytes": 566231040, + "records": [ + { + "name": "model.layers.26.mlp.gate_up_proj.weight", + "shape": [ + 55296, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 566231040, + "byteOffset": 0 + } + ], + "md5sum": "2c4a91bb75d35d793eb4199939ca98f1" + }, + { + "dataPath": "params_shard_130.bin", + "format": "raw-shard", + "nbytes": 73400320, + "records": [ + { + "name": "model.layers.26.self_attn.c_attn.weight", + "shape": [ + 7168, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 73400320, + "byteOffset": 0 + } + ], + "md5sum": "8fc29644343cc88f59ac2572e63ac594" + }, + { + "dataPath": "params_shard_131.bin", + "format": "raw-shard", + "nbytes": 52428800, + "records": [ + { + "name": "model.layers.26.self_attn.o_proj.weight", + "shape": [ + 5120, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 52428800, + "byteOffset": 0 + } + ], + "md5sum": "c2d67f7fbd78c58828a3e07d0d57aa86" + }, + { + "dataPath": "params_shard_132.bin", + "format": "raw-shard", + "nbytes": 283115520, + "records": [ + { + "name": "model.layers.27.mlp.down_proj.weight", + "shape": [ + 5120, + 27648 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 283115520, + "byteOffset": 0 + } + ], + "md5sum": "15e43141672697e610aea6fe7ce7f3b5" + }, + { + "dataPath": "params_shard_133.bin", + "format": "raw-shard", + "nbytes": 566231040, + "records": [ + { + "name": "model.layers.27.mlp.gate_up_proj.weight", + "shape": [ + 55296, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 566231040, + "byteOffset": 0 + } + ], + "md5sum": "a9c76bb50b75080ba00b4aa98cdcff8e" + }, + { + "dataPath": "params_shard_134.bin", + "format": "raw-shard", + "nbytes": 73400320, + "records": [ + { + "name": "model.layers.27.self_attn.c_attn.weight", + "shape": [ + 7168, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 73400320, + "byteOffset": 0 + } + ], + "md5sum": "11b8fa0417841b860933a4ef96c81fe0" + }, + { + "dataPath": "params_shard_135.bin", + "format": "raw-shard", + "nbytes": 52428800, + "records": [ + { + "name": "model.layers.27.self_attn.o_proj.weight", + "shape": [ + 5120, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 52428800, + "byteOffset": 0 + } + ], + "md5sum": "2be726b77935a45f3d5ce3e8baf55fb8" + }, + { + "dataPath": "params_shard_136.bin", + "format": "raw-shard", + "nbytes": 566231040, + "records": [ + { + "name": "model.layers.28.mlp.gate_up_proj.weight", + "shape": [ + 55296, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 566231040, + "byteOffset": 0 + } + ], + "md5sum": "5b1da532afc1509e409c6053378a20a3" + }, + { + "dataPath": "params_shard_137.bin", + "format": "raw-shard", + "nbytes": 73400320, + "records": [ + { + "name": "model.layers.28.self_attn.c_attn.weight", + "shape": [ + 7168, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 73400320, + "byteOffset": 0 + } + ], + "md5sum": "fa1c26c0fc514aa22c398e923fd28ba6" + }, + { + "dataPath": "params_shard_138.bin", + "format": "raw-shard", + "nbytes": 52428800, + "records": [ + { + "name": "model.layers.28.self_attn.o_proj.weight", + "shape": [ + 5120, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 52428800, + "byteOffset": 0 + } + ], + "md5sum": "636cb207241080246182a42935e20ba6" + }, + { + "dataPath": "params_shard_139.bin", + "format": "raw-shard", + "nbytes": 283115520, + "records": [ + { + "name": "model.layers.28.mlp.down_proj.weight", + "shape": [ + 5120, + 27648 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 283115520, + "byteOffset": 0 + } + ], + "md5sum": "6eaab88663ab140bfcb5a402a166cc12" + }, + { + "dataPath": "params_shard_140.bin", + "format": "raw-shard", + "nbytes": 283115520, + "records": [ + { + "name": "model.layers.29.mlp.down_proj.weight", + "shape": [ + 5120, + 27648 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 283115520, + "byteOffset": 0 + } + ], + "md5sum": "5d4a6188ba9ffe4d935fb8fe29e1a3cf" + }, + { + "dataPath": "params_shard_141.bin", + "format": "raw-shard", + "nbytes": 566231040, + "records": [ + { + "name": "model.layers.29.mlp.gate_up_proj.weight", + "shape": [ + 55296, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 566231040, + "byteOffset": 0 + } + ], + "md5sum": "07f80050d09a6fa2ce2f85cda9b440b8" + }, + { + "dataPath": "params_shard_142.bin", + "format": "raw-shard", + "nbytes": 73400320, + "records": [ + { + "name": "model.layers.29.self_attn.c_attn.weight", + "shape": [ + 7168, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 73400320, + "byteOffset": 0 + } + ], + "md5sum": "78a07eddbee3394e414b44072d6261a4" + }, + { + "dataPath": "params_shard_143.bin", + "format": "raw-shard", + "nbytes": 52428800, + "records": [ + { + "name": "model.layers.29.self_attn.o_proj.weight", + "shape": [ + 5120, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 52428800, + "byteOffset": 0 + } + ], + "md5sum": "1faeb6b644f5b209b4d705d73465fbd9" + }, + { + "dataPath": "params_shard_144.bin", + "format": "raw-shard", + "nbytes": 283115520, + "records": [ + { + "name": "model.layers.30.mlp.down_proj.weight", + "shape": [ + 5120, + 27648 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 283115520, + "byteOffset": 0 + } + ], + "md5sum": "912c91c22ebad17f9e771c9bfd81890d" + }, + { + "dataPath": "params_shard_145.bin", + "format": "raw-shard", + "nbytes": 566231040, + "records": [ + { + "name": "model.layers.30.mlp.gate_up_proj.weight", + "shape": [ + 55296, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 566231040, + "byteOffset": 0 + } + ], + "md5sum": "ee21593ae1e2738340bf07a3e2a5f086" + }, + { + "dataPath": "params_shard_146.bin", + "format": "raw-shard", + "nbytes": 73400320, + "records": [ + { + "name": "model.layers.30.self_attn.c_attn.weight", + "shape": [ + 7168, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 73400320, + "byteOffset": 0 + } + ], + "md5sum": "65b7f348577e3a3da25fef4067386b9f" + }, + { + "dataPath": "params_shard_147.bin", + "format": "raw-shard", + "nbytes": 52428800, + "records": [ + { + "name": "model.layers.30.self_attn.o_proj.weight", + "shape": [ + 5120, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 52428800, + "byteOffset": 0 + } + ], + "md5sum": "c7f551114323af7b5c2266869b4c85ef" + }, + { + "dataPath": "params_shard_148.bin", + "format": "raw-shard", + "nbytes": 283115520, + "records": [ + { + "name": "model.layers.31.mlp.down_proj.weight", + "shape": [ + 5120, + 27648 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 283115520, + "byteOffset": 0 + } + ], + "md5sum": "fd390325119ffbf3e64b08493d1f7a29" + }, + { + "dataPath": "params_shard_149.bin", + "format": "raw-shard", + "nbytes": 566231040, + "records": [ + { + "name": "model.layers.31.mlp.gate_up_proj.weight", + "shape": [ + 55296, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 566231040, + "byteOffset": 0 + } + ], + "md5sum": "a7fe8ea88483103f1cb154adad35e476" + }, + { + "dataPath": "params_shard_150.bin", + "format": "raw-shard", + "nbytes": 73400320, + "records": [ + { + "name": "model.layers.31.self_attn.c_attn.weight", + "shape": [ + 7168, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 73400320, + "byteOffset": 0 + } + ], + "md5sum": "88db0e0ffbfcc30ee1881414beddc147" + }, + { + "dataPath": "params_shard_151.bin", + "format": "raw-shard", + "nbytes": 52428800, + "records": [ + { + "name": "model.layers.31.self_attn.o_proj.weight", + "shape": [ + 5120, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 52428800, + "byteOffset": 0 + } + ], + "md5sum": "bc37accf4f8424f1980706d4d72cc06c" + }, + { + "dataPath": "params_shard_152.bin", + "format": "raw-shard", + "nbytes": 283115520, + "records": [ + { + "name": "model.layers.32.mlp.down_proj.weight", + "shape": [ + 5120, + 27648 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 283115520, + "byteOffset": 0 + } + ], + "md5sum": "018953b0932e4fa32ce227f4606b2bcf" + }, + { + "dataPath": "params_shard_153.bin", + "format": "raw-shard", + "nbytes": 566231040, + "records": [ + { + "name": "model.layers.32.mlp.gate_up_proj.weight", + "shape": [ + 55296, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 566231040, + "byteOffset": 0 + } + ], + "md5sum": "9752ff9c4d2ddf845918cfb0bfd5faff" + }, + { + "dataPath": "params_shard_154.bin", + "format": "raw-shard", + "nbytes": 73400320, + "records": [ + { + "name": "model.layers.32.self_attn.c_attn.weight", + "shape": [ + 7168, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 73400320, + "byteOffset": 0 + } + ], + "md5sum": "25a7879a7b9296000c5b18dafcf19b9c" + }, + { + "dataPath": "params_shard_155.bin", + "format": "raw-shard", + "nbytes": 52428800, + "records": [ + { + "name": "model.layers.32.self_attn.o_proj.weight", + "shape": [ + 5120, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 52428800, + "byteOffset": 0 + } + ], + "md5sum": "dc7cf91ebb8b87f8d22f1f93ab174c0b" + }, + { + "dataPath": "params_shard_156.bin", + "format": "raw-shard", + "nbytes": 283115520, + "records": [ + { + "name": "model.layers.33.mlp.down_proj.weight", + "shape": [ + 5120, + 27648 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 283115520, + "byteOffset": 0 + } + ], + "md5sum": "dd4376a46e978b28dfde6dbe6e713565" + }, + { + "dataPath": "params_shard_157.bin", + "format": "raw-shard", + "nbytes": 566231040, + "records": [ + { + "name": "model.layers.33.mlp.gate_up_proj.weight", + "shape": [ + 55296, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 566231040, + "byteOffset": 0 + } + ], + "md5sum": "0ff9c331aaf562091652f04a0eac2cd9" + }, + { + "dataPath": "params_shard_158.bin", + "format": "raw-shard", + "nbytes": 73400320, + "records": [ + { + "name": "model.layers.33.self_attn.c_attn.weight", + "shape": [ + 7168, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 73400320, + "byteOffset": 0 + } + ], + "md5sum": "52e807896b3888a33d3882de5c2028d3" + }, + { + "dataPath": "params_shard_159.bin", + "format": "raw-shard", + "nbytes": 52428800, + "records": [ + { + "name": "model.layers.33.self_attn.o_proj.weight", + "shape": [ + 5120, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 52428800, + "byteOffset": 0 + } + ], + "md5sum": "2fd4d329fd586344138dd0350bb52b78" + }, + { + "dataPath": "params_shard_160.bin", + "format": "raw-shard", + "nbytes": 283115520, + "records": [ + { + "name": "model.layers.34.mlp.down_proj.weight", + "shape": [ + 5120, + 27648 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 283115520, + "byteOffset": 0 + } + ], + "md5sum": "246c3b98c46a21c358ef8b787c23c505" + }, + { + "dataPath": "params_shard_161.bin", + "format": "raw-shard", + "nbytes": 566231040, + "records": [ + { + "name": "model.layers.34.mlp.gate_up_proj.weight", + "shape": [ + 55296, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 566231040, + "byteOffset": 0 + } + ], + "md5sum": "87d5da0bd6cee2da99f17e4762c4be3f" + }, + { + "dataPath": "params_shard_162.bin", + "format": "raw-shard", + "nbytes": 73400320, + "records": [ + { + "name": "model.layers.34.self_attn.c_attn.weight", + "shape": [ + 7168, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 73400320, + "byteOffset": 0 + } + ], + "md5sum": "20c395885fd80b96a01d8681e680fcb4" + }, + { + "dataPath": "params_shard_163.bin", + "format": "raw-shard", + "nbytes": 52428800, + "records": [ + { + "name": "model.layers.34.self_attn.o_proj.weight", + "shape": [ + 5120, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 52428800, + "byteOffset": 0 + } + ], + "md5sum": "bb24995e2a5ffe1383882d5af4cdfd91" + }, + { + "dataPath": "params_shard_164.bin", + "format": "raw-shard", + "nbytes": 283115520, + "records": [ + { + "name": "model.layers.35.mlp.down_proj.weight", + "shape": [ + 5120, + 27648 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 283115520, + "byteOffset": 0 + } + ], + "md5sum": "3e324dcfd38292bdec61fe1584944496" + }, + { + "dataPath": "params_shard_165.bin", + "format": "raw-shard", + "nbytes": 566231040, + "records": [ + { + "name": "model.layers.35.mlp.gate_up_proj.weight", + "shape": [ + 55296, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 566231040, + "byteOffset": 0 + } + ], + "md5sum": "7fbab8a1b213ff6aa179aeedb20b3678" + }, + { + "dataPath": "params_shard_166.bin", + "format": "raw-shard", + "nbytes": 73400320, + "records": [ + { + "name": "model.layers.35.self_attn.c_attn.weight", + "shape": [ + 7168, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 73400320, + "byteOffset": 0 + } + ], + "md5sum": "0aae830b676bf2d925df1ed63749c928" + }, + { + "dataPath": "params_shard_167.bin", + "format": "raw-shard", + "nbytes": 52428800, + "records": [ + { + "name": "model.layers.35.self_attn.o_proj.weight", + "shape": [ + 5120, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 52428800, + "byteOffset": 0 + } + ], + "md5sum": "646243ee0e76b1ddd557fb8f27ab99e3" + }, + { + "dataPath": "params_shard_168.bin", + "format": "raw-shard", + "nbytes": 283115520, + "records": [ + { + "name": "model.layers.36.mlp.down_proj.weight", + "shape": [ + 5120, + 27648 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 283115520, + "byteOffset": 0 + } + ], + "md5sum": "bceb3e735a28c847730c21ea8b891cb5" + }, + { + "dataPath": "params_shard_169.bin", + "format": "raw-shard", + "nbytes": 566231040, + "records": [ + { + "name": "model.layers.36.mlp.gate_up_proj.weight", + "shape": [ + 55296, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 566231040, + "byteOffset": 0 + } + ], + "md5sum": "f3fd210b815272c2bb92958cb63011bc" + }, + { + "dataPath": "params_shard_170.bin", + "format": "raw-shard", + "nbytes": 73400320, + "records": [ + { + "name": "model.layers.36.self_attn.c_attn.weight", + "shape": [ + 7168, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 73400320, + "byteOffset": 0 + } + ], + "md5sum": "aa5e36af555c0c60a1a368fef9117b76" + }, + { + "dataPath": "params_shard_171.bin", + "format": "raw-shard", + "nbytes": 52428800, + "records": [ + { + "name": "model.layers.36.self_attn.o_proj.weight", + "shape": [ + 5120, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 52428800, + "byteOffset": 0 + } + ], + "md5sum": "b014fa09adbb76e85f25ffb1b46e04db" + }, + { + "dataPath": "params_shard_172.bin", + "format": "raw-shard", + "nbytes": 283115520, + "records": [ + { + "name": "model.layers.37.mlp.down_proj.weight", + "shape": [ + 5120, + 27648 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 283115520, + "byteOffset": 0 + } + ], + "md5sum": "0582d0760b655e35d934a2cffde094fa" + }, + { + "dataPath": "params_shard_173.bin", + "format": "raw-shard", + "nbytes": 566231040, + "records": [ + { + "name": "model.layers.37.mlp.gate_up_proj.weight", + "shape": [ + 55296, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 566231040, + "byteOffset": 0 + } + ], + "md5sum": "820b899385a420f0e39509941774d454" + }, + { + "dataPath": "params_shard_174.bin", + "format": "raw-shard", + "nbytes": 73400320, + "records": [ + { + "name": "model.layers.37.self_attn.c_attn.weight", + "shape": [ + 7168, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 73400320, + "byteOffset": 0 + } + ], + "md5sum": "10d2dc9c6478458b59f60054a363a599" + }, + { + "dataPath": "params_shard_175.bin", + "format": "raw-shard", + "nbytes": 52428800, + "records": [ + { + "name": "model.layers.37.self_attn.o_proj.weight", + "shape": [ + 5120, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 52428800, + "byteOffset": 0 + } + ], + "md5sum": "3c1a68bc405ee2201a5732aebcbca44f" + }, + { + "dataPath": "params_shard_176.bin", + "format": "raw-shard", + "nbytes": 566231040, + "records": [ + { + "name": "model.layers.38.mlp.gate_up_proj.weight", + "shape": [ + 55296, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 566231040, + "byteOffset": 0 + } + ], + "md5sum": "acee256e1953072816d1c741d64e1dd7" + }, + { + "dataPath": "params_shard_177.bin", + "format": "raw-shard", + "nbytes": 73400320, + "records": [ + { + "name": "model.layers.38.self_attn.c_attn.weight", + "shape": [ + 7168, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 73400320, + "byteOffset": 0 + } + ], + "md5sum": "35df09cb7ddbdbedff32ae5997b901fe" + }, + { + "dataPath": "params_shard_178.bin", + "format": "raw-shard", + "nbytes": 52428800, + "records": [ + { + "name": "model.layers.38.self_attn.o_proj.weight", + "shape": [ + 5120, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 52428800, + "byteOffset": 0 + } + ], + "md5sum": "e35efdb5045467d18b2b05944fa6778e" + }, + { + "dataPath": "params_shard_179.bin", + "format": "raw-shard", + "nbytes": 283115520, + "records": [ + { + "name": "model.layers.38.mlp.down_proj.weight", + "shape": [ + 5120, + 27648 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 283115520, + "byteOffset": 0 + } + ], + "md5sum": "41001c6dc4ac3f5effef66fa774c340b" + }, + { + "dataPath": "params_shard_180.bin", + "format": "raw-shard", + "nbytes": 283115520, + "records": [ + { + "name": "model.layers.39.mlp.down_proj.weight", + "shape": [ + 5120, + 27648 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 283115520, + "byteOffset": 0 + } + ], + "md5sum": "5c44d929b3c0c6dfc7ea09611575ebcc" + }, + { + "dataPath": "params_shard_181.bin", + "format": "raw-shard", + "nbytes": 566231040, + "records": [ + { + "name": "model.layers.39.mlp.gate_up_proj.weight", + "shape": [ + 55296, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 566231040, + "byteOffset": 0 + } + ], + "md5sum": "e260930affc69708f2d1b6b4f9d2eeeb" + }, + { + "dataPath": "params_shard_182.bin", + "format": "raw-shard", + "nbytes": 73400320, + "records": [ + { + "name": "model.layers.39.self_attn.c_attn.weight", + "shape": [ + 7168, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 73400320, + "byteOffset": 0 + } + ], + "md5sum": "9918641ab660b66232faf7bab4707392" + }, + { + "dataPath": "params_shard_183.bin", + "format": "raw-shard", + "nbytes": 52428800, + "records": [ + { + "name": "model.layers.39.self_attn.o_proj.weight", + "shape": [ + 5120, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 52428800, + "byteOffset": 0 + } + ], + "md5sum": "d218902a7fd121f323d67185d6716511" + }, + { + "dataPath": "params_shard_184.bin", + "format": "raw-shard", + "nbytes": 283115520, + "records": [ + { + "name": "model.layers.40.mlp.down_proj.weight", + "shape": [ + 5120, + 27648 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 283115520, + "byteOffset": 0 + } + ], + "md5sum": "d887e1f37e2f3b277673e78990a0215c" + }, + { + "dataPath": "params_shard_185.bin", + "format": "raw-shard", + "nbytes": 566231040, + "records": [ + { + "name": "model.layers.40.mlp.gate_up_proj.weight", + "shape": [ + 55296, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 566231040, + "byteOffset": 0 + } + ], + "md5sum": "2791b05140c4c37e1f085575b7b641bf" + }, + { + "dataPath": "params_shard_186.bin", + "format": "raw-shard", + "nbytes": 73400320, + "records": [ + { + "name": "model.layers.40.self_attn.c_attn.weight", + "shape": [ + 7168, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 73400320, + "byteOffset": 0 + } + ], + "md5sum": "6ed0ca567f823dd29d310740090f5ebd" + }, + { + "dataPath": "params_shard_187.bin", + "format": "raw-shard", + "nbytes": 52428800, + "records": [ + { + "name": "model.layers.40.self_attn.o_proj.weight", + "shape": [ + 5120, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 52428800, + "byteOffset": 0 + } + ], + "md5sum": "bee7995ded99632d37113d0a067a793b" + }, + { + "dataPath": "params_shard_188.bin", + "format": "raw-shard", + "nbytes": 283115520, + "records": [ + { + "name": "model.layers.41.mlp.down_proj.weight", + "shape": [ + 5120, + 27648 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 283115520, + "byteOffset": 0 + } + ], + "md5sum": "6259840681453819fb8695e7b9934955" + }, + { + "dataPath": "params_shard_189.bin", + "format": "raw-shard", + "nbytes": 566231040, + "records": [ + { + "name": "model.layers.41.mlp.gate_up_proj.weight", + "shape": [ + 55296, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 566231040, + "byteOffset": 0 + } + ], + "md5sum": "afa5daed816174d6571319862ffb172c" + }, + { + "dataPath": "params_shard_190.bin", + "format": "raw-shard", + "nbytes": 73400320, + "records": [ + { + "name": "model.layers.41.self_attn.c_attn.weight", + "shape": [ + 7168, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 73400320, + "byteOffset": 0 + } + ], + "md5sum": "e71e0abea9452662189d7615a8a2bb0e" + }, + { + "dataPath": "params_shard_191.bin", + "format": "raw-shard", + "nbytes": 52428800, + "records": [ + { + "name": "model.layers.41.self_attn.o_proj.weight", + "shape": [ + 5120, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 52428800, + "byteOffset": 0 + } + ], + "md5sum": "169366e4241d2d330692db8533c4a05a" + }, + { + "dataPath": "params_shard_192.bin", + "format": "raw-shard", + "nbytes": 283115520, + "records": [ + { + "name": "model.layers.42.mlp.down_proj.weight", + "shape": [ + 5120, + 27648 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 283115520, + "byteOffset": 0 + } + ], + "md5sum": "96a0c58c4de9a97283cce30539469608" + }, + { + "dataPath": "params_shard_193.bin", + "format": "raw-shard", + "nbytes": 566231040, + "records": [ + { + "name": "model.layers.42.mlp.gate_up_proj.weight", + "shape": [ + 55296, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 566231040, + "byteOffset": 0 + } + ], + "md5sum": "56d078c57d3f9ece1e84053440a04d45" + }, + { + "dataPath": "params_shard_194.bin", + "format": "raw-shard", + "nbytes": 73400320, + "records": [ + { + "name": "model.layers.42.self_attn.c_attn.weight", + "shape": [ + 7168, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 73400320, + "byteOffset": 0 + } + ], + "md5sum": "7c4e35a1757f11eb5d4bbc6d65885c10" + }, + { + "dataPath": "params_shard_195.bin", + "format": "raw-shard", + "nbytes": 52428800, + "records": [ + { + "name": "model.layers.42.self_attn.o_proj.weight", + "shape": [ + 5120, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 52428800, + "byteOffset": 0 + } + ], + "md5sum": "588c0008660b3f44568884582e70e298" + }, + { + "dataPath": "params_shard_196.bin", + "format": "raw-shard", + "nbytes": 283115520, + "records": [ + { + "name": "model.layers.43.mlp.down_proj.weight", + "shape": [ + 5120, + 27648 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 283115520, + "byteOffset": 0 + } + ], + "md5sum": "c96af2e433b270ae05266bab8a369dd1" + }, + { + "dataPath": "params_shard_197.bin", + "format": "raw-shard", + "nbytes": 566231040, + "records": [ + { + "name": "model.layers.43.mlp.gate_up_proj.weight", + "shape": [ + 55296, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 566231040, + "byteOffset": 0 + } + ], + "md5sum": "dfaf243bdeadb028b210e0b2f7d352a3" + }, + { + "dataPath": "params_shard_198.bin", + "format": "raw-shard", + "nbytes": 73400320, + "records": [ + { + "name": "model.layers.43.self_attn.c_attn.weight", + "shape": [ + 7168, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 73400320, + "byteOffset": 0 + } + ], + "md5sum": "ce6d2408518a60c430a02a8cac541bd0" + }, + { + "dataPath": "params_shard_199.bin", + "format": "raw-shard", + "nbytes": 52428800, + "records": [ + { + "name": "model.layers.43.self_attn.o_proj.weight", + "shape": [ + 5120, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 52428800, + "byteOffset": 0 + } + ], + "md5sum": "b42d89de343997a9be6dde0ad48886ff" + }, + { + "dataPath": "params_shard_200.bin", + "format": "raw-shard", + "nbytes": 283115520, + "records": [ + { + "name": "model.layers.44.mlp.down_proj.weight", + "shape": [ + 5120, + 27648 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 283115520, + "byteOffset": 0 + } + ], + "md5sum": "d9bb502c230222137830935a00a86672" + }, + { + "dataPath": "params_shard_201.bin", + "format": "raw-shard", + "nbytes": 566231040, + "records": [ + { + "name": "model.layers.44.mlp.gate_up_proj.weight", + "shape": [ + 55296, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 566231040, + "byteOffset": 0 + } + ], + "md5sum": "943539d605b665440e55bb7f518f08f5" + }, + { + "dataPath": "params_shard_202.bin", + "format": "raw-shard", + "nbytes": 73400320, + "records": [ + { + "name": "model.layers.44.self_attn.c_attn.weight", + "shape": [ + 7168, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 73400320, + "byteOffset": 0 + } + ], + "md5sum": "5635cdacba20d39009aea9744521322c" + }, + { + "dataPath": "params_shard_203.bin", + "format": "raw-shard", + "nbytes": 52428800, + "records": [ + { + "name": "model.layers.44.self_attn.o_proj.weight", + "shape": [ + 5120, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 52428800, + "byteOffset": 0 + } + ], + "md5sum": "97589702de25c929b402cf9021a61731" + }, + { + "dataPath": "params_shard_204.bin", + "format": "raw-shard", + "nbytes": 283115520, + "records": [ + { + "name": "model.layers.45.mlp.down_proj.weight", + "shape": [ + 5120, + 27648 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 283115520, + "byteOffset": 0 + } + ], + "md5sum": "188289d27fdec079f36c24bbeaa6287e" + }, + { + "dataPath": "params_shard_205.bin", + "format": "raw-shard", + "nbytes": 566231040, + "records": [ + { + "name": "model.layers.45.mlp.gate_up_proj.weight", + "shape": [ + 55296, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 566231040, + "byteOffset": 0 + } + ], + "md5sum": "f2356d135eab1b97a1999784c3bf7c01" + }, + { + "dataPath": "params_shard_206.bin", + "format": "raw-shard", + "nbytes": 73400320, + "records": [ + { + "name": "model.layers.45.self_attn.c_attn.weight", + "shape": [ + 7168, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 73400320, + "byteOffset": 0 + } + ], + "md5sum": "95340b636baba41303919dfbc1a6eabc" + }, + { + "dataPath": "params_shard_207.bin", + "format": "raw-shard", + "nbytes": 52428800, + "records": [ + { + "name": "model.layers.45.self_attn.o_proj.weight", + "shape": [ + 5120, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 52428800, + "byteOffset": 0 + } + ], + "md5sum": "78f612aa174b25dae2cb96022fa0e84c" + }, + { + "dataPath": "params_shard_208.bin", + "format": "raw-shard", + "nbytes": 283115520, + "records": [ + { + "name": "model.layers.46.mlp.down_proj.weight", + "shape": [ + 5120, + 27648 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 283115520, + "byteOffset": 0 + } + ], + "md5sum": "c1ed996ab0612650b5a0069862f0ba63" + }, + { + "dataPath": "params_shard_209.bin", + "format": "raw-shard", + "nbytes": 566231040, + "records": [ + { + "name": "model.layers.46.mlp.gate_up_proj.weight", + "shape": [ + 55296, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 566231040, + "byteOffset": 0 + } + ], + "md5sum": "d78392be6743b9556d16d1ae5d12f634" + }, + { + "dataPath": "params_shard_210.bin", + "format": "raw-shard", + "nbytes": 73400320, + "records": [ + { + "name": "model.layers.46.self_attn.c_attn.weight", + "shape": [ + 7168, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 73400320, + "byteOffset": 0 + } + ], + "md5sum": "d74d4948968229e940ce0779ad1e7672" + }, + { + "dataPath": "params_shard_211.bin", + "format": "raw-shard", + "nbytes": 52428800, + "records": [ + { + "name": "model.layers.46.self_attn.o_proj.weight", + "shape": [ + 5120, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 52428800, + "byteOffset": 0 + } + ], + "md5sum": "c9361e05a3b1789f756a0f1528bd3b79" + }, + { + "dataPath": "params_shard_212.bin", + "format": "raw-shard", + "nbytes": 283115520, + "records": [ + { + "name": "model.layers.47.mlp.down_proj.weight", + "shape": [ + 5120, + 27648 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 283115520, + "byteOffset": 0 + } + ], + "md5sum": "a0e0f39350c818ecff360aeb09434d42" + }, + { + "dataPath": "params_shard_213.bin", + "format": "raw-shard", + "nbytes": 566231040, + "records": [ + { + "name": "model.layers.47.mlp.gate_up_proj.weight", + "shape": [ + 55296, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 566231040, + "byteOffset": 0 + } + ], + "md5sum": "8663575f64fdb21bf3e9b68902e8d81f" + }, + { + "dataPath": "params_shard_214.bin", + "format": "raw-shard", + "nbytes": 73400320, + "records": [ + { + "name": "model.layers.47.self_attn.c_attn.weight", + "shape": [ + 7168, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 73400320, + "byteOffset": 0 + } + ], + "md5sum": "50319ec57802d3b1ec00f2d4d82388d9" + }, + { + "dataPath": "params_shard_215.bin", + "format": "raw-shard", + "nbytes": 52428800, + "records": [ + { + "name": "model.layers.47.self_attn.o_proj.weight", + "shape": [ + 5120, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 52428800, + "byteOffset": 0 + } + ], + "md5sum": "3d90c4cee0803ad2da7d6a4eaf88eaba" + }, + { + "dataPath": "params_shard_216.bin", + "format": "raw-shard", + "nbytes": 566231040, + "records": [ + { + "name": "model.layers.48.mlp.gate_up_proj.weight", + "shape": [ + 55296, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 566231040, + "byteOffset": 0 + } + ], + "md5sum": "d0d43378648c346524f8c0f95d08b8b3" + }, + { + "dataPath": "params_shard_217.bin", + "format": "raw-shard", + "nbytes": 73400320, + "records": [ + { + "name": "model.layers.48.self_attn.c_attn.weight", + "shape": [ + 7168, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 73400320, + "byteOffset": 0 + } + ], + "md5sum": "c285c1b13094d27131d72c1a9026e038" + }, + { + "dataPath": "params_shard_218.bin", + "format": "raw-shard", + "nbytes": 52428800, + "records": [ + { + "name": "model.layers.48.self_attn.o_proj.weight", + "shape": [ + 5120, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 52428800, + "byteOffset": 0 + } + ], + "md5sum": "79928a476f6ed0bae224d364120f373a" + }, + { + "dataPath": "params_shard_219.bin", + "format": "raw-shard", + "nbytes": 283115520, + "records": [ + { + "name": "model.layers.48.mlp.down_proj.weight", + "shape": [ + 5120, + 27648 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 283115520, + "byteOffset": 0 + } + ], + "md5sum": "fb478e8f2f7ca40c8a3ad7c43a53a57f" + }, + { + "dataPath": "params_shard_220.bin", + "format": "raw-shard", + "nbytes": 283115520, + "records": [ + { + "name": "model.layers.49.mlp.down_proj.weight", + "shape": [ + 5120, + 27648 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 283115520, + "byteOffset": 0 + } + ], + "md5sum": "aedbe67769b43fa5ace0cf9caebf15de" + }, + { + "dataPath": "params_shard_221.bin", + "format": "raw-shard", + "nbytes": 566231040, + "records": [ + { + "name": "model.layers.49.mlp.gate_up_proj.weight", + "shape": [ + 55296, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 566231040, + "byteOffset": 0 + } + ], + "md5sum": "e9e3028f3e2e1a639592962791d1e3b1" + }, + { + "dataPath": "params_shard_222.bin", + "format": "raw-shard", + "nbytes": 73400320, + "records": [ + { + "name": "model.layers.49.self_attn.c_attn.weight", + "shape": [ + 7168, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 73400320, + "byteOffset": 0 + } + ], + "md5sum": "4e0f9b0a2b31ce3e6ee6894845c35632" + }, + { + "dataPath": "params_shard_223.bin", + "format": "raw-shard", + "nbytes": 52428800, + "records": [ + { + "name": "model.layers.49.self_attn.o_proj.weight", + "shape": [ + 5120, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 52428800, + "byteOffset": 0 + } + ], + "md5sum": "71e96a6c044c59bdc29422cd5d23cc0d" + }, + { + "dataPath": "params_shard_224.bin", + "format": "raw-shard", + "nbytes": 283115520, + "records": [ + { + "name": "model.layers.50.mlp.down_proj.weight", + "shape": [ + 5120, + 27648 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 283115520, + "byteOffset": 0 + } + ], + "md5sum": "499207de95883514255f98836097126b" + }, + { + "dataPath": "params_shard_225.bin", + "format": "raw-shard", + "nbytes": 566231040, + "records": [ + { + "name": "model.layers.50.mlp.gate_up_proj.weight", + "shape": [ + 55296, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 566231040, + "byteOffset": 0 + } + ], + "md5sum": "177528b456ee872403dcb2f503e66ecf" + }, + { + "dataPath": "params_shard_226.bin", + "format": "raw-shard", + "nbytes": 73400320, + "records": [ + { + "name": "model.layers.50.self_attn.c_attn.weight", + "shape": [ + 7168, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 73400320, + "byteOffset": 0 + } + ], + "md5sum": "eca0b236caaf7c4cb5405b3e95a6f367" + }, + { + "dataPath": "params_shard_227.bin", + "format": "raw-shard", + "nbytes": 52428800, + "records": [ + { + "name": "model.layers.50.self_attn.o_proj.weight", + "shape": [ + 5120, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 52428800, + "byteOffset": 0 + } + ], + "md5sum": "e5fc1ff31f105cb71a414f9915025a94" + }, + { + "dataPath": "params_shard_228.bin", + "format": "raw-shard", + "nbytes": 283115520, + "records": [ + { + "name": "model.layers.51.mlp.down_proj.weight", + "shape": [ + 5120, + 27648 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 283115520, + "byteOffset": 0 + } + ], + "md5sum": "5fb3ca77b8ac7f1fd693e32b657b12f4" + }, + { + "dataPath": "params_shard_229.bin", + "format": "raw-shard", + "nbytes": 566231040, + "records": [ + { + "name": "model.layers.51.mlp.gate_up_proj.weight", + "shape": [ + 55296, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 566231040, + "byteOffset": 0 + } + ], + "md5sum": "a80169cae28095e91192cf6a49aca432" + }, + { + "dataPath": "params_shard_230.bin", + "format": "raw-shard", + "nbytes": 73400320, + "records": [ + { + "name": "model.layers.51.self_attn.c_attn.weight", + "shape": [ + 7168, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 73400320, + "byteOffset": 0 + } + ], + "md5sum": "89a39dc5f85080698d5fe7e282ff0e00" + }, + { + "dataPath": "params_shard_231.bin", + "format": "raw-shard", + "nbytes": 52428800, + "records": [ + { + "name": "model.layers.51.self_attn.o_proj.weight", + "shape": [ + 5120, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 52428800, + "byteOffset": 0 + } + ], + "md5sum": "177dfccb6750916307a7865ada092592" + }, + { + "dataPath": "params_shard_232.bin", + "format": "raw-shard", + "nbytes": 283115520, + "records": [ + { + "name": "model.layers.52.mlp.down_proj.weight", + "shape": [ + 5120, + 27648 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 283115520, + "byteOffset": 0 + } + ], + "md5sum": "64e735684c27e71c1e2d7bb2e8ee347b" + }, + { + "dataPath": "params_shard_233.bin", + "format": "raw-shard", + "nbytes": 566231040, + "records": [ + { + "name": "model.layers.52.mlp.gate_up_proj.weight", + "shape": [ + 55296, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 566231040, + "byteOffset": 0 + } + ], + "md5sum": "ebf5d7a880b1e0a1e53e1dd2213a12d2" + }, + { + "dataPath": "params_shard_234.bin", + "format": "raw-shard", + "nbytes": 73400320, + "records": [ + { + "name": "model.layers.52.self_attn.c_attn.weight", + "shape": [ + 7168, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 73400320, + "byteOffset": 0 + } + ], + "md5sum": "a23ed7b33e9a387b84bf814f07cfd428" + }, + { + "dataPath": "params_shard_235.bin", + "format": "raw-shard", + "nbytes": 52428800, + "records": [ + { + "name": "model.layers.52.self_attn.o_proj.weight", + "shape": [ + 5120, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 52428800, + "byteOffset": 0 + } + ], + "md5sum": "ab7207ca6dbf5e90f3288527b06d0996" + }, + { + "dataPath": "params_shard_236.bin", + "format": "raw-shard", + "nbytes": 283115520, + "records": [ + { + "name": "model.layers.53.mlp.down_proj.weight", + "shape": [ + 5120, + 27648 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 283115520, + "byteOffset": 0 + } + ], + "md5sum": "da1333293942255f0f94beaccbc8d37a" + }, + { + "dataPath": "params_shard_237.bin", + "format": "raw-shard", + "nbytes": 566231040, + "records": [ + { + "name": "model.layers.53.mlp.gate_up_proj.weight", + "shape": [ + 55296, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 566231040, + "byteOffset": 0 + } + ], + "md5sum": "d437b0cdec63d1ce2ab17da72ed80cec" + }, + { + "dataPath": "params_shard_238.bin", + "format": "raw-shard", + "nbytes": 73400320, + "records": [ + { + "name": "model.layers.53.self_attn.c_attn.weight", + "shape": [ + 7168, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 73400320, + "byteOffset": 0 + } + ], + "md5sum": "a0ac87111f16d3b4253c94e1171106d8" + }, + { + "dataPath": "params_shard_239.bin", + "format": "raw-shard", + "nbytes": 52428800, + "records": [ + { + "name": "model.layers.53.self_attn.o_proj.weight", + "shape": [ + 5120, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 52428800, + "byteOffset": 0 + } + ], + "md5sum": "76a78553716928a1164e0d7eb4c10f0d" + }, + { + "dataPath": "params_shard_240.bin", + "format": "raw-shard", + "nbytes": 283115520, + "records": [ + { + "name": "model.layers.54.mlp.down_proj.weight", + "shape": [ + 5120, + 27648 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 283115520, + "byteOffset": 0 + } + ], + "md5sum": "14a54ea4a344deb75c450f74e541d000" + }, + { + "dataPath": "params_shard_241.bin", + "format": "raw-shard", + "nbytes": 566231040, + "records": [ + { + "name": "model.layers.54.mlp.gate_up_proj.weight", + "shape": [ + 55296, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 566231040, + "byteOffset": 0 + } + ], + "md5sum": "4274f9994fd3232fa9301bf484ba7b0f" + }, + { + "dataPath": "params_shard_242.bin", + "format": "raw-shard", + "nbytes": 73400320, + "records": [ + { + "name": "model.layers.54.self_attn.c_attn.weight", + "shape": [ + 7168, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 73400320, + "byteOffset": 0 + } + ], + "md5sum": "a57e67d1a1daaa20b1e84f067e4244dd" + }, + { + "dataPath": "params_shard_243.bin", + "format": "raw-shard", + "nbytes": 52428800, + "records": [ + { + "name": "model.layers.54.self_attn.o_proj.weight", + "shape": [ + 5120, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 52428800, + "byteOffset": 0 + } + ], + "md5sum": "ade5c32b7048b9198149501cfa3c5fdd" + }, + { + "dataPath": "params_shard_244.bin", + "format": "raw-shard", + "nbytes": 283115520, + "records": [ + { + "name": "model.layers.55.mlp.down_proj.weight", + "shape": [ + 5120, + 27648 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 283115520, + "byteOffset": 0 + } + ], + "md5sum": "4bdebc607ebb67c6f232f99eb8f5b4d9" + }, + { + "dataPath": "params_shard_245.bin", + "format": "raw-shard", + "nbytes": 566231040, + "records": [ + { + "name": "model.layers.55.mlp.gate_up_proj.weight", + "shape": [ + 55296, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 566231040, + "byteOffset": 0 + } + ], + "md5sum": "bebf519ea1b5b184df5efdb8cf98c2b2" + }, + { + "dataPath": "params_shard_246.bin", + "format": "raw-shard", + "nbytes": 73400320, + "records": [ + { + "name": "model.layers.55.self_attn.c_attn.weight", + "shape": [ + 7168, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 73400320, + "byteOffset": 0 + } + ], + "md5sum": "f56fe2a6de3e829a7edac9f4411293ee" + }, + { + "dataPath": "params_shard_247.bin", + "format": "raw-shard", + "nbytes": 52428800, + "records": [ + { + "name": "model.layers.55.self_attn.o_proj.weight", + "shape": [ + 5120, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 52428800, + "byteOffset": 0 + } + ], + "md5sum": "da44a7a0bffc082a32a45d03874d4c7c" + }, + { + "dataPath": "params_shard_248.bin", + "format": "raw-shard", + "nbytes": 283115520, + "records": [ + { + "name": "model.layers.56.mlp.down_proj.weight", + "shape": [ + 5120, + 27648 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 283115520, + "byteOffset": 0 + } + ], + "md5sum": "35b1c3538dfb8d2917352aadc90644b6" + }, + { + "dataPath": "params_shard_249.bin", + "format": "raw-shard", + "nbytes": 566231040, + "records": [ + { + "name": "model.layers.56.mlp.gate_up_proj.weight", + "shape": [ + 55296, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 566231040, + "byteOffset": 0 + } + ], + "md5sum": "8cd0228326d0dde79848aa47db57c16d" + }, + { + "dataPath": "params_shard_250.bin", + "format": "raw-shard", + "nbytes": 73400320, + "records": [ + { + "name": "model.layers.56.self_attn.c_attn.weight", + "shape": [ + 7168, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 73400320, + "byteOffset": 0 + } + ], + "md5sum": "988fc04a8395798c80733617b7b36fed" + }, + { + "dataPath": "params_shard_251.bin", + "format": "raw-shard", + "nbytes": 52428800, + "records": [ + { + "name": "model.layers.56.self_attn.o_proj.weight", + "shape": [ + 5120, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 52428800, + "byteOffset": 0 + } + ], + "md5sum": "4af75a89b793ad871ace19e846ac9123" + }, + { + "dataPath": "params_shard_252.bin", + "format": "raw-shard", + "nbytes": 283115520, + "records": [ + { + "name": "model.layers.57.mlp.down_proj.weight", + "shape": [ + 5120, + 27648 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 283115520, + "byteOffset": 0 + } + ], + "md5sum": "ac6757e2e8d25f6b8bd8b1ca5deaa1a9" + }, + { + "dataPath": "params_shard_253.bin", + "format": "raw-shard", + "nbytes": 566231040, + "records": [ + { + "name": "model.layers.57.mlp.gate_up_proj.weight", + "shape": [ + 55296, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 566231040, + "byteOffset": 0 + } + ], + "md5sum": "8b78d30a8ee0c738b3101768ed711171" + }, + { + "dataPath": "params_shard_254.bin", + "format": "raw-shard", + "nbytes": 73400320, + "records": [ + { + "name": "model.layers.57.self_attn.c_attn.weight", + "shape": [ + 7168, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 73400320, + "byteOffset": 0 + } + ], + "md5sum": "e0c01a97ae9c374d2aa1769c16f865c2" + }, + { + "dataPath": "params_shard_255.bin", + "format": "raw-shard", + "nbytes": 52428800, + "records": [ + { + "name": "model.layers.57.self_attn.o_proj.weight", + "shape": [ + 5120, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 52428800, + "byteOffset": 0 + } + ], + "md5sum": "d8f85ae54c84f49179ebbc861ae9ce20" + }, + { + "dataPath": "params_shard_256.bin", + "format": "raw-shard", + "nbytes": 73400320, + "records": [ + { + "name": "model.layers.58.self_attn.c_attn.weight", + "shape": [ + 7168, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 73400320, + "byteOffset": 0 + } + ], + "md5sum": "a5ce8a5ad7d7e1a16a6bd09ee080d47a" + }, + { + "dataPath": "params_shard_257.bin", + "format": "raw-shard", + "nbytes": 52428800, + "records": [ + { + "name": "model.layers.58.self_attn.o_proj.weight", + "shape": [ + 5120, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 52428800, + "byteOffset": 0 + } + ], + "md5sum": "7ea4e0acb194aabed6caef8f889ea673" + }, + { + "dataPath": "params_shard_258.bin", + "format": "raw-shard", + "nbytes": 2238464, + "records": [ + { + "name": "model.layers.58.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 0 + }, + { + "name": "model.layers.58.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 10240 + }, + { + "name": "model.layers.59.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 20480 + }, + { + "name": "model.layers.59.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 30720 + }, + { + "name": "model.layers.59.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 40960 + }, + { + "name": "model.layers.60.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 55296 + }, + { + "name": "model.layers.60.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 65536 + }, + { + "name": "model.layers.60.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 75776 + }, + { + "name": "model.layers.61.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 90112 + }, + { + "name": "model.layers.61.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 100352 + }, + { + "name": "model.layers.61.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 110592 + }, + { + "name": "model.layers.62.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 124928 + }, + { + "name": "model.layers.62.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 135168 + }, + { + "name": "model.layers.62.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 145408 + }, + { + "name": "model.layers.63.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 159744 + }, + { + "name": "model.layers.63.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 169984 + }, + { + "name": "model.layers.63.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 180224 + }, + { + "name": "model.norm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 194560 + }, + { + "name": "model.layers.0.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 204800 + }, + { + "name": "model.layers.0.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 215040 + }, + { + "name": "model.layers.0.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 225280 + }, + { + "name": "model.layers.1.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 239616 + }, + { + "name": "model.layers.1.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 249856 + }, + { + "name": "model.layers.1.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 260096 + }, + { + "name": "model.layers.2.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 274432 + }, + { + "name": "model.layers.2.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 284672 + }, + { + "name": "model.layers.2.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 294912 + }, + { + "name": "model.layers.3.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 309248 + }, + { + "name": "model.layers.3.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 319488 + }, + { + "name": "model.layers.3.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 329728 + }, + { + "name": "model.layers.4.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 344064 + }, + { + "name": "model.layers.4.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 354304 + }, + { + "name": "model.layers.4.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 364544 + }, + { + "name": "model.layers.5.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 378880 + }, + { + "name": "model.layers.5.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 389120 + }, + { + "name": "model.layers.5.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 399360 + }, + { + "name": "model.layers.6.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 413696 + }, + { + "name": "model.layers.6.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 423936 + }, + { + "name": "model.layers.6.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 434176 + }, + { + "name": "model.layers.7.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 448512 + }, + { + "name": "model.layers.7.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 458752 + }, + { + "name": "model.layers.7.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 468992 + }, + { + "name": "model.layers.8.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 483328 + }, + { + "name": "model.layers.10.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 497664 + }, + { + "name": "model.layers.10.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 507904 + }, + { + "name": "model.layers.10.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 518144 + }, + { + "name": "model.layers.11.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 532480 + }, + { + "name": "model.layers.11.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 542720 + }, + { + "name": "model.layers.11.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 552960 + }, + { + "name": "model.layers.12.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 567296 + }, + { + "name": "model.layers.12.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 577536 + }, + { + "name": "model.layers.12.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 587776 + }, + { + "name": "model.layers.13.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 602112 + }, + { + "name": "model.layers.13.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 612352 + }, + { + "name": "model.layers.13.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 622592 + }, + { + "name": "model.layers.14.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 636928 + }, + { + "name": "model.layers.14.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 647168 + }, + { + "name": "model.layers.14.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 657408 + }, + { + "name": "model.layers.15.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 671744 + }, + { + "name": "model.layers.15.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 681984 + }, + { + "name": "model.layers.15.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 692224 + }, + { + "name": "model.layers.16.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 706560 + }, + { + "name": "model.layers.16.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 716800 + }, + { + "name": "model.layers.16.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 727040 + }, + { + "name": "model.layers.17.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 741376 + }, + { + "name": "model.layers.17.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 751616 + }, + { + "name": "model.layers.17.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 761856 + }, + { + "name": "model.layers.18.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 776192 + }, + { + "name": "model.layers.8.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 790528 + }, + { + "name": "model.layers.8.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 800768 + }, + { + "name": "model.layers.9.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 811008 + }, + { + "name": "model.layers.9.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 821248 + }, + { + "name": "model.layers.9.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 831488 + }, + { + "name": "model.layers.18.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 845824 + }, + { + "name": "model.layers.18.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 856064 + }, + { + "name": "model.layers.19.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 866304 + }, + { + "name": "model.layers.19.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 876544 + }, + { + "name": "model.layers.19.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 886784 + }, + { + "name": "model.layers.20.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 901120 + }, + { + "name": "model.layers.20.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 911360 + }, + { + "name": "model.layers.20.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 921600 + }, + { + "name": "model.layers.21.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 935936 + }, + { + "name": "model.layers.21.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 946176 + }, + { + "name": "model.layers.21.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 956416 + }, + { + "name": "model.layers.22.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 970752 + }, + { + "name": "model.layers.22.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 980992 + }, + { + "name": "model.layers.22.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 991232 + }, + { + "name": "model.layers.23.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 1005568 + }, + { + "name": "model.layers.23.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 1015808 + }, + { + "name": "model.layers.23.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 1026048 + }, + { + "name": "model.layers.24.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 1040384 + }, + { + "name": "model.layers.24.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 1050624 + }, + { + "name": "model.layers.24.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 1060864 + }, + { + "name": "model.layers.25.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 1075200 + }, + { + "name": "model.layers.25.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 1085440 + }, + { + "name": "model.layers.25.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 1095680 + }, + { + "name": "model.layers.26.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 1110016 + }, + { + "name": "model.layers.26.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 1120256 + }, + { + "name": "model.layers.26.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 1130496 + }, + { + "name": "model.layers.27.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 1144832 + }, + { + "name": "model.layers.27.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 1155072 + }, + { + "name": "model.layers.27.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 1165312 + }, + { + "name": "model.layers.28.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 1179648 + }, + { + "name": "model.layers.28.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 1193984 + }, + { + "name": "model.layers.28.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 1204224 + }, + { + "name": "model.layers.29.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 1214464 + }, + { + "name": "model.layers.29.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 1224704 + }, + { + "name": "model.layers.29.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 1234944 + }, + { + "name": "model.layers.30.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 1249280 + }, + { + "name": "model.layers.30.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 1259520 + }, + { + "name": "model.layers.30.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 1269760 + }, + { + "name": "model.layers.31.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 1284096 + }, + { + "name": "model.layers.31.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 1294336 + }, + { + "name": "model.layers.31.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 1304576 + }, + { + "name": "model.layers.32.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 1318912 + }, + { + "name": "model.layers.32.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 1329152 + }, + { + "name": "model.layers.32.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 1339392 + }, + { + "name": "model.layers.33.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 1353728 + }, + { + "name": "model.layers.33.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 1363968 + }, + { + "name": "model.layers.33.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 1374208 + }, + { + "name": "model.layers.34.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 1388544 + }, + { + "name": "model.layers.34.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 1398784 + }, + { + "name": "model.layers.34.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 1409024 + }, + { + "name": "model.layers.35.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 1423360 + }, + { + "name": "model.layers.35.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 1433600 + }, + { + "name": "model.layers.35.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 1443840 + }, + { + "name": "model.layers.36.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 1458176 + }, + { + "name": "model.layers.36.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 1468416 + }, + { + "name": "model.layers.36.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 1478656 + }, + { + "name": "model.layers.37.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 1492992 + }, + { + "name": "model.layers.37.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 1503232 + }, + { + "name": "model.layers.37.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 1513472 + }, + { + "name": "model.layers.38.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 1527808 + }, + { + "name": "model.layers.38.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 1542144 + }, + { + "name": "model.layers.38.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 1552384 + }, + { + "name": "model.layers.39.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 1562624 + }, + { + "name": "model.layers.39.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 1572864 + }, + { + "name": "model.layers.39.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 1583104 + }, + { + "name": "model.layers.40.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 1597440 + }, + { + "name": "model.layers.40.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 1607680 + }, + { + "name": "model.layers.40.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 1617920 + }, + { + "name": "model.layers.41.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 1632256 + }, + { + "name": "model.layers.41.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 1642496 + }, + { + "name": "model.layers.41.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 1652736 + }, + { + "name": "model.layers.42.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 1667072 + }, + { + "name": "model.layers.42.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 1677312 + }, + { + "name": "model.layers.42.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 1687552 + }, + { + "name": "model.layers.43.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 1701888 + }, + { + "name": "model.layers.43.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 1712128 + }, + { + "name": "model.layers.43.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 1722368 + }, + { + "name": "model.layers.44.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 1736704 + }, + { + "name": "model.layers.44.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 1746944 + }, + { + "name": "model.layers.44.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 1757184 + }, + { + "name": "model.layers.45.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 1771520 + }, + { + "name": "model.layers.45.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 1781760 + }, + { + "name": "model.layers.45.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 1792000 + }, + { + "name": "model.layers.46.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 1806336 + }, + { + "name": "model.layers.46.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 1816576 + }, + { + "name": "model.layers.46.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 1826816 + }, + { + "name": "model.layers.47.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 1841152 + }, + { + "name": "model.layers.47.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 1851392 + }, + { + "name": "model.layers.47.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 1861632 + }, + { + "name": "model.layers.48.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 1875968 + }, + { + "name": "model.layers.48.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 1890304 + }, + { + "name": "model.layers.48.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 1900544 + }, + { + "name": "model.layers.49.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 1910784 + }, + { + "name": "model.layers.49.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 1921024 + }, + { + "name": "model.layers.49.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 1931264 + }, + { + "name": "model.layers.50.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 1945600 + }, + { + "name": "model.layers.50.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 1955840 + }, + { + "name": "model.layers.50.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 1966080 + }, + { + "name": "model.layers.51.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 1980416 + }, + { + "name": "model.layers.51.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 1990656 + }, + { + "name": "model.layers.51.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 2000896 + }, + { + "name": "model.layers.52.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 2015232 + }, + { + "name": "model.layers.52.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 2025472 + }, + { + "name": "model.layers.52.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 2035712 + }, + { + "name": "model.layers.53.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 2050048 + }, + { + "name": "model.layers.53.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 2060288 + }, + { + "name": "model.layers.53.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 2070528 + }, + { + "name": "model.layers.54.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 2084864 + }, + { + "name": "model.layers.54.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 2095104 + }, + { + "name": "model.layers.54.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 2105344 + }, + { + "name": "model.layers.55.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 2119680 + }, + { + "name": "model.layers.55.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 2129920 + }, + { + "name": "model.layers.55.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 2140160 + }, + { + "name": "model.layers.56.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 2154496 + }, + { + "name": "model.layers.56.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 2164736 + }, + { + "name": "model.layers.56.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 2174976 + }, + { + "name": "model.layers.57.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 2189312 + }, + { + "name": "model.layers.57.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 2199552 + }, + { + "name": "model.layers.57.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 2209792 + }, + { + "name": "model.layers.58.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 2224128 + } + ], + "md5sum": "3b8e829c0d6be42b592125838fc02486" + } + ] +} \ No newline at end of file diff --git a/params_shard_0.bin b/params_shard_0.bin new file mode 100644 index 0000000000000000000000000000000000000000..cdee8e446d5e2d6f4f55173e1a7f1405da3a98f7 --- /dev/null +++ b/params_shard_0.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f75e33fc0bf782dd98427f8e6b93976e0ba5ee6cf0312aaa3bbae638aea641d6 +size 1557135360 diff --git a/params_shard_102.bin b/params_shard_102.bin new file mode 100644 index 0000000000000000000000000000000000000000..58a8b7a9dcd6ae5ac1454b0dd69f470ca89865aa --- /dev/null +++ b/params_shard_102.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b5022928ae3f135fadaaf0dfb12c3d8de00099f8e27cce45f0356c00d9096282 +size 73400320 diff --git a/params_shard_103.bin b/params_shard_103.bin new file mode 100644 index 0000000000000000000000000000000000000000..974a1e1f25445535315a53a9310c466009f3bacc --- /dev/null +++ b/params_shard_103.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b67cdc1778b2f1889a79181fff9e86a7dedaa691567f48bb1f8dae9c291681e8 +size 52428800 diff --git a/params_shard_107.bin b/params_shard_107.bin new file mode 100644 index 0000000000000000000000000000000000000000..8548528d46ee6d2cc17dc20e355de5bcd33434f7 --- /dev/null +++ b/params_shard_107.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:57c7b0f7625515e36bb2df06f612961bd7627874e0712c18761209c4b8ccc68d +size 52428800 diff --git a/params_shard_108.bin b/params_shard_108.bin new file mode 100644 index 0000000000000000000000000000000000000000..d895bb9a917e6d65a1305f9f2df29ec4f0b88cbb --- /dev/null +++ b/params_shard_108.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3f27b527ea33a37785a3a36305599aec5ef8e08364a0fcf28b2edb3470d8ac81 +size 283115520 diff --git a/params_shard_111.bin b/params_shard_111.bin new file mode 100644 index 0000000000000000000000000000000000000000..6ae623008d129c6502bd5ccd16efac08637a8275 --- /dev/null +++ b/params_shard_111.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c65fc3749d865d27165d396a8b96c49e70ae53b80780efa0e95391524728cece +size 52428800 diff --git a/params_shard_114.bin b/params_shard_114.bin new file mode 100644 index 0000000000000000000000000000000000000000..908cce84c0360eb014785f3d265ca923abec0351 --- /dev/null +++ b/params_shard_114.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b3f4adc2b58a9ef564d27f8ee3924368a376f6f11133da307bd50f75582e66f9 +size 73400320 diff --git a/params_shard_115.bin b/params_shard_115.bin new file mode 100644 index 0000000000000000000000000000000000000000..1378e8438d3e101c938b8a35fc1e4f920d1c1b8e --- /dev/null +++ b/params_shard_115.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e05242312e460a49907649518243e8b60e5cdd0008358c7cfbdd4b877bcaf719 +size 52428800 diff --git a/params_shard_117.bin b/params_shard_117.bin new file mode 100644 index 0000000000000000000000000000000000000000..a3ba56cbde4a5cfd68b735cf8104193710b39fe8 --- /dev/null +++ b/params_shard_117.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7177bf3e3456ef89cc6fced6ac318c4b848fd626745c7a5f075bb8f9419dbcb0 +size 566231040 diff --git a/params_shard_121.bin b/params_shard_121.bin new file mode 100644 index 0000000000000000000000000000000000000000..1dcb140f7dae434f5a4cc837d8ece635ce4fb82a --- /dev/null +++ b/params_shard_121.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a7c3594417d83e248296fe96efb18aca80b46815a8d0b3ce828e6c1218a953ec +size 566231040 diff --git a/params_shard_122.bin b/params_shard_122.bin new file mode 100644 index 0000000000000000000000000000000000000000..37680c335d5e9f26e8f17d838ebf73507ed19fd4 --- /dev/null +++ b/params_shard_122.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b34c6dc714d6581cf7b865c5669b2241e25513ab6cae8de9dd151c891af66b02 +size 73400320 diff --git a/params_shard_127.bin b/params_shard_127.bin new file mode 100644 index 0000000000000000000000000000000000000000..437351973b073c1dec443923e7fde9c33f7f9b82 --- /dev/null +++ b/params_shard_127.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7f711c4e34ba90aa040ebe3c51ba06be9fa51e8074ce45ff97d242e82b3325e9 +size 52428800 diff --git a/params_shard_129.bin b/params_shard_129.bin new file mode 100644 index 0000000000000000000000000000000000000000..f5c56531b97cbf7eae9d88b1dced893044415cd2 --- /dev/null +++ b/params_shard_129.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:67d9fcc8df558dcd18bc1542461823456496e423f1dee79237745af336934abc +size 566231040 diff --git a/params_shard_131.bin b/params_shard_131.bin new file mode 100644 index 0000000000000000000000000000000000000000..66e580a8a8d623b749de4d1c8d7a45e1430d2655 --- /dev/null +++ b/params_shard_131.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8feb1bc9432a65ae82a27339e26de98463045a5ad6f8bc3ec8a45c5506fd3327 +size 52428800 diff --git a/params_shard_133.bin b/params_shard_133.bin new file mode 100644 index 0000000000000000000000000000000000000000..1a4938582c0571b319a01006ffce245ad1bf8421 --- /dev/null +++ b/params_shard_133.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:62e595863afc3d987fe9a1c10c46a384b8d32955f230f1d949542570a183229f +size 566231040 diff --git a/params_shard_135.bin b/params_shard_135.bin new file mode 100644 index 0000000000000000000000000000000000000000..50f1d1a89873d48e8dde483b63888a062b7ee92f --- /dev/null +++ b/params_shard_135.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f9e6caa45881047983c9393912d4d3160b83e9388bcafe3096a7da38cf76e94f +size 52428800 diff --git a/params_shard_139.bin b/params_shard_139.bin new file mode 100644 index 0000000000000000000000000000000000000000..f9a02cfa99d8e5a7f8003c6c5b577bb80ac6a7e3 --- /dev/null +++ b/params_shard_139.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9d70c7e7aa9e45cf7c1975520cf02353bdd9ae863a6e07f7447565d3e8b6a6dd +size 283115520 diff --git a/params_shard_14.bin b/params_shard_14.bin new file mode 100644 index 0000000000000000000000000000000000000000..7c03aebf59dfaaab7058c8577062fade5e5b8b65 --- /dev/null +++ b/params_shard_14.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:af12979477a0ee9a0a9b9035e33c919a9a923374e96403082442c703ed6ad88a +size 52428800 diff --git a/params_shard_140.bin b/params_shard_140.bin new file mode 100644 index 0000000000000000000000000000000000000000..71e0cb7459ea16faf22f144d97af8363c2b55971 --- /dev/null +++ b/params_shard_140.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:010742d39f79759f1141a2269aeafd9d4efa3ec1b2c49eaab4b960f64ef8087a +size 283115520 diff --git a/params_shard_141.bin b/params_shard_141.bin new file mode 100644 index 0000000000000000000000000000000000000000..fa982f591b0a7e483867a938e31d56e1fc08cb9d --- /dev/null +++ b/params_shard_141.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:de62984bb1fcf7d8376673839dfa5e3afd3761872213a540ebccb2ac3087cec9 +size 566231040 diff --git a/params_shard_143.bin b/params_shard_143.bin new file mode 100644 index 0000000000000000000000000000000000000000..6cc70984c2a51e85736791183fab3ed619ec2357 --- /dev/null +++ b/params_shard_143.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9130c1abc9b67e6391a8b9c63b8e9cb1884543762008a037b69a9415c6e9e686 +size 52428800 diff --git a/params_shard_146.bin b/params_shard_146.bin new file mode 100644 index 0000000000000000000000000000000000000000..1186dc9eb5356756570e532e3b50ddad262a82f0 --- /dev/null +++ b/params_shard_146.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0be2b34df32f95cc994ae976490b67279b44a3eeef44114746349905343a725d +size 73400320 diff --git a/params_shard_147.bin b/params_shard_147.bin new file mode 100644 index 0000000000000000000000000000000000000000..bf40c0d3293b03ee449cf83c850f7ac31f8f0018 --- /dev/null +++ b/params_shard_147.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:475a67353b6b3c87e2bcf8905d0f159cc9351c90ed82ea35a838f6b85a28a82a +size 52428800 diff --git a/params_shard_149.bin b/params_shard_149.bin new file mode 100644 index 0000000000000000000000000000000000000000..4631cfc1747c60298a09f476e4b9f4ea5d0dc43b --- /dev/null +++ b/params_shard_149.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cdd2046ca0aec03608aea4cef1cf139277247c8dbbd3c9c2459b08b339cb1830 +size 566231040 diff --git a/params_shard_155.bin b/params_shard_155.bin new file mode 100644 index 0000000000000000000000000000000000000000..f1950f454901bb391392ec97391d2363824426a2 --- /dev/null +++ b/params_shard_155.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:257559ec941434460a09c096fdb289c4a5039de56ccfb68e8d96df7d03ed4fad +size 52428800 diff --git a/params_shard_158.bin b/params_shard_158.bin new file mode 100644 index 0000000000000000000000000000000000000000..1d78269038e29455b8931adf5595277c8525856f --- /dev/null +++ b/params_shard_158.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fc394ca7e742239e05a3b70612e575efed71fc3fa97ccf3a87a6ecc2407c0b7e +size 73400320 diff --git a/params_shard_16.bin b/params_shard_16.bin new file mode 100644 index 0000000000000000000000000000000000000000..4f50c702081ee9b8ff80fee18de7c5b8890c82d7 --- /dev/null +++ b/params_shard_16.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fd59e59a03d10821d830f24439fd3fc1771d642483444f3492bb618fbc78bd28 +size 566231040 diff --git a/params_shard_165.bin b/params_shard_165.bin new file mode 100644 index 0000000000000000000000000000000000000000..5d102df8856c8d46e65981a157e4bc6265f8a335 --- /dev/null +++ b/params_shard_165.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1bf1ca45ba716158348cb2e7ae13f27b6af65b2934afa0ba4fa4e9a60ad76b76 +size 566231040 diff --git a/params_shard_168.bin b/params_shard_168.bin new file mode 100644 index 0000000000000000000000000000000000000000..9937a5712d1943688c506dceb5710f1013c49172 --- /dev/null +++ b/params_shard_168.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cc3db2395ee3e9efd611a321ff2d350ad7c5536aba315a6e7b96f16d27d60d11 +size 283115520 diff --git a/params_shard_177.bin b/params_shard_177.bin new file mode 100644 index 0000000000000000000000000000000000000000..46ddd0f7bc815ffd98c29ddbb517f8752d77a11c --- /dev/null +++ b/params_shard_177.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eb2301008d7c95c11ea06c9fb646d38471b62f4ca7579faa1833a7598d03c35e +size 73400320 diff --git a/params_shard_179.bin b/params_shard_179.bin new file mode 100644 index 0000000000000000000000000000000000000000..507fb00a58e4f3fa860fa0fe64bc16d6dfcbc1b8 --- /dev/null +++ b/params_shard_179.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:df803dee26d20511abb88e43457501eee82e7435deda863e5989cff132b4dfa2 +size 283115520 diff --git a/params_shard_18.bin b/params_shard_18.bin new file mode 100644 index 0000000000000000000000000000000000000000..a1f23eb558da460c76814a10e9c1c3642d745704 --- /dev/null +++ b/params_shard_18.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:397070d5f1998bdee3a9c0a609064b459fb47ca84fa54be632ebcb6562b037ed +size 52428800 diff --git a/params_shard_180.bin b/params_shard_180.bin new file mode 100644 index 0000000000000000000000000000000000000000..351cc05a01585201aac03bec9cc263a0a781564e --- /dev/null +++ b/params_shard_180.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:83b5ad3ce1d001f6c231850acced1d8fb6e4f5033edd7ebdf33cc725aceb8cb8 +size 283115520 diff --git a/params_shard_182.bin b/params_shard_182.bin new file mode 100644 index 0000000000000000000000000000000000000000..b8ca721e508b0c92389f8daa3b38ad5c9ff034e2 --- /dev/null +++ b/params_shard_182.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:32f354bb5127fed9b43ac9466d628a746c8371dc9a4b0e6eec6126bf799b4d61 +size 73400320 diff --git a/params_shard_19.bin b/params_shard_19.bin new file mode 100644 index 0000000000000000000000000000000000000000..48647aa7d7eada6170ef2803365a6816880b3e0f --- /dev/null +++ b/params_shard_19.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:967c33c852188691b1f3e3b9e4e89c50a54ce7c1494e4bddf2bb3cca106c6ac2 +size 283115520 diff --git a/params_shard_191.bin b/params_shard_191.bin new file mode 100644 index 0000000000000000000000000000000000000000..67d0291199ad2b60393c3f6b73f5dcf0d53bb711 --- /dev/null +++ b/params_shard_191.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a4fceeb2ffe87a1fd292ba6e548a757c50ccc9582400fedebed9ed99cd4308ed +size 52428800 diff --git a/params_shard_192.bin b/params_shard_192.bin new file mode 100644 index 0000000000000000000000000000000000000000..a815f9c42a9cd96d3239135a039c856a83fa562c --- /dev/null +++ b/params_shard_192.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8b8c696e9ce8af8787ccb9b5934bf426e6e0625d6507666628b582dd2d68a7bc +size 283115520 diff --git a/params_shard_193.bin b/params_shard_193.bin new file mode 100644 index 0000000000000000000000000000000000000000..80e7cf7448d40bfa82b44299d421c74742ac9ac5 --- /dev/null +++ b/params_shard_193.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8f3db04b999b699f2b320261e03ff272b4d4d6adfb305baf31206df7ee3e6d7d +size 566231040 diff --git a/params_shard_196.bin b/params_shard_196.bin new file mode 100644 index 0000000000000000000000000000000000000000..ca134c14422f9b0691da632d977073a522405e20 --- /dev/null +++ b/params_shard_196.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b8216c1a7474343450dd43c0f65484982c925988164bf084b0b21d88318742b +size 283115520 diff --git a/params_shard_198.bin b/params_shard_198.bin new file mode 100644 index 0000000000000000000000000000000000000000..484b7652a702158e6a6e020f2fb50957b7bb74cd --- /dev/null +++ b/params_shard_198.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:95d2e639889b60826d1585cb41174baad5f3d80310c0593c406bb352105bd466 +size 73400320 diff --git a/params_shard_202.bin b/params_shard_202.bin new file mode 100644 index 0000000000000000000000000000000000000000..c9ce70e0b1b17966d8964760437016aa68fd3b22 --- /dev/null +++ b/params_shard_202.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:796ff0e482e20abef91abd1f7eea39efe18e630c4a0d694bbcc8ca381387e68b +size 73400320 diff --git a/params_shard_205.bin b/params_shard_205.bin new file mode 100644 index 0000000000000000000000000000000000000000..8391d123b57512c54d6d59edb38805560fc86b0d --- /dev/null +++ b/params_shard_205.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c34b23ebbb010827a8e971905d602000b11ca7c945801d10a44806a55fc381d5 +size 566231040 diff --git a/params_shard_211.bin b/params_shard_211.bin new file mode 100644 index 0000000000000000000000000000000000000000..10429577e153617e19d8e7e1b951b49fd4b6d221 --- /dev/null +++ b/params_shard_211.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2e9888a22244195f13f0699d01de12035e0135bbe5f574b2c10118845f9903c0 +size 52428800 diff --git a/params_shard_212.bin b/params_shard_212.bin new file mode 100644 index 0000000000000000000000000000000000000000..c19173983ab86157675b2a5452dee4c5906fa259 --- /dev/null +++ b/params_shard_212.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:283e5a68afde772123c7da9a81b2dbda14b82c4ff4181141525bf53edde8aef5 +size 283115520 diff --git a/params_shard_214.bin b/params_shard_214.bin new file mode 100644 index 0000000000000000000000000000000000000000..22ff3a226227bb9d01d714e3c4a4a60d37369169 --- /dev/null +++ b/params_shard_214.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a40487925c546f616d379d30b497f5b61e4c30a3dc3c29e6795c1d722e100fc4 +size 73400320 diff --git a/params_shard_215.bin b/params_shard_215.bin new file mode 100644 index 0000000000000000000000000000000000000000..5a280b0dfcd8b1873b008fffa2798632921c7808 --- /dev/null +++ b/params_shard_215.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:867e03e8bd9ea26429662d86aa792052b68a17262d5cda5573772d035b3546e6 +size 52428800 diff --git a/params_shard_217.bin b/params_shard_217.bin new file mode 100644 index 0000000000000000000000000000000000000000..f95d34193d4bcb867fb3bceed2306f16bf28ca2d --- /dev/null +++ b/params_shard_217.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:964e11562745642f2d1dbed864d8594cc09cfe65f69667b7c94232241a6d5102 +size 73400320 diff --git a/params_shard_223.bin b/params_shard_223.bin new file mode 100644 index 0000000000000000000000000000000000000000..64a9ee23af159d04bcd90600651fd1a9b1b57603 --- /dev/null +++ b/params_shard_223.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b9bf61cdc69a96a2611eb13d0a0db67feb8ece4ca57c9e4c9f1dfdae9cad6457 +size 52428800 diff --git a/params_shard_225.bin b/params_shard_225.bin new file mode 100644 index 0000000000000000000000000000000000000000..cc4cdefbb7edb1aa94541ed7d894bc661aad1e0c --- /dev/null +++ b/params_shard_225.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ee30919c97c0e926cbb13a8f36359bd6288eebf04bca09010f2d652aa5a1e6da +size 566231040 diff --git a/params_shard_245.bin b/params_shard_245.bin new file mode 100644 index 0000000000000000000000000000000000000000..9595762f4e6be9be451ed4ad8b33b9e6da841861 --- /dev/null +++ b/params_shard_245.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6d3f25db9ba91cb48ff88536294ed94a13226b0b1e43bd4b1a4685ab7f156b19 +size 566231040 diff --git a/params_shard_248.bin b/params_shard_248.bin new file mode 100644 index 0000000000000000000000000000000000000000..e99daa69ce1b4b2be5953c3eee270325f13c683c --- /dev/null +++ b/params_shard_248.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3ae7d0b53adb869471400c125258889656b7a8c483ab334ebacf8b8d968bd5d3 +size 283115520 diff --git a/params_shard_25.bin b/params_shard_25.bin new file mode 100644 index 0000000000000000000000000000000000000000..3099fdf9f1c8b8e8683fa8f7fabfbd77c4b29087 --- /dev/null +++ b/params_shard_25.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c3b056e22e1924e61f8c389e270529d65812b73ea36fd88d072531da6057facd +size 566231040 diff --git a/params_shard_253.bin b/params_shard_253.bin new file mode 100644 index 0000000000000000000000000000000000000000..da309f0c4a9967f1785ef93fdecf1ad94875b951 --- /dev/null +++ b/params_shard_253.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f4e4cb3a520625c4c946743f90411bd96f7624dc1d315bb210bce634564fe9af +size 566231040 diff --git a/params_shard_26.bin b/params_shard_26.bin new file mode 100644 index 0000000000000000000000000000000000000000..6c03a57d049e1aac4b0fc79ca2b6dfdeff3513b9 --- /dev/null +++ b/params_shard_26.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:85714d2bc7a037b9fa5df98a27959a4d4b702eb6fb1b18a1e7562e07fcc3e399 +size 73400320 diff --git a/params_shard_27.bin b/params_shard_27.bin new file mode 100644 index 0000000000000000000000000000000000000000..6e464ebc4d00c6ba9a156e28512981491ab4992c --- /dev/null +++ b/params_shard_27.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cf82b6a32aa1dc0a5ec28d8fd7e02f67e6387a1053e61e02c8f938f2e4afc283 +size 52428800 diff --git a/params_shard_29.bin b/params_shard_29.bin new file mode 100644 index 0000000000000000000000000000000000000000..07c5c28cbcae814c1efbead0b125935d24a68647 --- /dev/null +++ b/params_shard_29.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:069477360ddb0052d028db5927fde8f7713b4c98b24937055a71d17565468967 +size 566231040 diff --git a/params_shard_41.bin b/params_shard_41.bin new file mode 100644 index 0000000000000000000000000000000000000000..cafc39596542e7080d7f3fc015cfff87ccbbd8ea --- /dev/null +++ b/params_shard_41.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4d46bc293b1e735c84a83e83e59274238b9bc37a894c3836a0f42dc9ad931e97 +size 566231040 diff --git a/params_shard_46.bin b/params_shard_46.bin new file mode 100644 index 0000000000000000000000000000000000000000..a8b94da517c2eb02e032e55fa80d16b8b504d87d --- /dev/null +++ b/params_shard_46.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5b34d83b622489eb024f78f38916205f9e69fceb1d44c985707a1e17e98ef75f +size 73400320 diff --git a/params_shard_49.bin b/params_shard_49.bin new file mode 100644 index 0000000000000000000000000000000000000000..8a87e6b15151b792373c50f9824f3bbc369ea8e5 --- /dev/null +++ b/params_shard_49.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:38bb3b0ae6c8b1733a217b92039e3fef5264ca31781bcabe465874ee65c51f2f +size 566231040 diff --git a/params_shard_51.bin b/params_shard_51.bin new file mode 100644 index 0000000000000000000000000000000000000000..c97de38c132c9cbe51d4bfc70a3cf7c1d724c39b --- /dev/null +++ b/params_shard_51.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:62112a6fd5af36063d728a0f2d8b1253bef4867cb54700e3857e54d40c6c9ffc +size 52428800 diff --git a/params_shard_59.bin b/params_shard_59.bin new file mode 100644 index 0000000000000000000000000000000000000000..4336d4321e557122b70a5631dd24312224ed37ef --- /dev/null +++ b/params_shard_59.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d03b8fd00bca7eb178486e8b14367dc0c466f96becf5d7ad5c19835af5bf5247 +size 283115520 diff --git a/params_shard_61.bin b/params_shard_61.bin new file mode 100644 index 0000000000000000000000000000000000000000..def4d912ba42f3a100cabdbfdd8edf88f37b5909 --- /dev/null +++ b/params_shard_61.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cdd3d7e1e3064b9fd23c05cd6f627a4e42ac0a001cde4338aaaee9cdb0bd7db4 +size 73400320 diff --git a/params_shard_67.bin b/params_shard_67.bin new file mode 100644 index 0000000000000000000000000000000000000000..85da449efc589bd46d9968f30599115c8f4c33b1 --- /dev/null +++ b/params_shard_67.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6f21c6fe1b8e48e5d2224afbbcea336fb1caf2549dd54aa16c2f8f4dbe12eb2b +size 283115520 diff --git a/params_shard_7.bin b/params_shard_7.bin new file mode 100644 index 0000000000000000000000000000000000000000..7bd1a0cc10e2f03af4b2e89691716c57601aa3f6 --- /dev/null +++ b/params_shard_7.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:30abe90371f7837ae3cd59fcd45bcd52bbd58ef19a903dfc634e27997f38792e +size 283115520 diff --git a/params_shard_70.bin b/params_shard_70.bin new file mode 100644 index 0000000000000000000000000000000000000000..d71112aa5817b3a97c3c6f21fc478bc175e10952 --- /dev/null +++ b/params_shard_70.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:12d872034ef5d80f5a83193ee8fd001e02c47dfe8b8fd79485893a681b739054 +size 52428800 diff --git a/params_shard_75.bin b/params_shard_75.bin new file mode 100644 index 0000000000000000000000000000000000000000..4c218a52e5c16af87143ec8af8e270f6cfc5509b --- /dev/null +++ b/params_shard_75.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3f620c70a23b047db6d18f0efccb8bc66dbb9059f9db59e27f130ac9c2da611d +size 283115520 diff --git a/params_shard_78.bin b/params_shard_78.bin new file mode 100644 index 0000000000000000000000000000000000000000..da382efde56df920b9e68bcf7719b4fd2f78877c --- /dev/null +++ b/params_shard_78.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c3f1b3a068daee7b7b4ba70bfb803d43944de0010193e70733740acbf09088e7 +size 52428800 diff --git a/params_shard_79.bin b/params_shard_79.bin new file mode 100644 index 0000000000000000000000000000000000000000..9c7e48191be44399eb2739a25cdfdbd040ac2005 --- /dev/null +++ b/params_shard_79.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a59448918f4a9cbddd77bf4ad7579db36092a1d5312a5943116087abb48b53da +size 283115520 diff --git a/params_shard_84.bin b/params_shard_84.bin new file mode 100644 index 0000000000000000000000000000000000000000..0d0da905a54164b2323e7ec3a4854a177fd7bfe4 --- /dev/null +++ b/params_shard_84.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:36e9e1d78fc565f52505f733c0ded04ccace136a35475c2ec92eaf3841765d22 +size 566231040 diff --git a/params_shard_87.bin b/params_shard_87.bin new file mode 100644 index 0000000000000000000000000000000000000000..6131f1a0b613db0dfaa1702a4c3fec8c9c95e7a1 --- /dev/null +++ b/params_shard_87.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:404fa3c07911b24ac097e7903213bf93e001d60cbd6f05178b4fad18bc5660c7 +size 283115520 diff --git a/params_shard_90.bin b/params_shard_90.bin new file mode 100644 index 0000000000000000000000000000000000000000..a3e50819abe04ef065ae0c4920185e117f3d5211 --- /dev/null +++ b/params_shard_90.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7e8efec2673ceda380c11d0e809f1e4726de316f31408ce5e109cefcf13d30cb +size 52428800 diff --git a/params_shard_93.bin b/params_shard_93.bin new file mode 100644 index 0000000000000000000000000000000000000000..e9c134d46c3882baa5615ab65bc59ec9f7da1764 --- /dev/null +++ b/params_shard_93.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7277099938d9d6b8c0256c9c1ab1edb2f5a8fc5fa8033b8cc7a1f85849e4b944 +size 52428800 diff --git a/params_shard_96.bin b/params_shard_96.bin new file mode 100644 index 0000000000000000000000000000000000000000..f5979d3c30ebb190805b27872576dfb272b33df0 --- /dev/null +++ b/params_shard_96.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4d131ed89c754994f389232c46545298b9ee95e8b4a49ff801df1b6fa0fce87b +size 566231040