| /home/cfruan/.conda/envs/mlc-source-311/bin/python -m mlc_chat gen_config /ssd1/cfruan/models/TinyLlama-1.1B-Chat-v0.4 --quantization q4f16_1 --conv-template chatml --output /tmp/tmp2xt0mvux | |
| /home/cfruan/.conda/envs/mlc-source-311/lib/python3.11/site-packages/numpy/core/getlimits.py:549: UserWarning: The value of the smallest subnormal for <class 'numpy.float64'> type is zero. | |
| setattr(self, word, getattr(machar, word).flat[0]) | |
| /home/cfruan/.conda/envs/mlc-source-311/lib/python3.11/site-packages/numpy/core/getlimits.py:89: UserWarning: The value of the smallest subnormal for <class 'numpy.float64'> type is zero. | |
| return self._float_to_str(self.smallest_subnormal) | |
| /home/cfruan/.conda/envs/mlc-source-311/lib/python3.11/site-packages/numpy/core/getlimits.py:549: UserWarning: The value of the smallest subnormal for <class 'numpy.float32'> type is zero. | |
| setattr(self, word, getattr(machar, word).flat[0]) | |
| /home/cfruan/.conda/envs/mlc-source-311/lib/python3.11/site-packages/numpy/core/getlimits.py:89: UserWarning: The value of the smallest subnormal for <class 'numpy.float32'> type is zero. | |
| return self._float_to_str(self.smallest_subnormal) | |
| [2024-01-16 09:22:44] INFO auto_config.py:115: [92mFound[0m model configuration: /ssd1/cfruan/models/TinyLlama-1.1B-Chat-v0.4/config.json | |
| [2024-01-16 09:22:44] INFO auto_config.py:153: [92mFound[0m model type: [1mllama[0m. Use `--model-type` to override. | |
| [2024-01-16 09:22:44] INFO llama_model.py:51: [1mcontext_window_size[0m not found in config.json. Falling back to [1mmax_position_embeddings[0m (2048) | |
| [2024-01-16 09:22:44] INFO llama_model.py:71: [1mprefill_chunk_size[0m defaults to [1mcontext_window_size[0m (2048) | |
| [2024-01-16 09:22:44] INFO gen_config.py:117: [config.json] Setting [1mbos_token_id[0m: 1 | |
| [2024-01-16 09:22:44] INFO gen_config.py:117: [config.json] Setting [1meos_token_id[0m: 2 | |
| [2024-01-16 09:22:44] INFO gen_config.py:129: [92mFound[0m tokenizer config: /ssd1/cfruan/models/TinyLlama-1.1B-Chat-v0.4/tokenizer.model. Copying to [1m/tmp/tmp2xt0mvux/tokenizer.model[0m | |
| [2024-01-16 09:22:45] INFO gen_config.py:129: [92mFound[0m tokenizer config: /ssd1/cfruan/models/TinyLlama-1.1B-Chat-v0.4/tokenizer.json. Copying to [1m/tmp/tmp2xt0mvux/tokenizer.json[0m | |
| [2024-01-16 09:22:45] INFO gen_config.py:131: [91mNot found[0m tokenizer config: /ssd1/cfruan/models/TinyLlama-1.1B-Chat-v0.4/vocab.json | |
| [2024-01-16 09:22:45] INFO gen_config.py:131: [91mNot found[0m tokenizer config: /ssd1/cfruan/models/TinyLlama-1.1B-Chat-v0.4/merges.txt | |
| [2024-01-16 09:22:45] INFO gen_config.py:129: [92mFound[0m tokenizer config: /ssd1/cfruan/models/TinyLlama-1.1B-Chat-v0.4/added_tokens.json. Copying to [1m/tmp/tmp2xt0mvux/added_tokens.json[0m | |
| [2024-01-16 09:22:45] INFO gen_config.py:129: [92mFound[0m tokenizer config: /ssd1/cfruan/models/TinyLlama-1.1B-Chat-v0.4/tokenizer_config.json. Copying to [1m/tmp/tmp2xt0mvux/tokenizer_config.json[0m | |
| [2024-01-16 09:22:45] INFO gen_config.py:70: [System default] Setting [1mpad_token_id[0m: 0 | |
| [2024-01-16 09:22:45] INFO gen_config.py:70: [System default] Setting [1mtemperature[0m: 0.7 | |
| [2024-01-16 09:22:45] INFO gen_config.py:70: [System default] Setting [1mrepetition_penalty[0m: 1.0 | |
| [2024-01-16 09:22:45] INFO gen_config.py:70: [System default] Setting [1mtop_p[0m: 0.95 | |
| [2024-01-16 09:22:45] INFO gen_config.py:70: [System default] Setting [1mmean_gen_len[0m: 128 | |
| [2024-01-16 09:22:45] INFO gen_config.py:70: [System default] Setting [1mmax_gen_len[0m: 512 | |
| [2024-01-16 09:22:45] INFO gen_config.py:70: [System default] Setting [1mshift_fill_factor[0m: 0.3 | |
| [2024-01-16 09:22:45] INFO gen_config.py:159: Dumping configuration file to: [1m/tmp/tmp2xt0mvux/mlc-chat-config.json[0m | |
| /home/cfruan/.conda/envs/mlc-source-311/bin/python -m mlc_chat convert_weight /ssd1/cfruan/models/TinyLlama-1.1B-Chat-v0.4 --quantization q4f16_1 --source-format auto --output /tmp/tmp2xt0mvux | |
| /home/cfruan/.conda/envs/mlc-source-311/lib/python3.11/site-packages/numpy/core/getlimits.py:549: UserWarning: The value of the smallest subnormal for <class 'numpy.float64'> type is zero. | |
| setattr(self, word, getattr(machar, word).flat[0]) | |
| /home/cfruan/.conda/envs/mlc-source-311/lib/python3.11/site-packages/numpy/core/getlimits.py:89: UserWarning: The value of the smallest subnormal for <class 'numpy.float64'> type is zero. | |
| return self._float_to_str(self.smallest_subnormal) | |
| /home/cfruan/.conda/envs/mlc-source-311/lib/python3.11/site-packages/numpy/core/getlimits.py:549: UserWarning: The value of the smallest subnormal for <class 'numpy.float32'> type is zero. | |
| setattr(self, word, getattr(machar, word).flat[0]) | |
| /home/cfruan/.conda/envs/mlc-source-311/lib/python3.11/site-packages/numpy/core/getlimits.py:89: UserWarning: The value of the smallest subnormal for <class 'numpy.float32'> type is zero. | |
| return self._float_to_str(self.smallest_subnormal) | |
| [2024-01-16 09:22:45] INFO auto_config.py:115: [92mFound[0m model configuration: /ssd1/cfruan/models/TinyLlama-1.1B-Chat-v0.4/config.json | |
| [2024-01-16 09:22:46] INFO auto_device.py:76: [92mFound[0m device: cuda:0 | |
| [2024-01-16 09:22:46] INFO auto_device.py:76: [92mFound[0m device: cuda:1 | |
| [2024-01-16 09:22:46] INFO auto_device.py:85: [91mNot found[0m device: rocm:0 | |
| [2024-01-16 09:22:47] INFO auto_device.py:85: [91mNot found[0m device: metal:0 | |
| [2024-01-16 09:22:47] INFO auto_device.py:76: [92mFound[0m device: vulkan:0 | |
| [2024-01-16 09:22:47] INFO auto_device.py:76: [92mFound[0m device: vulkan:1 | |
| [2024-01-16 09:22:47] INFO auto_device.py:76: [92mFound[0m device: vulkan:2 | |
| [2024-01-16 09:22:48] INFO auto_device.py:85: [91mNot found[0m device: opencl:0 | |
| [2024-01-16 09:22:48] INFO auto_device.py:33: Using device: [1mcuda:0[0m | |
| [2024-01-16 09:22:48] INFO auto_weight.py:70: Finding weights in: /ssd1/cfruan/models/TinyLlama-1.1B-Chat-v0.4 | |
| [2024-01-16 09:22:48] INFO auto_weight.py:129: [92mFound[0m source weight format: huggingface-torch. Source configuration: /ssd1/cfruan/models/TinyLlama-1.1B-Chat-v0.4/pytorch_model.bin | |
| [2024-01-16 09:22:48] INFO auto_weight.py:143: [92mFound[0m source weight format: huggingface-safetensor. Source configuration: /ssd1/cfruan/models/TinyLlama-1.1B-Chat-v0.4/model.safetensors.index.json | |
| [2024-01-16 09:22:48] INFO auto_weight.py:106: Using source weight configuration: [1m/ssd1/cfruan/models/TinyLlama-1.1B-Chat-v0.4/pytorch_model.bin[0m. Use `--source` to override. | |
| [2024-01-16 09:22:48] INFO auto_weight.py:110: Using source weight format: [1mhuggingface-torch[0m. Use `--source-format` to override. | |
| [2024-01-16 09:22:48] INFO auto_config.py:153: [92mFound[0m model type: [1mllama[0m. Use `--model-type` to override. | |
| [2024-01-16 09:22:48] INFO llama_model.py:51: [1mcontext_window_size[0m not found in config.json. Falling back to [1mmax_position_embeddings[0m (2048) | |
| [2024-01-16 09:22:48] INFO llama_model.py:71: [1mprefill_chunk_size[0m defaults to [1mcontext_window_size[0m (2048) | |
| [2024-01-16 09:22:52] INFO huggingface_loader.py:169: Loading HF parameters from: /ssd1/cfruan/models/TinyLlama-1.1B-Chat-v0.4/pytorch_model.bin | |
| [1mWeight conversion with arguments:[0m | |
| [1m--config[0m /ssd1/cfruan/models/TinyLlama-1.1B-Chat-v0.4/config.json | |
| [1m--quantization[0m GroupQuantize(name='q4f16_1', kind='group-quant', group_size=32, quantize_dtype='int4', storage_dtype='uint32', model_dtype='float16', linear_weight_layout='NK', num_elem_per_storage=8, num_storage_per_group=4, max_int_value=7) | |
| [1m--model-type[0m llama | |
| [1m--device[0m cuda:0 | |
| [1m--source[0m /ssd1/cfruan/models/TinyLlama-1.1B-Chat-v0.4/pytorch_model.bin | |
| [1m--source-format[0m huggingface-torch | |
| [1m--output[0m /tmp/tmp2xt0mvux | |
| 0%| | 0/135 [00:00<?, ?it/s] [2024-01-16 09:22:55] INFO group_quantization.py:227: Compiling quantize function for key: ((32003, 2048), float16, cuda, axis=1, output_transpose=False) | |
| 0%| | 0/135 [00:00<?, ?it/s] [2024-01-16 09:22:56] INFO huggingface_loader.py:121: [Quantized] Parameter: "[1mmodel.embed_tokens.q_weight[0m", shape: (32003, 256), dtype: uint32 | |
| 0%| | 0/135 [00:01<?, ?it/s] [2024-01-16 09:22:56] INFO huggingface_loader.py:121: [Quantized] Parameter: "[1mmodel.embed_tokens.q_scale[0m", shape: (32003, 64), dtype: float16 | |
| 0%| | 0/135 [00:01<?, ?it/s] 1%|█▏ | 1/135 [00:01<03:24, 1.53s/it] [2024-01-16 09:22:56] INFO group_quantization.py:227: Compiling quantize function for key: ((2560, 2048), float16, cuda, axis=1, output_transpose=False) | |
| 1%|█▏ | 1/135 [00:01<03:24, 1.53s/it] [2024-01-16 09:22:56] INFO huggingface_loader.py:121: [Quantized] Parameter: "[1mmodel.layers.0.self_attn.qkv_proj.q_weight[0m", shape: (2560, 256), dtype: uint32 | |
| 1%|█▏ | 1/135 [00:01<03:24, 1.53s/it] [2024-01-16 09:22:56] INFO huggingface_loader.py:121: [Quantized] Parameter: "[1mmodel.layers.0.self_attn.qkv_proj.q_scale[0m", shape: (2560, 64), dtype: float16 | |
| 1%|█▏ | 1/135 [00:01<03:24, 1.53s/it] 1%|██▎ | 2/135 [00:01<01:55, 1.15it/s] [2024-01-16 09:22:56] INFO group_quantization.py:227: Compiling quantize function for key: ((2048, 2048), float16, cuda, axis=1, output_transpose=False) | |
| 1%|██▎ | 2/135 [00:01<01:55, 1.15it/s] [2024-01-16 09:22:56] INFO huggingface_loader.py:121: [Quantized] Parameter: "[1mmodel.layers.0.self_attn.o_proj.q_weight[0m", shape: (2048, 256), dtype: uint32 | |
| 1%|██▎ | 2/135 [00:02<01:55, 1.15it/s] [2024-01-16 09:22:56] INFO huggingface_loader.py:121: [Quantized] Parameter: "[1mmodel.layers.0.self_attn.o_proj.q_scale[0m", shape: (2048, 64), dtype: float16 | |
| 1%|██▎ | 2/135 [00:02<01:55, 1.15it/s] 2%|███▍ | 3/135 [00:02<01:26, 1.53it/s] [2024-01-16 09:22:57] INFO group_quantization.py:227: Compiling quantize function for key: ((11264, 2048), float16, cuda, axis=1, output_transpose=False) | |
| 2%|███▍ | 3/135 [00:02<01:26, 1.53it/s] [2024-01-16 09:22:57] INFO huggingface_loader.py:121: [Quantized] Parameter: "[1mmodel.layers.0.mlp.gate_up_proj.q_weight[0m", shape: (11264, 256), dtype: uint32 | |
| 2%|███▍ | 3/135 [00:02<01:26, 1.53it/s] [2024-01-16 09:22:57] INFO huggingface_loader.py:121: [Quantized] Parameter: "[1mmodel.layers.0.mlp.gate_up_proj.q_scale[0m", shape: (11264, 64), dtype: float16 | |
| 2%|███▍ | 3/135 [00:02<01:26, 1.53it/s] 3%|████▌ | 4/135 [00:02<01:20, 1.62it/s] [2024-01-16 09:22:57] INFO group_quantization.py:227: Compiling quantize function for key: ((2048, 5632), float16, cuda, axis=1, output_transpose=False) | |
| 3%|████▌ | 4/135 [00:02<01:20, 1.62it/s] [2024-01-16 09:22:57] INFO huggingface_loader.py:121: [Quantized] Parameter: "[1mmodel.layers.0.mlp.down_proj.q_weight[0m", shape: (2048, 704), dtype: uint32 | |
| 3%|████▌ | 4/135 [00:03<01:20, 1.62it/s] [2024-01-16 09:22:57] INFO huggingface_loader.py:121: [Quantized] Parameter: "[1mmodel.layers.0.mlp.down_proj.q_scale[0m", shape: (2048, 176), dtype: float16 | |
| 3%|████▌ | 4/135 [00:03<01:20, 1.62it/s] 4%|█████▋ | 5/135 [00:03<01:11, 1.81it/s] [2024-01-16 09:22:57] INFO huggingface_loader.py:129: [Not quantized] Parameter: "[1mmodel.layers.0.input_layernorm.weight[0m", shape: (2048,), dtype: float16 | |
| 4%|█████▋ | 5/135 [00:03<01:11, 1.81it/s] [2024-01-16 09:22:57] INFO huggingface_loader.py:129: [Not quantized] Parameter: "[1mmodel.layers.0.post_attention_layernorm.weight[0m", shape: (2048,), dtype: float16 | |
| 4%|█████▋ | 5/135 [00:03<01:11, 1.81it/s] [2024-01-16 09:22:58] INFO huggingface_loader.py:121: [Quantized] Parameter: "[1mmodel.layers.1.self_attn.qkv_proj.q_weight[0m", shape: (2560, 256), dtype: uint32 | |
| 4%|█████▋ | 5/135 [00:03<01:11, 1.81it/s] [2024-01-16 09:22:58] INFO huggingface_loader.py:121: [Quantized] Parameter: "[1mmodel.layers.1.self_attn.qkv_proj.q_scale[0m", shape: (2560, 64), dtype: float16 | |
| 4%|█████▋ | 5/135 [00:03<01:11, 1.81it/s] [2024-01-16 09:22:58] INFO huggingface_loader.py:121: [Quantized] Parameter: "[1mmodel.layers.1.self_attn.o_proj.q_weight[0m", shape: (2048, 256), dtype: uint32 | |
| 4%|█████▋ | 5/135 [00:03<01:11, 1.81it/s] [2024-01-16 09:22:58] INFO huggingface_loader.py:121: [Quantized] Parameter: "[1mmodel.layers.1.self_attn.o_proj.q_scale[0m", shape: (2048, 64), dtype: float16 | |
| 4%|█████▋ | 5/135 [00:03<01:11, 1.81it/s] [2024-01-16 09:22:58] INFO huggingface_loader.py:121: [Quantized] Parameter: "[1mmodel.layers.1.mlp.gate_up_proj.q_weight[0m", shape: (11264, 256), dtype: uint32 | |
| 4%|█████▋ | 5/135 [00:03<01:11, 1.81it/s] [2024-01-16 09:22:58] INFO huggingface_loader.py:121: [Quantized] Parameter: "[1mmodel.layers.1.mlp.gate_up_proj.q_scale[0m", shape: (11264, 64), dtype: float16 | |
| 4%|█████▋ | 5/135 [00:03<01:11, 1.81it/s] 7%|███████████▎ | 10/135 [00:03<00:23, 5.28it/s] [2024-01-16 09:22:58] INFO huggingface_loader.py:121: [Quantized] Parameter: "[1mmodel.layers.1.mlp.down_proj.q_weight[0m", shape: (2048, 704), dtype: uint32 | |
| 7%|███████████▎ | 10/135 [00:03<00:23, 5.28it/s] [2024-01-16 09:22:58] INFO huggingface_loader.py:121: [Quantized] Parameter: "[1mmodel.layers.1.mlp.down_proj.q_scale[0m", shape: (2048, 176), dtype: float16 | |
| 7%|███████████▎ | 10/135 [00:03<00:23, 5.28it/s] [2024-01-16 09:22:58] INFO huggingface_loader.py:129: [Not quantized] Parameter: "[1mmodel.layers.1.input_layernorm.weight[0m", shape: (2048,), dtype: float16 | |
| 7%|███████████▎ | 10/135 [00:03<00:23, 5.28it/s] [2024-01-16 09:22:58] INFO huggingface_loader.py:129: [Not quantized] Parameter: "[1mmodel.layers.1.post_attention_layernorm.weight[0m", shape: (2048,), dtype: float16 | |
| 7%|███████████▎ | 10/135 [00:03<00:23, 5.28it/s] [2024-01-16 09:22:58] INFO huggingface_loader.py:121: [Quantized] Parameter: "[1mmodel.layers.2.self_attn.qkv_proj.q_weight[0m", shape: (2560, 256), dtype: uint32 | |
| 7%|███████████▎ | 10/135 [00:03<00:23, 5.28it/s] [2024-01-16 09:22:58] INFO huggingface_loader.py:121: [Quantized] Parameter: "[1mmodel.layers.2.self_attn.qkv_proj.q_scale[0m", shape: (2560, 64), dtype: float16 | |
| 7%|███████████▎ | 10/135 [00:03<00:23, 5.28it/s] [2024-01-16 09:22:58] INFO huggingface_loader.py:121: [Quantized] Parameter: "[1mmodel.layers.2.self_attn.o_proj.q_weight[0m", shape: (2048, 256), dtype: uint32 | |
| 7%|███████████▎ | 10/135 [00:03<00:23, 5.28it/s] [2024-01-16 09:22:58] INFO huggingface_loader.py:121: [Quantized] Parameter: "[1mmodel.layers.2.self_attn.o_proj.q_scale[0m", shape: (2048, 64), dtype: float16 | |
| 7%|███████████▎ | 10/135 [00:03<00:23, 5.28it/s] [2024-01-16 09:22:58] INFO huggingface_loader.py:121: [Quantized] Parameter: "[1mmodel.layers.2.mlp.gate_up_proj.q_weight[0m", shape: (11264, 256), dtype: uint32 | |
| 7%|███████████▎ | 10/135 [00:03<00:23, 5.28it/s] [2024-01-16 09:22:58] INFO huggingface_loader.py:121: [Quantized] Parameter: "[1mmodel.layers.2.mlp.gate_up_proj.q_scale[0m", shape: (11264, 64), dtype: float16 | |
| 7%|███████████▎ | 10/135 [00:03<00:23, 5.28it/s] 12%|██████████████████ | 16/135 [00:03<00:13, 8.91it/s] [2024-01-16 09:22:58] INFO huggingface_loader.py:121: [Quantized] Parameter: "[1mmodel.layers.2.mlp.down_proj.q_weight[0m", shape: (2048, 704), dtype: uint32 | |
| 12%|██████████████████ | 16/135 [00:03<00:13, 8.91it/s] [2024-01-16 09:22:58] INFO huggingface_loader.py:121: [Quantized] Parameter: "[1mmodel.layers.2.mlp.down_proj.q_scale[0m", shape: (2048, 176), dtype: float16 | |
| 12%|██████████████████ | 16/135 [00:03<00:13, 8.91it/s] [2024-01-16 09:22:58] INFO huggingface_loader.py:129: [Not quantized] Parameter: "[1mmodel.layers.2.input_layernorm.weight[0m", shape: (2048,), dtype: float16 | |
| 12%|██████████████████ | 16/135 [00:03<00:13, 8.91it/s] [2024-01-16 09:22:58] INFO huggingface_loader.py:129: [Not quantized] Parameter: "[1mmodel.layers.2.post_attention_layernorm.weight[0m", shape: (2048,), dtype: float16 | |
| 12%|██████████████████ | 16/135 [00:03<00:13, 8.91it/s] [2024-01-16 09:22:58] INFO huggingface_loader.py:121: [Quantized] Parameter: "[1mmodel.layers.3.self_attn.qkv_proj.q_weight[0m", shape: (2560, 256), dtype: uint32 | |
| 12%|██████████████████ | 16/135 [00:03<00:13, 8.91it/s] [2024-01-16 09:22:58] INFO huggingface_loader.py:121: [Quantized] Parameter: "[1mmodel.layers.3.self_attn.qkv_proj.q_scale[0m", shape: (2560, 64), dtype: float16 | |
| 12%|██████████████████ | 16/135 [00:03<00:13, 8.91it/s] 15%|██████████████████████▌ | 20/135 [00:03<00:09, 12.16it/s] [2024-01-16 09:22:58] INFO huggingface_loader.py:121: [Quantized] Parameter: "[1mmodel.layers.3.self_attn.o_proj.q_weight[0m", shape: (2048, 256), dtype: uint32 | |
| 15%|██████████████████████▌ | 20/135 [00:03<00:09, 12.16it/s] [2024-01-16 09:22:58] INFO huggingface_loader.py:121: [Quantized] Parameter: "[1mmodel.layers.3.self_attn.o_proj.q_scale[0m", shape: (2048, 64), dtype: float16 | |
| 15%|██████████████████████▌ | 20/135 [00:03<00:09, 12.16it/s] [2024-01-16 09:22:58] INFO huggingface_loader.py:121: [Quantized] Parameter: "[1mmodel.layers.3.mlp.gate_up_proj.q_weight[0m", shape: (11264, 256), dtype: uint32 | |
| 15%|██████████████████████▌ | 20/135 [00:04<00:09, 12.16it/s] [2024-01-16 09:22:58] INFO huggingface_loader.py:121: [Quantized] Parameter: "[1mmodel.layers.3.mlp.gate_up_proj.q_scale[0m", shape: (11264, 64), dtype: float16 | |
| 15%|██████████████████████▌ | 20/135 [00:04<00:09, 12.16it/s] [2024-01-16 09:22:58] INFO huggingface_loader.py:121: [Quantized] Parameter: "[1mmodel.layers.3.mlp.down_proj.q_weight[0m", shape: (2048, 704), dtype: uint32 | |
| 15%|██████████████████████▌ | 20/135 [00:04<00:09, 12.16it/s] [2024-01-16 09:22:58] INFO huggingface_loader.py:121: [Quantized] Parameter: "[1mmodel.layers.3.mlp.down_proj.q_scale[0m", shape: (2048, 176), dtype: float16 | |
| 15%|██████████████████████▌ | 20/135 [00:04<00:09, 12.16it/s] 17%|█████████████████████████▉ | 23/135 [00:04<00:09, 12.34it/s] [2024-01-16 09:22:58] INFO huggingface_loader.py:129: [Not quantized] Parameter: "[1mmodel.layers.3.input_layernorm.weight[0m", shape: (2048,), dtype: float16 | |
| 17%|█████████████████████████▉ | 23/135 [00:04<00:09, 12.34it/s] [2024-01-16 09:22:58] INFO huggingface_loader.py:129: [Not quantized] Parameter: "[1mmodel.layers.3.post_attention_layernorm.weight[0m", shape: (2048,), dtype: float16 | |
| 17%|█████████████████████████▉ | 23/135 [00:04<00:09, 12.34it/s] [2024-01-16 09:22:58] INFO huggingface_loader.py:121: [Quantized] Parameter: "[1mmodel.layers.4.self_attn.qkv_proj.q_weight[0m", shape: (2560, 256), dtype: uint32 | |
| 17%|█████████████████████████▉ | 23/135 [00:04<00:09, 12.34it/s] [2024-01-16 09:22:58] INFO huggingface_loader.py:121: [Quantized] Parameter: "[1mmodel.layers.4.self_attn.qkv_proj.q_scale[0m", shape: (2560, 64), dtype: float16 | |
| 17%|█████████████████████████▉ | 23/135 [00:04<00:09, 12.34it/s] [2024-01-16 09:22:58] INFO huggingface_loader.py:121: [Quantized] Parameter: "[1mmodel.layers.4.self_attn.o_proj.q_weight[0m", shape: (2048, 256), dtype: uint32 | |
| 17%|█████████████████████████▉ | 23/135 [00:04<00:09, 12.34it/s] [2024-01-16 09:22:58] INFO huggingface_loader.py:121: [Quantized] Parameter: "[1mmodel.layers.4.self_attn.o_proj.q_scale[0m", shape: (2048, 64), dtype: float16 | |
| 17%|█████████████████████████▉ | 23/135 [00:04<00:09, 12.34it/s] [2024-01-16 09:22:59] INFO huggingface_loader.py:121: [Quantized] Parameter: "[1mmodel.layers.4.mlp.gate_up_proj.q_weight[0m", shape: (11264, 256), dtype: uint32 | |
| 17%|█████████████████████████▉ | 23/135 [00:04<00:09, 12.34it/s] [2024-01-16 09:22:59] INFO huggingface_loader.py:121: [Quantized] Parameter: "[1mmodel.layers.4.mlp.gate_up_proj.q_scale[0m", shape: (11264, 64), dtype: float16 | |
| 17%|█████████████████████████▉ | 23/135 [00:04<00:09, 12.34it/s] 21%|███████████████████████████████▌ | 28/135 [00:04<00:07, 14.26it/s] [2024-01-16 09:22:59] INFO huggingface_loader.py:121: [Quantized] Parameter: "[1mmodel.layers.4.mlp.down_proj.q_weight[0m", shape: (2048, 704), dtype: uint32 | |
| 21%|███████████████████████████████▌ | 28/135 [00:04<00:07, 14.26it/s] [2024-01-16 09:22:59] INFO huggingface_loader.py:121: [Quantized] Parameter: "[1mmodel.layers.4.mlp.down_proj.q_scale[0m", shape: (2048, 176), dtype: float16 | |
| 21%|███████████████████████████████▌ | 28/135 [00:04<00:07, 14.26it/s] [2024-01-16 09:22:59] INFO huggingface_loader.py:129: [Not quantized] Parameter: "[1mmodel.layers.4.input_layernorm.weight[0m", shape: (2048,), dtype: float16 | |
| 21%|███████████████████████████████▌ | 28/135 [00:04<00:07, 14.26it/s] [2024-01-16 09:22:59] INFO huggingface_loader.py:129: [Not quantized] Parameter: "[1mmodel.layers.4.post_attention_layernorm.weight[0m", shape: (2048,), dtype: float16 | |
| 21%|███████████████████████████████▌ | 28/135 [00:04<00:07, 14.26it/s] [2024-01-16 09:22:59] INFO huggingface_loader.py:121: [Quantized] Parameter: "[1mmodel.layers.5.self_attn.qkv_proj.q_weight[0m", shape: (2560, 256), dtype: uint32 | |
| 21%|███████████████████████████████▌ | 28/135 [00:04<00:07, 14.26it/s] [2024-01-16 09:22:59] INFO huggingface_loader.py:121: [Quantized] Parameter: "[1mmodel.layers.5.self_attn.qkv_proj.q_scale[0m", shape: (2560, 64), dtype: float16 | |
| 21%|███████████████████████████████▌ | 28/135 [00:04<00:07, 14.26it/s] [2024-01-16 09:22:59] INFO huggingface_loader.py:121: [Quantized] Parameter: "[1mmodel.layers.5.self_attn.o_proj.q_weight[0m", shape: (2048, 256), dtype: uint32 | |
| 21%|███████████████████████████████▌ | 28/135 [00:04<00:07, 14.26it/s] [2024-01-16 09:22:59] INFO huggingface_loader.py:121: [Quantized] Parameter: "[1mmodel.layers.5.self_attn.o_proj.q_scale[0m", shape: (2048, 64), dtype: float16 | |
| 21%|███████████████████████████████▌ | 28/135 [00:04<00:07, 14.26it/s] [2024-01-16 09:22:59] INFO huggingface_loader.py:121: [Quantized] Parameter: "[1mmodel.layers.5.mlp.gate_up_proj.q_weight[0m", shape: (11264, 256), dtype: uint32 | |
| 21%|███████████████████████████████▌ | 28/135 [00:04<00:07, 14.26it/s] [2024-01-16 09:22:59] INFO huggingface_loader.py:121: [Quantized] Parameter: "[1mmodel.layers.5.mlp.gate_up_proj.q_scale[0m", shape: (11264, 64), dtype: float16 | |
| 21%|███████████████████████████████▌ | 28/135 [00:04<00:07, 14.26it/s] 25%|██████████████████████████████████████▎ | 34/135 [00:04<00:06, 16.33it/s] [2024-01-16 09:22:59] INFO huggingface_loader.py:121: [Quantized] Parameter: "[1mmodel.layers.5.mlp.down_proj.q_weight[0m", shape: (2048, 704), dtype: uint32 | |
| 25%|██████████████████████████████████████▎ | 34/135 [00:04<00:06, 16.33it/s] [2024-01-16 09:22:59] INFO huggingface_loader.py:121: [Quantized] Parameter: "[1mmodel.layers.5.mlp.down_proj.q_scale[0m", shape: (2048, 176), dtype: float16 | |
| 25%|██████████████████████████████████████▎ | 34/135 [00:04<00:06, 16.33it/s] [2024-01-16 09:22:59] INFO huggingface_loader.py:129: [Not quantized] Parameter: "[1mmodel.layers.5.input_layernorm.weight[0m", shape: (2048,), dtype: float16 | |
| 25%|██████████████████████████████████████▎ | 34/135 [00:04<00:06, 16.33it/s] [2024-01-16 09:22:59] INFO huggingface_loader.py:129: [Not quantized] Parameter: "[1mmodel.layers.5.post_attention_layernorm.weight[0m", shape: (2048,), dtype: float16 | |
| 25%|██████████████████████████████████████▎ | 34/135 [00:04<00:06, 16.33it/s] [2024-01-16 09:22:59] INFO huggingface_loader.py:121: [Quantized] Parameter: "[1mmodel.layers.6.self_attn.qkv_proj.q_weight[0m", shape: (2560, 256), dtype: uint32 | |
| 25%|██████████████████████████████████████▎ | 34/135 [00:04<00:06, 16.33it/s] [2024-01-16 09:22:59] INFO huggingface_loader.py:121: [Quantized] Parameter: "[1mmodel.layers.6.self_attn.qkv_proj.q_scale[0m", shape: (2560, 64), dtype: float16 | |
| 25%|██████████████████████████████████████▎ | 34/135 [00:04<00:06, 16.33it/s] [2024-01-16 09:22:59] INFO huggingface_loader.py:121: [Quantized] Parameter: "[1mmodel.layers.6.self_attn.o_proj.q_weight[0m", shape: (2048, 256), dtype: uint32 | |
| 25%|██████████████████████████████████████▎ | 34/135 [00:04<00:06, 16.33it/s] [2024-01-16 09:22:59] INFO huggingface_loader.py:121: [Quantized] Parameter: "[1mmodel.layers.6.self_attn.o_proj.q_scale[0m", shape: (2048, 64), dtype: float16 | |
| 25%|██████████████████████████████████████▎ | 34/135 [00:04<00:06, 16.33it/s] 29%|███████████████████████████████████████████▉ | 39/135 [00:04<00:04, 20.69it/s] [2024-01-16 09:22:59] INFO huggingface_loader.py:121: [Quantized] Parameter: "[1mmodel.layers.6.mlp.gate_up_proj.q_weight[0m", shape: (11264, 256), dtype: uint32 | |
| 29%|███████████████████████████████████████████▉ | 39/135 [00:05<00:04, 20.69it/s] [2024-01-16 09:22:59] INFO huggingface_loader.py:121: [Quantized] Parameter: "[1mmodel.layers.6.mlp.gate_up_proj.q_scale[0m", shape: (11264, 64), dtype: float16 | |
| 29%|███████████████████████████████████████████▉ | 39/135 [00:05<00:04, 20.69it/s] [2024-01-16 09:22:59] INFO huggingface_loader.py:121: [Quantized] Parameter: "[1mmodel.layers.6.mlp.down_proj.q_weight[0m", shape: (2048, 704), dtype: uint32 | |
| 29%|███████████████████████████████████████████▉ | 39/135 [00:05<00:04, 20.69it/s] [2024-01-16 09:22:59] INFO huggingface_loader.py:121: [Quantized] Parameter: "[1mmodel.layers.6.mlp.down_proj.q_scale[0m", shape: (2048, 176), dtype: float16 | |
| 29%|███████████████████████████████████████████▉ | 39/135 [00:05<00:04, 20.69it/s] [2024-01-16 09:22:59] INFO huggingface_loader.py:129: [Not quantized] Parameter: "[1mmodel.layers.6.input_layernorm.weight[0m", shape: (2048,), dtype: float16 | |
| 29%|███████████████████████████████████████████▉ | 39/135 [00:05<00:04, 20.69it/s] 31%|███████████████████████████████████████████████▎ | 42/135 [00:05<00:05, 18.26it/s] [2024-01-16 09:22:59] INFO huggingface_loader.py:129: [Not quantized] Parameter: "[1mmodel.layers.6.post_attention_layernorm.weight[0m", shape: (2048,), dtype: float16 | |
| 31%|███████████████████████████████████████████████▎ | 42/135 [00:05<00:05, 18.26it/s] [2024-01-16 09:22:59] INFO huggingface_loader.py:121: [Quantized] Parameter: "[1mmodel.layers.7.self_attn.qkv_proj.q_weight[0m", shape: (2560, 256), dtype: uint32 | |
| 31%|███████████████████████████████████████████████▎ | 42/135 [00:05<00:05, 18.26it/s] [2024-01-16 09:22:59] INFO huggingface_loader.py:121: [Quantized] Parameter: "[1mmodel.layers.7.self_attn.qkv_proj.q_scale[0m", shape: (2560, 64), dtype: float16 | |
| 31%|███████████████████████████████████████████████▎ | 42/135 [00:05<00:05, 18.26it/s] [2024-01-16 09:22:59] INFO huggingface_loader.py:121: [Quantized] Parameter: "[1mmodel.layers.7.self_attn.o_proj.q_weight[0m", shape: (2048, 256), dtype: uint32 | |
| 31%|███████████████████████████████████████████████▎ | 42/135 [00:05<00:05, 18.26it/s] [2024-01-16 09:22:59] INFO huggingface_loader.py:121: [Quantized] Parameter: "[1mmodel.layers.7.self_attn.o_proj.q_scale[0m", shape: (2048, 64), dtype: float16 | |
| 31%|███████████████████████████████████████████████▎ | 42/135 [00:05<00:05, 18.26it/s] [2024-01-16 09:22:59] INFO huggingface_loader.py:121: [Quantized] Parameter: "[1mmodel.layers.7.mlp.gate_up_proj.q_weight[0m", shape: (11264, 256), dtype: uint32 | |
| 31%|███████████████████████████████████████████████▎ | 42/135 [00:05<00:05, 18.26it/s] [2024-01-16 09:22:59] INFO huggingface_loader.py:121: [Quantized] Parameter: "[1mmodel.layers.7.mlp.gate_up_proj.q_scale[0m", shape: (11264, 64), dtype: float16 | |
| 31%|███████████████████████████████████████████████▎ | 42/135 [00:05<00:05, 18.26it/s] 34%|███████████████████████████████████████████████████▊ | 46/135 [00:05<00:04, 18.07it/s] [2024-01-16 09:23:00] INFO huggingface_loader.py:121: [Quantized] Parameter: "[1mmodel.layers.7.mlp.down_proj.q_weight[0m", shape: (2048, 704), dtype: uint32 | |
| 34%|███████████████████████████████████████████████████▊ | 46/135 [00:05<00:04, 18.07it/s] [2024-01-16 09:23:00] INFO huggingface_loader.py:121: [Quantized] Parameter: "[1mmodel.layers.7.mlp.down_proj.q_scale[0m", shape: (2048, 176), dtype: float16 | |
| 34%|███████████████████████████████████████████████████▊ | 46/135 [00:05<00:04, 18.07it/s] [2024-01-16 09:23:00] INFO huggingface_loader.py:129: [Not quantized] Parameter: "[1mmodel.layers.7.input_layernorm.weight[0m", shape: (2048,), dtype: float16 | |
| 34%|███████████████████████████████████████████████████▊ | 46/135 [00:05<00:04, 18.07it/s] [2024-01-16 09:23:00] INFO huggingface_loader.py:129: [Not quantized] Parameter: "[1mmodel.layers.7.post_attention_layernorm.weight[0m", shape: (2048,), dtype: float16 | |
| 34%|███████████████████████████████████████████████████▊ | 46/135 [00:05<00:04, 18.07it/s] [2024-01-16 09:23:00] INFO huggingface_loader.py:121: [Quantized] Parameter: "[1mmodel.layers.8.self_attn.qkv_proj.q_weight[0m", shape: (2560, 256), dtype: uint32 | |
| 34%|███████████████████████████████████████████████████▊ | 46/135 [00:05<00:04, 18.07it/s] [2024-01-16 09:23:00] INFO huggingface_loader.py:121: [Quantized] Parameter: "[1mmodel.layers.8.self_attn.qkv_proj.q_scale[0m", shape: (2560, 64), dtype: float16 | |
| 34%|███████████████████████████████████████████████████▊ | 46/135 [00:05<00:04, 18.07it/s] [2024-01-16 09:23:00] INFO huggingface_loader.py:121: [Quantized] Parameter: "[1mmodel.layers.8.self_attn.o_proj.q_weight[0m", shape: (2048, 256), dtype: uint32 | |
| 34%|███████████████████████████████████████████████████▊ | 46/135 [00:05<00:04, 18.07it/s] [2024-01-16 09:23:00] INFO huggingface_loader.py:121: [Quantized] Parameter: "[1mmodel.layers.8.self_attn.o_proj.q_scale[0m", shape: (2048, 64), dtype: float16 | |
| 34%|███████████████████████████████████████████████████▊ | 46/135 [00:05<00:04, 18.07it/s] [2024-01-16 09:23:00] INFO huggingface_loader.py:121: [Quantized] Parameter: "[1mmodel.layers.8.mlp.gate_up_proj.q_weight[0m", shape: (11264, 256), dtype: uint32 | |
| 34%|███████████████████████████████████████████████████▊ | 46/135 [00:05<00:04, 18.07it/s] [2024-01-16 09:23:00] INFO huggingface_loader.py:121: [Quantized] Parameter: "[1mmodel.layers.8.mlp.gate_up_proj.q_scale[0m", shape: (11264, 64), dtype: float16 | |
| 34%|███████████████████████████████████████████████████▊ | 46/135 [00:05<00:04, 18.07it/s] 39%|██████████████████████████████████████████████████████████▌ | 52/135 [00:05<00:04, 19.95it/s] [2024-01-16 09:23:00] INFO huggingface_loader.py:121: [Quantized] Parameter: "[1mmodel.layers.8.mlp.down_proj.q_weight[0m", shape: (2048, 704), dtype: uint32 | |
| 39%|██████████████████████████████████████████████████████████▌ | 52/135 [00:05<00:04, 19.95it/s] [2024-01-16 09:23:00] INFO huggingface_loader.py:121: [Quantized] Parameter: "[1mmodel.layers.8.mlp.down_proj.q_scale[0m", shape: (2048, 176), dtype: float16 | |
| 39%|██████████████████████████████████████████████████████████▌ | 52/135 [00:05<00:04, 19.95it/s] [2024-01-16 09:23:00] INFO huggingface_loader.py:129: [Not quantized] Parameter: "[1mmodel.layers.8.input_layernorm.weight[0m", shape: (2048,), dtype: float16 | |
| 39%|██████████████████████████████████████████████████████████▌ | 52/135 [00:05<00:04, 19.95it/s] [2024-01-16 09:23:00] INFO huggingface_loader.py:129: [Not quantized] Parameter: "[1mmodel.layers.8.post_attention_layernorm.weight[0m", shape: (2048,), dtype: float16 | |
| 39%|██████████████████████████████████████████████████████████▌ | 52/135 [00:05<00:04, 19.95it/s] [2024-01-16 09:23:00] INFO huggingface_loader.py:121: [Quantized] Parameter: "[1mmodel.layers.9.self_attn.qkv_proj.q_weight[0m", shape: (2560, 256), dtype: uint32 | |
| 39%|██████████████████████████████████████████████████████████▌ | 52/135 [00:05<00:04, 19.95it/s] [2024-01-16 09:23:00] INFO huggingface_loader.py:121: [Quantized] Parameter: "[1mmodel.layers.9.self_attn.qkv_proj.q_scale[0m", shape: (2560, 64), dtype: float16 | |
| 39%|██████████████████████████████████████████████████████████▌ | 52/135 [00:05<00:04, 19.95it/s] [2024-01-16 09:23:00] INFO huggingface_loader.py:121: [Quantized] Parameter: "[1mmodel.layers.9.self_attn.o_proj.q_weight[0m", shape: (2048, 256), dtype: uint32 | |
| 39%|██████████████████████████████████████████████████████████▌ | 52/135 [00:05<00:04, 19.95it/s] [2024-01-16 09:23:00] INFO huggingface_loader.py:121: [Quantized] Parameter: "[1mmodel.layers.9.self_attn.o_proj.q_scale[0m", shape: (2048, 64), dtype: float16 | |
| 39%|██████████████████████████████████████████████████████████▌ | 52/135 [00:05<00:04, 19.95it/s] [2024-01-16 09:23:00] INFO huggingface_loader.py:121: [Quantized] Parameter: "[1mmodel.layers.9.mlp.gate_up_proj.q_weight[0m", shape: (11264, 256), dtype: uint32 | |
| 39%|██████████████████████████████████████████████████████████▌ | 52/135 [00:05<00:04, 19.95it/s] [2024-01-16 09:23:00] INFO huggingface_loader.py:121: [Quantized] Parameter: "[1mmodel.layers.9.mlp.gate_up_proj.q_scale[0m", shape: (11264, 64), dtype: float16 | |
| 39%|██████████████████████████████████████████████████████████▌ | 52/135 [00:05<00:04, 19.95it/s] 43%|█████████████████████████████████████████████████████████████████▎ | 58/135 [00:05<00:03, 21.24it/s] [2024-01-16 09:23:00] INFO huggingface_loader.py:121: [Quantized] Parameter: "[1mmodel.layers.9.mlp.down_proj.q_weight[0m", shape: (2048, 704), dtype: uint32 | |
| 43%|█████████████████████████████████████████████████████████████████▎ | 58/135 [00:05<00:03, 21.24it/s] [2024-01-16 09:23:00] INFO huggingface_loader.py:121: [Quantized] Parameter: "[1mmodel.layers.9.mlp.down_proj.q_scale[0m", shape: (2048, 176), dtype: float16 | |
| 43%|█████████████████████████████████████████████████████████████████▎ | 58/135 [00:05<00:03, 21.24it/s] [2024-01-16 09:23:00] INFO huggingface_loader.py:129: [Not quantized] Parameter: "[1mmodel.layers.9.input_layernorm.weight[0m", shape: (2048,), dtype: float16 | |
| 43%|█████████████████████████████████████████████████████████████████▎ | 58/135 [00:05<00:03, 21.24it/s] [2024-01-16 09:23:00] INFO huggingface_loader.py:129: [Not quantized] Parameter: "[1mmodel.layers.9.post_attention_layernorm.weight[0m", shape: (2048,), dtype: float16 | |
| 43%|█████████████████████████████████████████████████████████████████▎ | 58/135 [00:05<00:03, 21.24it/s] [2024-01-16 09:23:00] INFO huggingface_loader.py:121: [Quantized] Parameter: "[1mmodel.layers.10.self_attn.qkv_proj.q_weight[0m", shape: (2560, 256), dtype: uint32 | |
| 43%|█████████████████████████████████████████████████████████████████▎ | 58/135 [00:05<00:03, 21.24it/s] [2024-01-16 09:23:00] INFO huggingface_loader.py:121: [Quantized] Parameter: "[1mmodel.layers.10.self_attn.qkv_proj.q_scale[0m", shape: (2560, 64), dtype: float16 | |
| 43%|█████████████████████████████████████████████████████████████████▎ | 58/135 [00:05<00:03, 21.24it/s] [2024-01-16 09:23:00] INFO huggingface_loader.py:121: [Quantized] Parameter: "[1mmodel.layers.10.self_attn.o_proj.q_weight[0m", shape: (2048, 256), dtype: uint32 | |
| 43%|█████████████████████████████████████████████████████████████████▎ | 58/135 [00:05<00:03, 21.24it/s] [2024-01-16 09:23:00] INFO huggingface_loader.py:121: [Quantized] Parameter: "[1mmodel.layers.10.self_attn.o_proj.q_scale[0m", shape: (2048, 64), dtype: float16 | |
| 43%|█████████████████████████████████████████████████████████████████▎ | 58/135 [00:05<00:03, 21.24it/s] 47%|██████████████████████████████████████████████████████████████████████▉ | 63/135 [00:05<00:02, 25.36it/s] [2024-01-16 09:23:00] INFO huggingface_loader.py:121: [Quantized] Parameter: "[1mmodel.layers.10.mlp.gate_up_proj.q_weight[0m", shape: (11264, 256), dtype: uint32 | |
| 47%|██████████████████████████████████████████████████████████████████████▉ | 63/135 [00:06<00:02, 25.36it/s] [2024-01-16 09:23:00] INFO huggingface_loader.py:121: [Quantized] Parameter: "[1mmodel.layers.10.mlp.gate_up_proj.q_scale[0m", shape: (11264, 64), dtype: float16 | |
| 47%|██████████████████████████████████████████████████████████████████████▉ | 63/135 [00:06<00:02, 25.36it/s] [2024-01-16 09:23:00] INFO huggingface_loader.py:121: [Quantized] Parameter: "[1mmodel.layers.10.mlp.down_proj.q_weight[0m", shape: (2048, 704), dtype: uint32 | |
| 47%|██████████████████████████████████████████████████████████████████████▉ | 63/135 [00:06<00:02, 25.36it/s] [2024-01-16 09:23:00] INFO huggingface_loader.py:121: [Quantized] Parameter: "[1mmodel.layers.10.mlp.down_proj.q_scale[0m", shape: (2048, 176), dtype: float16 | |
| 47%|██████████████████████████████████████████████████████████████████████▉ | 63/135 [00:06<00:02, 25.36it/s] [2024-01-16 09:23:00] INFO huggingface_loader.py:129: [Not quantized] Parameter: "[1mmodel.layers.10.input_layernorm.weight[0m", shape: (2048,), dtype: float16 | |
| 47%|██████████████████████████████████████████████████████████████████████▉ | 63/135 [00:06<00:02, 25.36it/s] [2024-01-16 09:23:00] INFO huggingface_loader.py:129: [Not quantized] Parameter: "[1mmodel.layers.10.post_attention_layernorm.weight[0m", shape: (2048,), dtype: float16 | |
| 47%|██████████████████████████████████████████████████████████████████████▉ | 63/135 [00:06<00:02, 25.36it/s] 50%|███████████████████████████████████████████████████████████████████████████▍ | 67/135 [00:06<00:02, 23.20it/s] [2024-01-16 09:23:00] INFO huggingface_loader.py:121: [Quantized] Parameter: "[1mmodel.layers.11.self_attn.qkv_proj.q_weight[0m", shape: (2560, 256), dtype: uint32 | |
| 50%|███████████████████████████████████████████████████████████████████████████▍ | 67/135 [00:06<00:02, 23.20it/s] [2024-01-16 09:23:00] INFO huggingface_loader.py:121: [Quantized] Parameter: "[1mmodel.layers.11.self_attn.qkv_proj.q_scale[0m", shape: (2560, 64), dtype: float16 | |
| 50%|███████████████████████████████████████████████████████████████████████████▍ | 67/135 [00:06<00:02, 23.20it/s] [2024-01-16 09:23:00] INFO huggingface_loader.py:121: [Quantized] Parameter: "[1mmodel.layers.11.self_attn.o_proj.q_weight[0m", shape: (2048, 256), dtype: uint32 | |
| 50%|███████████████████████████████████████████████████████████████████████████▍ | 67/135 [00:06<00:02, 23.20it/s] [2024-01-16 09:23:00] INFO huggingface_loader.py:121: [Quantized] Parameter: "[1mmodel.layers.11.self_attn.o_proj.q_scale[0m", shape: (2048, 64), dtype: float16 | |
| 50%|███████████████████████████████████████████████████████████████████████████▍ | 67/135 [00:06<00:02, 23.20it/s] [2024-01-16 09:23:01] INFO huggingface_loader.py:121: [Quantized] Parameter: "[1mmodel.layers.11.mlp.gate_up_proj.q_weight[0m", shape: (11264, 256), dtype: uint32 | |
| 50%|███████████████████████████████████████████████████████████████████████████▍ | 67/135 [00:06<00:02, 23.20it/s] [2024-01-16 09:23:01] INFO huggingface_loader.py:121: [Quantized] Parameter: "[1mmodel.layers.11.mlp.gate_up_proj.q_scale[0m", shape: (11264, 64), dtype: float16 | |
| 50%|███████████████████████████████████████████████████████████████████████████▍ | 67/135 [00:06<00:02, 23.20it/s] 52%|██████████████████████████████████████████████████████████████████████████████▊ | 70/135 [00:06<00:03, 20.50it/s] [2024-01-16 09:23:01] INFO huggingface_loader.py:121: [Quantized] Parameter: "[1mmodel.layers.11.mlp.down_proj.q_weight[0m", shape: (2048, 704), dtype: uint32 | |
| 52%|██████████████████████████████████████████████████████████████████████████████▊ | 70/135 [00:06<00:03, 20.50it/s] [2024-01-16 09:23:01] INFO huggingface_loader.py:121: [Quantized] Parameter: "[1mmodel.layers.11.mlp.down_proj.q_scale[0m", shape: (2048, 176), dtype: float16 | |
| 52%|██████████████████████████████████████████████████████████████████████████████▊ | 70/135 [00:06<00:03, 20.50it/s] [2024-01-16 09:23:01] INFO huggingface_loader.py:129: [Not quantized] Parameter: "[1mmodel.layers.11.input_layernorm.weight[0m", shape: (2048,), dtype: float16 | |
| 52%|██████████████████████████████████████████████████████████████████████████████▊ | 70/135 [00:06<00:03, 20.50it/s] [2024-01-16 09:23:01] INFO huggingface_loader.py:129: [Not quantized] Parameter: "[1mmodel.layers.11.post_attention_layernorm.weight[0m", shape: (2048,), dtype: float16 | |
| 52%|██████████████████████████████████████████████████████████████████████████████▊ | 70/135 [00:06<00:03, 20.50it/s] [2024-01-16 09:23:01] INFO huggingface_loader.py:121: [Quantized] Parameter: "[1mmodel.layers.12.self_attn.qkv_proj.q_weight[0m", shape: (2560, 256), dtype: uint32 | |
| 52%|██████████████████████████████████████████████████████████████████████████████▊ | 70/135 [00:06<00:03, 20.50it/s] [2024-01-16 09:23:01] INFO huggingface_loader.py:121: [Quantized] Parameter: "[1mmodel.layers.12.self_attn.qkv_proj.q_scale[0m", shape: (2560, 64), dtype: float16 | |
| 52%|██████████████████████████████████████████████████████████████████████████████▊ | 70/135 [00:06<00:03, 20.50it/s] [2024-01-16 09:23:01] INFO huggingface_loader.py:121: [Quantized] Parameter: "[1mmodel.layers.12.self_attn.o_proj.q_weight[0m", shape: (2048, 256), dtype: uint32 | |
| 52%|██████████████████████████████████████████████████████████████████████████████▊ | 70/135 [00:06<00:03, 20.50it/s] [2024-01-16 09:23:01] INFO huggingface_loader.py:121: [Quantized] Parameter: "[1mmodel.layers.12.self_attn.o_proj.q_scale[0m", shape: (2048, 64), dtype: float16 | |
| 52%|██████████████████████████████████████████████████████████████████████████████▊ | 70/135 [00:06<00:03, 20.50it/s] [2024-01-16 09:23:01] INFO huggingface_loader.py:121: [Quantized] Parameter: "[1mmodel.layers.12.mlp.gate_up_proj.q_weight[0m", shape: (11264, 256), dtype: uint32 | |
| 52%|██████████████████████████████████████████████████████████████████████████████▊ | 70/135 [00:06<00:03, 20.50it/s] [2024-01-16 09:23:01] INFO huggingface_loader.py:121: [Quantized] Parameter: "[1mmodel.layers.12.mlp.gate_up_proj.q_scale[0m", shape: (11264, 64), dtype: float16 | |
| 52%|██████████████████████████████████████████████████████████████████████████████▊ | 70/135 [00:06<00:03, 20.50it/s] 56%|█████████████████████████████████████████████████████████████████████████████████████▌ | 76/135 [00:06<00:02, 21.06it/s] [2024-01-16 09:23:01] INFO huggingface_loader.py:121: [Quantized] Parameter: "[1mmodel.layers.12.mlp.down_proj.q_weight[0m", shape: (2048, 704), dtype: uint32 | |
| 56%|█████████████████████████████████████████████████████████████████████████████████████▌ | 76/135 [00:06<00:02, 21.06it/s] [2024-01-16 09:23:01] INFO huggingface_loader.py:121: [Quantized] Parameter: "[1mmodel.layers.12.mlp.down_proj.q_scale[0m", shape: (2048, 176), dtype: float16 | |
| 56%|█████████████████████████████████████████████████████████████████████████████████████▌ | 76/135 [00:06<00:02, 21.06it/s] [2024-01-16 09:23:01] INFO huggingface_loader.py:129: [Not quantized] Parameter: "[1mmodel.layers.12.input_layernorm.weight[0m", shape: (2048,), dtype: float16 | |
| 56%|█████████████████████████████████████████████████████████████████████████████████████▌ | 76/135 [00:06<00:02, 21.06it/s] [2024-01-16 09:23:01] INFO huggingface_loader.py:129: [Not quantized] Parameter: "[1mmodel.layers.12.post_attention_layernorm.weight[0m", shape: (2048,), dtype: float16 | |
| 56%|█████████████████████████████████████████████████████████████████████████████████████▌ | 76/135 [00:06<00:02, 21.06it/s] [2024-01-16 09:23:01] INFO huggingface_loader.py:121: [Quantized] Parameter: "[1mmodel.layers.13.self_attn.qkv_proj.q_weight[0m", shape: (2560, 256), dtype: uint32 | |
| 56%|█████████████████████████████████████████████████████████████████████████████████████▌ | 76/135 [00:06<00:02, 21.06it/s] [2024-01-16 09:23:01] INFO huggingface_loader.py:121: [Quantized] Parameter: "[1mmodel.layers.13.self_attn.qkv_proj.q_scale[0m", shape: (2560, 64), dtype: float16 | |
| 56%|█████████████████████████████████████████████████████████████████████████████████████▌ | 76/135 [00:06<00:02, 21.06it/s] [2024-01-16 09:23:01] INFO huggingface_loader.py:121: [Quantized] Parameter: "[1mmodel.layers.13.self_attn.o_proj.q_weight[0m", shape: (2048, 256), dtype: uint32 | |
| 56%|█████████████████████████████████████████████████████████████████████████████████████▌ | 76/135 [00:06<00:02, 21.06it/s] [2024-01-16 09:23:01] INFO huggingface_loader.py:121: [Quantized] Parameter: "[1mmodel.layers.13.self_attn.o_proj.q_scale[0m", shape: (2048, 64), dtype: float16 | |
| 56%|█████████████████████████████████████████████████████████████████████████████████████▌ | 76/135 [00:06<00:02, 21.06it/s] 60%|███████████████████████████████████████████████████████████████████████████████████████████▏ | 81/135 [00:06<00:02, 25.12it/s] [2024-01-16 09:23:01] INFO huggingface_loader.py:121: [Quantized] Parameter: "[1mmodel.layers.13.mlp.gate_up_proj.q_weight[0m", shape: (11264, 256), dtype: uint32 | |
| 60%|███████████████████████████████████████████████████████████████████████████████████████████▏ | 81/135 [00:06<00:02, 25.12it/s] [2024-01-16 09:23:01] INFO huggingface_loader.py:121: [Quantized] Parameter: "[1mmodel.layers.13.mlp.gate_up_proj.q_scale[0m", shape: (11264, 64), dtype: float16 | |
| 60%|███████████████████████████████████████████████████████████████████████████████████████████▏ | 81/135 [00:06<00:02, 25.12it/s] [2024-01-16 09:23:01] INFO huggingface_loader.py:121: [Quantized] Parameter: "[1mmodel.layers.13.mlp.down_proj.q_weight[0m", shape: (2048, 704), dtype: uint32 | |
| 60%|███████████████████████████████████████████████████████████████████████████████████████████▏ | 81/135 [00:06<00:02, 25.12it/s] [2024-01-16 09:23:01] INFO huggingface_loader.py:121: [Quantized] Parameter: "[1mmodel.layers.13.mlp.down_proj.q_scale[0m", shape: (2048, 176), dtype: float16 | |
| 60%|███████████████████████████████████████████████████████████████████████████████████████████▏ | 81/135 [00:06<00:02, 25.12it/s] [2024-01-16 09:23:01] INFO huggingface_loader.py:129: [Not quantized] Parameter: "[1mmodel.layers.13.input_layernorm.weight[0m", shape: (2048,), dtype: float16 | |
| 60%|███████████████████████████████████████████████████████████████████████████████████████████▏ | 81/135 [00:06<00:02, 25.12it/s] 62%|██████████████████████████████████████████████████████████████████████████████████████████████▌ | 84/135 [00:06<00:02, 21.67it/s] [2024-01-16 09:23:01] INFO huggingface_loader.py:129: [Not quantized] Parameter: "[1mmodel.layers.13.post_attention_layernorm.weight[0m", shape: (2048,), dtype: float16 | |
| 62%|██████████████████████████████████████████████████████████████████████████████████████████████▌ | 84/135 [00:06<00:02, 21.67it/s] [2024-01-16 09:23:01] INFO huggingface_loader.py:121: [Quantized] Parameter: "[1mmodel.layers.14.self_attn.qkv_proj.q_weight[0m", shape: (2560, 256), dtype: uint32 | |
| 62%|██████████████████████████████████████████████████████████████████████████████████████████████▌ | 84/135 [00:06<00:02, 21.67it/s] [2024-01-16 09:23:01] INFO huggingface_loader.py:121: [Quantized] Parameter: "[1mmodel.layers.14.self_attn.qkv_proj.q_scale[0m", shape: (2560, 64), dtype: float16 | |
| 62%|██████████████████████████████████████████████████████████████████████████████████████████████▌ | 84/135 [00:06<00:02, 21.67it/s] [2024-01-16 09:23:01] INFO huggingface_loader.py:121: [Quantized] Parameter: "[1mmodel.layers.14.self_attn.o_proj.q_weight[0m", shape: (2048, 256), dtype: uint32 | |
| 62%|██████████████████████████████████████████████████████████████████████████████████████████████▌ | 84/135 [00:06<00:02, 21.67it/s] [2024-01-16 09:23:01] INFO huggingface_loader.py:121: [Quantized] Parameter: "[1mmodel.layers.14.self_attn.o_proj.q_scale[0m", shape: (2048, 64), dtype: float16 | |
| 62%|██████████████████████████████████████████████████████████████████████████████████████████████▌ | 84/135 [00:06<00:02, 21.67it/s] [2024-01-16 09:23:01] INFO huggingface_loader.py:121: [Quantized] Parameter: "[1mmodel.layers.14.mlp.gate_up_proj.q_weight[0m", shape: (11264, 256), dtype: uint32 | |
| 62%|██████████████████████████████████████████████████████████████████████████████████████████████▌ | 84/135 [00:07<00:02, 21.67it/s] [2024-01-16 09:23:01] INFO huggingface_loader.py:121: [Quantized] Parameter: "[1mmodel.layers.14.mlp.gate_up_proj.q_scale[0m", shape: (11264, 64), dtype: float16 | |
| 62%|██████████████████████████████████████████████████████████████████████████████████████████████▌ | 84/135 [00:07<00:02, 21.67it/s] 65%|███████████████████████████████████████████████████████████████████████████████████████████████████ | 88/135 [00:07<00:02, 20.80it/s] [2024-01-16 09:23:01] INFO huggingface_loader.py:121: [Quantized] Parameter: "[1mmodel.layers.14.mlp.down_proj.q_weight[0m", shape: (2048, 704), dtype: uint32 | |
| 65%|███████████████████████████████████████████████████████████████████████████████████████████████████ | 88/135 [00:07<00:02, 20.80it/s] [2024-01-16 09:23:01] INFO huggingface_loader.py:121: [Quantized] Parameter: "[1mmodel.layers.14.mlp.down_proj.q_scale[0m", shape: (2048, 176), dtype: float16 | |
| 65%|███████████████████████████████████████████████████████████████████████████████████████████████████ | 88/135 [00:07<00:02, 20.80it/s] [2024-01-16 09:23:01] INFO huggingface_loader.py:129: [Not quantized] Parameter: "[1mmodel.layers.14.input_layernorm.weight[0m", shape: (2048,), dtype: float16 | |
| 65%|███████████████████████████████████████████████████████████████████████████████████████████████████ | 88/135 [00:07<00:02, 20.80it/s] [2024-01-16 09:23:01] INFO huggingface_loader.py:129: [Not quantized] Parameter: "[1mmodel.layers.14.post_attention_layernorm.weight[0m", shape: (2048,), dtype: float16 | |
| 65%|███████████████████████████████████████████████████████████████████████████████████████████████████ | 88/135 [00:07<00:02, 20.80it/s] [2024-01-16 09:23:01] INFO huggingface_loader.py:121: [Quantized] Parameter: "[1mmodel.layers.15.self_attn.qkv_proj.q_weight[0m", shape: (2560, 256), dtype: uint32 | |
| 65%|███████████████████████████████████████████████████████████████████████████████████████████████████ | 88/135 [00:07<00:02, 20.80it/s] [2024-01-16 09:23:01] INFO huggingface_loader.py:121: [Quantized] Parameter: "[1mmodel.layers.15.self_attn.qkv_proj.q_scale[0m", shape: (2560, 64), dtype: float16 | |
| 65%|███████████████████████████████████████████████████████████████████████████████████████████████████ | 88/135 [00:07<00:02, 20.80it/s] [2024-01-16 09:23:01] INFO huggingface_loader.py:121: [Quantized] Parameter: "[1mmodel.layers.15.self_attn.o_proj.q_weight[0m", shape: (2048, 256), dtype: uint32 | |
| 65%|███████████████████████████████████████████████████████████████████████████████████████████████████ | 88/135 [00:07<00:02, 20.80it/s] [2024-01-16 09:23:01] INFO huggingface_loader.py:121: [Quantized] Parameter: "[1mmodel.layers.15.self_attn.o_proj.q_scale[0m", shape: (2048, 64), dtype: float16 | |
| 65%|███████████████████████████████████████████████████████████████████████████████████████████████████ | 88/135 [00:07<00:02, 20.80it/s] [2024-01-16 09:23:02] INFO huggingface_loader.py:121: [Quantized] Parameter: "[1mmodel.layers.15.mlp.gate_up_proj.q_weight[0m", shape: (11264, 256), dtype: uint32 | |
| 65%|███████████████████████████████████████████████████████████████████████████████████████████████████ | 88/135 [00:07<00:02, 20.80it/s] [2024-01-16 09:23:02] INFO huggingface_loader.py:121: [Quantized] Parameter: "[1mmodel.layers.15.mlp.gate_up_proj.q_scale[0m", shape: (11264, 64), dtype: float16 | |
| 65%|███████████████████████████████████████████████████████████████████████████████████████████████████ | 88/135 [00:07<00:02, 20.80it/s] 70%|█████████████████████████████████████████████████████████████████████████████████████████████████████████▊ | 94/135 [00:07<00:02, 19.84it/s] [2024-01-16 09:23:02] INFO huggingface_loader.py:121: [Quantized] Parameter: "[1mmodel.layers.15.mlp.down_proj.q_weight[0m", shape: (2048, 704), dtype: uint32 | |
| 70%|█████████████████████████████████████████████████████████████████████████████████████████████████████████▊ | 94/135 [00:07<00:02, 19.84it/s] [2024-01-16 09:23:02] INFO huggingface_loader.py:121: [Quantized] Parameter: "[1mmodel.layers.15.mlp.down_proj.q_scale[0m", shape: (2048, 176), dtype: float16 | |
| 70%|█████████████████████████████████████████████████████████████████████████████████████████████████████████▊ | 94/135 [00:07<00:02, 19.84it/s] [2024-01-16 09:23:02] INFO huggingface_loader.py:129: [Not quantized] Parameter: "[1mmodel.layers.15.input_layernorm.weight[0m", shape: (2048,), dtype: float16 | |
| 70%|█████████████████████████████████████████████████████████████████████████████████████████████████████████▊ | 94/135 [00:07<00:02, 19.84it/s] [2024-01-16 09:23:02] INFO huggingface_loader.py:129: [Not quantized] Parameter: "[1mmodel.layers.15.post_attention_layernorm.weight[0m", shape: (2048,), dtype: float16 | |
| 70%|█████████████████████████████████████████████████████████████████████████████████████████████████████████▊ | 94/135 [00:07<00:02, 19.84it/s] [2024-01-16 09:23:02] INFO huggingface_loader.py:121: [Quantized] Parameter: "[1mmodel.layers.16.self_attn.qkv_proj.q_weight[0m", shape: (2560, 256), dtype: uint32 | |
| 70%|█████████████████████████████████████████████████████████████████████████████████████████████████████████▊ | 94/135 [00:07<00:02, 19.84it/s] [2024-01-16 09:23:02] INFO huggingface_loader.py:121: [Quantized] Parameter: "[1mmodel.layers.16.self_attn.qkv_proj.q_scale[0m", shape: (2560, 64), dtype: float16 | |
| 70%|█████████████████████████████████████████████████████████████████████████████████████████████████████████▊ | 94/135 [00:07<00:02, 19.84it/s] [2024-01-16 09:23:02] INFO huggingface_loader.py:121: [Quantized] Parameter: "[1mmodel.layers.16.self_attn.o_proj.q_weight[0m", shape: (2048, 256), dtype: uint32 | |
| 70%|█████████████████████████████████████████████████████████████████████████████████████████████████████████▊ | 94/135 [00:07<00:02, 19.84it/s] [2024-01-16 09:23:02] INFO huggingface_loader.py:121: [Quantized] Parameter: "[1mmodel.layers.16.self_attn.o_proj.q_scale[0m", shape: (2048, 64), dtype: float16 | |
| 70%|█████████████████████████████████████████████████████████████████████████████████████████████████████████▊ | 94/135 [00:07<00:02, 19.84it/s] 73%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████▍ | 99/135 [00:07<00:01, 24.22it/s] [2024-01-16 09:23:02] INFO huggingface_loader.py:121: [Quantized] Parameter: "[1mmodel.layers.16.mlp.gate_up_proj.q_weight[0m", shape: (11264, 256), dtype: uint32 | |
| 73%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████▍ | 99/135 [00:07<00:01, 24.22it/s] [2024-01-16 09:23:02] INFO huggingface_loader.py:121: [Quantized] Parameter: "[1mmodel.layers.16.mlp.gate_up_proj.q_scale[0m", shape: (11264, 64), dtype: float16 | |
| 73%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████▍ | 99/135 [00:07<00:01, 24.22it/s] [2024-01-16 09:23:02] INFO huggingface_loader.py:121: [Quantized] Parameter: "[1mmodel.layers.16.mlp.down_proj.q_weight[0m", shape: (2048, 704), dtype: uint32 | |
| 73%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████▍ | 99/135 [00:07<00:01, 24.22it/s] [2024-01-16 09:23:02] INFO huggingface_loader.py:121: [Quantized] Parameter: "[1mmodel.layers.16.mlp.down_proj.q_scale[0m", shape: (2048, 176), dtype: float16 | |
| 73%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████▍ | 99/135 [00:07<00:01, 24.22it/s] [2024-01-16 09:23:02] INFO huggingface_loader.py:129: [Not quantized] Parameter: "[1mmodel.layers.16.input_layernorm.weight[0m", shape: (2048,), dtype: float16 | |
| 73%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████▍ | 99/135 [00:07<00:01, 24.22it/s] 76%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████ | 102/135 [00:07<00:01, 20.79it/s] [2024-01-16 09:23:02] INFO huggingface_loader.py:129: [Not quantized] Parameter: "[1mmodel.layers.16.post_attention_layernorm.weight[0m", shape: (2048,), dtype: float16 | |
| 76%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████ | 102/135 [00:07<00:01, 20.79it/s] [2024-01-16 09:23:02] INFO huggingface_loader.py:121: [Quantized] Parameter: "[1mmodel.layers.17.self_attn.qkv_proj.q_weight[0m", shape: (2560, 256), dtype: uint32 | |
| 76%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████ | 102/135 [00:07<00:01, 20.79it/s] [2024-01-16 09:23:02] INFO huggingface_loader.py:121: [Quantized] Parameter: "[1mmodel.layers.17.self_attn.qkv_proj.q_scale[0m", shape: (2560, 64), dtype: float16 | |
| 76%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████ | 102/135 [00:07<00:01, 20.79it/s] [2024-01-16 09:23:02] INFO huggingface_loader.py:121: [Quantized] Parameter: "[1mmodel.layers.17.self_attn.o_proj.q_weight[0m", shape: (2048, 256), dtype: uint32 | |
| 76%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████ | 102/135 [00:07<00:01, 20.79it/s] [2024-01-16 09:23:02] INFO huggingface_loader.py:121: [Quantized] Parameter: "[1mmodel.layers.17.self_attn.o_proj.q_scale[0m", shape: (2048, 64), dtype: float16 | |
| 76%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████ | 102/135 [00:07<00:01, 20.79it/s] [2024-01-16 09:23:02] INFO huggingface_loader.py:121: [Quantized] Parameter: "[1mmodel.layers.17.mlp.gate_up_proj.q_weight[0m", shape: (11264, 256), dtype: uint32 | |
| 76%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████ | 102/135 [00:08<00:01, 20.79it/s] [2024-01-16 09:23:02] INFO huggingface_loader.py:121: [Quantized] Parameter: "[1mmodel.layers.17.mlp.gate_up_proj.q_scale[0m", shape: (11264, 64), dtype: float16 | |
| 76%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████ | 102/135 [00:08<00:01, 20.79it/s] 79%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌ | 106/135 [00:08<00:01, 20.16it/s] [2024-01-16 09:23:02] INFO huggingface_loader.py:121: [Quantized] Parameter: "[1mmodel.layers.17.mlp.down_proj.q_weight[0m", shape: (2048, 704), dtype: uint32 | |
| 79%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌ | 106/135 [00:08<00:01, 20.16it/s] [2024-01-16 09:23:02] INFO huggingface_loader.py:121: [Quantized] Parameter: "[1mmodel.layers.17.mlp.down_proj.q_scale[0m", shape: (2048, 176), dtype: float16 | |
| 79%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌ | 106/135 [00:08<00:01, 20.16it/s] [2024-01-16 09:23:02] INFO huggingface_loader.py:129: [Not quantized] Parameter: "[1mmodel.layers.17.input_layernorm.weight[0m", shape: (2048,), dtype: float16 | |
| 79%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌ | 106/135 [00:08<00:01, 20.16it/s] [2024-01-16 09:23:02] INFO huggingface_loader.py:129: [Not quantized] Parameter: "[1mmodel.layers.17.post_attention_layernorm.weight[0m", shape: (2048,), dtype: float16 | |
| 79%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌ | 106/135 [00:08<00:01, 20.16it/s] [2024-01-16 09:23:02] INFO huggingface_loader.py:121: [Quantized] Parameter: "[1mmodel.layers.18.self_attn.qkv_proj.q_weight[0m", shape: (2560, 256), dtype: uint32 | |
| 79%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌ | 106/135 [00:08<00:01, 20.16it/s] [2024-01-16 09:23:02] INFO huggingface_loader.py:121: [Quantized] Parameter: "[1mmodel.layers.18.self_attn.qkv_proj.q_scale[0m", shape: (2560, 64), dtype: float16 | |
| 79%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌ | 106/135 [00:08<00:01, 20.16it/s] [2024-01-16 09:23:02] INFO huggingface_loader.py:121: [Quantized] Parameter: "[1mmodel.layers.18.self_attn.o_proj.q_weight[0m", shape: (2048, 256), dtype: uint32 | |
| 79%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌ | 106/135 [00:08<00:01, 20.16it/s] [2024-01-16 09:23:02] INFO huggingface_loader.py:121: [Quantized] Parameter: "[1mmodel.layers.18.self_attn.o_proj.q_scale[0m", shape: (2048, 64), dtype: float16 | |
| 79%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌ | 106/135 [00:08<00:01, 20.16it/s] [2024-01-16 09:23:02] INFO huggingface_loader.py:121: [Quantized] Parameter: "[1mmodel.layers.18.mlp.gate_up_proj.q_weight[0m", shape: (11264, 256), dtype: uint32 | |
| 79%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌ | 106/135 [00:08<00:01, 20.16it/s] [2024-01-16 09:23:02] INFO huggingface_loader.py:121: [Quantized] Parameter: "[1mmodel.layers.18.mlp.gate_up_proj.q_scale[0m", shape: (11264, 64), dtype: float16 | |
| 79%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌ | 106/135 [00:08<00:01, 20.16it/s] 83%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎ | 112/135 [00:08<00:01, 21.16it/s] [2024-01-16 09:23:02] INFO huggingface_loader.py:121: [Quantized] Parameter: "[1mmodel.layers.18.mlp.down_proj.q_weight[0m", shape: (2048, 704), dtype: uint32 | |
| 83%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎ | 112/135 [00:08<00:01, 21.16it/s] [2024-01-16 09:23:02] INFO huggingface_loader.py:121: [Quantized] Parameter: "[1mmodel.layers.18.mlp.down_proj.q_scale[0m", shape: (2048, 176), dtype: float16 | |
| 83%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎ | 112/135 [00:08<00:01, 21.16it/s] [2024-01-16 09:23:02] INFO huggingface_loader.py:129: [Not quantized] Parameter: "[1mmodel.layers.18.input_layernorm.weight[0m", shape: (2048,), dtype: float16 | |
| 83%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎ | 112/135 [00:08<00:01, 21.16it/s] [2024-01-16 09:23:02] INFO huggingface_loader.py:129: [Not quantized] Parameter: "[1mmodel.layers.18.post_attention_layernorm.weight[0m", shape: (2048,), dtype: float16 | |
| 83%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎ | 112/135 [00:08<00:01, 21.16it/s] [2024-01-16 09:23:03] INFO huggingface_loader.py:121: [Quantized] Parameter: "[1mmodel.layers.19.self_attn.qkv_proj.q_weight[0m", shape: (2560, 256), dtype: uint32 | |
| 83%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎ | 112/135 [00:08<00:01, 21.16it/s] [2024-01-16 09:23:03] INFO huggingface_loader.py:121: [Quantized] Parameter: "[1mmodel.layers.19.self_attn.qkv_proj.q_scale[0m", shape: (2560, 64), dtype: float16 | |
| 83%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎ | 112/135 [00:08<00:01, 21.16it/s] [2024-01-16 09:23:03] INFO huggingface_loader.py:121: [Quantized] Parameter: "[1mmodel.layers.19.self_attn.o_proj.q_weight[0m", shape: (2048, 256), dtype: uint32 | |
| 83%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎ | 112/135 [00:08<00:01, 21.16it/s] [2024-01-16 09:23:03] INFO huggingface_loader.py:121: [Quantized] Parameter: "[1mmodel.layers.19.self_attn.o_proj.q_scale[0m", shape: (2048, 64), dtype: float16 | |
| 83%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎ | 112/135 [00:08<00:01, 21.16it/s] 87%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊ | 117/135 [00:08<00:00, 25.13it/s] [2024-01-16 09:23:03] INFO huggingface_loader.py:121: [Quantized] Parameter: "[1mmodel.layers.19.mlp.gate_up_proj.q_weight[0m", shape: (11264, 256), dtype: uint32 | |
| 87%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊ | 117/135 [00:08<00:00, 25.13it/s] [2024-01-16 09:23:03] INFO huggingface_loader.py:121: [Quantized] Parameter: "[1mmodel.layers.19.mlp.gate_up_proj.q_scale[0m", shape: (11264, 64), dtype: float16 | |
| 87%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊ | 117/135 [00:08<00:00, 25.13it/s] [2024-01-16 09:23:03] INFO huggingface_loader.py:121: [Quantized] Parameter: "[1mmodel.layers.19.mlp.down_proj.q_weight[0m", shape: (2048, 704), dtype: uint32 | |
| 87%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊ | 117/135 [00:08<00:00, 25.13it/s] [2024-01-16 09:23:03] INFO huggingface_loader.py:121: [Quantized] Parameter: "[1mmodel.layers.19.mlp.down_proj.q_scale[0m", shape: (2048, 176), dtype: float16 | |
| 87%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊ | 117/135 [00:08<00:00, 25.13it/s] [2024-01-16 09:23:03] INFO huggingface_loader.py:129: [Not quantized] Parameter: "[1mmodel.layers.19.input_layernorm.weight[0m", shape: (2048,), dtype: float16 | |
| 87%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊ | 117/135 [00:08<00:00, 25.13it/s] 89%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏ | 120/135 [00:08<00:00, 21.73it/s] [2024-01-16 09:23:03] INFO huggingface_loader.py:129: [Not quantized] Parameter: "[1mmodel.layers.19.post_attention_layernorm.weight[0m", shape: (2048,), dtype: float16 | |
| 89%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏ | 120/135 [00:08<00:00, 21.73it/s] [2024-01-16 09:23:03] INFO huggingface_loader.py:121: [Quantized] Parameter: "[1mmodel.layers.20.self_attn.qkv_proj.q_weight[0m", shape: (2560, 256), dtype: uint32 | |
| 89%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏ | 120/135 [00:08<00:00, 21.73it/s] [2024-01-16 09:23:03] INFO huggingface_loader.py:121: [Quantized] Parameter: "[1mmodel.layers.20.self_attn.qkv_proj.q_scale[0m", shape: (2560, 64), dtype: float16 | |
| 89%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏ | 120/135 [00:08<00:00, 21.73it/s] [2024-01-16 09:23:03] INFO huggingface_loader.py:121: [Quantized] Parameter: "[1mmodel.layers.20.self_attn.o_proj.q_weight[0m", shape: (2048, 256), dtype: uint32 | |
| 89%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏ | 120/135 [00:08<00:00, 21.73it/s] [2024-01-16 09:23:03] INFO huggingface_loader.py:121: [Quantized] Parameter: "[1mmodel.layers.20.self_attn.o_proj.q_scale[0m", shape: (2048, 64), dtype: float16 | |
| 89%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏ | 120/135 [00:08<00:00, 21.73it/s] [2024-01-16 09:23:03] INFO huggingface_loader.py:121: [Quantized] Parameter: "[1mmodel.layers.20.mlp.gate_up_proj.q_weight[0m", shape: (11264, 256), dtype: uint32 | |
| 89%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏ | 120/135 [00:08<00:00, 21.73it/s] [2024-01-16 09:23:03] INFO huggingface_loader.py:121: [Quantized] Parameter: "[1mmodel.layers.20.mlp.gate_up_proj.q_scale[0m", shape: (11264, 64), dtype: float16 | |
| 89%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏ | 120/135 [00:08<00:00, 21.73it/s] 92%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋ | 124/135 [00:08<00:00, 20.94it/s] [2024-01-16 09:23:03] INFO huggingface_loader.py:121: [Quantized] Parameter: "[1mmodel.layers.20.mlp.down_proj.q_weight[0m", shape: (2048, 704), dtype: uint32 | |
| 92%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋ | 124/135 [00:08<00:00, 20.94it/s] [2024-01-16 09:23:03] INFO huggingface_loader.py:121: [Quantized] Parameter: "[1mmodel.layers.20.mlp.down_proj.q_scale[0m", shape: (2048, 176), dtype: float16 | |
| 92%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋ | 124/135 [00:08<00:00, 20.94it/s] [2024-01-16 09:23:03] INFO huggingface_loader.py:129: [Not quantized] Parameter: "[1mmodel.layers.20.input_layernorm.weight[0m", shape: (2048,), dtype: float16 | |
| 92%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋ | 124/135 [00:08<00:00, 20.94it/s] [2024-01-16 09:23:03] INFO huggingface_loader.py:129: [Not quantized] Parameter: "[1mmodel.layers.20.post_attention_layernorm.weight[0m", shape: (2048,), dtype: float16 | |
| 92%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋ | 124/135 [00:08<00:00, 20.94it/s] [2024-01-16 09:23:03] INFO huggingface_loader.py:121: [Quantized] Parameter: "[1mmodel.layers.21.self_attn.qkv_proj.q_weight[0m", shape: (2560, 256), dtype: uint32 | |
| 92%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋ | 124/135 [00:08<00:00, 20.94it/s] [2024-01-16 09:23:03] INFO huggingface_loader.py:121: [Quantized] Parameter: "[1mmodel.layers.21.self_attn.qkv_proj.q_scale[0m", shape: (2560, 64), dtype: float16 | |
| 92%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋ | 124/135 [00:08<00:00, 20.94it/s] [2024-01-16 09:23:03] INFO huggingface_loader.py:121: [Quantized] Parameter: "[1mmodel.layers.21.self_attn.o_proj.q_weight[0m", shape: (2048, 256), dtype: uint32 | |
| 92%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋ | 124/135 [00:08<00:00, 20.94it/s] [2024-01-16 09:23:03] INFO huggingface_loader.py:121: [Quantized] Parameter: "[1mmodel.layers.21.self_attn.o_proj.q_scale[0m", shape: (2048, 64), dtype: float16 | |
| 92%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋ | 124/135 [00:08<00:00, 20.94it/s] [2024-01-16 09:23:03] INFO huggingface_loader.py:121: [Quantized] Parameter: "[1mmodel.layers.21.mlp.gate_up_proj.q_weight[0m", shape: (11264, 256), dtype: uint32 | |
| 92%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋ | 124/135 [00:09<00:00, 20.94it/s] [2024-01-16 09:23:03] INFO huggingface_loader.py:121: [Quantized] Parameter: "[1mmodel.layers.21.mlp.gate_up_proj.q_scale[0m", shape: (11264, 64), dtype: float16 | |
| 92%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋ | 124/135 [00:09<00:00, 20.94it/s] 96%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍ | 130/135 [00:09<00:00, 21.74it/s] [2024-01-16 09:23:03] INFO huggingface_loader.py:121: [Quantized] Parameter: "[1mmodel.layers.21.mlp.down_proj.q_weight[0m", shape: (2048, 704), dtype: uint32 | |
| 96%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍ | 130/135 [00:09<00:00, 21.74it/s] [2024-01-16 09:23:03] INFO huggingface_loader.py:121: [Quantized] Parameter: "[1mmodel.layers.21.mlp.down_proj.q_scale[0m", shape: (2048, 176), dtype: float16 | |
| 96%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍ | 130/135 [00:09<00:00, 21.74it/s] [2024-01-16 09:23:03] INFO huggingface_loader.py:129: [Not quantized] Parameter: "[1mmodel.layers.21.input_layernorm.weight[0m", shape: (2048,), dtype: float16 | |
| 96%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍ | 130/135 [00:09<00:00, 21.74it/s] [2024-01-16 09:23:03] INFO huggingface_loader.py:129: [Not quantized] Parameter: "[1mmodel.layers.21.post_attention_layernorm.weight[0m", shape: (2048,), dtype: float16 | |
| 96%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍ | 130/135 [00:09<00:00, 21.74it/s] [2024-01-16 09:23:03] INFO huggingface_loader.py:129: [Not quantized] Parameter: "[1mmodel.norm.weight[0m", shape: (2048,), dtype: float16 | |
| 96%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍ | 130/135 [00:09<00:00, 21.74it/s] [2024-01-16 09:23:04] INFO huggingface_loader.py:121: [Quantized] Parameter: "[1mlm_head.q_weight[0m", shape: (32003, 256), dtype: uint32 | |
| 96%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍ | 130/135 [00:09<00:00, 21.74it/s] [2024-01-16 09:23:04] INFO huggingface_loader.py:121: [Quantized] Parameter: "[1mlm_head.q_scale[0m", shape: (32003, 64), dtype: float16 | |
| 96%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍ | 130/135 [00:09<00:00, 21.74it/s] 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 135/135 [00:09<00:00, 17.52it/s] 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 135/135 [00:09<00:00, 14.22it/s] | |
| [2024-01-16 09:23:04] INFO huggingface_loader.py:179: Unloading HF weight file: /ssd1/cfruan/models/TinyLlama-1.1B-Chat-v0.4/pytorch_model.bin | |
| [2024-01-16 09:23:04] INFO stats.py:71: [92mTime usage[0m: HF loading: 2.666 sec; Pre-quantization mapping: 6.411 sec; Quantization: 2.607 sec | |
| [2024-01-16 09:23:04] INFO stats.py:85: [92mRAM usage[0m: Peak RAM: 4.098 GB. Total bytes loaded from disk: 4.098 GB | |
| [2024-01-16 09:23:04] INFO convert_weight.py:119: [92mParameter size[0m after quantization: 0.576 GB | |
| [2024-01-16 09:23:04] INFO convert_weight.py:124: [92mTotal parameters[0m: 1,100,060,672 | |
| [2024-01-16 09:23:04] INFO convert_weight.py:125: [92mBits per parameter[0m: 4.501 | |
| Start storing to cache /tmp/tmp2xt0mvux | |
| [0001/0225] saving model.embed_tokens.q_weight [0002/0225] saving model.embed_tokens.q_scale [0003/0225] saving model.layers.0.self_attn.qkv_proj.q_weight [0004/0225] saving model.layers.0.self_attn.qkv_proj.q_scale [0005/0225] saving model.layers.0.self_attn.o_proj.q_weight [0006/0225] saving model.layers.0.self_attn.o_proj.q_scale [0007/0225] saving model.layers.0.mlp.gate_up_proj.q_weight [0008/0225] saving model.layers.0.mlp.gate_up_proj.q_scale [0009/0225] saving model.layers.0.mlp.down_proj.q_weight [0010/0225] saving model.layers.0.mlp.down_proj.q_scale [0011/0225] saving model.layers.0.input_layernorm.weight [0012/0225] saving model.layers.0.post_attention_layernorm.weight [0013/0225] saving model.layers.1.self_attn.qkv_proj.q_weight [0014/0225] saving model.layers.1.self_attn.qkv_proj.q_scale [0015/0225] saving model.layers.1.self_attn.o_proj.q_weight [0016/0225] saving model.layers.1.self_attn.o_proj.q_scale [0017/0225] saving model.layers.1.mlp.gate_up_proj.q_weight [0018/0225] saving model.layers.1.mlp.gate_up_proj.q_scale [0019/0225] saving model.layers.1.mlp.down_proj.q_weight [0020/0225] saving model.layers.1.mlp.down_proj.q_scale [0021/0225] saving model.layers.1.input_layernorm.weight [0022/0225] saving model.layers.1.post_attention_layernorm.weight [0023/0225] saving model.layers.2.self_attn.qkv_proj.q_weight [0024/0225] saving model.layers.2.self_attn.qkv_proj.q_scale [0025/0225] saving model.layers.2.self_attn.o_proj.q_weight [0026/0225] saving model.layers.2.self_attn.o_proj.q_scale [0027/0225] saving model.layers.2.mlp.gate_up_proj.q_weight [0028/0225] saving model.layers.2.mlp.gate_up_proj.q_scale [0029/0225] saving model.layers.2.mlp.down_proj.q_weight [0030/0225] saving model.layers.2.mlp.down_proj.q_scale [0031/0225] saving model.layers.2.input_layernorm.weight [0032/0225] saving model.layers.2.post_attention_layernorm.weight [0033/0225] saving model.layers.3.self_attn.qkv_proj.q_weight [0034/0225] saving model.layers.3.self_attn.qkv_proj.q_scale [0035/0225] saving model.layers.3.self_attn.o_proj.q_weight [0036/0225] saving model.layers.3.self_attn.o_proj.q_scale [0037/0225] saving model.layers.3.mlp.gate_up_proj.q_weight [0038/0225] saving model.layers.3.mlp.gate_up_proj.q_scale [0039/0225] saving model.layers.3.mlp.down_proj.q_weight [0040/0225] saving model.layers.3.mlp.down_proj.q_scale [0041/0225] saving model.layers.3.input_layernorm.weight [0042/0225] saving model.layers.3.post_attention_layernorm.weight [0043/0225] saving model.layers.4.self_attn.qkv_proj.q_weight [0044/0225] saving model.layers.4.self_attn.qkv_proj.q_scale [0045/0225] saving model.layers.4.self_attn.o_proj.q_weight [0046/0225] saving model.layers.4.self_attn.o_proj.q_scale [0047/0225] saving model.layers.4.mlp.gate_up_proj.q_weight [0048/0225] saving model.layers.4.mlp.gate_up_proj.q_scale [0049/0225] saving model.layers.4.mlp.down_proj.q_weight [0050/0225] saving model.layers.4.mlp.down_proj.q_scale [0051/0225] saving model.layers.4.input_layernorm.weight [0052/0225] saving model.layers.4.post_attention_layernorm.weight [0053/0225] saving model.layers.5.self_attn.qkv_proj.q_weight [0054/0225] saving model.layers.5.self_attn.qkv_proj.q_scale [0055/0225] saving model.layers.5.self_attn.o_proj.q_weight [0056/0225] saving model.layers.5.self_attn.o_proj.q_scale [0057/0225] saving model.layers.5.mlp.gate_up_proj.q_weight [0058/0225] saving model.layers.5.mlp.gate_up_proj.q_scale [0059/0225] saving model.layers.5.mlp.down_proj.q_weight [0060/0225] saving model.layers.5.mlp.down_proj.q_scale [0061/0225] saving model.layers.5.input_layernorm.weight [0062/0225] saving model.layers.5.post_attention_layernorm.weight [0063/0225] saving model.layers.6.self_attn.qkv_proj.q_weight [0064/0225] saving model.layers.6.self_attn.qkv_proj.q_scale [0065/0225] saving model.layers.6.self_attn.o_proj.q_weight [0066/0225] saving model.layers.6.self_attn.o_proj.q_scale [0067/0225] saving model.layers.6.mlp.gate_up_proj.q_weight [0068/0225] saving model.layers.6.mlp.gate_up_proj.q_scale [0069/0225] saving model.layers.6.mlp.down_proj.q_weight [0070/0225] saving model.layers.6.mlp.down_proj.q_scale [0071/0225] saving model.layers.6.input_layernorm.weight [0072/0225] saving model.layers.6.post_attention_layernorm.weight [0073/0225] saving model.layers.7.self_attn.qkv_proj.q_weight [0074/0225] saving model.layers.7.self_attn.qkv_proj.q_scale [0075/0225] saving model.layers.7.self_attn.o_proj.q_weight [0076/0225] saving model.layers.7.self_attn.o_proj.q_scale [0077/0225] saving model.layers.7.mlp.gate_up_proj.q_weight [0078/0225] saving model.layers.7.mlp.gate_up_proj.q_scale [0079/0225] saving model.layers.7.mlp.down_proj.q_weight [0080/0225] saving model.layers.7.mlp.down_proj.q_scale [0081/0225] saving model.layers.7.input_layernorm.weight [0082/0225] saving model.layers.7.post_attention_layernorm.weight [0083/0225] saving model.layers.8.self_attn.qkv_proj.q_weight [0084/0225] saving model.layers.8.self_attn.qkv_proj.q_scale [0085/0225] saving model.layers.8.self_attn.o_proj.q_weight [0086/0225] saving model.layers.8.self_attn.o_proj.q_scale [0087/0225] saving model.layers.8.mlp.gate_up_proj.q_weight [0088/0225] saving model.layers.8.mlp.gate_up_proj.q_scale [0089/0225] saving model.layers.8.mlp.down_proj.q_weight [0090/0225] saving model.layers.8.mlp.down_proj.q_scale [0091/0225] saving model.layers.8.input_layernorm.weight [0092/0225] saving model.layers.8.post_attention_layernorm.weight [0093/0225] saving model.layers.9.self_attn.qkv_proj.q_weight [0094/0225] saving model.layers.9.self_attn.qkv_proj.q_scale [0095/0225] saving model.layers.9.self_attn.o_proj.q_weight [0096/0225] saving model.layers.9.self_attn.o_proj.q_scale [0097/0225] saving model.layers.9.mlp.gate_up_proj.q_weight [0098/0225] saving model.layers.9.mlp.gate_up_proj.q_scale [0099/0225] saving model.layers.9.mlp.down_proj.q_weight [0100/0225] saving model.layers.9.mlp.down_proj.q_scale [0101/0225] saving model.layers.9.input_layernorm.weight [0102/0225] saving model.layers.9.post_attention_layernorm.weight [0103/0225] saving model.layers.10.self_attn.qkv_proj.q_weight [0104/0225] saving model.layers.10.self_attn.qkv_proj.q_scale [0105/0225] saving model.layers.10.self_attn.o_proj.q_weight [0106/0225] saving model.layers.10.self_attn.o_proj.q_scale [0107/0225] saving model.layers.10.mlp.gate_up_proj.q_weight [0108/0225] saving model.layers.10.mlp.gate_up_proj.q_scale [0109/0225] saving model.layers.10.mlp.down_proj.q_weight [0110/0225] saving model.layers.10.mlp.down_proj.q_scale [0111/0225] saving model.layers.10.input_layernorm.weight [0112/0225] saving model.layers.10.post_attention_layernorm.weight [0113/0225] saving model.layers.11.self_attn.qkv_proj.q_weight [0114/0225] saving model.layers.11.self_attn.qkv_proj.q_scale [0115/0225] saving model.layers.11.self_attn.o_proj.q_weight [0116/0225] saving model.layers.11.self_attn.o_proj.q_scale [0117/0225] saving model.layers.11.mlp.gate_up_proj.q_weight [0118/0225] saving model.layers.11.mlp.gate_up_proj.q_scale [0119/0225] saving model.layers.11.mlp.down_proj.q_weight [0120/0225] saving model.layers.11.mlp.down_proj.q_scale [0121/0225] saving model.layers.11.input_layernorm.weight [0122/0225] saving model.layers.11.post_attention_layernorm.weight [0123/0225] saving model.layers.12.self_attn.qkv_proj.q_weight [0124/0225] saving model.layers.12.self_attn.qkv_proj.q_scale [0125/0225] saving model.layers.12.self_attn.o_proj.q_weight [0126/0225] saving model.layers.12.self_attn.o_proj.q_scale [0127/0225] saving model.layers.12.mlp.gate_up_proj.q_weight [0128/0225] saving model.layers.12.mlp.gate_up_proj.q_scale [0129/0225] saving model.layers.12.mlp.down_proj.q_weight [0130/0225] saving model.layers.12.mlp.down_proj.q_scale [0131/0225] saving model.layers.12.input_layernorm.weight [0132/0225] saving model.layers.12.post_attention_layernorm.weight [0133/0225] saving model.layers.13.self_attn.qkv_proj.q_weight [0134/0225] saving model.layers.13.self_attn.qkv_proj.q_scale [0135/0225] saving model.layers.13.self_attn.o_proj.q_weight [0136/0225] saving model.layers.13.self_attn.o_proj.q_scale [0137/0225] saving model.layers.13.mlp.gate_up_proj.q_weight [0138/0225] saving model.layers.13.mlp.gate_up_proj.q_scale [0139/0225] saving model.layers.13.mlp.down_proj.q_weight [0140/0225] saving model.layers.13.mlp.down_proj.q_scale [0141/0225] saving model.layers.13.input_layernorm.weight [0142/0225] saving model.layers.13.post_attention_layernorm.weight [0143/0225] saving model.layers.14.self_attn.qkv_proj.q_weight [0144/0225] saving model.layers.14.self_attn.qkv_proj.q_scale [0145/0225] saving model.layers.14.self_attn.o_proj.q_weight [0146/0225] saving model.layers.14.self_attn.o_proj.q_scale [0147/0225] saving model.layers.14.mlp.gate_up_proj.q_weight [0148/0225] saving model.layers.14.mlp.gate_up_proj.q_scale [0149/0225] saving model.layers.14.mlp.down_proj.q_weight [0150/0225] saving model.layers.14.mlp.down_proj.q_scale [0151/0225] saving model.layers.14.input_layernorm.weight [0152/0225] saving model.layers.14.post_attention_layernorm.weight [0153/0225] saving model.layers.15.self_attn.qkv_proj.q_weight [0154/0225] saving model.layers.15.self_attn.qkv_proj.q_scale [0155/0225] saving model.layers.15.self_attn.o_proj.q_weight [0156/0225] saving model.layers.15.self_attn.o_proj.q_scale [0157/0225] saving model.layers.15.mlp.gate_up_proj.q_weight [0158/0225] saving model.layers.15.mlp.gate_up_proj.q_scale [0159/0225] saving model.layers.15.mlp.down_proj.q_weight [0160/0225] saving model.layers.15.mlp.down_proj.q_scale [0161/0225] saving model.layers.15.input_layernorm.weight [0162/0225] saving model.layers.15.post_attention_layernorm.weight [0163/0225] saving model.layers.16.self_attn.qkv_proj.q_weight [0164/0225] saving model.layers.16.self_attn.qkv_proj.q_scale [0165/0225] saving model.layers.16.self_attn.o_proj.q_weight [0166/0225] saving model.layers.16.self_attn.o_proj.q_scale [0167/0225] saving model.layers.16.mlp.gate_up_proj.q_weight [0168/0225] saving model.layers.16.mlp.gate_up_proj.q_scale [0169/0225] saving model.layers.16.mlp.down_proj.q_weight [0170/0225] saving model.layers.16.mlp.down_proj.q_scale [0171/0225] saving model.layers.16.input_layernorm.weight [0172/0225] saving model.layers.16.post_attention_layernorm.weight [0173/0225] saving model.layers.17.self_attn.qkv_proj.q_weight [0174/0225] saving model.layers.17.self_attn.qkv_proj.q_scale [0175/0225] saving model.layers.17.self_attn.o_proj.q_weight [0176/0225] saving model.layers.17.self_attn.o_proj.q_scale [0177/0225] saving model.layers.17.mlp.gate_up_proj.q_weight [0178/0225] saving model.layers.17.mlp.gate_up_proj.q_scale [0179/0225] saving model.layers.17.mlp.down_proj.q_weight [0180/0225] saving model.layers.17.mlp.down_proj.q_scale [0181/0225] saving model.layers.17.input_layernorm.weight [0182/0225] saving model.layers.17.post_attention_layernorm.weight [0183/0225] saving model.layers.18.self_attn.qkv_proj.q_weight [0184/0225] saving model.layers.18.self_attn.qkv_proj.q_scale [0185/0225] saving model.layers.18.self_attn.o_proj.q_weight [0186/0225] saving model.layers.18.self_attn.o_proj.q_scale [0187/0225] saving model.layers.18.mlp.gate_up_proj.q_weight [0188/0225] saving model.layers.18.mlp.gate_up_proj.q_scale [0189/0225] saving model.layers.18.mlp.down_proj.q_weight [0190/0225] saving model.layers.18.mlp.down_proj.q_scale [0191/0225] saving model.layers.18.input_layernorm.weight [0192/0225] saving model.layers.18.post_attention_layernorm.weight [0193/0225] saving model.layers.19.self_attn.qkv_proj.q_weight [0194/0225] saving model.layers.19.self_attn.qkv_proj.q_scale[2024-01-16 09:23:05] INFO convert_weight.py:141: Saved to directory: [1m/tmp/tmp2xt0mvux[0m | |
| [0195/0225] saving model.layers.19.self_attn.o_proj.q_weight [0196/0225] saving model.layers.19.self_attn.o_proj.q_scale [0197/0225] saving model.layers.19.mlp.gate_up_proj.q_weight [0198/0225] saving model.layers.19.mlp.gate_up_proj.q_scale [0199/0225] saving model.layers.19.mlp.down_proj.q_weight [0200/0225] saving model.layers.19.mlp.down_proj.q_scale [0201/0225] saving model.layers.19.input_layernorm.weight [0202/0225] saving model.layers.19.post_attention_layernorm.weight [0203/0225] saving model.layers.20.self_attn.qkv_proj.q_weight [0204/0225] saving model.layers.20.self_attn.qkv_proj.q_scale [0205/0225] saving model.layers.20.self_attn.o_proj.q_weight [0206/0225] saving model.layers.20.self_attn.o_proj.q_scale [0207/0225] saving model.layers.20.mlp.gate_up_proj.q_weight [0208/0225] saving model.layers.20.mlp.gate_up_proj.q_scale [0209/0225] saving model.layers.20.mlp.down_proj.q_weight [0210/0225] saving model.layers.20.mlp.down_proj.q_scale [0211/0225] saving model.layers.20.input_layernorm.weight [0212/0225] saving model.layers.20.post_attention_layernorm.weight [0213/0225] saving model.layers.21.self_attn.qkv_proj.q_weight [0214/0225] saving model.layers.21.self_attn.qkv_proj.q_scale [0215/0225] saving model.layers.21.self_attn.o_proj.q_weight [0216/0225] saving model.layers.21.self_attn.o_proj.q_scale [0217/0225] saving model.layers.21.mlp.gate_up_proj.q_weight [0218/0225] saving model.layers.21.mlp.gate_up_proj.q_scale [0219/0225] saving model.layers.21.mlp.down_proj.q_weight [0220/0225] saving model.layers.21.mlp.down_proj.q_scale [0221/0225] saving model.layers.21.input_layernorm.weight [0222/0225] saving model.layers.21.post_attention_layernorm.weight [0223/0225] saving model.norm.weight [0224/0225] saving lm_head.q_weight [0225/0225] saving lm_head.q_scale | |
| All finished, 24 total shards committed, record saved to /tmp/tmp2xt0mvux/ndarray-cache.json | |