| { | |
| "measurement": { | |
| "model.layers.0": { | |
| "accuracy": 0.8820657730102539, | |
| "total_bits": 573724416, | |
| "q_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "k_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "v_proj": { | |
| "group_size": { | |
| "4": 128 | |
| }, | |
| "bits": [ | |
| 4 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "o_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "up_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "gate_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "down_proj": { | |
| "group_size": { | |
| "4": 128 | |
| }, | |
| "bits": [ | |
| 4 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| } | |
| }, | |
| "model.layers.1": { | |
| "accuracy": 0.8954000473022461, | |
| "total_bits": 573724416, | |
| "q_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "k_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "v_proj": { | |
| "group_size": { | |
| "4": 128 | |
| }, | |
| "bits": [ | |
| 4 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "o_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "up_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "gate_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "down_proj": { | |
| "group_size": { | |
| "4": 128 | |
| }, | |
| "bits": [ | |
| 4 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| } | |
| }, | |
| "model.layers.2": { | |
| "accuracy": 0.9489546418190002, | |
| "total_bits": 458124288, | |
| "q_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "k_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "v_proj": { | |
| "group_size": { | |
| "4": 128 | |
| }, | |
| "bits": [ | |
| 4 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "o_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "up_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "gate_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "down_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| } | |
| }, | |
| "model.layers.3": { | |
| "accuracy": 0.9551604390144348, | |
| "total_bits": 466380288, | |
| "q_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "k_proj": { | |
| "group_size": { | |
| "4": 128 | |
| }, | |
| "bits": [ | |
| 4 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "v_proj": { | |
| "group_size": { | |
| "4": 128 | |
| }, | |
| "bits": [ | |
| 4 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "o_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "up_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "gate_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "down_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| } | |
| }, | |
| "model.layers.4": { | |
| "accuracy": 0.948026180267334, | |
| "total_bits": 458124288, | |
| "q_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "k_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "v_proj": { | |
| "group_size": { | |
| "4": 128 | |
| }, | |
| "bits": [ | |
| 4 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "o_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "up_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "gate_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "down_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| } | |
| }, | |
| "model.layers.5": { | |
| "accuracy": 0.938827395439148, | |
| "total_bits": 458124288, | |
| "q_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "k_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "v_proj": { | |
| "group_size": { | |
| "4": 128 | |
| }, | |
| "bits": [ | |
| 4 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "o_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "up_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "gate_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "down_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| } | |
| }, | |
| "model.layers.6": { | |
| "accuracy": 0.9337625503540039, | |
| "total_bits": 458124288, | |
| "q_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "k_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "v_proj": { | |
| "group_size": { | |
| "4": 128 | |
| }, | |
| "bits": [ | |
| 4 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "o_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "up_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "gate_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "down_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| } | |
| }, | |
| "model.layers.7": { | |
| "accuracy": 0.9263930320739746, | |
| "total_bits": 466380288, | |
| "q_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "k_proj": { | |
| "group_size": { | |
| "4": 128 | |
| }, | |
| "bits": [ | |
| 4 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "v_proj": { | |
| "group_size": { | |
| "4": 128 | |
| }, | |
| "bits": [ | |
| 4 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "o_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "up_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "gate_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "down_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| } | |
| }, | |
| "model.layers.8": { | |
| "accuracy": 0.9275798797607422, | |
| "total_bits": 466380288, | |
| "q_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "k_proj": { | |
| "group_size": { | |
| "4": 128 | |
| }, | |
| "bits": [ | |
| 4 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "v_proj": { | |
| "group_size": { | |
| "4": 128 | |
| }, | |
| "bits": [ | |
| 4 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "o_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "up_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "gate_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "down_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| } | |
| }, | |
| "model.layers.9": { | |
| "accuracy": 0.9196293354034424, | |
| "total_bits": 458124288, | |
| "q_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "k_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "v_proj": { | |
| "group_size": { | |
| "4": 128 | |
| }, | |
| "bits": [ | |
| 4 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "o_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "up_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "gate_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "down_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| } | |
| }, | |
| "model.layers.10": { | |
| "accuracy": 0.9175989627838135, | |
| "total_bits": 466380288, | |
| "q_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "k_proj": { | |
| "group_size": { | |
| "4": 128 | |
| }, | |
| "bits": [ | |
| 4 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "v_proj": { | |
| "group_size": { | |
| "4": 128 | |
| }, | |
| "bits": [ | |
| 4 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "o_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "up_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "gate_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "down_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| } | |
| }, | |
| "model.layers.11": { | |
| "accuracy": 0.9152019023895264, | |
| "total_bits": 466380288, | |
| "q_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "k_proj": { | |
| "group_size": { | |
| "4": 128 | |
| }, | |
| "bits": [ | |
| 4 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "v_proj": { | |
| "group_size": { | |
| "4": 128 | |
| }, | |
| "bits": [ | |
| 4 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "o_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "up_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "gate_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "down_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| } | |
| }, | |
| "model.layers.12": { | |
| "accuracy": 0.9095911979675293, | |
| "total_bits": 466380288, | |
| "q_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "k_proj": { | |
| "group_size": { | |
| "4": 128 | |
| }, | |
| "bits": [ | |
| 4 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "v_proj": { | |
| "group_size": { | |
| "4": 128 | |
| }, | |
| "bits": [ | |
| 4 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "o_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "up_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "gate_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "down_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| } | |
| }, | |
| "model.layers.13": { | |
| "accuracy": 0.9048597812652588, | |
| "total_bits": 458124288, | |
| "q_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "k_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "v_proj": { | |
| "group_size": { | |
| "4": 128 | |
| }, | |
| "bits": [ | |
| 4 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "o_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "up_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "gate_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "down_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| } | |
| }, | |
| "model.layers.14": { | |
| "accuracy": 0.9164364337921143, | |
| "total_bits": 573724416, | |
| "q_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "k_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "v_proj": { | |
| "group_size": { | |
| "4": 128 | |
| }, | |
| "bits": [ | |
| 4 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "o_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "up_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "gate_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "down_proj": { | |
| "group_size": { | |
| "4": 128 | |
| }, | |
| "bits": [ | |
| 4 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| } | |
| }, | |
| "model.layers.15": { | |
| "accuracy": 0.9038996696472168, | |
| "total_bits": 573724416, | |
| "q_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "k_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "v_proj": { | |
| "group_size": { | |
| "4": 128 | |
| }, | |
| "bits": [ | |
| 4 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "o_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "up_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "gate_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "down_proj": { | |
| "group_size": { | |
| "4": 128 | |
| }, | |
| "bits": [ | |
| 4 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| } | |
| }, | |
| "model.layers.16": { | |
| "accuracy": 0.9254007339477539, | |
| "total_bits": 615009024, | |
| "q_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "k_proj": { | |
| "group_size": { | |
| "4": 128 | |
| }, | |
| "bits": [ | |
| 4 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "v_proj": { | |
| "group_size": { | |
| "4": 128 | |
| }, | |
| "bits": [ | |
| 4 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "o_proj": { | |
| "group_size": { | |
| "4": 128 | |
| }, | |
| "bits": [ | |
| 4 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "up_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "gate_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "down_proj": { | |
| "group_size": { | |
| "4": 128 | |
| }, | |
| "bits": [ | |
| 4 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| } | |
| }, | |
| "model.layers.17": { | |
| "accuracy": 0.9273276329040527, | |
| "total_bits": 648037632, | |
| "q_proj": { | |
| "group_size": { | |
| "4": 128 | |
| }, | |
| "bits": [ | |
| 4 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "k_proj": { | |
| "group_size": { | |
| "4": 128 | |
| }, | |
| "bits": [ | |
| 4 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "v_proj": { | |
| "group_size": { | |
| "4": 128 | |
| }, | |
| "bits": [ | |
| 4 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "o_proj": { | |
| "group_size": { | |
| "4": 128 | |
| }, | |
| "bits": [ | |
| 4 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "up_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "gate_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "down_proj": { | |
| "group_size": { | |
| "4": 128 | |
| }, | |
| "bits": [ | |
| 4 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| } | |
| }, | |
| "model.layers.18": { | |
| "accuracy": 0.9346868991851807, | |
| "total_bits": 722356992, | |
| "q_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "k_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "v_proj": { | |
| "group_size": { | |
| "4": 128 | |
| }, | |
| "bits": [ | |
| 4 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "o_proj": { | |
| "group_size": { | |
| "4": 128 | |
| }, | |
| "bits": [ | |
| 4 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "up_proj": { | |
| "group_size": { | |
| "4": 128 | |
| }, | |
| "bits": [ | |
| 4 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "gate_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "down_proj": { | |
| "group_size": { | |
| "4": 128 | |
| }, | |
| "bits": [ | |
| 4 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| } | |
| }, | |
| "model.layers.19": { | |
| "accuracy": 0.9401049613952637, | |
| "total_bits": 755385600, | |
| "q_proj": { | |
| "group_size": { | |
| "4": 128 | |
| }, | |
| "bits": [ | |
| 4 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "k_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "v_proj": { | |
| "group_size": { | |
| "4": 128 | |
| }, | |
| "bits": [ | |
| 4 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "o_proj": { | |
| "group_size": { | |
| "4": 128 | |
| }, | |
| "bits": [ | |
| 4 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "up_proj": { | |
| "group_size": { | |
| "4": 128 | |
| }, | |
| "bits": [ | |
| 4 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "gate_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "down_proj": { | |
| "group_size": { | |
| "4": 128 | |
| }, | |
| "bits": [ | |
| 4 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| } | |
| }, | |
| "model.layers.20": { | |
| "accuracy": 0.9692505598068237, | |
| "total_bits": 846216960, | |
| "q_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "k_proj": { | |
| "group_size": { | |
| "4": 128 | |
| }, | |
| "bits": [ | |
| 4 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "v_proj": { | |
| "group_size": { | |
| "4": 128 | |
| }, | |
| "bits": [ | |
| 4 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "o_proj": { | |
| "group_size": { | |
| "4": 128 | |
| }, | |
| "bits": [ | |
| 4 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "up_proj": { | |
| "group_size": { | |
| "4": 128 | |
| }, | |
| "bits": [ | |
| 4 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "gate_proj": { | |
| "group_size": { | |
| "4": 128 | |
| }, | |
| "bits": [ | |
| 4 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "down_proj": { | |
| "group_size": { | |
| "4": 128 | |
| }, | |
| "bits": [ | |
| 4 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| } | |
| }, | |
| "model.layers.21": { | |
| "accuracy": 0.9710770845413208, | |
| "total_bits": 846216960, | |
| "q_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "k_proj": { | |
| "group_size": { | |
| "4": 128 | |
| }, | |
| "bits": [ | |
| 4 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "v_proj": { | |
| "group_size": { | |
| "4": 128 | |
| }, | |
| "bits": [ | |
| 4 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "o_proj": { | |
| "group_size": { | |
| "4": 128 | |
| }, | |
| "bits": [ | |
| 4 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "up_proj": { | |
| "group_size": { | |
| "4": 128 | |
| }, | |
| "bits": [ | |
| 4 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "gate_proj": { | |
| "group_size": { | |
| "4": 128 | |
| }, | |
| "bits": [ | |
| 4 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "down_proj": { | |
| "group_size": { | |
| "4": 128 | |
| }, | |
| "bits": [ | |
| 4 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| } | |
| }, | |
| "model.layers.22": { | |
| "accuracy": 0.9737789630889893, | |
| "total_bits": 846216960, | |
| "q_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "k_proj": { | |
| "group_size": { | |
| "4": 128 | |
| }, | |
| "bits": [ | |
| 4 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "v_proj": { | |
| "group_size": { | |
| "4": 128 | |
| }, | |
| "bits": [ | |
| 4 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "o_proj": { | |
| "group_size": { | |
| "4": 128 | |
| }, | |
| "bits": [ | |
| 4 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "up_proj": { | |
| "group_size": { | |
| "4": 128 | |
| }, | |
| "bits": [ | |
| 4 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "gate_proj": { | |
| "group_size": { | |
| "4": 128 | |
| }, | |
| "bits": [ | |
| 4 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "down_proj": { | |
| "group_size": { | |
| "4": 128 | |
| }, | |
| "bits": [ | |
| 4 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| } | |
| }, | |
| "model.layers.23": { | |
| "accuracy": 0.9729764461517334, | |
| "total_bits": 846216960, | |
| "q_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "k_proj": { | |
| "group_size": { | |
| "4": 128 | |
| }, | |
| "bits": [ | |
| 4 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "v_proj": { | |
| "group_size": { | |
| "4": 128 | |
| }, | |
| "bits": [ | |
| 4 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "o_proj": { | |
| "group_size": { | |
| "4": 128 | |
| }, | |
| "bits": [ | |
| 4 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "up_proj": { | |
| "group_size": { | |
| "4": 128 | |
| }, | |
| "bits": [ | |
| 4 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "gate_proj": { | |
| "group_size": { | |
| "4": 128 | |
| }, | |
| "bits": [ | |
| 4 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "down_proj": { | |
| "group_size": { | |
| "4": 128 | |
| }, | |
| "bits": [ | |
| 4 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| } | |
| }, | |
| "model.layers.24": { | |
| "accuracy": 0.9737536907196045, | |
| "total_bits": 846216960, | |
| "q_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "k_proj": { | |
| "group_size": { | |
| "4": 128 | |
| }, | |
| "bits": [ | |
| 4 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "v_proj": { | |
| "group_size": { | |
| "4": 128 | |
| }, | |
| "bits": [ | |
| 4 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "o_proj": { | |
| "group_size": { | |
| "4": 128 | |
| }, | |
| "bits": [ | |
| 4 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "up_proj": { | |
| "group_size": { | |
| "4": 128 | |
| }, | |
| "bits": [ | |
| 4 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "gate_proj": { | |
| "group_size": { | |
| "4": 128 | |
| }, | |
| "bits": [ | |
| 4 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "down_proj": { | |
| "group_size": { | |
| "4": 128 | |
| }, | |
| "bits": [ | |
| 4 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| } | |
| }, | |
| "model.layers.25": { | |
| "accuracy": 0.9738945960998535, | |
| "total_bits": 846216960, | |
| "q_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "k_proj": { | |
| "group_size": { | |
| "4": 128 | |
| }, | |
| "bits": [ | |
| 4 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "v_proj": { | |
| "group_size": { | |
| "4": 128 | |
| }, | |
| "bits": [ | |
| 4 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "o_proj": { | |
| "group_size": { | |
| "4": 128 | |
| }, | |
| "bits": [ | |
| 4 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "up_proj": { | |
| "group_size": { | |
| "4": 128 | |
| }, | |
| "bits": [ | |
| 4 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "gate_proj": { | |
| "group_size": { | |
| "4": 128 | |
| }, | |
| "bits": [ | |
| 4 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "down_proj": { | |
| "group_size": { | |
| "4": 128 | |
| }, | |
| "bits": [ | |
| 4 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| } | |
| }, | |
| "model.layers.26": { | |
| "accuracy": 0.9777768850326538, | |
| "total_bits": 879245568, | |
| "q_proj": { | |
| "group_size": { | |
| "4": 128 | |
| }, | |
| "bits": [ | |
| 4 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "k_proj": { | |
| "group_size": { | |
| "4": 128 | |
| }, | |
| "bits": [ | |
| 4 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "v_proj": { | |
| "group_size": { | |
| "4": 128 | |
| }, | |
| "bits": [ | |
| 4 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "o_proj": { | |
| "group_size": { | |
| "4": 128 | |
| }, | |
| "bits": [ | |
| 4 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "up_proj": { | |
| "group_size": { | |
| "4": 128 | |
| }, | |
| "bits": [ | |
| 4 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "gate_proj": { | |
| "group_size": { | |
| "4": 128 | |
| }, | |
| "bits": [ | |
| 4 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "down_proj": { | |
| "group_size": { | |
| "4": 128 | |
| }, | |
| "bits": [ | |
| 4 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| } | |
| }, | |
| "model.layers.27": { | |
| "accuracy": 0.9767287969589233, | |
| "total_bits": 879245568, | |
| "q_proj": { | |
| "group_size": { | |
| "4": 128 | |
| }, | |
| "bits": [ | |
| 4 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "k_proj": { | |
| "group_size": { | |
| "4": 128 | |
| }, | |
| "bits": [ | |
| 4 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "v_proj": { | |
| "group_size": { | |
| "4": 128 | |
| }, | |
| "bits": [ | |
| 4 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "o_proj": { | |
| "group_size": { | |
| "4": 128 | |
| }, | |
| "bits": [ | |
| 4 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "up_proj": { | |
| "group_size": { | |
| "4": 128 | |
| }, | |
| "bits": [ | |
| 4 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "gate_proj": { | |
| "group_size": { | |
| "4": 128 | |
| }, | |
| "bits": [ | |
| 4 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "down_proj": { | |
| "group_size": { | |
| "4": 128 | |
| }, | |
| "bits": [ | |
| 4 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| } | |
| }, | |
| "model.layers.28": { | |
| "accuracy": 0.9730557203292847, | |
| "total_bits": 879245568, | |
| "q_proj": { | |
| "group_size": { | |
| "4": 128 | |
| }, | |
| "bits": [ | |
| 4 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "k_proj": { | |
| "group_size": { | |
| "4": 128 | |
| }, | |
| "bits": [ | |
| 4 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "v_proj": { | |
| "group_size": { | |
| "4": 128 | |
| }, | |
| "bits": [ | |
| 4 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "o_proj": { | |
| "group_size": { | |
| "4": 128 | |
| }, | |
| "bits": [ | |
| 4 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "up_proj": { | |
| "group_size": { | |
| "4": 128 | |
| }, | |
| "bits": [ | |
| 4 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "gate_proj": { | |
| "group_size": { | |
| "4": 128 | |
| }, | |
| "bits": [ | |
| 4 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "down_proj": { | |
| "group_size": { | |
| "4": 128 | |
| }, | |
| "bits": [ | |
| 4 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| } | |
| }, | |
| "model.layers.29": { | |
| "accuracy": 0.9716944694519043, | |
| "total_bits": 879245568, | |
| "q_proj": { | |
| "group_size": { | |
| "4": 128 | |
| }, | |
| "bits": [ | |
| 4 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "k_proj": { | |
| "group_size": { | |
| "4": 128 | |
| }, | |
| "bits": [ | |
| 4 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "v_proj": { | |
| "group_size": { | |
| "4": 128 | |
| }, | |
| "bits": [ | |
| 4 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "o_proj": { | |
| "group_size": { | |
| "4": 128 | |
| }, | |
| "bits": [ | |
| 4 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "up_proj": { | |
| "group_size": { | |
| "4": 128 | |
| }, | |
| "bits": [ | |
| 4 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "gate_proj": { | |
| "group_size": { | |
| "4": 128 | |
| }, | |
| "bits": [ | |
| 4 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "down_proj": { | |
| "group_size": { | |
| "4": 128 | |
| }, | |
| "bits": [ | |
| 4 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| } | |
| }, | |
| "model.layers.30": { | |
| "accuracy": 0.9692039489746094, | |
| "total_bits": 879245568, | |
| "q_proj": { | |
| "group_size": { | |
| "4": 128 | |
| }, | |
| "bits": [ | |
| 4 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "k_proj": { | |
| "group_size": { | |
| "4": 128 | |
| }, | |
| "bits": [ | |
| 4 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "v_proj": { | |
| "group_size": { | |
| "4": 128 | |
| }, | |
| "bits": [ | |
| 4 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "o_proj": { | |
| "group_size": { | |
| "4": 128 | |
| }, | |
| "bits": [ | |
| 4 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "up_proj": { | |
| "group_size": { | |
| "4": 128 | |
| }, | |
| "bits": [ | |
| 4 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "gate_proj": { | |
| "group_size": { | |
| "4": 128 | |
| }, | |
| "bits": [ | |
| 4 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "down_proj": { | |
| "group_size": { | |
| "4": 128 | |
| }, | |
| "bits": [ | |
| 4 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| } | |
| }, | |
| "model.layers.31": { | |
| "accuracy": 0.9576601982116699, | |
| "total_bits": 879245568, | |
| "q_proj": { | |
| "group_size": { | |
| "4": 128 | |
| }, | |
| "bits": [ | |
| 4 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "k_proj": { | |
| "group_size": { | |
| "4": 128 | |
| }, | |
| "bits": [ | |
| 4 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "v_proj": { | |
| "group_size": { | |
| "4": 128 | |
| }, | |
| "bits": [ | |
| 4 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "o_proj": { | |
| "group_size": { | |
| "4": 128 | |
| }, | |
| "bits": [ | |
| 4 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "up_proj": { | |
| "group_size": { | |
| "4": 128 | |
| }, | |
| "bits": [ | |
| 4 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "gate_proj": { | |
| "group_size": { | |
| "4": 128 | |
| }, | |
| "bits": [ | |
| 4 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "down_proj": { | |
| "group_size": { | |
| "4": 128 | |
| }, | |
| "bits": [ | |
| 4 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| } | |
| } | |
| } | |
| } |