| { | |
| "measurement": { | |
| "model.layers.0": { | |
| "accuracy": 0.7405672073364258, | |
| "total_bits": 120876576, | |
| "q_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "k_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "v_proj": { | |
| "group_size": { | |
| "4": 128 | |
| }, | |
| "bits": [ | |
| 4 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "o_proj": { | |
| "group_size": { | |
| "4": 128 | |
| }, | |
| "bits": [ | |
| 4 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "up_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "gate_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "down_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| } | |
| }, | |
| "model.layers.1": { | |
| "accuracy": 0.8317708969116211, | |
| "total_bits": 112619808, | |
| "q_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "k_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "v_proj": { | |
| "group_size": { | |
| "4": 128 | |
| }, | |
| "bits": [ | |
| 4 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "o_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "up_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "gate_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "down_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| } | |
| }, | |
| "model.layers.2": { | |
| "accuracy": 0.8587737083435059, | |
| "total_bits": 112619808, | |
| "q_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "k_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "v_proj": { | |
| "group_size": { | |
| "4": 128 | |
| }, | |
| "bits": [ | |
| 4 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "o_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "up_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "gate_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "down_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| } | |
| }, | |
| "model.layers.3": { | |
| "accuracy": 0.8670854568481445, | |
| "total_bits": 112619808, | |
| "q_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "k_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "v_proj": { | |
| "group_size": { | |
| "4": 128 | |
| }, | |
| "bits": [ | |
| 4 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "o_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "up_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "gate_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "down_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| } | |
| }, | |
| "model.layers.4": { | |
| "accuracy": 0.8640389442443848, | |
| "total_bits": 112619808, | |
| "q_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "k_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "v_proj": { | |
| "group_size": { | |
| "4": 128 | |
| }, | |
| "bits": [ | |
| 4 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "o_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "up_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "gate_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "down_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| } | |
| }, | |
| "model.layers.5": { | |
| "accuracy": 0.8391742706298828, | |
| "total_bits": 112619808, | |
| "q_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "k_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "v_proj": { | |
| "group_size": { | |
| "4": 128 | |
| }, | |
| "bits": [ | |
| 4 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "o_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "up_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "gate_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "down_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| } | |
| }, | |
| "model.layers.6": { | |
| "accuracy": 0.7521486282348633, | |
| "total_bits": 120876576, | |
| "q_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "k_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "v_proj": { | |
| "group_size": { | |
| "4": 128 | |
| }, | |
| "bits": [ | |
| 4 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "o_proj": { | |
| "group_size": { | |
| "4": 128 | |
| }, | |
| "bits": [ | |
| 4 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "up_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "gate_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "down_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| } | |
| }, | |
| "model.layers.7": { | |
| "accuracy": 0.9208848476409912, | |
| "total_bits": 112619808, | |
| "q_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "k_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "v_proj": { | |
| "group_size": { | |
| "4": 128 | |
| }, | |
| "bits": [ | |
| 4 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "o_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "up_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "gate_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "down_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| } | |
| }, | |
| "model.layers.8": { | |
| "accuracy": 0.9133262634277344, | |
| "total_bits": 112619808, | |
| "q_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "k_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "v_proj": { | |
| "group_size": { | |
| "4": 128 | |
| }, | |
| "bits": [ | |
| 4 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "o_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "up_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "gate_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "down_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| } | |
| }, | |
| "model.layers.9": { | |
| "accuracy": 0.9102234840393066, | |
| "total_bits": 112619808, | |
| "q_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "k_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "v_proj": { | |
| "group_size": { | |
| "4": 128 | |
| }, | |
| "bits": [ | |
| 4 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "o_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "up_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "gate_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "down_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| } | |
| }, | |
| "model.layers.10": { | |
| "accuracy": 0.9021768569946289, | |
| "total_bits": 112619808, | |
| "q_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "k_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "v_proj": { | |
| "group_size": { | |
| "4": 128 | |
| }, | |
| "bits": [ | |
| 4 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "o_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "up_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "gate_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "down_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| } | |
| }, | |
| "model.layers.11": { | |
| "accuracy": 0.9006736278533936, | |
| "total_bits": 112619808, | |
| "q_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "k_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "v_proj": { | |
| "group_size": { | |
| "4": 128 | |
| }, | |
| "bits": [ | |
| 4 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "o_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "up_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "gate_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "down_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| } | |
| }, | |
| "model.layers.12": { | |
| "accuracy": 0.8896198272705078, | |
| "total_bits": 112619808, | |
| "q_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "k_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "v_proj": { | |
| "group_size": { | |
| "4": 128 | |
| }, | |
| "bits": [ | |
| 4 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "o_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "up_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "gate_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "down_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| } | |
| }, | |
| "model.layers.13": { | |
| "accuracy": 0.8880295753479004, | |
| "total_bits": 112619808, | |
| "q_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "k_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "v_proj": { | |
| "group_size": { | |
| "4": 128 | |
| }, | |
| "bits": [ | |
| 4 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "o_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "up_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "gate_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "down_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| } | |
| }, | |
| "model.layers.14": { | |
| "accuracy": 0.8788719177246094, | |
| "total_bits": 112619808, | |
| "q_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "k_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "v_proj": { | |
| "group_size": { | |
| "4": 128 | |
| }, | |
| "bits": [ | |
| 4 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "o_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "up_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "gate_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "down_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| } | |
| }, | |
| "model.layers.15": { | |
| "accuracy": 0.8800616264343262, | |
| "total_bits": 112619808, | |
| "q_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "k_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "v_proj": { | |
| "group_size": { | |
| "4": 128 | |
| }, | |
| "bits": [ | |
| 4 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "o_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "up_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "gate_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "down_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| } | |
| }, | |
| "model.layers.16": { | |
| "accuracy": 0.8750381469726562, | |
| "total_bits": 112619808, | |
| "q_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "k_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "v_proj": { | |
| "group_size": { | |
| "4": 128 | |
| }, | |
| "bits": [ | |
| 4 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "o_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "up_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "gate_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "down_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| } | |
| }, | |
| "model.layers.17": { | |
| "accuracy": 0.8723258972167969, | |
| "total_bits": 112619808, | |
| "q_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "k_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "v_proj": { | |
| "group_size": { | |
| "4": 128 | |
| }, | |
| "bits": [ | |
| 4 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "o_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "up_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "gate_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "down_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| } | |
| }, | |
| "model.layers.18": { | |
| "accuracy": 0.8696107864379883, | |
| "total_bits": 112619808, | |
| "q_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "k_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "v_proj": { | |
| "group_size": { | |
| "4": 128 | |
| }, | |
| "bits": [ | |
| 4 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "o_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "up_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "gate_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "down_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| } | |
| }, | |
| "model.layers.19": { | |
| "accuracy": 0.8691720962524414, | |
| "total_bits": 112619808, | |
| "q_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "k_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "v_proj": { | |
| "group_size": { | |
| "4": 128 | |
| }, | |
| "bits": [ | |
| 4 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "o_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "up_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "gate_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "down_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| } | |
| }, | |
| "model.layers.20": { | |
| "accuracy": 0.8783330917358398, | |
| "total_bits": 112619808, | |
| "q_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "k_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "v_proj": { | |
| "group_size": { | |
| "4": 128 | |
| }, | |
| "bits": [ | |
| 4 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "o_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "up_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "gate_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "down_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| } | |
| }, | |
| "model.layers.21": { | |
| "accuracy": 0.8743820190429688, | |
| "total_bits": 112619808, | |
| "q_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "k_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "v_proj": { | |
| "group_size": { | |
| "4": 128 | |
| }, | |
| "bits": [ | |
| 4 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "o_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "up_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "gate_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "down_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| } | |
| }, | |
| "model.layers.22": { | |
| "accuracy": 0.8673934936523438, | |
| "total_bits": 112619808, | |
| "q_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "k_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "v_proj": { | |
| "group_size": { | |
| "4": 128 | |
| }, | |
| "bits": [ | |
| 4 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "o_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "up_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "gate_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "down_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| } | |
| }, | |
| "model.layers.23": { | |
| "accuracy": 0.8243818283081055, | |
| "total_bits": 112619808, | |
| "q_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "k_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "v_proj": { | |
| "group_size": { | |
| "4": 128 | |
| }, | |
| "bits": [ | |
| 4 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "o_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "up_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "gate_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "down_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| } | |
| } | |
| } | |
| } |