Upload 115 files
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- llm-q-scaling-law-master/.gitignore +7 -0
- llm-q-scaling-law-master/.gitmodules +3 -0
- llm-q-scaling-law-master/README.md +23 -0
- llm-q-scaling-law-master/configs/debug/integer.toml +19 -0
- llm-q-scaling-law-master/configs/debug/mxint_4bit-bypass.toml +36 -0
- llm-q-scaling-law-master/configs/debug/mxint_4bit-no_evaluate.toml +42 -0
- llm-q-scaling-law-master/configs/debug/mxint_4bit-tinyllama.toml +42 -0
- llm-q-scaling-law-master/configs/search/layerwise/custom-eval.toml +38 -0
- llm-q-scaling-law-master/configs/search/layerwise/mxint_4bit-alpaca-random-50-bs-1.toml +42 -0
- llm-q-scaling-law-master/configs/search/layerwise/mxint_4bit-alpaca-random-50-bs-2.toml +42 -0
- llm-q-scaling-law-master/configs/search/layerwise/mxint_4bit-alpaca-random-50-bs-4.toml +42 -0
- llm-q-scaling-law-master/configs/search/layerwise/mxint_4bit-alpaca-random-50-bs-8.toml +42 -0
- llm-q-scaling-law-master/configs/search/layerwise/mxint_4bit-pajama-random-50-bs-1.toml +42 -0
- llm-q-scaling-law-master/configs/search/layerwise/mxint_4bit-pajama-random-50-bs-2.toml +42 -0
- llm-q-scaling-law-master/configs/search/layerwise/mxint_4bit-pajama-random-50-bs-4.toml +42 -0
- llm-q-scaling-law-master/configs/search/layerwise/mxint_4bit-pajama-random-50-bs-8.toml +42 -0
- llm-q-scaling-law-master/configs/search/matmult/mxint_4bit-alpaca-random-50-bs-1.toml +42 -0
- llm-q-scaling-law-master/configs/search/matmult/mxint_4bit-alpaca-random-50-bs-2.toml +42 -0
- llm-q-scaling-law-master/configs/search/matmult/mxint_4bit-alpaca-random-50-bs-4.toml +42 -0
- llm-q-scaling-law-master/configs/search/matmult/mxint_4bit-alpaca-random-50-bs-8.toml +42 -0
- llm-q-scaling-law-master/configs/search/matmult/mxint_4bit-pajama-random-50-bs-1.toml +42 -0
- llm-q-scaling-law-master/configs/search/matmult/mxint_4bit-pajama-random-50-bs-2.toml +42 -0
- llm-q-scaling-law-master/configs/search/matmult/mxint_4bit-pajama-random-50-bs-4.toml +42 -0
- llm-q-scaling-law-master/configs/search/matmult/mxint_4bit-pajama-random-50-bs-8.toml +42 -0
- llm-q-scaling-law-master/eval_scripts/eval_harness_commands.txt +34 -0
- llm-q-scaling-law-master/eval_scripts/layerwise/evalscript_generator.py +141 -0
- llm-q-scaling-law-master/eval_scripts/layerwise/llama_layerwise_eval_commands.txt +36 -0
- llm-q-scaling-law-master/eval_scripts/layerwise/opt_layerwise_eval_commands.txt +72 -0
- llm-q-scaling-law-master/eval_scripts/layerwise/qwen1.5_layerwise_eval_commands.txt +72 -0
- llm-q-scaling-law-master/eval_scripts/matmult/evalscript_generator.py +142 -0
- llm-q-scaling-law-master/eval_scripts/matmult/llama_matmult_eval_commands.txt +28 -0
- llm-q-scaling-law-master/eval_scripts/matmult/opt_matmult_eval_commands.txt +56 -0
- llm-q-scaling-law-master/eval_scripts/matmult/qwen1.5_matmult_eval_commands.txt +56 -0
- llm-q-scaling-law-master/q_ratio_results/layerwise/qwen1.5/qwen-0.5B-best-trail.txt +0 -0
- llm-q-scaling-law-master/q_ratio_results/layerwise/qwen1.5/qwen-1.8B-best-trail.txt +0 -0
- llm-q-scaling-law-master/q_ratio_results/layerwise/qwen1.5/qwen-14B-best-trail.txt +7 -0
- llm-q-scaling-law-master/q_ratio_results/layerwise/qwen1.5/qwen-32B-best-trail.txt +8 -0
- llm-q-scaling-law-master/q_ratio_results/layerwise/qwen1.5/qwen-4B-best-trail.txt +0 -0
- llm-q-scaling-law-master/q_ratio_results/layerwise/qwen1.5/qwen-72B-best-trail.txt +8 -0
- llm-q-scaling-law-master/q_ratio_results/layerwise/qwen1.5/qwen-7B-best-trail.txt +0 -0
- llm-q-scaling-law-master/q_ratio_results/matmult/qwen1.5/qwen-32B-best-trail.txt +6 -0
- llm-q-scaling-law-master/requirements.txt +26 -0
- llm-q-scaling-law-master/run_scripts/env_command.sh +23 -0
- llm-q-scaling-law-master/run_scripts/layerwise/llama2_layerwise_run_commands.txt +24 -0
- llm-q-scaling-law-master/run_scripts/layerwise/llama_layerwise_run_commands.txt +32 -0
- llm-q-scaling-law-master/run_scripts/layerwise/opt_layerwise_run_commands.txt +64 -0
- llm-q-scaling-law-master/run_scripts/layerwise/qwen1.5_layerwise_run_commands.txt +64 -0
- llm-q-scaling-law-master/run_scripts/layerwise/runscript_generator.py +111 -0
- llm-q-scaling-law-master/run_scripts/matmult/llama2_matmult_run_commands.txt +18 -0
- llm-q-scaling-law-master/run_scripts/matmult/llama_matmult_run_commands.txt +24 -0
llm-q-scaling-law-master/.gitignore
ADDED
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
.vscode/
|
2 |
+
__pycache__/
|
3 |
+
checkpoints/
|
4 |
+
wandb/
|
5 |
+
ckpts/
|
6 |
+
ckpt/
|
7 |
+
results/
|
llm-q-scaling-law-master/.gitmodules
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
[submodule "src/lm-evaluation-harness"]
|
2 |
+
path = src/lm-evaluation-harness
|
3 |
+
url = [email protected]:EleutherAI/lm-evaluation-harness.git
|
llm-q-scaling-law-master/README.md
ADDED
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# LLM Quantisation Scaling Law
|
2 |
+
|
3 |
+
## Setup
|
4 |
+
|
5 |
+
Conda environment is recommended. To create a conda environment, run:
|
6 |
+
|
7 |
+
```bash
|
8 |
+
conda create -n llm-mixed-q python=3.11 -y
|
9 |
+
pip install -r requirements.txt
|
10 |
+
git submodule update --init --recursive
|
11 |
+
```
|
12 |
+
|
13 |
+
## Features
|
14 |
+
|
15 |
+
* Supported model architectures:
|
16 |
+
+ Qwen2
|
17 |
+
+ OPT
|
18 |
+
+ Llama
|
19 |
+
|
20 |
+
|
21 |
+
## Entry points
|
22 |
+
|
23 |
+
The configuration file for model and search parameters is located in `./config/`.
|
llm-q-scaling-law-master/configs/debug/integer.toml
ADDED
@@ -0,0 +1,19 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
[quantization]
|
2 |
+
name = "integer"
|
3 |
+
data_in_width = 16
|
4 |
+
data_in_frac_width = 3
|
5 |
+
weight_width = 16
|
6 |
+
weight_frac_width = 3
|
7 |
+
bias_width = 16
|
8 |
+
bias_frac_width = 3
|
9 |
+
|
10 |
+
[setup]
|
11 |
+
# low to high precision ratio
|
12 |
+
ratio = 0.1
|
13 |
+
# at what granularity?
|
14 |
+
# select from ["transformer_layer", "matmult"]
|
15 |
+
granularity = "transformer_layer"
|
16 |
+
tasks = ['sst', 'mnli']
|
17 |
+
batch_size = 16
|
18 |
+
num_samples_per_trial = 1024
|
19 |
+
num_trials = 16
|
llm-q-scaling-law-master/configs/debug/mxint_4bit-bypass.toml
ADDED
@@ -0,0 +1,36 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
[quantization.linear.x]
|
2 |
+
name="bypass"
|
3 |
+
width=8
|
4 |
+
block_size=16
|
5 |
+
block_axis=-2
|
6 |
+
[quantization.linear.w]
|
7 |
+
name="bypass"
|
8 |
+
width=4
|
9 |
+
block_size=16
|
10 |
+
block_axis=-2
|
11 |
+
|
12 |
+
[quantization.matmul.x]
|
13 |
+
name="bypass"
|
14 |
+
width=8
|
15 |
+
block_size=16
|
16 |
+
block_axis=-2
|
17 |
+
|
18 |
+
[quantization.matmul.w]
|
19 |
+
name="bypass"
|
20 |
+
width=8
|
21 |
+
block_size=16
|
22 |
+
block_axis=-1
|
23 |
+
|
24 |
+
[setup]
|
25 |
+
# low to high precision ratio
|
26 |
+
ratio=0.9
|
27 |
+
# at what granularity?
|
28 |
+
# select from ["transformer_layer", "matmult"]
|
29 |
+
granularity="transformer_layer"
|
30 |
+
# granularity="matmult"
|
31 |
+
tasks=['custom_alpaca']
|
32 |
+
batch_size=8
|
33 |
+
num_samples_per_trial=10
|
34 |
+
num_trials=3
|
35 |
+
device_map="auto-balanced"
|
36 |
+
random=true
|
llm-q-scaling-law-master/configs/debug/mxint_4bit-no_evaluate.toml
ADDED
@@ -0,0 +1,42 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
[quantization.linear.x]
|
2 |
+
name="mxint"
|
3 |
+
width=4
|
4 |
+
block_size=16
|
5 |
+
block_axis=-2
|
6 |
+
[quantization.linear.w]
|
7 |
+
name="mxint"
|
8 |
+
width=4
|
9 |
+
block_size=16
|
10 |
+
block_axis=-2
|
11 |
+
|
12 |
+
[quantization.matmul.x]
|
13 |
+
name="mxint"
|
14 |
+
width=4
|
15 |
+
block_size=16
|
16 |
+
block_axis=-2
|
17 |
+
|
18 |
+
[quantization.matmul.w]
|
19 |
+
name="mxint"
|
20 |
+
width=4
|
21 |
+
block_size=16
|
22 |
+
block_axis=-1
|
23 |
+
|
24 |
+
[setup]
|
25 |
+
# low to high precision ratio
|
26 |
+
ratio=0.9
|
27 |
+
# at what granularity?
|
28 |
+
# select from ["transformer_layer", "matmult"]
|
29 |
+
granularity="transformer_layer"
|
30 |
+
# granularity="matmult"
|
31 |
+
tasks=['custom_alpaca']
|
32 |
+
batch_size=4
|
33 |
+
num_samples_per_trial=1000
|
34 |
+
num_trials=3
|
35 |
+
device_map="auto-balanced"
|
36 |
+
random=true
|
37 |
+
|
38 |
+
[evaluation]
|
39 |
+
# set to [] if not using any evaluation benchmarks
|
40 |
+
tasks=[]
|
41 |
+
batch_size=1
|
42 |
+
num_best_epochs=1
|
llm-q-scaling-law-master/configs/debug/mxint_4bit-tinyllama.toml
ADDED
@@ -0,0 +1,42 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
[quantization.linear.x]
|
2 |
+
name="mxint"
|
3 |
+
width=4
|
4 |
+
block_size=16
|
5 |
+
block_axis=-2
|
6 |
+
[quantization.linear.w]
|
7 |
+
name="mxint"
|
8 |
+
width=4
|
9 |
+
block_size=16
|
10 |
+
block_axis=-2
|
11 |
+
|
12 |
+
[quantization.matmul.x]
|
13 |
+
name="mxint"
|
14 |
+
width=4
|
15 |
+
block_size=16
|
16 |
+
block_axis=-2
|
17 |
+
|
18 |
+
[quantization.matmul.w]
|
19 |
+
name="mxint"
|
20 |
+
width=4
|
21 |
+
block_size=16
|
22 |
+
block_axis=-1
|
23 |
+
|
24 |
+
[setup]
|
25 |
+
# low to high precision ratio
|
26 |
+
ratio=0.9
|
27 |
+
# at what granularity?
|
28 |
+
# select from ["transformer_layer", "matmult"]
|
29 |
+
granularity="transformer_layer"
|
30 |
+
# granularity="matmult"
|
31 |
+
tasks=['custom_alpaca']
|
32 |
+
batch_size=8
|
33 |
+
num_samples_per_trial=100
|
34 |
+
num_trials=3
|
35 |
+
device_map="auto-balanced"
|
36 |
+
random=true
|
37 |
+
|
38 |
+
[evaluation]
|
39 |
+
# set to [] if not using any evaluation benchmarks
|
40 |
+
tasks=['mmlu']
|
41 |
+
batch_size=8
|
42 |
+
num_best_epochs=1
|
llm-q-scaling-law-master/configs/search/layerwise/custom-eval.toml
ADDED
@@ -0,0 +1,38 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
[quantization.linear.x] # this is E2M1
|
2 |
+
name="minifloat"
|
3 |
+
width=4
|
4 |
+
exponent_width=2
|
5 |
+
[quantization.linear.w]
|
6 |
+
name="minifloat"
|
7 |
+
width=4
|
8 |
+
exponent_width=2
|
9 |
+
|
10 |
+
[quantization.matmul.x]
|
11 |
+
name="minifloat"
|
12 |
+
width=4
|
13 |
+
exponent_width=2
|
14 |
+
|
15 |
+
[quantization.matmul.w]
|
16 |
+
name="minifloat"
|
17 |
+
width=4
|
18 |
+
exponent_width=2
|
19 |
+
|
20 |
+
[setup]
|
21 |
+
# low to high precision ratio
|
22 |
+
ratio=0.0
|
23 |
+
# at what granularity?
|
24 |
+
# select from ["transformer_layer", "matmult"]
|
25 |
+
granularity="transformer_layer"
|
26 |
+
# granularity="matmult"
|
27 |
+
tasks=['custom_pajama']
|
28 |
+
batch_size=8
|
29 |
+
num_samples_per_trial=1000
|
30 |
+
num_trials=4
|
31 |
+
device_map="auto-balanced"
|
32 |
+
random=false
|
33 |
+
|
34 |
+
[evaluation]
|
35 |
+
# set to [] if not using any evaluation benchmarks
|
36 |
+
tasks=['scaling_law_easy', 'scaling_law_hard']
|
37 |
+
batch_size="auto:8"
|
38 |
+
num_best_epochs=3
|
llm-q-scaling-law-master/configs/search/layerwise/mxint_4bit-alpaca-random-50-bs-1.toml
ADDED
@@ -0,0 +1,42 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
[quantization.linear.x]
|
2 |
+
name="mxint"
|
3 |
+
width=4
|
4 |
+
block_size=16
|
5 |
+
block_axis=-2
|
6 |
+
[quantization.linear.w]
|
7 |
+
name="mxint"
|
8 |
+
width=4
|
9 |
+
block_size=16
|
10 |
+
block_axis=-2
|
11 |
+
|
12 |
+
[quantization.matmul.x]
|
13 |
+
name="mxint"
|
14 |
+
width=4
|
15 |
+
block_size=16
|
16 |
+
block_axis=-2
|
17 |
+
|
18 |
+
[quantization.matmul.w]
|
19 |
+
name="mxint"
|
20 |
+
width=4
|
21 |
+
block_size=16
|
22 |
+
block_axis=-1
|
23 |
+
|
24 |
+
[setup]
|
25 |
+
# low to high precision ratio
|
26 |
+
ratio=0.0
|
27 |
+
# at what granularity?
|
28 |
+
# select from ["transformer_layer", "matmult"]
|
29 |
+
granularity="transformer_layer"
|
30 |
+
# granularity="matmult"
|
31 |
+
tasks=['custom_alpaca']
|
32 |
+
batch_size=1
|
33 |
+
num_samples_per_trial=1000
|
34 |
+
num_trials=50
|
35 |
+
device_map="auto-balanced"
|
36 |
+
random=false
|
37 |
+
|
38 |
+
[evaluation]
|
39 |
+
# set to [] if not using any evaluation benchmarks
|
40 |
+
tasks=['mmlu']
|
41 |
+
batch_size=1
|
42 |
+
num_best_epochs=3
|
llm-q-scaling-law-master/configs/search/layerwise/mxint_4bit-alpaca-random-50-bs-2.toml
ADDED
@@ -0,0 +1,42 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
[quantization.linear.x]
|
2 |
+
name="mxint"
|
3 |
+
width=4
|
4 |
+
block_size=16
|
5 |
+
block_axis=-2
|
6 |
+
[quantization.linear.w]
|
7 |
+
name="mxint"
|
8 |
+
width=4
|
9 |
+
block_size=16
|
10 |
+
block_axis=-2
|
11 |
+
|
12 |
+
[quantization.matmul.x]
|
13 |
+
name="mxint"
|
14 |
+
width=4
|
15 |
+
block_size=16
|
16 |
+
block_axis=-2
|
17 |
+
|
18 |
+
[quantization.matmul.w]
|
19 |
+
name="mxint"
|
20 |
+
width=4
|
21 |
+
block_size=16
|
22 |
+
block_axis=-1
|
23 |
+
|
24 |
+
[setup]
|
25 |
+
# low to high precision ratio
|
26 |
+
ratio=0.0
|
27 |
+
# at what granularity?
|
28 |
+
# select from ["transformer_layer", "matmult"]
|
29 |
+
granularity="transformer_layer"
|
30 |
+
# granularity="matmult"
|
31 |
+
tasks=['custom_alpaca']
|
32 |
+
batch_size=2
|
33 |
+
num_samples_per_trial=1000
|
34 |
+
num_trials=50
|
35 |
+
device_map="auto-balanced"
|
36 |
+
random=false
|
37 |
+
|
38 |
+
[evaluation]
|
39 |
+
# set to [] if not using any evaluation benchmarks
|
40 |
+
tasks=['mmlu']
|
41 |
+
batch_size=2
|
42 |
+
num_best_epochs=3
|
llm-q-scaling-law-master/configs/search/layerwise/mxint_4bit-alpaca-random-50-bs-4.toml
ADDED
@@ -0,0 +1,42 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
[quantization.linear.x]
|
2 |
+
name="mxint"
|
3 |
+
width=4
|
4 |
+
block_size=16
|
5 |
+
block_axis=-2
|
6 |
+
[quantization.linear.w]
|
7 |
+
name="mxint"
|
8 |
+
width=4
|
9 |
+
block_size=16
|
10 |
+
block_axis=-2
|
11 |
+
|
12 |
+
[quantization.matmul.x]
|
13 |
+
name="mxint"
|
14 |
+
width=4
|
15 |
+
block_size=16
|
16 |
+
block_axis=-2
|
17 |
+
|
18 |
+
[quantization.matmul.w]
|
19 |
+
name="mxint"
|
20 |
+
width=4
|
21 |
+
block_size=16
|
22 |
+
block_axis=-1
|
23 |
+
|
24 |
+
[setup]
|
25 |
+
# low to high precision ratio
|
26 |
+
ratio=0.0
|
27 |
+
# at what granularity?
|
28 |
+
# select from ["transformer_layer", "matmult"]
|
29 |
+
granularity="transformer_layer"
|
30 |
+
# granularity="matmult"
|
31 |
+
tasks=['custom_alpaca']
|
32 |
+
batch_size=4
|
33 |
+
num_samples_per_trial=1000
|
34 |
+
num_trials=50
|
35 |
+
device_map="auto-balanced"
|
36 |
+
random=false
|
37 |
+
|
38 |
+
[evaluation]
|
39 |
+
# set to [] if not using any evaluation benchmarks
|
40 |
+
tasks=['mmlu']
|
41 |
+
batch_size=4
|
42 |
+
num_best_epochs=3
|
llm-q-scaling-law-master/configs/search/layerwise/mxint_4bit-alpaca-random-50-bs-8.toml
ADDED
@@ -0,0 +1,42 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
[quantization.linear.x]
|
2 |
+
name="mxint"
|
3 |
+
width=4
|
4 |
+
block_size=16
|
5 |
+
block_axis=-2
|
6 |
+
[quantization.linear.w]
|
7 |
+
name="mxint"
|
8 |
+
width=4
|
9 |
+
block_size=16
|
10 |
+
block_axis=-2
|
11 |
+
|
12 |
+
[quantization.matmul.x]
|
13 |
+
name="mxint"
|
14 |
+
width=4
|
15 |
+
block_size=16
|
16 |
+
block_axis=-2
|
17 |
+
|
18 |
+
[quantization.matmul.w]
|
19 |
+
name="mxint"
|
20 |
+
width=4
|
21 |
+
block_size=16
|
22 |
+
block_axis=-1
|
23 |
+
|
24 |
+
[setup]
|
25 |
+
# low to high precision ratio
|
26 |
+
ratio=0.0
|
27 |
+
# at what granularity?
|
28 |
+
# select from ["transformer_layer", "matmult"]
|
29 |
+
granularity="transformer_layer"
|
30 |
+
# granularity="matmult"
|
31 |
+
tasks=['custom_alpaca']
|
32 |
+
batch_size=8
|
33 |
+
num_samples_per_trial=1000
|
34 |
+
num_trials=50
|
35 |
+
device_map="auto-balanced"
|
36 |
+
random=false
|
37 |
+
|
38 |
+
[evaluation]
|
39 |
+
# set to [] if not using any evaluation benchmarks
|
40 |
+
tasks=['mmlu']
|
41 |
+
batch_size=8
|
42 |
+
num_best_epochs=3
|
llm-q-scaling-law-master/configs/search/layerwise/mxint_4bit-pajama-random-50-bs-1.toml
ADDED
@@ -0,0 +1,42 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
[quantization.linear.x]
|
2 |
+
name="mxint"
|
3 |
+
width=4
|
4 |
+
block_size=16
|
5 |
+
block_axis=-2
|
6 |
+
[quantization.linear.w]
|
7 |
+
name="mxint"
|
8 |
+
width=4
|
9 |
+
block_size=16
|
10 |
+
block_axis=-2
|
11 |
+
|
12 |
+
[quantization.matmul.x]
|
13 |
+
name="mxint"
|
14 |
+
width=4
|
15 |
+
block_size=16
|
16 |
+
block_axis=-2
|
17 |
+
|
18 |
+
[quantization.matmul.w]
|
19 |
+
name="mxint"
|
20 |
+
width=4
|
21 |
+
block_size=16
|
22 |
+
block_axis=-1
|
23 |
+
|
24 |
+
[setup]
|
25 |
+
# low to high precision ratio
|
26 |
+
ratio=0.0
|
27 |
+
# at what granularity?
|
28 |
+
# select from ["transformer_layer", "matmult"]
|
29 |
+
granularity="transformer_layer"
|
30 |
+
# granularity="matmult"
|
31 |
+
tasks=['custom_pajama']
|
32 |
+
batch_size=1
|
33 |
+
num_samples_per_trial=1000
|
34 |
+
num_trials=50
|
35 |
+
device_map="auto-balanced"
|
36 |
+
random=false
|
37 |
+
|
38 |
+
[evaluation]
|
39 |
+
# set to [] if not using any evaluation benchmarks
|
40 |
+
tasks=['mmlu']
|
41 |
+
batch_size=1
|
42 |
+
num_best_epochs=3
|
llm-q-scaling-law-master/configs/search/layerwise/mxint_4bit-pajama-random-50-bs-2.toml
ADDED
@@ -0,0 +1,42 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
[quantization.linear.x]
|
2 |
+
name="mxint"
|
3 |
+
width=4
|
4 |
+
block_size=16
|
5 |
+
block_axis=-2
|
6 |
+
[quantization.linear.w]
|
7 |
+
name="mxint"
|
8 |
+
width=4
|
9 |
+
block_size=16
|
10 |
+
block_axis=-2
|
11 |
+
|
12 |
+
[quantization.matmul.x]
|
13 |
+
name="mxint"
|
14 |
+
width=4
|
15 |
+
block_size=16
|
16 |
+
block_axis=-2
|
17 |
+
|
18 |
+
[quantization.matmul.w]
|
19 |
+
name="mxint"
|
20 |
+
width=4
|
21 |
+
block_size=16
|
22 |
+
block_axis=-1
|
23 |
+
|
24 |
+
[setup]
|
25 |
+
# low to high precision ratio
|
26 |
+
ratio=0.0
|
27 |
+
# at what granularity?
|
28 |
+
# select from ["transformer_layer", "matmult"]
|
29 |
+
granularity="transformer_layer"
|
30 |
+
# granularity="matmult"
|
31 |
+
tasks=['custom_pajama']
|
32 |
+
batch_size=2
|
33 |
+
num_samples_per_trial=1000
|
34 |
+
num_trials=50
|
35 |
+
device_map="auto-balanced"
|
36 |
+
random=false
|
37 |
+
|
38 |
+
[evaluation]
|
39 |
+
# set to [] if not using any evaluation benchmarks
|
40 |
+
tasks=['mmlu']
|
41 |
+
batch_size=2
|
42 |
+
num_best_epochs=3
|
llm-q-scaling-law-master/configs/search/layerwise/mxint_4bit-pajama-random-50-bs-4.toml
ADDED
@@ -0,0 +1,42 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
[quantization.linear.x]
|
2 |
+
name="mxint"
|
3 |
+
width=4
|
4 |
+
block_size=16
|
5 |
+
block_axis=-2
|
6 |
+
[quantization.linear.w]
|
7 |
+
name="mxint"
|
8 |
+
width=4
|
9 |
+
block_size=16
|
10 |
+
block_axis=-2
|
11 |
+
|
12 |
+
[quantization.matmul.x]
|
13 |
+
name="mxint"
|
14 |
+
width=4
|
15 |
+
block_size=16
|
16 |
+
block_axis=-2
|
17 |
+
|
18 |
+
[quantization.matmul.w]
|
19 |
+
name="mxint"
|
20 |
+
width=4
|
21 |
+
block_size=16
|
22 |
+
block_axis=-1
|
23 |
+
|
24 |
+
[setup]
|
25 |
+
# low to high precision ratio
|
26 |
+
ratio=0.0
|
27 |
+
# at what granularity?
|
28 |
+
# select from ["transformer_layer", "matmult"]
|
29 |
+
granularity="transformer_layer"
|
30 |
+
# granularity="matmult"
|
31 |
+
tasks=['custom_pajama']
|
32 |
+
batch_size=4
|
33 |
+
num_samples_per_trial=1000
|
34 |
+
num_trials=50
|
35 |
+
device_map="auto-balanced"
|
36 |
+
random=false
|
37 |
+
|
38 |
+
[evaluation]
|
39 |
+
# set to [] if not using any evaluation benchmarks
|
40 |
+
tasks=['mmlu']
|
41 |
+
batch_size=4
|
42 |
+
num_best_epochs=3
|
llm-q-scaling-law-master/configs/search/layerwise/mxint_4bit-pajama-random-50-bs-8.toml
ADDED
@@ -0,0 +1,42 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
[quantization.linear.x]
|
2 |
+
name="mxint"
|
3 |
+
width=4
|
4 |
+
block_size=16
|
5 |
+
block_axis=-2
|
6 |
+
[quantization.linear.w]
|
7 |
+
name="mxint"
|
8 |
+
width=4
|
9 |
+
block_size=16
|
10 |
+
block_axis=-2
|
11 |
+
|
12 |
+
[quantization.matmul.x]
|
13 |
+
name="mxint"
|
14 |
+
width=4
|
15 |
+
block_size=16
|
16 |
+
block_axis=-2
|
17 |
+
|
18 |
+
[quantization.matmul.w]
|
19 |
+
name="mxint"
|
20 |
+
width=4
|
21 |
+
block_size=16
|
22 |
+
block_axis=-1
|
23 |
+
|
24 |
+
[setup]
|
25 |
+
# low to high precision ratio
|
26 |
+
ratio=0.0
|
27 |
+
# at what granularity?
|
28 |
+
# select from ["transformer_layer", "matmult"]
|
29 |
+
granularity="transformer_layer"
|
30 |
+
# granularity="matmult"
|
31 |
+
tasks=['custom_pajama']
|
32 |
+
batch_size=8
|
33 |
+
num_samples_per_trial=1000
|
34 |
+
num_trials=50
|
35 |
+
device_map="auto-balanced"
|
36 |
+
random=false
|
37 |
+
|
38 |
+
[evaluation]
|
39 |
+
# set to [] if not using any evaluation benchmarks
|
40 |
+
tasks=['mmlu']
|
41 |
+
batch_size=8
|
42 |
+
num_best_epochs=3
|
llm-q-scaling-law-master/configs/search/matmult/mxint_4bit-alpaca-random-50-bs-1.toml
ADDED
@@ -0,0 +1,42 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
[quantization.linear.x]
|
2 |
+
name="mxint"
|
3 |
+
width=4
|
4 |
+
block_size=16
|
5 |
+
block_axis=-2
|
6 |
+
[quantization.linear.w]
|
7 |
+
name="mxint"
|
8 |
+
width=4
|
9 |
+
block_size=16
|
10 |
+
block_axis=-2
|
11 |
+
|
12 |
+
[quantization.matmul.x]
|
13 |
+
name="mxint"
|
14 |
+
width=4
|
15 |
+
block_size=16
|
16 |
+
block_axis=-2
|
17 |
+
|
18 |
+
[quantization.matmul.w]
|
19 |
+
name="mxint"
|
20 |
+
width=4
|
21 |
+
block_size=16
|
22 |
+
block_axis=-1
|
23 |
+
|
24 |
+
[setup]
|
25 |
+
# low to high precision ratio
|
26 |
+
ratio=0.0
|
27 |
+
# at what granularity?
|
28 |
+
# select from ["transformer_layer", "matmult"]
|
29 |
+
granularity="matmult"
|
30 |
+
# granularity="matmult"
|
31 |
+
tasks=['custom_alpaca']
|
32 |
+
batch_size=1
|
33 |
+
num_samples_per_trial=1000
|
34 |
+
num_trials=50
|
35 |
+
device_map="auto-balanced"
|
36 |
+
random=false
|
37 |
+
|
38 |
+
[evaluation]
|
39 |
+
# set to [] if not using any evaluation benchmarks
|
40 |
+
tasks=['mmlu']
|
41 |
+
batch_size=1
|
42 |
+
num_best_epochs=3
|
llm-q-scaling-law-master/configs/search/matmult/mxint_4bit-alpaca-random-50-bs-2.toml
ADDED
@@ -0,0 +1,42 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
[quantization.linear.x]
|
2 |
+
name="mxint"
|
3 |
+
width=4
|
4 |
+
block_size=16
|
5 |
+
block_axis=-2
|
6 |
+
[quantization.linear.w]
|
7 |
+
name="mxint"
|
8 |
+
width=4
|
9 |
+
block_size=16
|
10 |
+
block_axis=-2
|
11 |
+
|
12 |
+
[quantization.matmul.x]
|
13 |
+
name="mxint"
|
14 |
+
width=4
|
15 |
+
block_size=16
|
16 |
+
block_axis=-2
|
17 |
+
|
18 |
+
[quantization.matmul.w]
|
19 |
+
name="mxint"
|
20 |
+
width=4
|
21 |
+
block_size=16
|
22 |
+
block_axis=-1
|
23 |
+
|
24 |
+
[setup]
|
25 |
+
# low to high precision ratio
|
26 |
+
ratio=0.0
|
27 |
+
# at what granularity?
|
28 |
+
# select from ["transformer_layer", "matmult"]
|
29 |
+
granularity="matmult"
|
30 |
+
# granularity="matmult"
|
31 |
+
tasks=['custom_alpaca']
|
32 |
+
batch_size=2
|
33 |
+
num_samples_per_trial=1000
|
34 |
+
num_trials=50
|
35 |
+
device_map="auto-balanced"
|
36 |
+
random=false
|
37 |
+
|
38 |
+
[evaluation]
|
39 |
+
# set to [] if not using any evaluation benchmarks
|
40 |
+
tasks=['mmlu']
|
41 |
+
batch_size=1
|
42 |
+
num_best_epochs=3
|
llm-q-scaling-law-master/configs/search/matmult/mxint_4bit-alpaca-random-50-bs-4.toml
ADDED
@@ -0,0 +1,42 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
[quantization.linear.x]
|
2 |
+
name="mxint"
|
3 |
+
width=4
|
4 |
+
block_size=16
|
5 |
+
block_axis=-2
|
6 |
+
[quantization.linear.w]
|
7 |
+
name="mxint"
|
8 |
+
width=4
|
9 |
+
block_size=16
|
10 |
+
block_axis=-2
|
11 |
+
|
12 |
+
[quantization.matmul.x]
|
13 |
+
name="mxint"
|
14 |
+
width=4
|
15 |
+
block_size=16
|
16 |
+
block_axis=-2
|
17 |
+
|
18 |
+
[quantization.matmul.w]
|
19 |
+
name="mxint"
|
20 |
+
width=4
|
21 |
+
block_size=16
|
22 |
+
block_axis=-1
|
23 |
+
|
24 |
+
[setup]
|
25 |
+
# low to high precision ratio
|
26 |
+
ratio=0.0
|
27 |
+
# at what granularity?
|
28 |
+
# select from ["transformer_layer", "matmult"]
|
29 |
+
granularity="matmult"
|
30 |
+
# granularity="matmult"
|
31 |
+
tasks=['custom_alpaca']
|
32 |
+
batch_size=4
|
33 |
+
num_samples_per_trial=1000
|
34 |
+
num_trials=50
|
35 |
+
device_map="auto-balanced"
|
36 |
+
random=false
|
37 |
+
|
38 |
+
[evaluation]
|
39 |
+
# set to [] if not using any evaluation benchmarks
|
40 |
+
tasks=['mmlu']
|
41 |
+
batch_size=1
|
42 |
+
num_best_epochs=3
|
llm-q-scaling-law-master/configs/search/matmult/mxint_4bit-alpaca-random-50-bs-8.toml
ADDED
@@ -0,0 +1,42 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
[quantization.linear.x]
|
2 |
+
name="mxint"
|
3 |
+
width=4
|
4 |
+
block_size=16
|
5 |
+
block_axis=-2
|
6 |
+
[quantization.linear.w]
|
7 |
+
name="mxint"
|
8 |
+
width=4
|
9 |
+
block_size=16
|
10 |
+
block_axis=-2
|
11 |
+
|
12 |
+
[quantization.matmul.x]
|
13 |
+
name="mxint"
|
14 |
+
width=4
|
15 |
+
block_size=16
|
16 |
+
block_axis=-2
|
17 |
+
|
18 |
+
[quantization.matmul.w]
|
19 |
+
name="mxint"
|
20 |
+
width=4
|
21 |
+
block_size=16
|
22 |
+
block_axis=-1
|
23 |
+
|
24 |
+
[setup]
|
25 |
+
# low to high precision ratio
|
26 |
+
ratio=0.0
|
27 |
+
# at what granularity?
|
28 |
+
# select from ["transformer_layer", "matmult"]
|
29 |
+
granularity="matmult"
|
30 |
+
# granularity="matmult"
|
31 |
+
tasks=['custom_alpaca']
|
32 |
+
batch_size=8
|
33 |
+
num_samples_per_trial=1000
|
34 |
+
num_trials=50
|
35 |
+
device_map="auto-balanced"
|
36 |
+
random=false
|
37 |
+
|
38 |
+
[evaluation]
|
39 |
+
# set to [] if not using any evaluation benchmarks
|
40 |
+
tasks=['mmlu']
|
41 |
+
batch_size=1
|
42 |
+
num_best_epochs=3
|
llm-q-scaling-law-master/configs/search/matmult/mxint_4bit-pajama-random-50-bs-1.toml
ADDED
@@ -0,0 +1,42 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
[quantization.linear.x]
|
2 |
+
name="mxint"
|
3 |
+
width=4
|
4 |
+
block_size=16
|
5 |
+
block_axis=-2
|
6 |
+
[quantization.linear.w]
|
7 |
+
name="mxint"
|
8 |
+
width=4
|
9 |
+
block_size=16
|
10 |
+
block_axis=-2
|
11 |
+
|
12 |
+
[quantization.matmul.x]
|
13 |
+
name="mxint"
|
14 |
+
width=4
|
15 |
+
block_size=16
|
16 |
+
block_axis=-2
|
17 |
+
|
18 |
+
[quantization.matmul.w]
|
19 |
+
name="mxint"
|
20 |
+
width=4
|
21 |
+
block_size=16
|
22 |
+
block_axis=-1
|
23 |
+
|
24 |
+
[setup]
|
25 |
+
# low to high precision ratio
|
26 |
+
ratio=0.0
|
27 |
+
# at what granularity?
|
28 |
+
# select from ["transformer_layer", "matmult"]
|
29 |
+
granularity="matmult"
|
30 |
+
# granularity="matmult"
|
31 |
+
tasks=['custom_pajama']
|
32 |
+
batch_size=1
|
33 |
+
num_samples_per_trial=1000
|
34 |
+
num_trials=50
|
35 |
+
device_map="auto-balanced"
|
36 |
+
random=false
|
37 |
+
|
38 |
+
[evaluation]
|
39 |
+
# set to [] if not using any evaluation benchmarks
|
40 |
+
tasks=['mmlu']
|
41 |
+
batch_size=1
|
42 |
+
num_best_epochs=3
|
llm-q-scaling-law-master/configs/search/matmult/mxint_4bit-pajama-random-50-bs-2.toml
ADDED
@@ -0,0 +1,42 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
[quantization.linear.x]
|
2 |
+
name="mxint"
|
3 |
+
width=4
|
4 |
+
block_size=16
|
5 |
+
block_axis=-2
|
6 |
+
[quantization.linear.w]
|
7 |
+
name="mxint"
|
8 |
+
width=4
|
9 |
+
block_size=16
|
10 |
+
block_axis=-2
|
11 |
+
|
12 |
+
[quantization.matmul.x]
|
13 |
+
name="mxint"
|
14 |
+
width=4
|
15 |
+
block_size=16
|
16 |
+
block_axis=-2
|
17 |
+
|
18 |
+
[quantization.matmul.w]
|
19 |
+
name="mxint"
|
20 |
+
width=4
|
21 |
+
block_size=16
|
22 |
+
block_axis=-1
|
23 |
+
|
24 |
+
[setup]
|
25 |
+
# low to high precision ratio
|
26 |
+
ratio=0.0
|
27 |
+
# at what granularity?
|
28 |
+
# select from ["transformer_layer", "matmult"]
|
29 |
+
granularity="matmult"
|
30 |
+
# granularity="matmult"
|
31 |
+
tasks=['custom_pajama']
|
32 |
+
batch_size=2
|
33 |
+
num_samples_per_trial=1000
|
34 |
+
num_trials=50
|
35 |
+
device_map="auto-balanced"
|
36 |
+
random=false
|
37 |
+
|
38 |
+
[evaluation]
|
39 |
+
# set to [] if not using any evaluation benchmarks
|
40 |
+
tasks=['mmlu']
|
41 |
+
batch_size=2
|
42 |
+
num_best_epochs=3
|
llm-q-scaling-law-master/configs/search/matmult/mxint_4bit-pajama-random-50-bs-4.toml
ADDED
@@ -0,0 +1,42 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
[quantization.linear.x]
|
2 |
+
name="mxint"
|
3 |
+
width=4
|
4 |
+
block_size=16
|
5 |
+
block_axis=-2
|
6 |
+
[quantization.linear.w]
|
7 |
+
name="mxint"
|
8 |
+
width=4
|
9 |
+
block_size=16
|
10 |
+
block_axis=-2
|
11 |
+
|
12 |
+
[quantization.matmul.x]
|
13 |
+
name="mxint"
|
14 |
+
width=4
|
15 |
+
block_size=16
|
16 |
+
block_axis=-2
|
17 |
+
|
18 |
+
[quantization.matmul.w]
|
19 |
+
name="mxint"
|
20 |
+
width=4
|
21 |
+
block_size=16
|
22 |
+
block_axis=-1
|
23 |
+
|
24 |
+
[setup]
|
25 |
+
# low to high precision ratio
|
26 |
+
ratio=0.0
|
27 |
+
# at what granularity?
|
28 |
+
# select from ["transformer_layer", "matmult"]
|
29 |
+
granularity="matmult"
|
30 |
+
# granularity="matmult"
|
31 |
+
tasks=['custom_pajama']
|
32 |
+
batch_size=4
|
33 |
+
num_samples_per_trial=1000
|
34 |
+
num_trials=50
|
35 |
+
device_map="auto-balanced"
|
36 |
+
random=false
|
37 |
+
|
38 |
+
[evaluation]
|
39 |
+
# set to [] if not using any evaluation benchmarks
|
40 |
+
tasks=['mmlu']
|
41 |
+
batch_size=4
|
42 |
+
num_best_epochs=3
|
llm-q-scaling-law-master/configs/search/matmult/mxint_4bit-pajama-random-50-bs-8.toml
ADDED
@@ -0,0 +1,42 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
[quantization.linear.x]
|
2 |
+
name="mxint"
|
3 |
+
width=4
|
4 |
+
block_size=16
|
5 |
+
block_axis=-2
|
6 |
+
[quantization.linear.w]
|
7 |
+
name="mxint"
|
8 |
+
width=4
|
9 |
+
block_size=16
|
10 |
+
block_axis=-2
|
11 |
+
|
12 |
+
[quantization.matmul.x]
|
13 |
+
name="mxint"
|
14 |
+
width=4
|
15 |
+
block_size=16
|
16 |
+
block_axis=-2
|
17 |
+
|
18 |
+
[quantization.matmul.w]
|
19 |
+
name="mxint"
|
20 |
+
width=4
|
21 |
+
block_size=16
|
22 |
+
block_axis=-1
|
23 |
+
|
24 |
+
[setup]
|
25 |
+
# low to high precision ratio
|
26 |
+
ratio=0.0
|
27 |
+
# at what granularity?
|
28 |
+
# select from ["transformer_layer", "matmult"]
|
29 |
+
granularity="matmult"
|
30 |
+
# granularity="matmult"
|
31 |
+
tasks=['custom_pajama']
|
32 |
+
batch_size=8
|
33 |
+
num_samples_per_trial=1000
|
34 |
+
num_trials=50
|
35 |
+
device_map="auto-balanced"
|
36 |
+
random=false
|
37 |
+
|
38 |
+
[evaluation]
|
39 |
+
# set to [] if not using any evaluation benchmarks
|
40 |
+
tasks=['mmlu']
|
41 |
+
batch_size=8
|
42 |
+
num_best_epochs=3
|
llm-q-scaling-law-master/eval_scripts/eval_harness_commands.txt
ADDED
@@ -0,0 +1,34 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
accelerate launch -m lm_eval --model hf \
|
2 |
+
--tasks mmlu \
|
3 |
+
--model_args pretrained=Qwen/Qwen1.5-0.5B \
|
4 |
+
--batch_size 8
|
5 |
+
|
6 |
+
accelerate launch -m lm_eval --model hf \
|
7 |
+
--tasks mmlu \
|
8 |
+
--model_args pretrained=Qwen/Qwen1.5-1.8B \
|
9 |
+
--batch_size 8
|
10 |
+
|
11 |
+
accelerate launch -m lm_eval --model hf \
|
12 |
+
--tasks mmlu \
|
13 |
+
--model_args pretrained=Qwen/Qwen1.5-4B \
|
14 |
+
--batch_size 8
|
15 |
+
|
16 |
+
accelerate launch -m lm_eval --model hf \
|
17 |
+
--tasks mmlu \
|
18 |
+
--model_args pretrained=Qwen/Qwen1.5-7B \
|
19 |
+
--batch_size 4
|
20 |
+
|
21 |
+
lm_eval --model hf \
|
22 |
+
--tasks mmlu \
|
23 |
+
--model_args pretrained=Qwen/Qwen1.5-14B,parallelize=True \
|
24 |
+
--batch_size 8
|
25 |
+
|
26 |
+
lm_eval --model hf \
|
27 |
+
--tasks mmlu \
|
28 |
+
--model_args pretrained=Qwen/Qwen1.5-32B,parallelize=True \
|
29 |
+
--batch_size 4
|
30 |
+
|
31 |
+
lm_eval --model hf \
|
32 |
+
--tasks mmlu \
|
33 |
+
--model_args pretrained=Qwen/Qwen1.5-72B,parallelize=True \
|
34 |
+
--batch_size 1
|
llm-q-scaling-law-master/eval_scripts/layerwise/evalscript_generator.py
ADDED
@@ -0,0 +1,141 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""
|
2 |
+
This generates the runscripts for the LLM-Q scaling law simulations.
|
3 |
+
|
4 |
+
"""
|
5 |
+
|
6 |
+
import os
|
7 |
+
import argparse
|
8 |
+
|
9 |
+
# the search command is
|
10 |
+
#python src/main.py --model_name {model_name} --search_config configs/search/mxint_4bit-tinyllama.toml --model_parallel --disable_wandb --save_dir ../ckpt/test
|
11 |
+
# the save dir is
|
12 |
+
# results/search/layerwise/{model_name}_{ratio}
|
13 |
+
|
14 |
+
granularity = ["transformer_layer"]
|
15 |
+
|
16 |
+
# opt template for searching
|
17 |
+
|
18 |
+
opt_model_sizes=["opt-125m","opt-350m","opt-1.3b","opt-2.7b","opt-6.7b","opt-13b","opt-30b","opt-66b"]
|
19 |
+
opt_batch_size = [8,8,8,8,4,2,1,1]
|
20 |
+
opt_layer_wise_ratios = [0.0,0.5,0.6,0.7,0.8,0.9,0.95,0.975]
|
21 |
+
opt_layer_wise_search_config_template = "mxint_4bit-pajama-random-50-bs-{}.toml"
|
22 |
+
opt_layer_wise_search_dir_template = "eval/layerwise/{}_{}"
|
23 |
+
|
24 |
+
opt_layerwise_run_command_palette = "python src/full_eval.py --model_arch opt --model_name {model_name} --ratio {q_ratio} --eval_config configs/search/layerwise/{search_config} --model_parallel --quantized_list \"{quantized_list}\""
|
25 |
+
|
26 |
+
with open("opt_layerwise_eval_commands.txt", "w") as f:
|
27 |
+
for i in range(len(opt_model_sizes)):
|
28 |
+
|
29 |
+
eval_best_trail_file = "../../q_ratio_results/layerwise/opt/opt-{}-best-trail.txt".format(opt_model_sizes[i].split("-")[1])
|
30 |
+
try:
|
31 |
+
with open(eval_best_trail_file, "r") as fi:
|
32 |
+
# read each line
|
33 |
+
lines = fi.readlines()
|
34 |
+
best_trail_q_lists_mapping = {}
|
35 |
+
for line in lines:
|
36 |
+
q_ratio, q_list = line.split(":")
|
37 |
+
q_list = q_list.strip()
|
38 |
+
#remove [] from the string
|
39 |
+
q_list = q_list[1:-1]
|
40 |
+
q_ratio = q_ratio.strip()
|
41 |
+
best_trail_q_lists_mapping[q_ratio] = q_list
|
42 |
+
except FileNotFoundError:
|
43 |
+
best_trail_q_lists_mapping = {}
|
44 |
+
|
45 |
+
|
46 |
+
model_size = opt_model_sizes[i]
|
47 |
+
model_batch_size = opt_batch_size[i]
|
48 |
+
for ratio in opt_layer_wise_ratios:
|
49 |
+
search_config = opt_layer_wise_search_config_template.format(model_batch_size)
|
50 |
+
save_dir = opt_layer_wise_search_dir_template.format(model_size, ratio)
|
51 |
+
q_list = best_trail_q_lists_mapping[str(ratio)] if str(ratio) in best_trail_q_lists_mapping else ""
|
52 |
+
model_name = f"facebook/{model_size}"
|
53 |
+
run_command = opt_layerwise_run_command_palette.format(model_name=model_name, q_ratio=ratio, search_config=search_config, save_dir=save_dir, quantized_list=q_list)
|
54 |
+
# print(run_command)
|
55 |
+
f.write(run_command + "\n")
|
56 |
+
f.write("\n")
|
57 |
+
|
58 |
+
|
59 |
+
# qwen template for searching
|
60 |
+
|
61 |
+
qwen15_model_sizes=["Qwen1.5-0.5B","Qwen1.5-1.8B","Qwen1.5-4B","Qwen1.5-7B","Qwen1.5-14B","Qwen1.5-32B","Qwen1.5-72B","Qwen1.5-110B"]
|
62 |
+
qwen15_batch_size = [8,8,4,2,2,2,1,1]
|
63 |
+
qwen15_layer_wise_ratios = [0.0,0.5,0.6,0.7,0.8,0.9,0.95,0.975]
|
64 |
+
qwen15_layer_wise_search_config_template = "mxint_4bit-pajama-random-50-bs-{}.toml"
|
65 |
+
qwen15_layer_wise_search_dir_template = "eval/layerwise/{}_{}"
|
66 |
+
|
67 |
+
qwen15_layerwise_run_command_palette = "python src/full_eval.py --model_arch qwen2 --model_name {model_name} --ratio {q_ratio} --eval_config configs/search/layerwise/{search_config} --model_parallel --quantized_list \"{quantized_list}\""
|
68 |
+
|
69 |
+
with open("qwen1.5_layerwise_eval_commands.txt", "w") as f:
|
70 |
+
for i in range(len(qwen15_model_sizes)):
|
71 |
+
|
72 |
+
eval_best_trail_file = "../../q_ratio_results/layerwise/qwen1.5/qwen-{}-best-trail.txt".format(qwen15_model_sizes[i].split("-")[1])
|
73 |
+
try:
|
74 |
+
with open(eval_best_trail_file, "r") as fi:
|
75 |
+
# read each line
|
76 |
+
lines = fi.readlines()
|
77 |
+
best_trail_q_lists_mapping = {}
|
78 |
+
for line in lines:
|
79 |
+
q_ratio, q_list = line.split(":")
|
80 |
+
q_list = q_list.strip()
|
81 |
+
q_list = q_list[1:-1]
|
82 |
+
q_ratio = q_ratio.strip()
|
83 |
+
best_trail_q_lists_mapping[q_ratio] = q_list
|
84 |
+
except FileNotFoundError:
|
85 |
+
best_trail_q_lists_mapping = {}
|
86 |
+
|
87 |
+
# print(best_trail_q_lists_mapping)
|
88 |
+
|
89 |
+
model_size = qwen15_model_sizes[i]
|
90 |
+
model_batch_size = qwen15_batch_size[i]
|
91 |
+
for ratio in qwen15_layer_wise_ratios:
|
92 |
+
search_config = qwen15_layer_wise_search_config_template.format(model_batch_size)
|
93 |
+
save_dir = qwen15_layer_wise_search_dir_template.format(model_size, ratio)
|
94 |
+
model_name = f"Qwen/{model_size}"
|
95 |
+
q_list = best_trail_q_lists_mapping[str(ratio)] if str(ratio) in best_trail_q_lists_mapping else ""
|
96 |
+
run_command = qwen15_layerwise_run_command_palette.format(model_name=model_name, q_ratio=ratio, search_config=search_config, save_dir=save_dir, quantized_list=q_list)
|
97 |
+
# print(run_command)
|
98 |
+
f.write(run_command + "\n")
|
99 |
+
f.write("\n")
|
100 |
+
|
101 |
+
# llama template for searching
|
102 |
+
|
103 |
+
# qwen template for searching
|
104 |
+
|
105 |
+
llama_model_sizes=["llama-7b","llama-13b","llama-30b","llama-65b"]
|
106 |
+
llama_batch_size = [2,2,1,1]
|
107 |
+
llama_layer_wise_ratios = [0.0,0.5,0.6,0.7,0.8,0.9,0.95,0.975]
|
108 |
+
llama_layer_wise_search_config_template = "mxint_4bit-pajama-random-50-bs-{}.toml"
|
109 |
+
llama_layer_wise_search_dir_template = "eval/layerwise/{}_{}"
|
110 |
+
|
111 |
+
llama_layerwise_run_command_palette = "python src/full_eval.py --model_arch llama --model_name {model_name} --ratio {q_ratio} --eval_config configs/search/layerwise/{search_config} --model_parallel --quantized_list \"{quantized_list}\""
|
112 |
+
|
113 |
+
with open("llama_layerwise_eval_commands.txt", "w") as f:
|
114 |
+
for i in range(len(llama_model_sizes)):
|
115 |
+
|
116 |
+
eval_best_trail_file = "../../q_ratio_results/layerwise/llama/llama-{}-best-trail.txt".format(qwen15_model_sizes[i].split("-")[1])
|
117 |
+
try:
|
118 |
+
with open(eval_best_trail_file, "r") as fi:
|
119 |
+
# read each line
|
120 |
+
lines = fi.readlines()
|
121 |
+
best_trail_q_lists_mapping = {}
|
122 |
+
for line in lines:
|
123 |
+
q_ratio, q_list = line.split(":")
|
124 |
+
q_list = q_list.strip()
|
125 |
+
q_list = q_list[1:-1]
|
126 |
+
q_ratio = q_ratio.strip()
|
127 |
+
best_trail_q_lists_mapping[q_ratio] = q_list
|
128 |
+
except FileNotFoundError:
|
129 |
+
best_trail_q_lists_mapping = {}
|
130 |
+
|
131 |
+
model_size = llama_model_sizes[i]
|
132 |
+
model_batch_size = llama_batch_size[i]
|
133 |
+
for ratio in llama_layer_wise_ratios:
|
134 |
+
search_config = llama_layer_wise_search_config_template.format(model_batch_size)
|
135 |
+
save_dir = llama_layer_wise_search_dir_template.format(model_size, ratio)
|
136 |
+
model_name = f"huggyllama/{model_size}"
|
137 |
+
q_list = best_trail_q_lists_mapping[str(ratio)] if str(ratio) in best_trail_q_lists_mapping else ""
|
138 |
+
run_command = llama_layerwise_run_command_palette.format(model_name=model_name, q_ratio=ratio, search_config=search_config, save_dir=save_dir, quantized_list=q_list)
|
139 |
+
# print(run_command)
|
140 |
+
f.write(run_command + "\n")
|
141 |
+
f.write("\n")
|
llm-q-scaling-law-master/eval_scripts/layerwise/llama_layerwise_eval_commands.txt
ADDED
@@ -0,0 +1,36 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
python src/full_eval.py --model_arch llama --model_name huggyllama/llama-7b --ratio 0.0 --eval_config configs/search/layerwise/mxint_4bit-alpaca-random-50-bs-2.toml --model_parallel --quantized_list ""
|
2 |
+
python src/full_eval.py --model_arch llama --model_name huggyllama/llama-7b --ratio 0.5 --eval_config configs/search/layerwise/mxint_4bit-alpaca-random-50-bs-2.toml --model_parallel --quantized_list ""
|
3 |
+
python src/full_eval.py --model_arch llama --model_name huggyllama/llama-7b --ratio 0.6 --eval_config configs/search/layerwise/mxint_4bit-alpaca-random-50-bs-2.toml --model_parallel --quantized_list ""
|
4 |
+
python src/full_eval.py --model_arch llama --model_name huggyllama/llama-7b --ratio 0.7 --eval_config configs/search/layerwise/mxint_4bit-alpaca-random-50-bs-2.toml --model_parallel --quantized_list ""
|
5 |
+
python src/full_eval.py --model_arch llama --model_name huggyllama/llama-7b --ratio 0.8 --eval_config configs/search/layerwise/mxint_4bit-alpaca-random-50-bs-2.toml --model_parallel --quantized_list ""
|
6 |
+
python src/full_eval.py --model_arch llama --model_name huggyllama/llama-7b --ratio 0.9 --eval_config configs/search/layerwise/mxint_4bit-alpaca-random-50-bs-2.toml --model_parallel --quantized_list ""
|
7 |
+
python src/full_eval.py --model_arch llama --model_name huggyllama/llama-7b --ratio 0.95 --eval_config configs/search/layerwise/mxint_4bit-alpaca-random-50-bs-2.toml --model_parallel --quantized_list ""
|
8 |
+
python src/full_eval.py --model_arch llama --model_name huggyllama/llama-7b --ratio 0.975 --eval_config configs/search/layerwise/mxint_4bit-alpaca-random-50-bs-2.toml --model_parallel --quantized_list ""
|
9 |
+
|
10 |
+
python src/full_eval.py --model_arch llama --model_name huggyllama/llama-13b --ratio 0.0 --eval_config configs/search/layerwise/mxint_4bit-alpaca-random-50-bs-2.toml --model_parallel --quantized_list ""
|
11 |
+
python src/full_eval.py --model_arch llama --model_name huggyllama/llama-13b --ratio 0.5 --eval_config configs/search/layerwise/mxint_4bit-alpaca-random-50-bs-2.toml --model_parallel --quantized_list ""
|
12 |
+
python src/full_eval.py --model_arch llama --model_name huggyllama/llama-13b --ratio 0.6 --eval_config configs/search/layerwise/mxint_4bit-alpaca-random-50-bs-2.toml --model_parallel --quantized_list ""
|
13 |
+
python src/full_eval.py --model_arch llama --model_name huggyllama/llama-13b --ratio 0.7 --eval_config configs/search/layerwise/mxint_4bit-alpaca-random-50-bs-2.toml --model_parallel --quantized_list ""
|
14 |
+
python src/full_eval.py --model_arch llama --model_name huggyllama/llama-13b --ratio 0.8 --eval_config configs/search/layerwise/mxint_4bit-alpaca-random-50-bs-2.toml --model_parallel --quantized_list ""
|
15 |
+
python src/full_eval.py --model_arch llama --model_name huggyllama/llama-13b --ratio 0.9 --eval_config configs/search/layerwise/mxint_4bit-alpaca-random-50-bs-2.toml --model_parallel --quantized_list ""
|
16 |
+
python src/full_eval.py --model_arch llama --model_name huggyllama/llama-13b --ratio 0.95 --eval_config configs/search/layerwise/mxint_4bit-alpaca-random-50-bs-2.toml --model_parallel --quantized_list ""
|
17 |
+
python src/full_eval.py --model_arch llama --model_name huggyllama/llama-13b --ratio 0.975 --eval_config configs/search/layerwise/mxint_4bit-alpaca-random-50-bs-2.toml --model_parallel --quantized_list ""
|
18 |
+
|
19 |
+
python src/full_eval.py --model_arch llama --model_name huggyllama/llama-30b --ratio 0.0 --eval_config configs/search/layerwise/mxint_4bit-alpaca-random-50-bs-1.toml --model_parallel --quantized_list ""
|
20 |
+
python src/full_eval.py --model_arch llama --model_name huggyllama/llama-30b --ratio 0.5 --eval_config configs/search/layerwise/mxint_4bit-alpaca-random-50-bs-1.toml --model_parallel --quantized_list ""
|
21 |
+
python src/full_eval.py --model_arch llama --model_name huggyllama/llama-30b --ratio 0.6 --eval_config configs/search/layerwise/mxint_4bit-alpaca-random-50-bs-1.toml --model_parallel --quantized_list ""
|
22 |
+
python src/full_eval.py --model_arch llama --model_name huggyllama/llama-30b --ratio 0.7 --eval_config configs/search/layerwise/mxint_4bit-alpaca-random-50-bs-1.toml --model_parallel --quantized_list ""
|
23 |
+
python src/full_eval.py --model_arch llama --model_name huggyllama/llama-30b --ratio 0.8 --eval_config configs/search/layerwise/mxint_4bit-alpaca-random-50-bs-1.toml --model_parallel --quantized_list ""
|
24 |
+
python src/full_eval.py --model_arch llama --model_name huggyllama/llama-30b --ratio 0.9 --eval_config configs/search/layerwise/mxint_4bit-alpaca-random-50-bs-1.toml --model_parallel --quantized_list ""
|
25 |
+
python src/full_eval.py --model_arch llama --model_name huggyllama/llama-30b --ratio 0.95 --eval_config configs/search/layerwise/mxint_4bit-alpaca-random-50-bs-1.toml --model_parallel --quantized_list ""
|
26 |
+
python src/full_eval.py --model_arch llama --model_name huggyllama/llama-30b --ratio 0.975 --eval_config configs/search/layerwise/mxint_4bit-alpaca-random-50-bs-1.toml --model_parallel --quantized_list ""
|
27 |
+
|
28 |
+
python src/full_eval.py --model_arch llama --model_name huggyllama/llama-65b --ratio 0.0 --eval_config configs/search/layerwise/mxint_4bit-alpaca-random-50-bs-1.toml --model_parallel --quantized_list ""
|
29 |
+
python src/full_eval.py --model_arch llama --model_name huggyllama/llama-65b --ratio 0.5 --eval_config configs/search/layerwise/mxint_4bit-alpaca-random-50-bs-1.toml --model_parallel --quantized_list ""
|
30 |
+
python src/full_eval.py --model_arch llama --model_name huggyllama/llama-65b --ratio 0.6 --eval_config configs/search/layerwise/mxint_4bit-alpaca-random-50-bs-1.toml --model_parallel --quantized_list ""
|
31 |
+
python src/full_eval.py --model_arch llama --model_name huggyllama/llama-65b --ratio 0.7 --eval_config configs/search/layerwise/mxint_4bit-alpaca-random-50-bs-1.toml --model_parallel --quantized_list ""
|
32 |
+
python src/full_eval.py --model_arch llama --model_name huggyllama/llama-65b --ratio 0.8 --eval_config configs/search/layerwise/mxint_4bit-alpaca-random-50-bs-1.toml --model_parallel --quantized_list ""
|
33 |
+
python src/full_eval.py --model_arch llama --model_name huggyllama/llama-65b --ratio 0.9 --eval_config configs/search/layerwise/mxint_4bit-alpaca-random-50-bs-1.toml --model_parallel --quantized_list ""
|
34 |
+
python src/full_eval.py --model_arch llama --model_name huggyllama/llama-65b --ratio 0.95 --eval_config configs/search/layerwise/mxint_4bit-alpaca-random-50-bs-1.toml --model_parallel --quantized_list ""
|
35 |
+
python src/full_eval.py --model_arch llama --model_name huggyllama/llama-65b --ratio 0.975 --eval_config configs/search/layerwise/mxint_4bit-alpaca-random-50-bs-1.toml --model_parallel --quantized_list ""
|
36 |
+
|
llm-q-scaling-law-master/eval_scripts/layerwise/opt_layerwise_eval_commands.txt
ADDED
@@ -0,0 +1,72 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
python src/full_eval.py --model_arch opt --model_name facebook/opt-125m --ratio 0.0 --eval_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-8.toml --model_parallel --quantized_list ""
|
2 |
+
python src/full_eval.py --model_arch opt --model_name facebook/opt-125m --ratio 0.5 --eval_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-8.toml --model_parallel --quantized_list ""
|
3 |
+
python src/full_eval.py --model_arch opt --model_name facebook/opt-125m --ratio 0.6 --eval_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-8.toml --model_parallel --quantized_list ""
|
4 |
+
python src/full_eval.py --model_arch opt --model_name facebook/opt-125m --ratio 0.7 --eval_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-8.toml --model_parallel --quantized_list ""
|
5 |
+
python src/full_eval.py --model_arch opt --model_name facebook/opt-125m --ratio 0.8 --eval_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-8.toml --model_parallel --quantized_list ""
|
6 |
+
python src/full_eval.py --model_arch opt --model_name facebook/opt-125m --ratio 0.9 --eval_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-8.toml --model_parallel --quantized_list ""
|
7 |
+
python src/full_eval.py --model_arch opt --model_name facebook/opt-125m --ratio 0.95 --eval_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-8.toml --model_parallel --quantized_list ""
|
8 |
+
python src/full_eval.py --model_arch opt --model_name facebook/opt-125m --ratio 0.975 --eval_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-8.toml --model_parallel --quantized_list ""
|
9 |
+
|
10 |
+
python src/full_eval.py --model_arch opt --model_name facebook/opt-350m --ratio 0.0 --eval_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-8.toml --model_parallel --quantized_list ""
|
11 |
+
python src/full_eval.py --model_arch opt --model_name facebook/opt-350m --ratio 0.5 --eval_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-8.toml --model_parallel --quantized_list ""
|
12 |
+
python src/full_eval.py --model_arch opt --model_name facebook/opt-350m --ratio 0.6 --eval_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-8.toml --model_parallel --quantized_list ""
|
13 |
+
python src/full_eval.py --model_arch opt --model_name facebook/opt-350m --ratio 0.7 --eval_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-8.toml --model_parallel --quantized_list ""
|
14 |
+
python src/full_eval.py --model_arch opt --model_name facebook/opt-350m --ratio 0.8 --eval_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-8.toml --model_parallel --quantized_list ""
|
15 |
+
python src/full_eval.py --model_arch opt --model_name facebook/opt-350m --ratio 0.9 --eval_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-8.toml --model_parallel --quantized_list ""
|
16 |
+
python src/full_eval.py --model_arch opt --model_name facebook/opt-350m --ratio 0.95 --eval_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-8.toml --model_parallel --quantized_list ""
|
17 |
+
python src/full_eval.py --model_arch opt --model_name facebook/opt-350m --ratio 0.975 --eval_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-8.toml --model_parallel --quantized_list ""
|
18 |
+
|
19 |
+
python src/full_eval.py --model_arch opt --model_name facebook/opt-1.3b --ratio 0.0 --eval_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-8.toml --model_parallel --quantized_list ""
|
20 |
+
python src/full_eval.py --model_arch opt --model_name facebook/opt-1.3b --ratio 0.5 --eval_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-8.toml --model_parallel --quantized_list ""
|
21 |
+
python src/full_eval.py --model_arch opt --model_name facebook/opt-1.3b --ratio 0.6 --eval_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-8.toml --model_parallel --quantized_list ""
|
22 |
+
python src/full_eval.py --model_arch opt --model_name facebook/opt-1.3b --ratio 0.7 --eval_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-8.toml --model_parallel --quantized_list ""
|
23 |
+
python src/full_eval.py --model_arch opt --model_name facebook/opt-1.3b --ratio 0.8 --eval_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-8.toml --model_parallel --quantized_list ""
|
24 |
+
python src/full_eval.py --model_arch opt --model_name facebook/opt-1.3b --ratio 0.9 --eval_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-8.toml --model_parallel --quantized_list ""
|
25 |
+
python src/full_eval.py --model_arch opt --model_name facebook/opt-1.3b --ratio 0.95 --eval_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-8.toml --model_parallel --quantized_list ""
|
26 |
+
python src/full_eval.py --model_arch opt --model_name facebook/opt-1.3b --ratio 0.975 --eval_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-8.toml --model_parallel --quantized_list ""
|
27 |
+
|
28 |
+
python src/full_eval.py --model_arch opt --model_name facebook/opt-2.7b --ratio 0.0 --eval_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-8.toml --model_parallel --quantized_list ""
|
29 |
+
python src/full_eval.py --model_arch opt --model_name facebook/opt-2.7b --ratio 0.5 --eval_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-8.toml --model_parallel --quantized_list ""
|
30 |
+
python src/full_eval.py --model_arch opt --model_name facebook/opt-2.7b --ratio 0.6 --eval_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-8.toml --model_parallel --quantized_list ""
|
31 |
+
python src/full_eval.py --model_arch opt --model_name facebook/opt-2.7b --ratio 0.7 --eval_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-8.toml --model_parallel --quantized_list ""
|
32 |
+
python src/full_eval.py --model_arch opt --model_name facebook/opt-2.7b --ratio 0.8 --eval_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-8.toml --model_parallel --quantized_list ""
|
33 |
+
python src/full_eval.py --model_arch opt --model_name facebook/opt-2.7b --ratio 0.9 --eval_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-8.toml --model_parallel --quantized_list ""
|
34 |
+
python src/full_eval.py --model_arch opt --model_name facebook/opt-2.7b --ratio 0.95 --eval_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-8.toml --model_parallel --quantized_list ""
|
35 |
+
python src/full_eval.py --model_arch opt --model_name facebook/opt-2.7b --ratio 0.975 --eval_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-8.toml --model_parallel --quantized_list ""
|
36 |
+
|
37 |
+
python src/full_eval.py --model_arch opt --model_name facebook/opt-6.7b --ratio 0.0 --eval_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-4.toml --model_parallel --quantized_list ""
|
38 |
+
python src/full_eval.py --model_arch opt --model_name facebook/opt-6.7b --ratio 0.5 --eval_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-4.toml --model_parallel --quantized_list ""
|
39 |
+
python src/full_eval.py --model_arch opt --model_name facebook/opt-6.7b --ratio 0.6 --eval_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-4.toml --model_parallel --quantized_list ""
|
40 |
+
python src/full_eval.py --model_arch opt --model_name facebook/opt-6.7b --ratio 0.7 --eval_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-4.toml --model_parallel --quantized_list ""
|
41 |
+
python src/full_eval.py --model_arch opt --model_name facebook/opt-6.7b --ratio 0.8 --eval_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-4.toml --model_parallel --quantized_list ""
|
42 |
+
python src/full_eval.py --model_arch opt --model_name facebook/opt-6.7b --ratio 0.9 --eval_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-4.toml --model_parallel --quantized_list ""
|
43 |
+
python src/full_eval.py --model_arch opt --model_name facebook/opt-6.7b --ratio 0.95 --eval_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-4.toml --model_parallel --quantized_list ""
|
44 |
+
python src/full_eval.py --model_arch opt --model_name facebook/opt-6.7b --ratio 0.975 --eval_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-4.toml --model_parallel --quantized_list ""
|
45 |
+
|
46 |
+
python src/full_eval.py --model_arch opt --model_name facebook/opt-13b --ratio 0.0 --eval_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-2.toml --model_parallel --quantized_list ""
|
47 |
+
python src/full_eval.py --model_arch opt --model_name facebook/opt-13b --ratio 0.5 --eval_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-2.toml --model_parallel --quantized_list ""
|
48 |
+
python src/full_eval.py --model_arch opt --model_name facebook/opt-13b --ratio 0.6 --eval_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-2.toml --model_parallel --quantized_list ""
|
49 |
+
python src/full_eval.py --model_arch opt --model_name facebook/opt-13b --ratio 0.7 --eval_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-2.toml --model_parallel --quantized_list ""
|
50 |
+
python src/full_eval.py --model_arch opt --model_name facebook/opt-13b --ratio 0.8 --eval_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-2.toml --model_parallel --quantized_list ""
|
51 |
+
python src/full_eval.py --model_arch opt --model_name facebook/opt-13b --ratio 0.9 --eval_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-2.toml --model_parallel --quantized_list ""
|
52 |
+
python src/full_eval.py --model_arch opt --model_name facebook/opt-13b --ratio 0.95 --eval_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-2.toml --model_parallel --quantized_list ""
|
53 |
+
python src/full_eval.py --model_arch opt --model_name facebook/opt-13b --ratio 0.975 --eval_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-2.toml --model_parallel --quantized_list ""
|
54 |
+
|
55 |
+
python src/full_eval.py --model_arch opt --model_name facebook/opt-30b --ratio 0.0 --eval_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-1.toml --model_parallel --quantized_list ""
|
56 |
+
python src/full_eval.py --model_arch opt --model_name facebook/opt-30b --ratio 0.5 --eval_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-1.toml --model_parallel --quantized_list ""
|
57 |
+
python src/full_eval.py --model_arch opt --model_name facebook/opt-30b --ratio 0.6 --eval_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-1.toml --model_parallel --quantized_list ""
|
58 |
+
python src/full_eval.py --model_arch opt --model_name facebook/opt-30b --ratio 0.7 --eval_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-1.toml --model_parallel --quantized_list ""
|
59 |
+
python src/full_eval.py --model_arch opt --model_name facebook/opt-30b --ratio 0.8 --eval_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-1.toml --model_parallel --quantized_list ""
|
60 |
+
python src/full_eval.py --model_arch opt --model_name facebook/opt-30b --ratio 0.9 --eval_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-1.toml --model_parallel --quantized_list ""
|
61 |
+
python src/full_eval.py --model_arch opt --model_name facebook/opt-30b --ratio 0.95 --eval_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-1.toml --model_parallel --quantized_list ""
|
62 |
+
python src/full_eval.py --model_arch opt --model_name facebook/opt-30b --ratio 0.975 --eval_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-1.toml --model_parallel --quantized_list ""
|
63 |
+
|
64 |
+
python src/full_eval.py --model_arch opt --model_name facebook/opt-66b --ratio 0.0 --eval_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-1.toml --model_parallel --quantized_list ""
|
65 |
+
python src/full_eval.py --model_arch opt --model_name facebook/opt-66b --ratio 0.5 --eval_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-1.toml --model_parallel --quantized_list ""
|
66 |
+
python src/full_eval.py --model_arch opt --model_name facebook/opt-66b --ratio 0.6 --eval_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-1.toml --model_parallel --quantized_list ""
|
67 |
+
python src/full_eval.py --model_arch opt --model_name facebook/opt-66b --ratio 0.7 --eval_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-1.toml --model_parallel --quantized_list ""
|
68 |
+
python src/full_eval.py --model_arch opt --model_name facebook/opt-66b --ratio 0.8 --eval_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-1.toml --model_parallel --quantized_list ""
|
69 |
+
python src/full_eval.py --model_arch opt --model_name facebook/opt-66b --ratio 0.9 --eval_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-1.toml --model_parallel --quantized_list ""
|
70 |
+
python src/full_eval.py --model_arch opt --model_name facebook/opt-66b --ratio 0.95 --eval_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-1.toml --model_parallel --quantized_list ""
|
71 |
+
python src/full_eval.py --model_arch opt --model_name facebook/opt-66b --ratio 0.975 --eval_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-1.toml --model_parallel --quantized_list ""
|
72 |
+
|
llm-q-scaling-law-master/eval_scripts/layerwise/qwen1.5_layerwise_eval_commands.txt
ADDED
@@ -0,0 +1,72 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
python src/full_eval.py --model_arch qwen2 --model_name Qwen/Qwen1.5-0.5B --ratio 0.0 --eval_config configs/search/layerwise/mxint_4bit-alpaca-random-50-bs-8.toml --model_parallel --quantized_list ""
|
2 |
+
python src/full_eval.py --model_arch qwen2 --model_name Qwen/Qwen1.5-0.5B --ratio 0.5 --eval_config configs/search/layerwise/mxint_4bit-alpaca-random-50-bs-8.toml --model_parallel --quantized_list ""
|
3 |
+
python src/full_eval.py --model_arch qwen2 --model_name Qwen/Qwen1.5-0.5B --ratio 0.6 --eval_config configs/search/layerwise/mxint_4bit-alpaca-random-50-bs-8.toml --model_parallel --quantized_list ""
|
4 |
+
python src/full_eval.py --model_arch qwen2 --model_name Qwen/Qwen1.5-0.5B --ratio 0.7 --eval_config configs/search/layerwise/mxint_4bit-alpaca-random-50-bs-8.toml --model_parallel --quantized_list ""
|
5 |
+
python src/full_eval.py --model_arch qwen2 --model_name Qwen/Qwen1.5-0.5B --ratio 0.8 --eval_config configs/search/layerwise/mxint_4bit-alpaca-random-50-bs-8.toml --model_parallel --quantized_list ""
|
6 |
+
python src/full_eval.py --model_arch qwen2 --model_name Qwen/Qwen1.5-0.5B --ratio 0.9 --eval_config configs/search/layerwise/mxint_4bit-alpaca-random-50-bs-8.toml --model_parallel --quantized_list ""
|
7 |
+
python src/full_eval.py --model_arch qwen2 --model_name Qwen/Qwen1.5-0.5B --ratio 0.95 --eval_config configs/search/layerwise/mxint_4bit-alpaca-random-50-bs-8.toml --model_parallel --quantized_list ""
|
8 |
+
python src/full_eval.py --model_arch qwen2 --model_name Qwen/Qwen1.5-0.5B --ratio 0.975 --eval_config configs/search/layerwise/mxint_4bit-alpaca-random-50-bs-8.toml --model_parallel --quantized_list ""
|
9 |
+
|
10 |
+
python src/full_eval.py --model_arch qwen2 --model_name Qwen/Qwen1.5-1.8B --ratio 0.0 --eval_config configs/search/layerwise/mxint_4bit-alpaca-random-50-bs-8.toml --model_parallel --quantized_list ""
|
11 |
+
python src/full_eval.py --model_arch qwen2 --model_name Qwen/Qwen1.5-1.8B --ratio 0.5 --eval_config configs/search/layerwise/mxint_4bit-alpaca-random-50-bs-8.toml --model_parallel --quantized_list ""
|
12 |
+
python src/full_eval.py --model_arch qwen2 --model_name Qwen/Qwen1.5-1.8B --ratio 0.6 --eval_config configs/search/layerwise/mxint_4bit-alpaca-random-50-bs-8.toml --model_parallel --quantized_list ""
|
13 |
+
python src/full_eval.py --model_arch qwen2 --model_name Qwen/Qwen1.5-1.8B --ratio 0.7 --eval_config configs/search/layerwise/mxint_4bit-alpaca-random-50-bs-8.toml --model_parallel --quantized_list ""
|
14 |
+
python src/full_eval.py --model_arch qwen2 --model_name Qwen/Qwen1.5-1.8B --ratio 0.8 --eval_config configs/search/layerwise/mxint_4bit-alpaca-random-50-bs-8.toml --model_parallel --quantized_list ""
|
15 |
+
python src/full_eval.py --model_arch qwen2 --model_name Qwen/Qwen1.5-1.8B --ratio 0.9 --eval_config configs/search/layerwise/mxint_4bit-alpaca-random-50-bs-8.toml --model_parallel --quantized_list ""
|
16 |
+
python src/full_eval.py --model_arch qwen2 --model_name Qwen/Qwen1.5-1.8B --ratio 0.95 --eval_config configs/search/layerwise/mxint_4bit-alpaca-random-50-bs-8.toml --model_parallel --quantized_list ""
|
17 |
+
python src/full_eval.py --model_arch qwen2 --model_name Qwen/Qwen1.5-1.8B --ratio 0.975 --eval_config configs/search/layerwise/mxint_4bit-alpaca-random-50-bs-8.toml --model_parallel --quantized_list ""
|
18 |
+
|
19 |
+
python src/full_eval.py --model_arch qwen2 --model_name Qwen/Qwen1.5-4B --ratio 0.0 --eval_config configs/search/layerwise/mxint_4bit-alpaca-random-50-bs-4.toml --model_parallel --quantized_list ""
|
20 |
+
python src/full_eval.py --model_arch qwen2 --model_name Qwen/Qwen1.5-4B --ratio 0.5 --eval_config configs/search/layerwise/mxint_4bit-alpaca-random-50-bs-4.toml --model_parallel --quantized_list ""
|
21 |
+
python src/full_eval.py --model_arch qwen2 --model_name Qwen/Qwen1.5-4B --ratio 0.6 --eval_config configs/search/layerwise/mxint_4bit-alpaca-random-50-bs-4.toml --model_parallel --quantized_list ""
|
22 |
+
python src/full_eval.py --model_arch qwen2 --model_name Qwen/Qwen1.5-4B --ratio 0.7 --eval_config configs/search/layerwise/mxint_4bit-alpaca-random-50-bs-4.toml --model_parallel --quantized_list ""
|
23 |
+
python src/full_eval.py --model_arch qwen2 --model_name Qwen/Qwen1.5-4B --ratio 0.8 --eval_config configs/search/layerwise/mxint_4bit-alpaca-random-50-bs-4.toml --model_parallel --quantized_list ""
|
24 |
+
python src/full_eval.py --model_arch qwen2 --model_name Qwen/Qwen1.5-4B --ratio 0.9 --eval_config configs/search/layerwise/mxint_4bit-alpaca-random-50-bs-4.toml --model_parallel --quantized_list ""
|
25 |
+
python src/full_eval.py --model_arch qwen2 --model_name Qwen/Qwen1.5-4B --ratio 0.95 --eval_config configs/search/layerwise/mxint_4bit-alpaca-random-50-bs-4.toml --model_parallel --quantized_list ""
|
26 |
+
python src/full_eval.py --model_arch qwen2 --model_name Qwen/Qwen1.5-4B --ratio 0.975 --eval_config configs/search/layerwise/mxint_4bit-alpaca-random-50-bs-4.toml --model_parallel --quantized_list ""
|
27 |
+
|
28 |
+
python src/full_eval.py --model_arch qwen2 --model_name Qwen/Qwen1.5-7B --ratio 0.0 --eval_config configs/search/layerwise/mxint_4bit-alpaca-random-50-bs-2.toml --model_parallel --quantized_list ""
|
29 |
+
python src/full_eval.py --model_arch qwen2 --model_name Qwen/Qwen1.5-7B --ratio 0.5 --eval_config configs/search/layerwise/mxint_4bit-alpaca-random-50-bs-2.toml --model_parallel --quantized_list ""
|
30 |
+
python src/full_eval.py --model_arch qwen2 --model_name Qwen/Qwen1.5-7B --ratio 0.6 --eval_config configs/search/layerwise/mxint_4bit-alpaca-random-50-bs-2.toml --model_parallel --quantized_list ""
|
31 |
+
python src/full_eval.py --model_arch qwen2 --model_name Qwen/Qwen1.5-7B --ratio 0.7 --eval_config configs/search/layerwise/mxint_4bit-alpaca-random-50-bs-2.toml --model_parallel --quantized_list ""
|
32 |
+
python src/full_eval.py --model_arch qwen2 --model_name Qwen/Qwen1.5-7B --ratio 0.8 --eval_config configs/search/layerwise/mxint_4bit-alpaca-random-50-bs-2.toml --model_parallel --quantized_list ""
|
33 |
+
python src/full_eval.py --model_arch qwen2 --model_name Qwen/Qwen1.5-7B --ratio 0.9 --eval_config configs/search/layerwise/mxint_4bit-alpaca-random-50-bs-2.toml --model_parallel --quantized_list ""
|
34 |
+
python src/full_eval.py --model_arch qwen2 --model_name Qwen/Qwen1.5-7B --ratio 0.95 --eval_config configs/search/layerwise/mxint_4bit-alpaca-random-50-bs-2.toml --model_parallel --quantized_list ""
|
35 |
+
python src/full_eval.py --model_arch qwen2 --model_name Qwen/Qwen1.5-7B --ratio 0.975 --eval_config configs/search/layerwise/mxint_4bit-alpaca-random-50-bs-2.toml --model_parallel --quantized_list ""
|
36 |
+
|
37 |
+
python src/full_eval.py --model_arch qwen2 --model_name Qwen/Qwen1.5-14B --ratio 0.0 --eval_config configs/search/layerwise/mxint_4bit-alpaca-random-50-bs-2.toml --model_parallel --quantized_list ""
|
38 |
+
python src/full_eval.py --model_arch qwen2 --model_name Qwen/Qwen1.5-14B --ratio 0.5 --eval_config configs/search/layerwise/mxint_4bit-alpaca-random-50-bs-2.toml --model_parallel --quantized_list "1, 1, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0"
|
39 |
+
python src/full_eval.py --model_arch qwen2 --model_name Qwen/Qwen1.5-14B --ratio 0.6 --eval_config configs/search/layerwise/mxint_4bit-alpaca-random-50-bs-2.toml --model_parallel --quantized_list "1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1, 1, 0, 0, 1, 1, 1, 0, 1, 0, 0, 1, 0, 1, 1, 1, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 1, 1, 0"
|
40 |
+
python src/full_eval.py --model_arch qwen2 --model_name Qwen/Qwen1.5-14B --ratio 0.7 --eval_config configs/search/layerwise/mxint_4bit-alpaca-random-50-bs-2.toml --model_parallel --quantized_list "1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0, 1, 0, 0"
|
41 |
+
python src/full_eval.py --model_arch qwen2 --model_name Qwen/Qwen1.5-14B --ratio 0.8 --eval_config configs/search/layerwise/mxint_4bit-alpaca-random-50-bs-2.toml --model_parallel --quantized_list "1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 0, 0, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 1, 1, 0, 1, 0, 1, 1"
|
42 |
+
python src/full_eval.py --model_arch qwen2 --model_name Qwen/Qwen1.5-14B --ratio 0.9 --eval_config configs/search/layerwise/mxint_4bit-alpaca-random-50-bs-2.toml --model_parallel --quantized_list "1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 0, 0, 1, 0, 0, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1"
|
43 |
+
python src/full_eval.py --model_arch qwen2 --model_name Qwen/Qwen1.5-14B --ratio 0.95 --eval_config configs/search/layerwise/mxint_4bit-alpaca-random-50-bs-2.toml --model_parallel --quantized_list "1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1"
|
44 |
+
python src/full_eval.py --model_arch qwen2 --model_name Qwen/Qwen1.5-14B --ratio 0.975 --eval_config configs/search/layerwise/mxint_4bit-alpaca-random-50-bs-2.toml --model_parallel --quantized_list "1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1"
|
45 |
+
|
46 |
+
python src/full_eval.py --model_arch qwen2 --model_name Qwen/Qwen1.5-32B --ratio 0.0 --eval_config configs/search/layerwise/mxint_4bit-alpaca-random-50-bs-2.toml --model_parallel --quantized_list "0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0"
|
47 |
+
python src/full_eval.py --model_arch qwen2 --model_name Qwen/Qwen1.5-32B --ratio 0.5 --eval_config configs/search/layerwise/mxint_4bit-alpaca-random-50-bs-2.toml --model_parallel --quantized_list "1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0"
|
48 |
+
python src/full_eval.py --model_arch qwen2 --model_name Qwen/Qwen1.5-32B --ratio 0.6 --eval_config configs/search/layerwise/mxint_4bit-alpaca-random-50-bs-2.toml --model_parallel --quantized_list "0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 0, 1, 1, 1, 0, 1, 0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 1, 0, 0, 1, 0, 0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 0, 1, 0, 1, 0, 1, 1, 0, 1, 0, 0, 0, 1, 0"
|
49 |
+
python src/full_eval.py --model_arch qwen2 --model_name Qwen/Qwen1.5-32B --ratio 0.7 --eval_config configs/search/layerwise/mxint_4bit-alpaca-random-50-bs-2.toml --model_parallel --quantized_list "1, 1, 0, 0, 1, 0, 1, 0, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 1, 0, 1, 1, 1, 0, 0, 1, 1, 0, 0, 0, 1, 1, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 0, 0, 0, 1, 0"
|
50 |
+
python src/full_eval.py --model_arch qwen2 --model_name Qwen/Qwen1.5-32B --ratio 0.8 --eval_config configs/search/layerwise/mxint_4bit-alpaca-random-50-bs-2.toml --model_parallel --quantized_list "1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 0, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 0, 1, 0"
|
51 |
+
python src/full_eval.py --model_arch qwen2 --model_name Qwen/Qwen1.5-32B --ratio 0.9 --eval_config configs/search/layerwise/mxint_4bit-alpaca-random-50-bs-2.toml --model_parallel --quantized_list "0, 1, 1, 0, 1, 0, 1, 1, 0, 1, 0, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 0, 0, 0, 1, 1, 1"
|
52 |
+
python src/full_eval.py --model_arch qwen2 --model_name Qwen/Qwen1.5-32B --ratio 0.95 --eval_config configs/search/layerwise/mxint_4bit-alpaca-random-50-bs-2.toml --model_parallel --quantized_list "0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 1, 1, 1, 0, 1, 0, 0, 0, 0, 1, 0"
|
53 |
+
python src/full_eval.py --model_arch qwen2 --model_name Qwen/Qwen1.5-32B --ratio 0.975 --eval_config configs/search/layerwise/mxint_4bit-alpaca-random-50-bs-2.toml --model_parallel --quantized_list ""
|
54 |
+
|
55 |
+
python src/full_eval.py --model_arch qwen2 --model_name Qwen/Qwen1.5-72B --ratio 0.0 --eval_config configs/search/layerwise/mxint_4bit-alpaca-random-50-bs-1.toml --model_parallel --quantized_list "0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0"
|
56 |
+
python src/full_eval.py --model_arch qwen2 --model_name Qwen/Qwen1.5-72B --ratio 0.5 --eval_config configs/search/layerwise/mxint_4bit-alpaca-random-50-bs-1.toml --model_parallel --quantized_list "0, 0, 1, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 1, 0, 1, 0, 1, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 0, 1, 1, 1, 0, 0"
|
57 |
+
python src/full_eval.py --model_arch qwen2 --model_name Qwen/Qwen1.5-72B --ratio 0.6 --eval_config configs/search/layerwise/mxint_4bit-alpaca-random-50-bs-1.toml --model_parallel --quantized_list "1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 0, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 0, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 0, 1, 0, 1, 1, 0, 0, 0, 0, 1, 0, 1"
|
58 |
+
python src/full_eval.py --model_arch qwen2 --model_name Qwen/Qwen1.5-72B --ratio 0.7 --eval_config configs/search/layerwise/mxint_4bit-alpaca-random-50-bs-1.toml --model_parallel --quantized_list "0, 0, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 1, 1, 0, 1, 1, 0, 1, 0, 1, 0, 0, 0, 1, 0, 1, 1, 0, 0, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0"
|
59 |
+
python src/full_eval.py --model_arch qwen2 --model_name Qwen/Qwen1.5-72B --ratio 0.8 --eval_config configs/search/layerwise/mxint_4bit-alpaca-random-50-bs-1.toml --model_parallel --quantized_list "1, 0, 1, 0, 0, 0, 1, 1, 1, 0, 1, 1, 0, 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 1, 0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1"
|
60 |
+
python src/full_eval.py --model_arch qwen2 --model_name Qwen/Qwen1.5-72B --ratio 0.9 --eval_config configs/search/layerwise/mxint_4bit-alpaca-random-50-bs-1.toml --model_parallel --quantized_list "0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1"
|
61 |
+
python src/full_eval.py --model_arch qwen2 --model_name Qwen/Qwen1.5-72B --ratio 0.95 --eval_config configs/search/layerwise/mxint_4bit-alpaca-random-50-bs-1.toml --model_parallel --quantized_list "0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1"
|
62 |
+
python src/full_eval.py --model_arch qwen2 --model_name Qwen/Qwen1.5-72B --ratio 0.975 --eval_config configs/search/layerwise/mxint_4bit-alpaca-random-50-bs-1.toml --model_parallel --quantized_list ""
|
63 |
+
|
64 |
+
python src/full_eval.py --model_arch qwen2 --model_name Qwen/Qwen1.5-110B --ratio 0.0 --eval_config configs/search/layerwise/mxint_4bit-alpaca-random-50-bs-1.toml --model_parallel --quantized_list ""
|
65 |
+
python src/full_eval.py --model_arch qwen2 --model_name Qwen/Qwen1.5-110B --ratio 0.5 --eval_config configs/search/layerwise/mxint_4bit-alpaca-random-50-bs-1.toml --model_parallel --quantized_list ""
|
66 |
+
python src/full_eval.py --model_arch qwen2 --model_name Qwen/Qwen1.5-110B --ratio 0.6 --eval_config configs/search/layerwise/mxint_4bit-alpaca-random-50-bs-1.toml --model_parallel --quantized_list ""
|
67 |
+
python src/full_eval.py --model_arch qwen2 --model_name Qwen/Qwen1.5-110B --ratio 0.7 --eval_config configs/search/layerwise/mxint_4bit-alpaca-random-50-bs-1.toml --model_parallel --quantized_list ""
|
68 |
+
python src/full_eval.py --model_arch qwen2 --model_name Qwen/Qwen1.5-110B --ratio 0.8 --eval_config configs/search/layerwise/mxint_4bit-alpaca-random-50-bs-1.toml --model_parallel --quantized_list ""
|
69 |
+
python src/full_eval.py --model_arch qwen2 --model_name Qwen/Qwen1.5-110B --ratio 0.9 --eval_config configs/search/layerwise/mxint_4bit-alpaca-random-50-bs-1.toml --model_parallel --quantized_list ""
|
70 |
+
python src/full_eval.py --model_arch qwen2 --model_name Qwen/Qwen1.5-110B --ratio 0.95 --eval_config configs/search/layerwise/mxint_4bit-alpaca-random-50-bs-1.toml --model_parallel --quantized_list ""
|
71 |
+
python src/full_eval.py --model_arch qwen2 --model_name Qwen/Qwen1.5-110B --ratio 0.975 --eval_config configs/search/layerwise/mxint_4bit-alpaca-random-50-bs-1.toml --model_parallel --quantized_list ""
|
72 |
+
|
llm-q-scaling-law-master/eval_scripts/matmult/evalscript_generator.py
ADDED
@@ -0,0 +1,142 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""
|
2 |
+
This generates the runscripts for the LLM-Q scaling law simulations.
|
3 |
+
|
4 |
+
"""
|
5 |
+
|
6 |
+
import os
|
7 |
+
import argparse
|
8 |
+
|
9 |
+
# the search command is
|
10 |
+
#python src/main.py --model_name {model_name} --search_config configs/search/mxint_4bit-tinyllama.toml --model_parallel --disable_wandb --save_dir ../ckpt/test
|
11 |
+
# the save dir is
|
12 |
+
# results/search/layerwise/{model_name}_{ratio}
|
13 |
+
|
14 |
+
granularity = ["matmult"]
|
15 |
+
|
16 |
+
# opt template for searching
|
17 |
+
|
18 |
+
opt_model_sizes=["opt-125m","opt-350m","opt-1.3b","opt-2.7b","opt-6.7b","opt-13b","opt-30b","opt-66b"]
|
19 |
+
opt_batch_size = [8,8,8,8,4,2,1,1]
|
20 |
+
opt_matmult_ratios = [0.0,0.5,0.9,0.95,0.975,0.99]
|
21 |
+
opt_matmult_search_config_template = "mxint_4bit-pajama-random-50-bs-{}.toml"
|
22 |
+
opt_matmult_search_dir_template = "eval/matmult/{}_{}"
|
23 |
+
|
24 |
+
opt_matmult_run_command_palette = "python src/full_eval.py --model_arch opt --model_name {model_name} --ratio {q_ratio} --eval_config configs/search/matmult/{search_config} --model_parallel --quantized_list \"{quantized_list}\""
|
25 |
+
|
26 |
+
with open("opt_matmult_eval_commands.txt", "w") as f:
|
27 |
+
for i in range(len(opt_model_sizes)):
|
28 |
+
|
29 |
+
eval_best_trail_file = "../../q_ratio_results/matmult/opt/opt-{}-best-trail.txt".format(opt_model_sizes[i].split("-")[1])
|
30 |
+
try:
|
31 |
+
with open(eval_best_trail_file, "r") as fi:
|
32 |
+
# read each line
|
33 |
+
lines = fi.readlines()
|
34 |
+
best_trail_q_lists_mapping = {}
|
35 |
+
for line in lines:
|
36 |
+
q_ratio, q_list = line.split(":")
|
37 |
+
q_list = q_list.strip()
|
38 |
+
#remove [] from the string
|
39 |
+
q_list = q_list[1:-1]
|
40 |
+
q_ratio = q_ratio.strip()
|
41 |
+
best_trail_q_lists_mapping[q_ratio] = q_list
|
42 |
+
except FileNotFoundError:
|
43 |
+
best_trail_q_lists_mapping = {}
|
44 |
+
|
45 |
+
|
46 |
+
model_size = opt_model_sizes[i]
|
47 |
+
model_batch_size = opt_batch_size[i]
|
48 |
+
for ratio in opt_matmult_ratios:
|
49 |
+
search_config = opt_matmult_search_config_template.format(model_batch_size)
|
50 |
+
save_dir = opt_matmult_search_dir_template.format(model_size, ratio)
|
51 |
+
q_list = best_trail_q_lists_mapping[str(ratio)] if str(ratio) in best_trail_q_lists_mapping else ""
|
52 |
+
model_name = f"facebook/{model_size}"
|
53 |
+
run_command = opt_matmult_run_command_palette.format(model_name=model_name, q_ratio=ratio, search_config=search_config, save_dir=save_dir, quantized_list=q_list)
|
54 |
+
# print(run_command)
|
55 |
+
f.write(run_command + "\n")
|
56 |
+
f.write("\n")
|
57 |
+
|
58 |
+
|
59 |
+
# qwen template for searching
|
60 |
+
|
61 |
+
qwen15_model_sizes=["Qwen1.5-0.5B","Qwen1.5-1.8B","Qwen1.5-4B","Qwen1.5-7B","Qwen1.5-14B","Qwen1.5-32B","Qwen1.5-72B","Qwen1.5-110B"]
|
62 |
+
qwen15_batch_size = [8,8,4,2,2,2,1,1]
|
63 |
+
qwen15_matmult_ratios = [0.0,0.5,0.9,0.95,0.975,0.99]
|
64 |
+
qwen15_matmult_search_config_template = "mxint_4bit-pajama-random-50-bs-{}.toml"
|
65 |
+
qwen15_matmult_search_dir_template = "eval/matmult/{}_{}"
|
66 |
+
|
67 |
+
qwen15_matmult_run_command_palette = "python src/full_eval.py --model_arch qwen2 --model_name {model_name} --ratio {q_ratio} --eval_config configs/search/matmult/{search_config} --model_parallel --quantized_list \"{quantized_list}\""
|
68 |
+
|
69 |
+
with open("qwen1.5_matmult_eval_commands.txt", "w") as f:
|
70 |
+
for i in range(len(qwen15_model_sizes)):
|
71 |
+
|
72 |
+
eval_best_trail_file = "../../q_ratio_results/matmult/qwen1.5/qwen-{}-best-trail.txt".format(qwen15_model_sizes[i].split("-")[1])
|
73 |
+
try:
|
74 |
+
with open(eval_best_trail_file, "r") as fi:
|
75 |
+
# print("Reading best trail file: ", eval_best_trail_file)
|
76 |
+
# read each line
|
77 |
+
lines = fi.readlines()
|
78 |
+
best_trail_q_lists_mapping = {}
|
79 |
+
for line in lines:
|
80 |
+
q_ratio, q_list = line.split(":")
|
81 |
+
q_list = q_list.strip()
|
82 |
+
q_list = q_list[1:-1]
|
83 |
+
q_ratio = q_ratio.strip()
|
84 |
+
best_trail_q_lists_mapping[q_ratio] = q_list
|
85 |
+
except FileNotFoundError:
|
86 |
+
best_trail_q_lists_mapping = {}
|
87 |
+
|
88 |
+
# print(best_trail_q_lists_mapping)
|
89 |
+
|
90 |
+
model_size = qwen15_model_sizes[i]
|
91 |
+
model_batch_size = qwen15_batch_size[i]
|
92 |
+
for ratio in qwen15_matmult_ratios:
|
93 |
+
search_config = qwen15_matmult_search_config_template.format(model_batch_size)
|
94 |
+
save_dir = qwen15_matmult_search_dir_template.format(model_size, ratio)
|
95 |
+
model_name = f"Qwen/{model_size}"
|
96 |
+
q_list = best_trail_q_lists_mapping[str(ratio)] if str(ratio) in best_trail_q_lists_mapping else ""
|
97 |
+
run_command = qwen15_matmult_run_command_palette.format(model_name=model_name, q_ratio=ratio, search_config=search_config, save_dir=save_dir, quantized_list=q_list)
|
98 |
+
# print(run_command)
|
99 |
+
f.write(run_command + "\n")
|
100 |
+
f.write("\n")
|
101 |
+
|
102 |
+
# llama template for searching
|
103 |
+
|
104 |
+
# qwen template for searching
|
105 |
+
|
106 |
+
llama_model_sizes=["llama-7b","llama-13b","llama-30b","llama-65b"]
|
107 |
+
llama_batch_size = [2,2,1,1]
|
108 |
+
llama_matmult_ratios = [0.0,0.5,0.9,0.95,0.975,0.99]
|
109 |
+
llama_matmult_search_config_template = "mxint_4bit-pajama-random-50-bs-{}.toml"
|
110 |
+
llama_matmult_search_dir_template = "eval/matmult/{}_{}"
|
111 |
+
|
112 |
+
llama_matmult_run_command_palette = "python src/full_eval.py --model_arch llama --model_name {model_name} --ratio {q_ratio} --eval_config configs/search/matmult/{search_config} --model_parallel --quantized_list \"{quantized_list}\""
|
113 |
+
|
114 |
+
with open("llama_matmult_eval_commands.txt", "w") as f:
|
115 |
+
for i in range(len(llama_model_sizes)):
|
116 |
+
|
117 |
+
eval_best_trail_file = "../../q_ratio_results/matmult/llama/llama-{}-best-trail.txt".format(qwen15_model_sizes[i].split("-")[1])
|
118 |
+
try:
|
119 |
+
with open(eval_best_trail_file, "r") as fi:
|
120 |
+
# read each line
|
121 |
+
lines = fi.readlines()
|
122 |
+
best_trail_q_lists_mapping = {}
|
123 |
+
for line in lines:
|
124 |
+
q_ratio, q_list = line.split(":")
|
125 |
+
q_list = q_list.strip()
|
126 |
+
q_list = q_list[1:-1]
|
127 |
+
q_ratio = q_ratio.strip()
|
128 |
+
best_trail_q_lists_mapping[q_ratio] = q_list
|
129 |
+
except FileNotFoundError:
|
130 |
+
best_trail_q_lists_mapping = {}
|
131 |
+
|
132 |
+
model_size = llama_model_sizes[i]
|
133 |
+
model_batch_size = llama_batch_size[i]
|
134 |
+
for ratio in llama_matmult_ratios:
|
135 |
+
search_config = llama_matmult_search_config_template.format(model_batch_size)
|
136 |
+
save_dir = llama_matmult_search_dir_template.format(model_size, ratio)
|
137 |
+
model_name = f"huggyllama/{model_size}"
|
138 |
+
q_list = best_trail_q_lists_mapping[str(ratio)] if str(ratio) in best_trail_q_lists_mapping else ""
|
139 |
+
run_command = llama_matmult_run_command_palette.format(model_name=model_name, q_ratio=ratio, search_config=search_config, save_dir=save_dir, quantized_list=q_list)
|
140 |
+
# print(run_command)
|
141 |
+
f.write(run_command + "\n")
|
142 |
+
f.write("\n")
|
llm-q-scaling-law-master/eval_scripts/matmult/llama_matmult_eval_commands.txt
ADDED
@@ -0,0 +1,28 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
python src/full_eval.py --model_arch llama --model_name huggyllama/llama-7b --ratio 0.0 --eval_config configs/search/matmult/mxint_4bit-pajama-random-50-bs-2.toml --model_parallel --quantized_list ""
|
2 |
+
python src/full_eval.py --model_arch llama --model_name huggyllama/llama-7b --ratio 0.5 --eval_config configs/search/matmult/mxint_4bit-pajama-random-50-bs-2.toml --model_parallel --quantized_list ""
|
3 |
+
python src/full_eval.py --model_arch llama --model_name huggyllama/llama-7b --ratio 0.9 --eval_config configs/search/matmult/mxint_4bit-pajama-random-50-bs-2.toml --model_parallel --quantized_list ""
|
4 |
+
python src/full_eval.py --model_arch llama --model_name huggyllama/llama-7b --ratio 0.95 --eval_config configs/search/matmult/mxint_4bit-pajama-random-50-bs-2.toml --model_parallel --quantized_list ""
|
5 |
+
python src/full_eval.py --model_arch llama --model_name huggyllama/llama-7b --ratio 0.975 --eval_config configs/search/matmult/mxint_4bit-pajama-random-50-bs-2.toml --model_parallel --quantized_list ""
|
6 |
+
python src/full_eval.py --model_arch llama --model_name huggyllama/llama-7b --ratio 0.99 --eval_config configs/search/matmult/mxint_4bit-pajama-random-50-bs-2.toml --model_parallel --quantized_list ""
|
7 |
+
|
8 |
+
python src/full_eval.py --model_arch llama --model_name huggyllama/llama-13b --ratio 0.0 --eval_config configs/search/matmult/mxint_4bit-pajama-random-50-bs-2.toml --model_parallel --quantized_list ""
|
9 |
+
python src/full_eval.py --model_arch llama --model_name huggyllama/llama-13b --ratio 0.5 --eval_config configs/search/matmult/mxint_4bit-pajama-random-50-bs-2.toml --model_parallel --quantized_list ""
|
10 |
+
python src/full_eval.py --model_arch llama --model_name huggyllama/llama-13b --ratio 0.9 --eval_config configs/search/matmult/mxint_4bit-pajama-random-50-bs-2.toml --model_parallel --quantized_list ""
|
11 |
+
python src/full_eval.py --model_arch llama --model_name huggyllama/llama-13b --ratio 0.95 --eval_config configs/search/matmult/mxint_4bit-pajama-random-50-bs-2.toml --model_parallel --quantized_list ""
|
12 |
+
python src/full_eval.py --model_arch llama --model_name huggyllama/llama-13b --ratio 0.975 --eval_config configs/search/matmult/mxint_4bit-pajama-random-50-bs-2.toml --model_parallel --quantized_list ""
|
13 |
+
python src/full_eval.py --model_arch llama --model_name huggyllama/llama-13b --ratio 0.99 --eval_config configs/search/matmult/mxint_4bit-pajama-random-50-bs-2.toml --model_parallel --quantized_list ""
|
14 |
+
|
15 |
+
python src/full_eval.py --model_arch llama --model_name huggyllama/llama-30b --ratio 0.0 --eval_config configs/search/matmult/mxint_4bit-pajama-random-50-bs-1.toml --model_parallel --quantized_list ""
|
16 |
+
python src/full_eval.py --model_arch llama --model_name huggyllama/llama-30b --ratio 0.5 --eval_config configs/search/matmult/mxint_4bit-pajama-random-50-bs-1.toml --model_parallel --quantized_list ""
|
17 |
+
python src/full_eval.py --model_arch llama --model_name huggyllama/llama-30b --ratio 0.9 --eval_config configs/search/matmult/mxint_4bit-pajama-random-50-bs-1.toml --model_parallel --quantized_list ""
|
18 |
+
python src/full_eval.py --model_arch llama --model_name huggyllama/llama-30b --ratio 0.95 --eval_config configs/search/matmult/mxint_4bit-pajama-random-50-bs-1.toml --model_parallel --quantized_list ""
|
19 |
+
python src/full_eval.py --model_arch llama --model_name huggyllama/llama-30b --ratio 0.975 --eval_config configs/search/matmult/mxint_4bit-pajama-random-50-bs-1.toml --model_parallel --quantized_list ""
|
20 |
+
python src/full_eval.py --model_arch llama --model_name huggyllama/llama-30b --ratio 0.99 --eval_config configs/search/matmult/mxint_4bit-pajama-random-50-bs-1.toml --model_parallel --quantized_list ""
|
21 |
+
|
22 |
+
python src/full_eval.py --model_arch llama --model_name huggyllama/llama-65b --ratio 0.0 --eval_config configs/search/matmult/mxint_4bit-pajama-random-50-bs-1.toml --model_parallel --quantized_list ""
|
23 |
+
python src/full_eval.py --model_arch llama --model_name huggyllama/llama-65b --ratio 0.5 --eval_config configs/search/matmult/mxint_4bit-pajama-random-50-bs-1.toml --model_parallel --quantized_list ""
|
24 |
+
python src/full_eval.py --model_arch llama --model_name huggyllama/llama-65b --ratio 0.9 --eval_config configs/search/matmult/mxint_4bit-pajama-random-50-bs-1.toml --model_parallel --quantized_list ""
|
25 |
+
python src/full_eval.py --model_arch llama --model_name huggyllama/llama-65b --ratio 0.95 --eval_config configs/search/matmult/mxint_4bit-pajama-random-50-bs-1.toml --model_parallel --quantized_list ""
|
26 |
+
python src/full_eval.py --model_arch llama --model_name huggyllama/llama-65b --ratio 0.975 --eval_config configs/search/matmult/mxint_4bit-pajama-random-50-bs-1.toml --model_parallel --quantized_list ""
|
27 |
+
python src/full_eval.py --model_arch llama --model_name huggyllama/llama-65b --ratio 0.99 --eval_config configs/search/matmult/mxint_4bit-pajama-random-50-bs-1.toml --model_parallel --quantized_list ""
|
28 |
+
|
llm-q-scaling-law-master/eval_scripts/matmult/opt_matmult_eval_commands.txt
ADDED
@@ -0,0 +1,56 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
python src/full_eval.py --model_arch opt --model_name facebook/opt-125m --ratio 0.0 --eval_config configs/search/matmult/mxint_4bit-pajama-random-50-bs-8.toml --model_parallel --quantized_list ""
|
2 |
+
python src/full_eval.py --model_arch opt --model_name facebook/opt-125m --ratio 0.5 --eval_config configs/search/matmult/mxint_4bit-pajama-random-50-bs-8.toml --model_parallel --quantized_list ""
|
3 |
+
python src/full_eval.py --model_arch opt --model_name facebook/opt-125m --ratio 0.9 --eval_config configs/search/matmult/mxint_4bit-pajama-random-50-bs-8.toml --model_parallel --quantized_list ""
|
4 |
+
python src/full_eval.py --model_arch opt --model_name facebook/opt-125m --ratio 0.95 --eval_config configs/search/matmult/mxint_4bit-pajama-random-50-bs-8.toml --model_parallel --quantized_list ""
|
5 |
+
python src/full_eval.py --model_arch opt --model_name facebook/opt-125m --ratio 0.975 --eval_config configs/search/matmult/mxint_4bit-pajama-random-50-bs-8.toml --model_parallel --quantized_list ""
|
6 |
+
python src/full_eval.py --model_arch opt --model_name facebook/opt-125m --ratio 0.99 --eval_config configs/search/matmult/mxint_4bit-pajama-random-50-bs-8.toml --model_parallel --quantized_list ""
|
7 |
+
|
8 |
+
python src/full_eval.py --model_arch opt --model_name facebook/opt-350m --ratio 0.0 --eval_config configs/search/matmult/mxint_4bit-pajama-random-50-bs-8.toml --model_parallel --quantized_list ""
|
9 |
+
python src/full_eval.py --model_arch opt --model_name facebook/opt-350m --ratio 0.5 --eval_config configs/search/matmult/mxint_4bit-pajama-random-50-bs-8.toml --model_parallel --quantized_list ""
|
10 |
+
python src/full_eval.py --model_arch opt --model_name facebook/opt-350m --ratio 0.9 --eval_config configs/search/matmult/mxint_4bit-pajama-random-50-bs-8.toml --model_parallel --quantized_list ""
|
11 |
+
python src/full_eval.py --model_arch opt --model_name facebook/opt-350m --ratio 0.95 --eval_config configs/search/matmult/mxint_4bit-pajama-random-50-bs-8.toml --model_parallel --quantized_list ""
|
12 |
+
python src/full_eval.py --model_arch opt --model_name facebook/opt-350m --ratio 0.975 --eval_config configs/search/matmult/mxint_4bit-pajama-random-50-bs-8.toml --model_parallel --quantized_list ""
|
13 |
+
python src/full_eval.py --model_arch opt --model_name facebook/opt-350m --ratio 0.99 --eval_config configs/search/matmult/mxint_4bit-pajama-random-50-bs-8.toml --model_parallel --quantized_list ""
|
14 |
+
|
15 |
+
python src/full_eval.py --model_arch opt --model_name facebook/opt-1.3b --ratio 0.0 --eval_config configs/search/matmult/mxint_4bit-pajama-random-50-bs-8.toml --model_parallel --quantized_list ""
|
16 |
+
python src/full_eval.py --model_arch opt --model_name facebook/opt-1.3b --ratio 0.5 --eval_config configs/search/matmult/mxint_4bit-pajama-random-50-bs-8.toml --model_parallel --quantized_list ""
|
17 |
+
python src/full_eval.py --model_arch opt --model_name facebook/opt-1.3b --ratio 0.9 --eval_config configs/search/matmult/mxint_4bit-pajama-random-50-bs-8.toml --model_parallel --quantized_list ""
|
18 |
+
python src/full_eval.py --model_arch opt --model_name facebook/opt-1.3b --ratio 0.95 --eval_config configs/search/matmult/mxint_4bit-pajama-random-50-bs-8.toml --model_parallel --quantized_list ""
|
19 |
+
python src/full_eval.py --model_arch opt --model_name facebook/opt-1.3b --ratio 0.975 --eval_config configs/search/matmult/mxint_4bit-pajama-random-50-bs-8.toml --model_parallel --quantized_list ""
|
20 |
+
python src/full_eval.py --model_arch opt --model_name facebook/opt-1.3b --ratio 0.99 --eval_config configs/search/matmult/mxint_4bit-pajama-random-50-bs-8.toml --model_parallel --quantized_list ""
|
21 |
+
|
22 |
+
python src/full_eval.py --model_arch opt --model_name facebook/opt-2.7b --ratio 0.0 --eval_config configs/search/matmult/mxint_4bit-pajama-random-50-bs-8.toml --model_parallel --quantized_list ""
|
23 |
+
python src/full_eval.py --model_arch opt --model_name facebook/opt-2.7b --ratio 0.5 --eval_config configs/search/matmult/mxint_4bit-pajama-random-50-bs-8.toml --model_parallel --quantized_list ""
|
24 |
+
python src/full_eval.py --model_arch opt --model_name facebook/opt-2.7b --ratio 0.9 --eval_config configs/search/matmult/mxint_4bit-pajama-random-50-bs-8.toml --model_parallel --quantized_list ""
|
25 |
+
python src/full_eval.py --model_arch opt --model_name facebook/opt-2.7b --ratio 0.95 --eval_config configs/search/matmult/mxint_4bit-pajama-random-50-bs-8.toml --model_parallel --quantized_list ""
|
26 |
+
python src/full_eval.py --model_arch opt --model_name facebook/opt-2.7b --ratio 0.975 --eval_config configs/search/matmult/mxint_4bit-pajama-random-50-bs-8.toml --model_parallel --quantized_list ""
|
27 |
+
python src/full_eval.py --model_arch opt --model_name facebook/opt-2.7b --ratio 0.99 --eval_config configs/search/matmult/mxint_4bit-pajama-random-50-bs-8.toml --model_parallel --quantized_list ""
|
28 |
+
|
29 |
+
python src/full_eval.py --model_arch opt --model_name facebook/opt-6.7b --ratio 0.0 --eval_config configs/search/matmult/mxint_4bit-pajama-random-50-bs-4.toml --model_parallel --quantized_list ""
|
30 |
+
python src/full_eval.py --model_arch opt --model_name facebook/opt-6.7b --ratio 0.5 --eval_config configs/search/matmult/mxint_4bit-pajama-random-50-bs-4.toml --model_parallel --quantized_list ""
|
31 |
+
python src/full_eval.py --model_arch opt --model_name facebook/opt-6.7b --ratio 0.9 --eval_config configs/search/matmult/mxint_4bit-pajama-random-50-bs-4.toml --model_parallel --quantized_list ""
|
32 |
+
python src/full_eval.py --model_arch opt --model_name facebook/opt-6.7b --ratio 0.95 --eval_config configs/search/matmult/mxint_4bit-pajama-random-50-bs-4.toml --model_parallel --quantized_list ""
|
33 |
+
python src/full_eval.py --model_arch opt --model_name facebook/opt-6.7b --ratio 0.975 --eval_config configs/search/matmult/mxint_4bit-pajama-random-50-bs-4.toml --model_parallel --quantized_list ""
|
34 |
+
python src/full_eval.py --model_arch opt --model_name facebook/opt-6.7b --ratio 0.99 --eval_config configs/search/matmult/mxint_4bit-pajama-random-50-bs-4.toml --model_parallel --quantized_list ""
|
35 |
+
|
36 |
+
python src/full_eval.py --model_arch opt --model_name facebook/opt-13b --ratio 0.0 --eval_config configs/search/matmult/mxint_4bit-pajama-random-50-bs-2.toml --model_parallel --quantized_list ""
|
37 |
+
python src/full_eval.py --model_arch opt --model_name facebook/opt-13b --ratio 0.5 --eval_config configs/search/matmult/mxint_4bit-pajama-random-50-bs-2.toml --model_parallel --quantized_list ""
|
38 |
+
python src/full_eval.py --model_arch opt --model_name facebook/opt-13b --ratio 0.9 --eval_config configs/search/matmult/mxint_4bit-pajama-random-50-bs-2.toml --model_parallel --quantized_list ""
|
39 |
+
python src/full_eval.py --model_arch opt --model_name facebook/opt-13b --ratio 0.95 --eval_config configs/search/matmult/mxint_4bit-pajama-random-50-bs-2.toml --model_parallel --quantized_list ""
|
40 |
+
python src/full_eval.py --model_arch opt --model_name facebook/opt-13b --ratio 0.975 --eval_config configs/search/matmult/mxint_4bit-pajama-random-50-bs-2.toml --model_parallel --quantized_list ""
|
41 |
+
python src/full_eval.py --model_arch opt --model_name facebook/opt-13b --ratio 0.99 --eval_config configs/search/matmult/mxint_4bit-pajama-random-50-bs-2.toml --model_parallel --quantized_list ""
|
42 |
+
|
43 |
+
python src/full_eval.py --model_arch opt --model_name facebook/opt-30b --ratio 0.0 --eval_config configs/search/matmult/mxint_4bit-pajama-random-50-bs-1.toml --model_parallel --quantized_list ""
|
44 |
+
python src/full_eval.py --model_arch opt --model_name facebook/opt-30b --ratio 0.5 --eval_config configs/search/matmult/mxint_4bit-pajama-random-50-bs-1.toml --model_parallel --quantized_list ""
|
45 |
+
python src/full_eval.py --model_arch opt --model_name facebook/opt-30b --ratio 0.9 --eval_config configs/search/matmult/mxint_4bit-pajama-random-50-bs-1.toml --model_parallel --quantized_list ""
|
46 |
+
python src/full_eval.py --model_arch opt --model_name facebook/opt-30b --ratio 0.95 --eval_config configs/search/matmult/mxint_4bit-pajama-random-50-bs-1.toml --model_parallel --quantized_list ""
|
47 |
+
python src/full_eval.py --model_arch opt --model_name facebook/opt-30b --ratio 0.975 --eval_config configs/search/matmult/mxint_4bit-pajama-random-50-bs-1.toml --model_parallel --quantized_list ""
|
48 |
+
python src/full_eval.py --model_arch opt --model_name facebook/opt-30b --ratio 0.99 --eval_config configs/search/matmult/mxint_4bit-pajama-random-50-bs-1.toml --model_parallel --quantized_list ""
|
49 |
+
|
50 |
+
python src/full_eval.py --model_arch opt --model_name facebook/opt-66b --ratio 0.0 --eval_config configs/search/matmult/mxint_4bit-pajama-random-50-bs-1.toml --model_parallel --quantized_list ""
|
51 |
+
python src/full_eval.py --model_arch opt --model_name facebook/opt-66b --ratio 0.5 --eval_config configs/search/matmult/mxint_4bit-pajama-random-50-bs-1.toml --model_parallel --quantized_list ""
|
52 |
+
python src/full_eval.py --model_arch opt --model_name facebook/opt-66b --ratio 0.9 --eval_config configs/search/matmult/mxint_4bit-pajama-random-50-bs-1.toml --model_parallel --quantized_list ""
|
53 |
+
python src/full_eval.py --model_arch opt --model_name facebook/opt-66b --ratio 0.95 --eval_config configs/search/matmult/mxint_4bit-pajama-random-50-bs-1.toml --model_parallel --quantized_list ""
|
54 |
+
python src/full_eval.py --model_arch opt --model_name facebook/opt-66b --ratio 0.975 --eval_config configs/search/matmult/mxint_4bit-pajama-random-50-bs-1.toml --model_parallel --quantized_list ""
|
55 |
+
python src/full_eval.py --model_arch opt --model_name facebook/opt-66b --ratio 0.99 --eval_config configs/search/matmult/mxint_4bit-pajama-random-50-bs-1.toml --model_parallel --quantized_list ""
|
56 |
+
|
llm-q-scaling-law-master/eval_scripts/matmult/qwen1.5_matmult_eval_commands.txt
ADDED
@@ -0,0 +1,56 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
python src/full_eval.py --model_arch qwen2 --model_name Qwen/Qwen1.5-0.5B --ratio 0.0 --eval_config configs/search/matmult/mxint_4bit-pajama-random-50-bs-8.toml --model_parallel --quantized_list ""
|
2 |
+
python src/full_eval.py --model_arch qwen2 --model_name Qwen/Qwen1.5-0.5B --ratio 0.5 --eval_config configs/search/matmult/mxint_4bit-pajama-random-50-bs-8.toml --model_parallel --quantized_list ""
|
3 |
+
python src/full_eval.py --model_arch qwen2 --model_name Qwen/Qwen1.5-0.5B --ratio 0.9 --eval_config configs/search/matmult/mxint_4bit-pajama-random-50-bs-8.toml --model_parallel --quantized_list ""
|
4 |
+
python src/full_eval.py --model_arch qwen2 --model_name Qwen/Qwen1.5-0.5B --ratio 0.95 --eval_config configs/search/matmult/mxint_4bit-pajama-random-50-bs-8.toml --model_parallel --quantized_list ""
|
5 |
+
python src/full_eval.py --model_arch qwen2 --model_name Qwen/Qwen1.5-0.5B --ratio 0.975 --eval_config configs/search/matmult/mxint_4bit-pajama-random-50-bs-8.toml --model_parallel --quantized_list ""
|
6 |
+
python src/full_eval.py --model_arch qwen2 --model_name Qwen/Qwen1.5-0.5B --ratio 0.99 --eval_config configs/search/matmult/mxint_4bit-pajama-random-50-bs-8.toml --model_parallel --quantized_list ""
|
7 |
+
|
8 |
+
python src/full_eval.py --model_arch qwen2 --model_name Qwen/Qwen1.5-1.8B --ratio 0.0 --eval_config configs/search/matmult/mxint_4bit-pajama-random-50-bs-8.toml --model_parallel --quantized_list ""
|
9 |
+
python src/full_eval.py --model_arch qwen2 --model_name Qwen/Qwen1.5-1.8B --ratio 0.5 --eval_config configs/search/matmult/mxint_4bit-pajama-random-50-bs-8.toml --model_parallel --quantized_list ""
|
10 |
+
python src/full_eval.py --model_arch qwen2 --model_name Qwen/Qwen1.5-1.8B --ratio 0.9 --eval_config configs/search/matmult/mxint_4bit-pajama-random-50-bs-8.toml --model_parallel --quantized_list ""
|
11 |
+
python src/full_eval.py --model_arch qwen2 --model_name Qwen/Qwen1.5-1.8B --ratio 0.95 --eval_config configs/search/matmult/mxint_4bit-pajama-random-50-bs-8.toml --model_parallel --quantized_list ""
|
12 |
+
python src/full_eval.py --model_arch qwen2 --model_name Qwen/Qwen1.5-1.8B --ratio 0.975 --eval_config configs/search/matmult/mxint_4bit-pajama-random-50-bs-8.toml --model_parallel --quantized_list ""
|
13 |
+
python src/full_eval.py --model_arch qwen2 --model_name Qwen/Qwen1.5-1.8B --ratio 0.99 --eval_config configs/search/matmult/mxint_4bit-pajama-random-50-bs-8.toml --model_parallel --quantized_list ""
|
14 |
+
|
15 |
+
python src/full_eval.py --model_arch qwen2 --model_name Qwen/Qwen1.5-4B --ratio 0.0 --eval_config configs/search/matmult/mxint_4bit-pajama-random-50-bs-4.toml --model_parallel --quantized_list ""
|
16 |
+
python src/full_eval.py --model_arch qwen2 --model_name Qwen/Qwen1.5-4B --ratio 0.5 --eval_config configs/search/matmult/mxint_4bit-pajama-random-50-bs-4.toml --model_parallel --quantized_list ""
|
17 |
+
python src/full_eval.py --model_arch qwen2 --model_name Qwen/Qwen1.5-4B --ratio 0.9 --eval_config configs/search/matmult/mxint_4bit-pajama-random-50-bs-4.toml --model_parallel --quantized_list ""
|
18 |
+
python src/full_eval.py --model_arch qwen2 --model_name Qwen/Qwen1.5-4B --ratio 0.95 --eval_config configs/search/matmult/mxint_4bit-pajama-random-50-bs-4.toml --model_parallel --quantized_list ""
|
19 |
+
python src/full_eval.py --model_arch qwen2 --model_name Qwen/Qwen1.5-4B --ratio 0.975 --eval_config configs/search/matmult/mxint_4bit-pajama-random-50-bs-4.toml --model_parallel --quantized_list ""
|
20 |
+
python src/full_eval.py --model_arch qwen2 --model_name Qwen/Qwen1.5-4B --ratio 0.99 --eval_config configs/search/matmult/mxint_4bit-pajama-random-50-bs-4.toml --model_parallel --quantized_list ""
|
21 |
+
|
22 |
+
python src/full_eval.py --model_arch qwen2 --model_name Qwen/Qwen1.5-7B --ratio 0.0 --eval_config configs/search/matmult/mxint_4bit-pajama-random-50-bs-2.toml --model_parallel --quantized_list ""
|
23 |
+
python src/full_eval.py --model_arch qwen2 --model_name Qwen/Qwen1.5-7B --ratio 0.5 --eval_config configs/search/matmult/mxint_4bit-pajama-random-50-bs-2.toml --model_parallel --quantized_list ""
|
24 |
+
python src/full_eval.py --model_arch qwen2 --model_name Qwen/Qwen1.5-7B --ratio 0.9 --eval_config configs/search/matmult/mxint_4bit-pajama-random-50-bs-2.toml --model_parallel --quantized_list ""
|
25 |
+
python src/full_eval.py --model_arch qwen2 --model_name Qwen/Qwen1.5-7B --ratio 0.95 --eval_config configs/search/matmult/mxint_4bit-pajama-random-50-bs-2.toml --model_parallel --quantized_list ""
|
26 |
+
python src/full_eval.py --model_arch qwen2 --model_name Qwen/Qwen1.5-7B --ratio 0.975 --eval_config configs/search/matmult/mxint_4bit-pajama-random-50-bs-2.toml --model_parallel --quantized_list ""
|
27 |
+
python src/full_eval.py --model_arch qwen2 --model_name Qwen/Qwen1.5-7B --ratio 0.99 --eval_config configs/search/matmult/mxint_4bit-pajama-random-50-bs-2.toml --model_parallel --quantized_list ""
|
28 |
+
|
29 |
+
python src/full_eval.py --model_arch qwen2 --model_name Qwen/Qwen1.5-14B --ratio 0.0 --eval_config configs/search/matmult/mxint_4bit-pajama-random-50-bs-2.toml --model_parallel --quantized_list ""
|
30 |
+
python src/full_eval.py --model_arch qwen2 --model_name Qwen/Qwen1.5-14B --ratio 0.5 --eval_config configs/search/matmult/mxint_4bit-pajama-random-50-bs-2.toml --model_parallel --quantized_list ""
|
31 |
+
python src/full_eval.py --model_arch qwen2 --model_name Qwen/Qwen1.5-14B --ratio 0.9 --eval_config configs/search/matmult/mxint_4bit-pajama-random-50-bs-2.toml --model_parallel --quantized_list ""
|
32 |
+
python src/full_eval.py --model_arch qwen2 --model_name Qwen/Qwen1.5-14B --ratio 0.95 --eval_config configs/search/matmult/mxint_4bit-pajama-random-50-bs-2.toml --model_parallel --quantized_list ""
|
33 |
+
python src/full_eval.py --model_arch qwen2 --model_name Qwen/Qwen1.5-14B --ratio 0.975 --eval_config configs/search/matmult/mxint_4bit-pajama-random-50-bs-2.toml --model_parallel --quantized_list ""
|
34 |
+
python src/full_eval.py --model_arch qwen2 --model_name Qwen/Qwen1.5-14B --ratio 0.99 --eval_config configs/search/matmult/mxint_4bit-pajama-random-50-bs-2.toml --model_parallel --quantized_list ""
|
35 |
+
|
36 |
+
python src/full_eval.py --model_arch qwen2 --model_name Qwen/Qwen1.5-32B --ratio 0.0 --eval_config configs/search/matmult/mxint_4bit-pajama-random-50-bs-2.toml --model_parallel --quantized_list "0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0"
|
37 |
+
python src/full_eval.py --model_arch qwen2 --model_name Qwen/Qwen1.5-32B --ratio 0.5 --eval_config configs/search/matmult/mxint_4bit-pajama-random-50-bs-2.toml --model_parallel --quantized_list "1, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0, 1, 0, 1, 1, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 0, 0, 1, 1, 1, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, 0, 1, 1, 0, 0, 1, 0, 1, 0, 0, 0, 1, 1, 1, 0, 1, 0, 0, 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 0, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 1, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 0, 0, 0, 1, 0, 1, 1, 0, 0, 1, 0, 0, 0, 1, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 0, 0, 1, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 1, 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 1, 0, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 0, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 1, 0, 0, 1, 1, 0, 1, 1, 0, 0, 0, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 1, 1, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 0, 1, 1, 1, 0, 0, 0, 1, 0, 1, 1, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 1, 0, 0, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 1, 0, 1, 0, 1, 0, 1, 1, 1, 0, 1, 0, 1, 1, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 0, 1, 1, 1, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 1, 1, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0, 1, 1, 0, 0, 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 1, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 0, 0, 0, 1, 1, 1, 1, 1, 0, 1, 1, 0, 0, 0, 1, 1, 0"
|
38 |
+
python src/full_eval.py --model_arch qwen2 --model_name Qwen/Qwen1.5-32B --ratio 0.9 --eval_config configs/search/matmult/mxint_4bit-pajama-random-50-bs-2.toml --model_parallel --quantized_list "1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 0, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0, 1, 1, 0, 1, 1, 0, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0, 0, 1, 1, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 0"
|
39 |
+
python src/full_eval.py --model_arch qwen2 --model_name Qwen/Qwen1.5-32B --ratio 0.95 --eval_config configs/search/matmult/mxint_4bit-pajama-random-50-bs-2.toml --model_parallel --quantized_list "1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 0, 0, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 1, 1, 0, 0, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1"
|
40 |
+
python src/full_eval.py --model_arch qwen2 --model_name Qwen/Qwen1.5-32B --ratio 0.975 --eval_config configs/search/matmult/mxint_4bit-pajama-random-50-bs-2.toml --model_parallel --quantized_list "1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1"
|
41 |
+
python src/full_eval.py --model_arch qwen2 --model_name Qwen/Qwen1.5-32B --ratio 0.99 --eval_config configs/search/matmult/mxint_4bit-pajama-random-50-bs-2.toml --model_parallel --quantized_list "1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1"
|
42 |
+
|
43 |
+
python src/full_eval.py --model_arch qwen2 --model_name Qwen/Qwen1.5-72B --ratio 0.0 --eval_config configs/search/matmult/mxint_4bit-pajama-random-50-bs-1.toml --model_parallel --quantized_list ""
|
44 |
+
python src/full_eval.py --model_arch qwen2 --model_name Qwen/Qwen1.5-72B --ratio 0.5 --eval_config configs/search/matmult/mxint_4bit-pajama-random-50-bs-1.toml --model_parallel --quantized_list ""
|
45 |
+
python src/full_eval.py --model_arch qwen2 --model_name Qwen/Qwen1.5-72B --ratio 0.9 --eval_config configs/search/matmult/mxint_4bit-pajama-random-50-bs-1.toml --model_parallel --quantized_list ""
|
46 |
+
python src/full_eval.py --model_arch qwen2 --model_name Qwen/Qwen1.5-72B --ratio 0.95 --eval_config configs/search/matmult/mxint_4bit-pajama-random-50-bs-1.toml --model_parallel --quantized_list ""
|
47 |
+
python src/full_eval.py --model_arch qwen2 --model_name Qwen/Qwen1.5-72B --ratio 0.975 --eval_config configs/search/matmult/mxint_4bit-pajama-random-50-bs-1.toml --model_parallel --quantized_list ""
|
48 |
+
python src/full_eval.py --model_arch qwen2 --model_name Qwen/Qwen1.5-72B --ratio 0.99 --eval_config configs/search/matmult/mxint_4bit-pajama-random-50-bs-1.toml --model_parallel --quantized_list ""
|
49 |
+
|
50 |
+
python src/full_eval.py --model_arch qwen2 --model_name Qwen/Qwen1.5-110B --ratio 0.0 --eval_config configs/search/matmult/mxint_4bit-pajama-random-50-bs-1.toml --model_parallel --quantized_list ""
|
51 |
+
python src/full_eval.py --model_arch qwen2 --model_name Qwen/Qwen1.5-110B --ratio 0.5 --eval_config configs/search/matmult/mxint_4bit-pajama-random-50-bs-1.toml --model_parallel --quantized_list ""
|
52 |
+
python src/full_eval.py --model_arch qwen2 --model_name Qwen/Qwen1.5-110B --ratio 0.9 --eval_config configs/search/matmult/mxint_4bit-pajama-random-50-bs-1.toml --model_parallel --quantized_list ""
|
53 |
+
python src/full_eval.py --model_arch qwen2 --model_name Qwen/Qwen1.5-110B --ratio 0.95 --eval_config configs/search/matmult/mxint_4bit-pajama-random-50-bs-1.toml --model_parallel --quantized_list ""
|
54 |
+
python src/full_eval.py --model_arch qwen2 --model_name Qwen/Qwen1.5-110B --ratio 0.975 --eval_config configs/search/matmult/mxint_4bit-pajama-random-50-bs-1.toml --model_parallel --quantized_list ""
|
55 |
+
python src/full_eval.py --model_arch qwen2 --model_name Qwen/Qwen1.5-110B --ratio 0.99 --eval_config configs/search/matmult/mxint_4bit-pajama-random-50-bs-1.toml --model_parallel --quantized_list ""
|
56 |
+
|
llm-q-scaling-law-master/q_ratio_results/layerwise/qwen1.5/qwen-0.5B-best-trail.txt
ADDED
File without changes
|
llm-q-scaling-law-master/q_ratio_results/layerwise/qwen1.5/qwen-1.8B-best-trail.txt
ADDED
File without changes
|
llm-q-scaling-law-master/q_ratio_results/layerwise/qwen1.5/qwen-14B-best-trail.txt
ADDED
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
0.5 : [1, 1, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0]
|
2 |
+
0.6 : [1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1, 1, 0, 0, 1, 1, 1, 0, 1, 0, 0, 1, 0, 1, 1, 1, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 1, 1, 0]
|
3 |
+
0.7 : [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0, 1, 0, 0]
|
4 |
+
0.8 : [1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 0, 0, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 1, 1, 0, 1, 0, 1, 1]
|
5 |
+
0.9 : [1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 0, 0, 1, 0, 0, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1]
|
6 |
+
0.95 : [1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1]
|
7 |
+
0.975 : [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1]
|
llm-q-scaling-law-master/q_ratio_results/layerwise/qwen1.5/qwen-32B-best-trail.txt
ADDED
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
0.0 : [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
|
2 |
+
0.5 : [1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0]
|
3 |
+
0.6 : [0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 0, 1, 1, 1, 0, 1, 0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 1, 0, 0, 1, 0, 0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 0, 1, 0, 1, 0, 1, 1, 0, 1, 0, 0, 0, 1, 0]
|
4 |
+
0.7 : [1, 1, 0, 0, 1, 0, 1, 0, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 1, 0, 1, 1, 1, 0, 0, 1, 1, 0, 0, 0, 1, 1, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 0, 0, 0, 1, 0]
|
5 |
+
0.8 : [1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 0, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 0, 1, 0]
|
6 |
+
0.9 : [0, 1, 1, 0, 1, 0, 1, 1, 0, 1, 0, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 0, 0, 0, 1, 1, 1]
|
7 |
+
0.95 : [0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 1, 1, 1, 0, 1, 0, 0, 0, 0, 1, 0]
|
8 |
+
0.975 :
|
llm-q-scaling-law-master/q_ratio_results/layerwise/qwen1.5/qwen-4B-best-trail.txt
ADDED
File without changes
|
llm-q-scaling-law-master/q_ratio_results/layerwise/qwen1.5/qwen-72B-best-trail.txt
ADDED
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
0.0 : [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
|
2 |
+
0.5 : [0, 0, 1, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 1, 0, 1, 0, 1, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 0, 1, 1, 1, 0, 0]
|
3 |
+
0.6 : [1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 0, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 0, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 0, 1, 0, 1, 1, 0, 0, 0, 0, 1, 0, 1]
|
4 |
+
0.7 : [0, 0, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 1, 1, 0, 1, 1, 0, 1, 0, 1, 0, 0, 0, 1, 0, 1, 1, 0, 0, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0]
|
5 |
+
0.8 : [1, 0, 1, 0, 0, 0, 1, 1, 1, 0, 1, 1, 0, 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 1, 0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1]
|
6 |
+
0.9 : [0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1]
|
7 |
+
0.95 : [0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
|
8 |
+
0.975 :
|
llm-q-scaling-law-master/q_ratio_results/layerwise/qwen1.5/qwen-7B-best-trail.txt
ADDED
File without changes
|
llm-q-scaling-law-master/q_ratio_results/matmult/qwen1.5/qwen-32B-best-trail.txt
ADDED
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
0.0 : [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
|
2 |
+
0.5 : [1, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0, 1, 0, 1, 1, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 0, 0, 1, 1, 1, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, 0, 1, 1, 0, 0, 1, 0, 1, 0, 0, 0, 1, 1, 1, 0, 1, 0, 0, 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 0, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 1, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 0, 0, 0, 1, 0, 1, 1, 0, 0, 1, 0, 0, 0, 1, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 0, 0, 1, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 1, 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 1, 0, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 0, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 1, 0, 0, 1, 1, 0, 1, 1, 0, 0, 0, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 1, 1, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 0, 1, 1, 1, 0, 0, 0, 1, 0, 1, 1, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 1, 0, 0, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 1, 0, 1, 0, 1, 0, 1, 1, 1, 0, 1, 0, 1, 1, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 0, 1, 1, 1, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 1, 1, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0, 1, 1, 0, 0, 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 1, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 0, 0, 0, 1, 1, 1, 1, 1, 0, 1, 1, 0, 0, 0, 1, 1, 0]
|
3 |
+
0.9 : [1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 0, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0, 1, 1, 0, 1, 1, 0, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0, 0, 1, 1, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 0]
|
4 |
+
0.95 : [1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 0, 0, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 1, 1, 0, 0, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1]
|
5 |
+
0.975 : [1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
|
6 |
+
0.99 : [1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
|
llm-q-scaling-law-master/requirements.txt
ADDED
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
torch==2.3.1
|
2 |
+
transformers==4.42.3
|
3 |
+
datasets==2.20.0
|
4 |
+
nvitop
|
5 |
+
accelerate
|
6 |
+
joblib
|
7 |
+
optuna
|
8 |
+
wandb
|
9 |
+
toml
|
10 |
+
|
11 |
+
evaluate>=0.4.0
|
12 |
+
jsonlines
|
13 |
+
numexpr
|
14 |
+
peft>=0.2.0
|
15 |
+
pybind11>=2.6.2
|
16 |
+
pytablewriter
|
17 |
+
rouge-score>=0.0.4
|
18 |
+
sacrebleu>=1.5.0
|
19 |
+
scikit-learn>=0.24.1
|
20 |
+
sqlitedict
|
21 |
+
tqdm-multiprocess
|
22 |
+
zstandard
|
23 |
+
dill
|
24 |
+
word2number
|
25 |
+
more_itertools
|
26 |
+
sentencepiece
|
llm-q-scaling-law-master/run_scripts/env_command.sh
ADDED
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
export CUDA_VISIBLE_DEVICES=0
|
2 |
+
conda activate llm-mixed-q
|
3 |
+
|
4 |
+
export CUDA_VISIBLE_DEVICES=1
|
5 |
+
conda activate llm-mixed-q
|
6 |
+
|
7 |
+
export CUDA_VISIBLE_DEVICES=2
|
8 |
+
conda activate llm-mixed-q
|
9 |
+
|
10 |
+
export CUDA_VISIBLE_DEVICES=3
|
11 |
+
conda activate llm-mixed-q
|
12 |
+
|
13 |
+
export CUDA_VISIBLE_DEVICES=4
|
14 |
+
conda activate llm-mixed-q
|
15 |
+
|
16 |
+
export CUDA_VISIBLE_DEVICES=5
|
17 |
+
conda activate llm-mixed-q
|
18 |
+
|
19 |
+
export CUDA_VISIBLE_DEVICES=6
|
20 |
+
conda activate llm-mixed-q
|
21 |
+
|
22 |
+
export CUDA_VISIBLE_DEVICES=7
|
23 |
+
conda activate llm-mixed-q
|
llm-q-scaling-law-master/run_scripts/layerwise/llama2_layerwise_run_commands.txt
ADDED
@@ -0,0 +1,24 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
python src/main.py --model_arch llama2 --model_name meta-llama/Llama-2-7b-chat-hf --q_ratio 0.5 --search_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-2.toml --model_parallel --wandb_name Llama-2-7b_0.5 --wandb_group llama-layer-search --save_dir results/search/layerwise/Llama-2-7b_0.5
|
2 |
+
python src/main.py --model_arch llama2 --model_name meta-llama/Llama-2-7b-chat-hf --q_ratio 0.6 --search_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-2.toml --model_parallel --wandb_name Llama-2-7b_0.6 --wandb_group llama-layer-search --save_dir results/search/layerwise/Llama-2-7b_0.6
|
3 |
+
python src/main.py --model_arch llama2 --model_name meta-llama/Llama-2-7b-chat-hf --q_ratio 0.7 --search_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-2.toml --model_parallel --wandb_name Llama-2-7b_0.7 --wandb_group llama-layer-search --save_dir results/search/layerwise/Llama-2-7b_0.7
|
4 |
+
python src/main.py --model_arch llama2 --model_name meta-llama/Llama-2-7b-chat-hf --q_ratio 0.8 --search_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-2.toml --model_parallel --wandb_name Llama-2-7b_0.8 --wandb_group llama-layer-search --save_dir results/search/layerwise/Llama-2-7b_0.8
|
5 |
+
python src/main.py --model_arch llama2 --model_name meta-llama/Llama-2-7b-chat-hf --q_ratio 0.9 --search_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-2.toml --model_parallel --wandb_name Llama-2-7b_0.9 --wandb_group llama-layer-search --save_dir results/search/layerwise/Llama-2-7b_0.9
|
6 |
+
python src/main.py --model_arch llama2 --model_name meta-llama/Llama-2-7b-chat-hf --q_ratio 0.95 --search_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-2.toml --model_parallel --wandb_name Llama-2-7b_0.95 --wandb_group llama-layer-search --save_dir results/search/layerwise/Llama-2-7b_0.95
|
7 |
+
python src/main.py --model_arch llama2 --model_name meta-llama/Llama-2-7b-chat-hf --q_ratio 0.975 --search_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-2.toml --model_parallel --wandb_name Llama-2-7b_0.975 --wandb_group llama-layer-search --save_dir results/search/layerwise/Llama-2-7b_0.975
|
8 |
+
|
9 |
+
python src/main.py --model_arch llama2 --model_name meta-llama/Llama-2-13b-chat-hf --q_ratio 0.5 --search_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-2.toml --model_parallel --wandb_name Llama-2-13b_0.5 --wandb_group llama-layer-search --save_dir results/search/layerwise/Llama-2-13b_0.5
|
10 |
+
python src/main.py --model_arch llama2 --model_name meta-llama/Llama-2-13b-chat-hf --q_ratio 0.6 --search_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-2.toml --model_parallel --wandb_name Llama-2-13b_0.6 --wandb_group llama-layer-search --save_dir results/search/layerwise/Llama-2-13b_0.6
|
11 |
+
python src/main.py --model_arch llama2 --model_name meta-llama/Llama-2-13b-chat-hf --q_ratio 0.7 --search_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-2.toml --model_parallel --wandb_name Llama-2-13b_0.7 --wandb_group llama-layer-search --save_dir results/search/layerwise/Llama-2-13b_0.7
|
12 |
+
python src/main.py --model_arch llama2 --model_name meta-llama/Llama-2-13b-chat-hf --q_ratio 0.8 --search_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-2.toml --model_parallel --wandb_name Llama-2-13b_0.8 --wandb_group llama-layer-search --save_dir results/search/layerwise/Llama-2-13b_0.8
|
13 |
+
python src/main.py --model_arch llama2 --model_name meta-llama/Llama-2-13b-chat-hf --q_ratio 0.9 --search_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-2.toml --model_parallel --wandb_name Llama-2-13b_0.9 --wandb_group llama-layer-search --save_dir results/search/layerwise/Llama-2-13b_0.9
|
14 |
+
python src/main.py --model_arch llama2 --model_name meta-llama/Llama-2-13b-chat-hf --q_ratio 0.95 --search_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-2.toml --model_parallel --wandb_name Llama-2-13b_0.95 --wandb_group llama-layer-search --save_dir results/search/layerwise/Llama-2-13b_0.95
|
15 |
+
python src/main.py --model_arch llama2 --model_name meta-llama/Llama-2-13b-chat-hf --q_ratio 0.975 --search_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-2.toml --model_parallel --wandb_name Llama-2-13b_0.975 --wandb_group llama-layer-search --save_dir results/search/layerwise/Llama-2-13b_0.975
|
16 |
+
|
17 |
+
python src/main.py --model_arch llama2 --model_name meta-llama/Llama-2-70b-chat-hf --q_ratio 0.5 --search_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-1.toml --model_parallel --wandb_name Llama-2-70b_0.5 --wandb_group llama-layer-search --save_dir results/search/layerwise/Llama-2-70b_0.5
|
18 |
+
python src/main.py --model_arch llama2 --model_name meta-llama/Llama-2-70b-chat-hf --q_ratio 0.6 --search_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-1.toml --model_parallel --wandb_name Llama-2-70b_0.6 --wandb_group llama-layer-search --save_dir results/search/layerwise/Llama-2-70b_0.6
|
19 |
+
python src/main.py --model_arch llama2 --model_name meta-llama/Llama-2-70b-chat-hf --q_ratio 0.7 --search_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-1.toml --model_parallel --wandb_name Llama-2-70b_0.7 --wandb_group llama-layer-search --save_dir results/search/layerwise/Llama-2-70b_0.7
|
20 |
+
python src/main.py --model_arch llama2 --model_name meta-llama/Llama-2-70b-chat-hf --q_ratio 0.8 --search_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-1.toml --model_parallel --wandb_name Llama-2-70b_0.8 --wandb_group llama-layer-search --save_dir results/search/layerwise/Llama-2-70b_0.8
|
21 |
+
python src/main.py --model_arch llama2 --model_name meta-llama/Llama-2-70b-chat-hf --q_ratio 0.9 --search_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-1.toml --model_parallel --wandb_name Llama-2-70b_0.9 --wandb_group llama-layer-search --save_dir results/search/layerwise/Llama-2-70b_0.9
|
22 |
+
python src/main.py --model_arch llama2 --model_name meta-llama/Llama-2-70b-chat-hf --q_ratio 0.95 --search_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-1.toml --model_parallel --wandb_name Llama-2-70b_0.95 --wandb_group llama-layer-search --save_dir results/search/layerwise/Llama-2-70b_0.95
|
23 |
+
python src/main.py --model_arch llama2 --model_name meta-llama/Llama-2-70b-chat-hf --q_ratio 0.975 --search_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-1.toml --model_parallel --wandb_name Llama-2-70b_0.975 --wandb_group llama-layer-search --save_dir results/search/layerwise/Llama-2-70b_0.975
|
24 |
+
|
llm-q-scaling-law-master/run_scripts/layerwise/llama_layerwise_run_commands.txt
ADDED
@@ -0,0 +1,32 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
python src/main.py --model_arch llama --model_name huggyllama/llama-7b --q_ratio 0.5 --search_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-2.toml --model_parallel --wandb_name llama-7b_0.5 --wandb_group llama-layer-search --save_dir results/search/layerwise/llama-7b_0.5
|
2 |
+
python src/main.py --model_arch llama --model_name huggyllama/llama-7b --q_ratio 0.6 --search_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-2.toml --model_parallel --wandb_name llama-7b_0.6 --wandb_group llama-layer-search --save_dir results/search/layerwise/llama-7b_0.6
|
3 |
+
python src/main.py --model_arch llama --model_name huggyllama/llama-7b --q_ratio 0.7 --search_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-2.toml --model_parallel --wandb_name llama-7b_0.7 --wandb_group llama-layer-search --save_dir results/search/layerwise/llama-7b_0.7
|
4 |
+
python src/main.py --model_arch llama --model_name huggyllama/llama-7b --q_ratio 0.8 --search_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-2.toml --model_parallel --wandb_name llama-7b_0.8 --wandb_group llama-layer-search --save_dir results/search/layerwise/llama-7b_0.8
|
5 |
+
python src/main.py --model_arch llama --model_name huggyllama/llama-7b --q_ratio 0.9 --search_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-2.toml --model_parallel --wandb_name llama-7b_0.9 --wandb_group llama-layer-search --save_dir results/search/layerwise/llama-7b_0.9
|
6 |
+
python src/main.py --model_arch llama --model_name huggyllama/llama-7b --q_ratio 0.95 --search_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-2.toml --model_parallel --wandb_name llama-7b_0.95 --wandb_group llama-layer-search --save_dir results/search/layerwise/llama-7b_0.95
|
7 |
+
python src/main.py --model_arch llama --model_name huggyllama/llama-7b --q_ratio 0.975 --search_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-2.toml --model_parallel --wandb_name llama-7b_0.975 --wandb_group llama-layer-search --save_dir results/search/layerwise/llama-7b_0.975
|
8 |
+
|
9 |
+
python src/main.py --model_arch llama --model_name huggyllama/llama-13b --q_ratio 0.5 --search_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-2.toml --model_parallel --wandb_name llama-13b_0.5 --wandb_group llama-layer-search --save_dir results/search/layerwise/llama-13b_0.5
|
10 |
+
python src/main.py --model_arch llama --model_name huggyllama/llama-13b --q_ratio 0.6 --search_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-2.toml --model_parallel --wandb_name llama-13b_0.6 --wandb_group llama-layer-search --save_dir results/search/layerwise/llama-13b_0.6
|
11 |
+
python src/main.py --model_arch llama --model_name huggyllama/llama-13b --q_ratio 0.7 --search_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-2.toml --model_parallel --wandb_name llama-13b_0.7 --wandb_group llama-layer-search --save_dir results/search/layerwise/llama-13b_0.7
|
12 |
+
python src/main.py --model_arch llama --model_name huggyllama/llama-13b --q_ratio 0.8 --search_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-2.toml --model_parallel --wandb_name llama-13b_0.8 --wandb_group llama-layer-search --save_dir results/search/layerwise/llama-13b_0.8
|
13 |
+
python src/main.py --model_arch llama --model_name huggyllama/llama-13b --q_ratio 0.9 --search_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-2.toml --model_parallel --wandb_name llama-13b_0.9 --wandb_group llama-layer-search --save_dir results/search/layerwise/llama-13b_0.9
|
14 |
+
python src/main.py --model_arch llama --model_name huggyllama/llama-13b --q_ratio 0.95 --search_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-2.toml --model_parallel --wandb_name llama-13b_0.95 --wandb_group llama-layer-search --save_dir results/search/layerwise/llama-13b_0.95
|
15 |
+
python src/main.py --model_arch llama --model_name huggyllama/llama-13b --q_ratio 0.975 --search_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-2.toml --model_parallel --wandb_name llama-13b_0.975 --wandb_group llama-layer-search --save_dir results/search/layerwise/llama-13b_0.975
|
16 |
+
|
17 |
+
python src/main.py --model_arch llama --model_name huggyllama/llama-30b --q_ratio 0.5 --search_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-1.toml --model_parallel --wandb_name llama-30b_0.5 --wandb_group llama-layer-search --save_dir results/search/layerwise/llama-30b_0.5
|
18 |
+
python src/main.py --model_arch llama --model_name huggyllama/llama-30b --q_ratio 0.6 --search_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-1.toml --model_parallel --wandb_name llama-30b_0.6 --wandb_group llama-layer-search --save_dir results/search/layerwise/llama-30b_0.6
|
19 |
+
python src/main.py --model_arch llama --model_name huggyllama/llama-30b --q_ratio 0.7 --search_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-1.toml --model_parallel --wandb_name llama-30b_0.7 --wandb_group llama-layer-search --save_dir results/search/layerwise/llama-30b_0.7
|
20 |
+
python src/main.py --model_arch llama --model_name huggyllama/llama-30b --q_ratio 0.8 --search_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-1.toml --model_parallel --wandb_name llama-30b_0.8 --wandb_group llama-layer-search --save_dir results/search/layerwise/llama-30b_0.8
|
21 |
+
python src/main.py --model_arch llama --model_name huggyllama/llama-30b --q_ratio 0.9 --search_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-1.toml --model_parallel --wandb_name llama-30b_0.9 --wandb_group llama-layer-search --save_dir results/search/layerwise/llama-30b_0.9
|
22 |
+
python src/main.py --model_arch llama --model_name huggyllama/llama-30b --q_ratio 0.95 --search_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-1.toml --model_parallel --wandb_name llama-30b_0.95 --wandb_group llama-layer-search --save_dir results/search/layerwise/llama-30b_0.95
|
23 |
+
python src/main.py --model_arch llama --model_name huggyllama/llama-30b --q_ratio 0.975 --search_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-1.toml --model_parallel --wandb_name llama-30b_0.975 --wandb_group llama-layer-search --save_dir results/search/layerwise/llama-30b_0.975
|
24 |
+
|
25 |
+
python src/main.py --model_arch llama --model_name huggyllama/llama-65b --q_ratio 0.5 --search_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-1.toml --model_parallel --wandb_name llama-65b_0.5 --wandb_group llama-layer-search --save_dir results/search/layerwise/llama-65b_0.5
|
26 |
+
python src/main.py --model_arch llama --model_name huggyllama/llama-65b --q_ratio 0.6 --search_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-1.toml --model_parallel --wandb_name llama-65b_0.6 --wandb_group llama-layer-search --save_dir results/search/layerwise/llama-65b_0.6
|
27 |
+
python src/main.py --model_arch llama --model_name huggyllama/llama-65b --q_ratio 0.7 --search_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-1.toml --model_parallel --wandb_name llama-65b_0.7 --wandb_group llama-layer-search --save_dir results/search/layerwise/llama-65b_0.7
|
28 |
+
python src/main.py --model_arch llama --model_name huggyllama/llama-65b --q_ratio 0.8 --search_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-1.toml --model_parallel --wandb_name llama-65b_0.8 --wandb_group llama-layer-search --save_dir results/search/layerwise/llama-65b_0.8
|
29 |
+
python src/main.py --model_arch llama --model_name huggyllama/llama-65b --q_ratio 0.9 --search_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-1.toml --model_parallel --wandb_name llama-65b_0.9 --wandb_group llama-layer-search --save_dir results/search/layerwise/llama-65b_0.9
|
30 |
+
python src/main.py --model_arch llama --model_name huggyllama/llama-65b --q_ratio 0.95 --search_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-1.toml --model_parallel --wandb_name llama-65b_0.95 --wandb_group llama-layer-search --save_dir results/search/layerwise/llama-65b_0.95
|
31 |
+
python src/main.py --model_arch llama --model_name huggyllama/llama-65b --q_ratio 0.975 --search_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-1.toml --model_parallel --wandb_name llama-65b_0.975 --wandb_group llama-layer-search --save_dir results/search/layerwise/llama-65b_0.975
|
32 |
+
|
llm-q-scaling-law-master/run_scripts/layerwise/opt_layerwise_run_commands.txt
ADDED
@@ -0,0 +1,64 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
python src/main.py --model_arch opt --model_name facebook/opt-125m --q_ratio 0.5 --search_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-8.toml --model_parallel --wandb_name opt-125m_0.5 --wandb_group opt-layer-search --save_dir results/search/layerwise/opt-125m_0.5
|
2 |
+
python src/main.py --model_arch opt --model_name facebook/opt-125m --q_ratio 0.6 --search_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-8.toml --model_parallel --wandb_name opt-125m_0.6 --wandb_group opt-layer-search --save_dir results/search/layerwise/opt-125m_0.6
|
3 |
+
python src/main.py --model_arch opt --model_name facebook/opt-125m --q_ratio 0.7 --search_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-8.toml --model_parallel --wandb_name opt-125m_0.7 --wandb_group opt-layer-search --save_dir results/search/layerwise/opt-125m_0.7
|
4 |
+
python src/main.py --model_arch opt --model_name facebook/opt-125m --q_ratio 0.8 --search_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-8.toml --model_parallel --wandb_name opt-125m_0.8 --wandb_group opt-layer-search --save_dir results/search/layerwise/opt-125m_0.8
|
5 |
+
python src/main.py --model_arch opt --model_name facebook/opt-125m --q_ratio 0.9 --search_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-8.toml --model_parallel --wandb_name opt-125m_0.9 --wandb_group opt-layer-search --save_dir results/search/layerwise/opt-125m_0.9
|
6 |
+
python src/main.py --model_arch opt --model_name facebook/opt-125m --q_ratio 0.95 --search_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-8.toml --model_parallel --wandb_name opt-125m_0.95 --wandb_group opt-layer-search --save_dir results/search/layerwise/opt-125m_0.95
|
7 |
+
python src/main.py --model_arch opt --model_name facebook/opt-125m --q_ratio 0.975 --search_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-8.toml --model_parallel --wandb_name opt-125m_0.975 --wandb_group opt-layer-search --save_dir results/search/layerwise/opt-125m_0.975
|
8 |
+
|
9 |
+
python src/main.py --model_arch opt --model_name facebook/opt-350m --q_ratio 0.5 --search_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-8.toml --model_parallel --wandb_name opt-350m_0.5 --wandb_group opt-layer-search --save_dir results/search/layerwise/opt-350m_0.5
|
10 |
+
python src/main.py --model_arch opt --model_name facebook/opt-350m --q_ratio 0.6 --search_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-8.toml --model_parallel --wandb_name opt-350m_0.6 --wandb_group opt-layer-search --save_dir results/search/layerwise/opt-350m_0.6
|
11 |
+
python src/main.py --model_arch opt --model_name facebook/opt-350m --q_ratio 0.7 --search_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-8.toml --model_parallel --wandb_name opt-350m_0.7 --wandb_group opt-layer-search --save_dir results/search/layerwise/opt-350m_0.7
|
12 |
+
python src/main.py --model_arch opt --model_name facebook/opt-350m --q_ratio 0.8 --search_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-8.toml --model_parallel --wandb_name opt-350m_0.8 --wandb_group opt-layer-search --save_dir results/search/layerwise/opt-350m_0.8
|
13 |
+
python src/main.py --model_arch opt --model_name facebook/opt-350m --q_ratio 0.9 --search_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-8.toml --model_parallel --wandb_name opt-350m_0.9 --wandb_group opt-layer-search --save_dir results/search/layerwise/opt-350m_0.9
|
14 |
+
python src/main.py --model_arch opt --model_name facebook/opt-350m --q_ratio 0.95 --search_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-8.toml --model_parallel --wandb_name opt-350m_0.95 --wandb_group opt-layer-search --save_dir results/search/layerwise/opt-350m_0.95
|
15 |
+
python src/main.py --model_arch opt --model_name facebook/opt-350m --q_ratio 0.975 --search_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-8.toml --model_parallel --wandb_name opt-350m_0.975 --wandb_group opt-layer-search --save_dir results/search/layerwise/opt-350m_0.975
|
16 |
+
|
17 |
+
python src/main.py --model_arch opt --model_name facebook/opt-1.3b --q_ratio 0.5 --search_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-8.toml --model_parallel --wandb_name opt-1.3b_0.5 --wandb_group opt-layer-search --save_dir results/search/layerwise/opt-1.3b_0.5
|
18 |
+
python src/main.py --model_arch opt --model_name facebook/opt-1.3b --q_ratio 0.6 --search_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-8.toml --model_parallel --wandb_name opt-1.3b_0.6 --wandb_group opt-layer-search --save_dir results/search/layerwise/opt-1.3b_0.6
|
19 |
+
python src/main.py --model_arch opt --model_name facebook/opt-1.3b --q_ratio 0.7 --search_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-8.toml --model_parallel --wandb_name opt-1.3b_0.7 --wandb_group opt-layer-search --save_dir results/search/layerwise/opt-1.3b_0.7
|
20 |
+
python src/main.py --model_arch opt --model_name facebook/opt-1.3b --q_ratio 0.8 --search_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-8.toml --model_parallel --wandb_name opt-1.3b_0.8 --wandb_group opt-layer-search --save_dir results/search/layerwise/opt-1.3b_0.8
|
21 |
+
python src/main.py --model_arch opt --model_name facebook/opt-1.3b --q_ratio 0.9 --search_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-8.toml --model_parallel --wandb_name opt-1.3b_0.9 --wandb_group opt-layer-search --save_dir results/search/layerwise/opt-1.3b_0.9
|
22 |
+
python src/main.py --model_arch opt --model_name facebook/opt-1.3b --q_ratio 0.95 --search_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-8.toml --model_parallel --wandb_name opt-1.3b_0.95 --wandb_group opt-layer-search --save_dir results/search/layerwise/opt-1.3b_0.95
|
23 |
+
python src/main.py --model_arch opt --model_name facebook/opt-1.3b --q_ratio 0.975 --search_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-8.toml --model_parallel --wandb_name opt-1.3b_0.975 --wandb_group opt-layer-search --save_dir results/search/layerwise/opt-1.3b_0.975
|
24 |
+
|
25 |
+
python src/main.py --model_arch opt --model_name facebook/opt-2.7b --q_ratio 0.5 --search_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-8.toml --model_parallel --wandb_name opt-2.7b_0.5 --wandb_group opt-layer-search --save_dir results/search/layerwise/opt-2.7b_0.5
|
26 |
+
python src/main.py --model_arch opt --model_name facebook/opt-2.7b --q_ratio 0.6 --search_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-8.toml --model_parallel --wandb_name opt-2.7b_0.6 --wandb_group opt-layer-search --save_dir results/search/layerwise/opt-2.7b_0.6
|
27 |
+
python src/main.py --model_arch opt --model_name facebook/opt-2.7b --q_ratio 0.7 --search_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-8.toml --model_parallel --wandb_name opt-2.7b_0.7 --wandb_group opt-layer-search --save_dir results/search/layerwise/opt-2.7b_0.7
|
28 |
+
python src/main.py --model_arch opt --model_name facebook/opt-2.7b --q_ratio 0.8 --search_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-8.toml --model_parallel --wandb_name opt-2.7b_0.8 --wandb_group opt-layer-search --save_dir results/search/layerwise/opt-2.7b_0.8
|
29 |
+
python src/main.py --model_arch opt --model_name facebook/opt-2.7b --q_ratio 0.9 --search_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-8.toml --model_parallel --wandb_name opt-2.7b_0.9 --wandb_group opt-layer-search --save_dir results/search/layerwise/opt-2.7b_0.9
|
30 |
+
python src/main.py --model_arch opt --model_name facebook/opt-2.7b --q_ratio 0.95 --search_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-8.toml --model_parallel --wandb_name opt-2.7b_0.95 --wandb_group opt-layer-search --save_dir results/search/layerwise/opt-2.7b_0.95
|
31 |
+
python src/main.py --model_arch opt --model_name facebook/opt-2.7b --q_ratio 0.975 --search_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-8.toml --model_parallel --wandb_name opt-2.7b_0.975 --wandb_group opt-layer-search --save_dir results/search/layerwise/opt-2.7b_0.975
|
32 |
+
|
33 |
+
python src/main.py --model_arch opt --model_name facebook/opt-6.7b --q_ratio 0.5 --search_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-4.toml --model_parallel --wandb_name opt-6.7b_0.5 --wandb_group opt-layer-search --save_dir results/search/layerwise/opt-6.7b_0.5
|
34 |
+
python src/main.py --model_arch opt --model_name facebook/opt-6.7b --q_ratio 0.6 --search_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-4.toml --model_parallel --wandb_name opt-6.7b_0.6 --wandb_group opt-layer-search --save_dir results/search/layerwise/opt-6.7b_0.6
|
35 |
+
python src/main.py --model_arch opt --model_name facebook/opt-6.7b --q_ratio 0.7 --search_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-4.toml --model_parallel --wandb_name opt-6.7b_0.7 --wandb_group opt-layer-search --save_dir results/search/layerwise/opt-6.7b_0.7
|
36 |
+
python src/main.py --model_arch opt --model_name facebook/opt-6.7b --q_ratio 0.8 --search_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-4.toml --model_parallel --wandb_name opt-6.7b_0.8 --wandb_group opt-layer-search --save_dir results/search/layerwise/opt-6.7b_0.8
|
37 |
+
python src/main.py --model_arch opt --model_name facebook/opt-6.7b --q_ratio 0.9 --search_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-4.toml --model_parallel --wandb_name opt-6.7b_0.9 --wandb_group opt-layer-search --save_dir results/search/layerwise/opt-6.7b_0.9
|
38 |
+
python src/main.py --model_arch opt --model_name facebook/opt-6.7b --q_ratio 0.95 --search_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-4.toml --model_parallel --wandb_name opt-6.7b_0.95 --wandb_group opt-layer-search --save_dir results/search/layerwise/opt-6.7b_0.95
|
39 |
+
python src/main.py --model_arch opt --model_name facebook/opt-6.7b --q_ratio 0.975 --search_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-4.toml --model_parallel --wandb_name opt-6.7b_0.975 --wandb_group opt-layer-search --save_dir results/search/layerwise/opt-6.7b_0.975
|
40 |
+
|
41 |
+
python src/main.py --model_arch opt --model_name facebook/opt-13b --q_ratio 0.5 --search_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-2.toml --model_parallel --wandb_name opt-13b_0.5 --wandb_group opt-layer-search --save_dir results/search/layerwise/opt-13b_0.5
|
42 |
+
python src/main.py --model_arch opt --model_name facebook/opt-13b --q_ratio 0.6 --search_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-2.toml --model_parallel --wandb_name opt-13b_0.6 --wandb_group opt-layer-search --save_dir results/search/layerwise/opt-13b_0.6
|
43 |
+
python src/main.py --model_arch opt --model_name facebook/opt-13b --q_ratio 0.7 --search_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-2.toml --model_parallel --wandb_name opt-13b_0.7 --wandb_group opt-layer-search --save_dir results/search/layerwise/opt-13b_0.7
|
44 |
+
python src/main.py --model_arch opt --model_name facebook/opt-13b --q_ratio 0.8 --search_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-2.toml --model_parallel --wandb_name opt-13b_0.8 --wandb_group opt-layer-search --save_dir results/search/layerwise/opt-13b_0.8
|
45 |
+
python src/main.py --model_arch opt --model_name facebook/opt-13b --q_ratio 0.9 --search_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-2.toml --model_parallel --wandb_name opt-13b_0.9 --wandb_group opt-layer-search --save_dir results/search/layerwise/opt-13b_0.9
|
46 |
+
python src/main.py --model_arch opt --model_name facebook/opt-13b --q_ratio 0.95 --search_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-2.toml --model_parallel --wandb_name opt-13b_0.95 --wandb_group opt-layer-search --save_dir results/search/layerwise/opt-13b_0.95
|
47 |
+
python src/main.py --model_arch opt --model_name facebook/opt-13b --q_ratio 0.975 --search_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-2.toml --model_parallel --wandb_name opt-13b_0.975 --wandb_group opt-layer-search --save_dir results/search/layerwise/opt-13b_0.975
|
48 |
+
|
49 |
+
python src/main.py --model_arch opt --model_name facebook/opt-30b --q_ratio 0.5 --search_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-1.toml --model_parallel --wandb_name opt-30b_0.5 --wandb_group opt-layer-search --save_dir results/search/layerwise/opt-30b_0.5
|
50 |
+
python src/main.py --model_arch opt --model_name facebook/opt-30b --q_ratio 0.6 --search_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-1.toml --model_parallel --wandb_name opt-30b_0.6 --wandb_group opt-layer-search --save_dir results/search/layerwise/opt-30b_0.6
|
51 |
+
python src/main.py --model_arch opt --model_name facebook/opt-30b --q_ratio 0.7 --search_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-1.toml --model_parallel --wandb_name opt-30b_0.7 --wandb_group opt-layer-search --save_dir results/search/layerwise/opt-30b_0.7
|
52 |
+
python src/main.py --model_arch opt --model_name facebook/opt-30b --q_ratio 0.8 --search_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-1.toml --model_parallel --wandb_name opt-30b_0.8 --wandb_group opt-layer-search --save_dir results/search/layerwise/opt-30b_0.8
|
53 |
+
python src/main.py --model_arch opt --model_name facebook/opt-30b --q_ratio 0.9 --search_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-1.toml --model_parallel --wandb_name opt-30b_0.9 --wandb_group opt-layer-search --save_dir results/search/layerwise/opt-30b_0.9
|
54 |
+
python src/main.py --model_arch opt --model_name facebook/opt-30b --q_ratio 0.95 --search_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-1.toml --model_parallel --wandb_name opt-30b_0.95 --wandb_group opt-layer-search --save_dir results/search/layerwise/opt-30b_0.95
|
55 |
+
python src/main.py --model_arch opt --model_name facebook/opt-30b --q_ratio 0.975 --search_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-1.toml --model_parallel --wandb_name opt-30b_0.975 --wandb_group opt-layer-search --save_dir results/search/layerwise/opt-30b_0.975
|
56 |
+
|
57 |
+
python src/main.py --model_arch opt --model_name facebook/opt-66b --q_ratio 0.5 --search_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-1.toml --model_parallel --wandb_name opt-66b_0.5 --wandb_group opt-layer-search --save_dir results/search/layerwise/opt-66b_0.5
|
58 |
+
python src/main.py --model_arch opt --model_name facebook/opt-66b --q_ratio 0.6 --search_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-1.toml --model_parallel --wandb_name opt-66b_0.6 --wandb_group opt-layer-search --save_dir results/search/layerwise/opt-66b_0.6
|
59 |
+
python src/main.py --model_arch opt --model_name facebook/opt-66b --q_ratio 0.7 --search_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-1.toml --model_parallel --wandb_name opt-66b_0.7 --wandb_group opt-layer-search --save_dir results/search/layerwise/opt-66b_0.7
|
60 |
+
python src/main.py --model_arch opt --model_name facebook/opt-66b --q_ratio 0.8 --search_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-1.toml --model_parallel --wandb_name opt-66b_0.8 --wandb_group opt-layer-search --save_dir results/search/layerwise/opt-66b_0.8
|
61 |
+
python src/main.py --model_arch opt --model_name facebook/opt-66b --q_ratio 0.9 --search_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-1.toml --model_parallel --wandb_name opt-66b_0.9 --wandb_group opt-layer-search --save_dir results/search/layerwise/opt-66b_0.9
|
62 |
+
python src/main.py --model_arch opt --model_name facebook/opt-66b --q_ratio 0.95 --search_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-1.toml --model_parallel --wandb_name opt-66b_0.95 --wandb_group opt-layer-search --save_dir results/search/layerwise/opt-66b_0.95
|
63 |
+
python src/main.py --model_arch opt --model_name facebook/opt-66b --q_ratio 0.975 --search_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-1.toml --model_parallel --wandb_name opt-66b_0.975 --wandb_group opt-layer-search --save_dir results/search/layerwise/opt-66b_0.975
|
64 |
+
|
llm-q-scaling-law-master/run_scripts/layerwise/qwen1.5_layerwise_run_commands.txt
ADDED
@@ -0,0 +1,64 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
python src/main.py --model_arch qwen1.5 --model_name Qwen/Qwen1.5-0.5B --q_ratio 0.5 --search_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-8.toml --model_parallel --wandb_name Qwen1.5-0.5B_0.5 --wandb_group qwen15-layer-search --save_dir results/search/layerwise/Qwen1.5-0.5B_0.5
|
2 |
+
python src/main.py --model_arch qwen1.5 --model_name Qwen/Qwen1.5-0.5B --q_ratio 0.6 --search_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-8.toml --model_parallel --wandb_name Qwen1.5-0.5B_0.6 --wandb_group qwen15-layer-search --save_dir results/search/layerwise/Qwen1.5-0.5B_0.6
|
3 |
+
python src/main.py --model_arch qwen1.5 --model_name Qwen/Qwen1.5-0.5B --q_ratio 0.7 --search_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-8.toml --model_parallel --wandb_name Qwen1.5-0.5B_0.7 --wandb_group qwen15-layer-search --save_dir results/search/layerwise/Qwen1.5-0.5B_0.7
|
4 |
+
python src/main.py --model_arch qwen1.5 --model_name Qwen/Qwen1.5-0.5B --q_ratio 0.8 --search_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-8.toml --model_parallel --wandb_name Qwen1.5-0.5B_0.8 --wandb_group qwen15-layer-search --save_dir results/search/layerwise/Qwen1.5-0.5B_0.8
|
5 |
+
python src/main.py --model_arch qwen1.5 --model_name Qwen/Qwen1.5-0.5B --q_ratio 0.9 --search_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-8.toml --model_parallel --wandb_name Qwen1.5-0.5B_0.9 --wandb_group qwen15-layer-search --save_dir results/search/layerwise/Qwen1.5-0.5B_0.9
|
6 |
+
python src/main.py --model_arch qwen1.5 --model_name Qwen/Qwen1.5-0.5B --q_ratio 0.95 --search_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-8.toml --model_parallel --wandb_name Qwen1.5-0.5B_0.95 --wandb_group qwen15-layer-search --save_dir results/search/layerwise/Qwen1.5-0.5B_0.95
|
7 |
+
python src/main.py --model_arch qwen1.5 --model_name Qwen/Qwen1.5-0.5B --q_ratio 0.975 --search_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-8.toml --model_parallel --wandb_name Qwen1.5-0.5B_0.975 --wandb_group qwen15-layer-search --save_dir results/search/layerwise/Qwen1.5-0.5B_0.975
|
8 |
+
|
9 |
+
python src/main.py --model_arch qwen1.5 --model_name Qwen/Qwen1.5-1.8B --q_ratio 0.5 --search_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-8.toml --model_parallel --wandb_name Qwen1.5-1.8B_0.5 --wandb_group qwen15-layer-search --save_dir results/search/layerwise/Qwen1.5-1.8B_0.5
|
10 |
+
python src/main.py --model_arch qwen1.5 --model_name Qwen/Qwen1.5-1.8B --q_ratio 0.6 --search_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-8.toml --model_parallel --wandb_name Qwen1.5-1.8B_0.6 --wandb_group qwen15-layer-search --save_dir results/search/layerwise/Qwen1.5-1.8B_0.6
|
11 |
+
python src/main.py --model_arch qwen1.5 --model_name Qwen/Qwen1.5-1.8B --q_ratio 0.7 --search_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-8.toml --model_parallel --wandb_name Qwen1.5-1.8B_0.7 --wandb_group qwen15-layer-search --save_dir results/search/layerwise/Qwen1.5-1.8B_0.7
|
12 |
+
python src/main.py --model_arch qwen1.5 --model_name Qwen/Qwen1.5-1.8B --q_ratio 0.8 --search_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-8.toml --model_parallel --wandb_name Qwen1.5-1.8B_0.8 --wandb_group qwen15-layer-search --save_dir results/search/layerwise/Qwen1.5-1.8B_0.8
|
13 |
+
python src/main.py --model_arch qwen1.5 --model_name Qwen/Qwen1.5-1.8B --q_ratio 0.9 --search_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-8.toml --model_parallel --wandb_name Qwen1.5-1.8B_0.9 --wandb_group qwen15-layer-search --save_dir results/search/layerwise/Qwen1.5-1.8B_0.9
|
14 |
+
python src/main.py --model_arch qwen1.5 --model_name Qwen/Qwen1.5-1.8B --q_ratio 0.95 --search_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-8.toml --model_parallel --wandb_name Qwen1.5-1.8B_0.95 --wandb_group qwen15-layer-search --save_dir results/search/layerwise/Qwen1.5-1.8B_0.95
|
15 |
+
python src/main.py --model_arch qwen1.5 --model_name Qwen/Qwen1.5-1.8B --q_ratio 0.975 --search_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-8.toml --model_parallel --wandb_name Qwen1.5-1.8B_0.975 --wandb_group qwen15-layer-search --save_dir results/search/layerwise/Qwen1.5-1.8B_0.975
|
16 |
+
|
17 |
+
python src/main.py --model_arch qwen1.5 --model_name Qwen/Qwen1.5-4B --q_ratio 0.5 --search_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-4.toml --model_parallel --wandb_name Qwen1.5-4B_0.5 --wandb_group qwen15-layer-search --save_dir results/search/layerwise/Qwen1.5-4B_0.5
|
18 |
+
python src/main.py --model_arch qwen1.5 --model_name Qwen/Qwen1.5-4B --q_ratio 0.6 --search_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-4.toml --model_parallel --wandb_name Qwen1.5-4B_0.6 --wandb_group qwen15-layer-search --save_dir results/search/layerwise/Qwen1.5-4B_0.6
|
19 |
+
python src/main.py --model_arch qwen1.5 --model_name Qwen/Qwen1.5-4B --q_ratio 0.7 --search_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-4.toml --model_parallel --wandb_name Qwen1.5-4B_0.7 --wandb_group qwen15-layer-search --save_dir results/search/layerwise/Qwen1.5-4B_0.7
|
20 |
+
python src/main.py --model_arch qwen1.5 --model_name Qwen/Qwen1.5-4B --q_ratio 0.8 --search_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-4.toml --model_parallel --wandb_name Qwen1.5-4B_0.8 --wandb_group qwen15-layer-search --save_dir results/search/layerwise/Qwen1.5-4B_0.8
|
21 |
+
python src/main.py --model_arch qwen1.5 --model_name Qwen/Qwen1.5-4B --q_ratio 0.9 --search_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-4.toml --model_parallel --wandb_name Qwen1.5-4B_0.9 --wandb_group qwen15-layer-search --save_dir results/search/layerwise/Qwen1.5-4B_0.9
|
22 |
+
python src/main.py --model_arch qwen1.5 --model_name Qwen/Qwen1.5-4B --q_ratio 0.95 --search_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-4.toml --model_parallel --wandb_name Qwen1.5-4B_0.95 --wandb_group qwen15-layer-search --save_dir results/search/layerwise/Qwen1.5-4B_0.95
|
23 |
+
python src/main.py --model_arch qwen1.5 --model_name Qwen/Qwen1.5-4B --q_ratio 0.975 --search_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-4.toml --model_parallel --wandb_name Qwen1.5-4B_0.975 --wandb_group qwen15-layer-search --save_dir results/search/layerwise/Qwen1.5-4B_0.975
|
24 |
+
|
25 |
+
python src/main.py --model_arch qwen1.5 --model_name Qwen/Qwen1.5-7B --q_ratio 0.5 --search_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-2.toml --model_parallel --wandb_name Qwen1.5-7B_0.5 --wandb_group qwen15-layer-search --save_dir results/search/layerwise/Qwen1.5-7B_0.5
|
26 |
+
python src/main.py --model_arch qwen1.5 --model_name Qwen/Qwen1.5-7B --q_ratio 0.6 --search_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-2.toml --model_parallel --wandb_name Qwen1.5-7B_0.6 --wandb_group qwen15-layer-search --save_dir results/search/layerwise/Qwen1.5-7B_0.6
|
27 |
+
python src/main.py --model_arch qwen1.5 --model_name Qwen/Qwen1.5-7B --q_ratio 0.7 --search_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-2.toml --model_parallel --wandb_name Qwen1.5-7B_0.7 --wandb_group qwen15-layer-search --save_dir results/search/layerwise/Qwen1.5-7B_0.7
|
28 |
+
python src/main.py --model_arch qwen1.5 --model_name Qwen/Qwen1.5-7B --q_ratio 0.8 --search_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-2.toml --model_parallel --wandb_name Qwen1.5-7B_0.8 --wandb_group qwen15-layer-search --save_dir results/search/layerwise/Qwen1.5-7B_0.8
|
29 |
+
python src/main.py --model_arch qwen1.5 --model_name Qwen/Qwen1.5-7B --q_ratio 0.9 --search_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-2.toml --model_parallel --wandb_name Qwen1.5-7B_0.9 --wandb_group qwen15-layer-search --save_dir results/search/layerwise/Qwen1.5-7B_0.9
|
30 |
+
python src/main.py --model_arch qwen1.5 --model_name Qwen/Qwen1.5-7B --q_ratio 0.95 --search_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-2.toml --model_parallel --wandb_name Qwen1.5-7B_0.95 --wandb_group qwen15-layer-search --save_dir results/search/layerwise/Qwen1.5-7B_0.95
|
31 |
+
python src/main.py --model_arch qwen1.5 --model_name Qwen/Qwen1.5-7B --q_ratio 0.975 --search_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-2.toml --model_parallel --wandb_name Qwen1.5-7B_0.975 --wandb_group qwen15-layer-search --save_dir results/search/layerwise/Qwen1.5-7B_0.975
|
32 |
+
|
33 |
+
python src/main.py --model_arch qwen1.5 --model_name Qwen/Qwen1.5-14B --q_ratio 0.5 --search_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-2.toml --model_parallel --wandb_name Qwen1.5-14B_0.5 --wandb_group qwen15-layer-search --save_dir results/search/layerwise/Qwen1.5-14B_0.5
|
34 |
+
python src/main.py --model_arch qwen1.5 --model_name Qwen/Qwen1.5-14B --q_ratio 0.6 --search_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-2.toml --model_parallel --wandb_name Qwen1.5-14B_0.6 --wandb_group qwen15-layer-search --save_dir results/search/layerwise/Qwen1.5-14B_0.6
|
35 |
+
python src/main.py --model_arch qwen1.5 --model_name Qwen/Qwen1.5-14B --q_ratio 0.7 --search_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-2.toml --model_parallel --wandb_name Qwen1.5-14B_0.7 --wandb_group qwen15-layer-search --save_dir results/search/layerwise/Qwen1.5-14B_0.7
|
36 |
+
python src/main.py --model_arch qwen1.5 --model_name Qwen/Qwen1.5-14B --q_ratio 0.8 --search_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-2.toml --model_parallel --wandb_name Qwen1.5-14B_0.8 --wandb_group qwen15-layer-search --save_dir results/search/layerwise/Qwen1.5-14B_0.8
|
37 |
+
python src/main.py --model_arch qwen1.5 --model_name Qwen/Qwen1.5-14B --q_ratio 0.9 --search_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-2.toml --model_parallel --wandb_name Qwen1.5-14B_0.9 --wandb_group qwen15-layer-search --save_dir results/search/layerwise/Qwen1.5-14B_0.9
|
38 |
+
python src/main.py --model_arch qwen1.5 --model_name Qwen/Qwen1.5-14B --q_ratio 0.95 --search_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-2.toml --model_parallel --wandb_name Qwen1.5-14B_0.95 --wandb_group qwen15-layer-search --save_dir results/search/layerwise/Qwen1.5-14B_0.95
|
39 |
+
python src/main.py --model_arch qwen1.5 --model_name Qwen/Qwen1.5-14B --q_ratio 0.975 --search_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-2.toml --model_parallel --wandb_name Qwen1.5-14B_0.975 --wandb_group qwen15-layer-search --save_dir results/search/layerwise/Qwen1.5-14B_0.975
|
40 |
+
|
41 |
+
python src/main.py --model_arch qwen1.5 --model_name Qwen/Qwen1.5-32B --q_ratio 0.5 --search_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-2.toml --model_parallel --wandb_name Qwen1.5-32B_0.5 --wandb_group qwen15-layer-search --save_dir results/search/layerwise/Qwen1.5-32B_0.5
|
42 |
+
python src/main.py --model_arch qwen1.5 --model_name Qwen/Qwen1.5-32B --q_ratio 0.6 --search_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-2.toml --model_parallel --wandb_name Qwen1.5-32B_0.6 --wandb_group qwen15-layer-search --save_dir results/search/layerwise/Qwen1.5-32B_0.6
|
43 |
+
python src/main.py --model_arch qwen1.5 --model_name Qwen/Qwen1.5-32B --q_ratio 0.7 --search_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-2.toml --model_parallel --wandb_name Qwen1.5-32B_0.7 --wandb_group qwen15-layer-search --save_dir results/search/layerwise/Qwen1.5-32B_0.7
|
44 |
+
python src/main.py --model_arch qwen1.5 --model_name Qwen/Qwen1.5-32B --q_ratio 0.8 --search_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-2.toml --model_parallel --wandb_name Qwen1.5-32B_0.8 --wandb_group qwen15-layer-search --save_dir results/search/layerwise/Qwen1.5-32B_0.8
|
45 |
+
python src/main.py --model_arch qwen1.5 --model_name Qwen/Qwen1.5-32B --q_ratio 0.9 --search_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-2.toml --model_parallel --wandb_name Qwen1.5-32B_0.9 --wandb_group qwen15-layer-search --save_dir results/search/layerwise/Qwen1.5-32B_0.9
|
46 |
+
python src/main.py --model_arch qwen1.5 --model_name Qwen/Qwen1.5-32B --q_ratio 0.95 --search_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-2.toml --model_parallel --wandb_name Qwen1.5-32B_0.95 --wandb_group qwen15-layer-search --save_dir results/search/layerwise/Qwen1.5-32B_0.95
|
47 |
+
python src/main.py --model_arch qwen1.5 --model_name Qwen/Qwen1.5-32B --q_ratio 0.975 --search_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-2.toml --model_parallel --wandb_name Qwen1.5-32B_0.975 --wandb_group qwen15-layer-search --save_dir results/search/layerwise/Qwen1.5-32B_0.975
|
48 |
+
|
49 |
+
python src/main.py --model_arch qwen1.5 --model_name Qwen/Qwen1.5-72B --q_ratio 0.5 --search_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-1.toml --model_parallel --wandb_name Qwen1.5-72B_0.5 --wandb_group qwen15-layer-search --save_dir results/search/layerwise/Qwen1.5-72B_0.5
|
50 |
+
python src/main.py --model_arch qwen1.5 --model_name Qwen/Qwen1.5-72B --q_ratio 0.6 --search_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-1.toml --model_parallel --wandb_name Qwen1.5-72B_0.6 --wandb_group qwen15-layer-search --save_dir results/search/layerwise/Qwen1.5-72B_0.6
|
51 |
+
python src/main.py --model_arch qwen1.5 --model_name Qwen/Qwen1.5-72B --q_ratio 0.7 --search_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-1.toml --model_parallel --wandb_name Qwen1.5-72B_0.7 --wandb_group qwen15-layer-search --save_dir results/search/layerwise/Qwen1.5-72B_0.7
|
52 |
+
python src/main.py --model_arch qwen1.5 --model_name Qwen/Qwen1.5-72B --q_ratio 0.8 --search_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-1.toml --model_parallel --wandb_name Qwen1.5-72B_0.8 --wandb_group qwen15-layer-search --save_dir results/search/layerwise/Qwen1.5-72B_0.8
|
53 |
+
python src/main.py --model_arch qwen1.5 --model_name Qwen/Qwen1.5-72B --q_ratio 0.9 --search_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-1.toml --model_parallel --wandb_name Qwen1.5-72B_0.9 --wandb_group qwen15-layer-search --save_dir results/search/layerwise/Qwen1.5-72B_0.9
|
54 |
+
python src/main.py --model_arch qwen1.5 --model_name Qwen/Qwen1.5-72B --q_ratio 0.95 --search_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-1.toml --model_parallel --wandb_name Qwen1.5-72B_0.95 --wandb_group qwen15-layer-search --save_dir results/search/layerwise/Qwen1.5-72B_0.95
|
55 |
+
python src/main.py --model_arch qwen1.5 --model_name Qwen/Qwen1.5-72B --q_ratio 0.975 --search_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-1.toml --model_parallel --wandb_name Qwen1.5-72B_0.975 --wandb_group qwen15-layer-search --save_dir results/search/layerwise/Qwen1.5-72B_0.975
|
56 |
+
|
57 |
+
python src/main.py --model_arch qwen1.5 --model_name Qwen/Qwen1.5-110B --q_ratio 0.5 --search_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-1.toml --model_parallel --wandb_name Qwen1.5-110B_0.5 --wandb_group qwen15-layer-search --save_dir results/search/layerwise/Qwen1.5-110B_0.5
|
58 |
+
python src/main.py --model_arch qwen1.5 --model_name Qwen/Qwen1.5-110B --q_ratio 0.6 --search_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-1.toml --model_parallel --wandb_name Qwen1.5-110B_0.6 --wandb_group qwen15-layer-search --save_dir results/search/layerwise/Qwen1.5-110B_0.6
|
59 |
+
python src/main.py --model_arch qwen1.5 --model_name Qwen/Qwen1.5-110B --q_ratio 0.7 --search_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-1.toml --model_parallel --wandb_name Qwen1.5-110B_0.7 --wandb_group qwen15-layer-search --save_dir results/search/layerwise/Qwen1.5-110B_0.7
|
60 |
+
python src/main.py --model_arch qwen1.5 --model_name Qwen/Qwen1.5-110B --q_ratio 0.8 --search_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-1.toml --model_parallel --wandb_name Qwen1.5-110B_0.8 --wandb_group qwen15-layer-search --save_dir results/search/layerwise/Qwen1.5-110B_0.8
|
61 |
+
python src/main.py --model_arch qwen1.5 --model_name Qwen/Qwen1.5-110B --q_ratio 0.9 --search_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-1.toml --model_parallel --wandb_name Qwen1.5-110B_0.9 --wandb_group qwen15-layer-search --save_dir results/search/layerwise/Qwen1.5-110B_0.9
|
62 |
+
python src/main.py --model_arch qwen1.5 --model_name Qwen/Qwen1.5-110B --q_ratio 0.95 --search_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-1.toml --model_parallel --wandb_name Qwen1.5-110B_0.95 --wandb_group qwen15-layer-search --save_dir results/search/layerwise/Qwen1.5-110B_0.95
|
63 |
+
python src/main.py --model_arch qwen1.5 --model_name Qwen/Qwen1.5-110B --q_ratio 0.975 --search_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-1.toml --model_parallel --wandb_name Qwen1.5-110B_0.975 --wandb_group qwen15-layer-search --save_dir results/search/layerwise/Qwen1.5-110B_0.975
|
64 |
+
|
llm-q-scaling-law-master/run_scripts/layerwise/runscript_generator.py
ADDED
@@ -0,0 +1,111 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""
|
2 |
+
This generates the runscripts for the LLM-Q scaling law simulations.
|
3 |
+
|
4 |
+
"""
|
5 |
+
|
6 |
+
import os
|
7 |
+
import argparse
|
8 |
+
|
9 |
+
# the search command is
|
10 |
+
#python src/main.py --model_name {model_name} --search_config configs/search/mxint_4bit-tinyllama.toml --model_parallel --disable_wandb --save_dir ../ckpt/test
|
11 |
+
# the save dir is
|
12 |
+
# results/search/layerwise/{model_name}_{ratio}
|
13 |
+
|
14 |
+
granularity = ["transformer_layer"]
|
15 |
+
|
16 |
+
# opt template for searching
|
17 |
+
|
18 |
+
opt_model_sizes=["opt-125m","opt-350m","opt-1.3b","opt-2.7b","opt-6.7b","opt-13b","opt-30b","opt-66b"]
|
19 |
+
opt_batch_size = [8,8,8,8,4,2,1,1]
|
20 |
+
opt_layer_wise_ratios = [0.5,0.6,0.7,0.8,0.9,0.95,0.975]
|
21 |
+
opt_layer_wise_search_config_template = "mxint_4bit-pajama-random-50-bs-{}.toml"
|
22 |
+
opt_layer_wise_search_dir_template = "search/layerwise/{}_{}"
|
23 |
+
|
24 |
+
opt_layerwise_run_command_palette = "python src/main.py --model_arch opt --model_name {model_name} --q_ratio {q_ratio} --search_config configs/search/layerwise/{search_config} --model_parallel --wandb_name {wandb_name} --wandb_group opt-layer-search --save_dir results/{save_dir}"
|
25 |
+
|
26 |
+
with open("opt_layerwise_run_commands.txt", "w") as f:
|
27 |
+
for i in range(len(opt_model_sizes)):
|
28 |
+
model_size = opt_model_sizes[i]
|
29 |
+
model_batch_size = opt_batch_size[i]
|
30 |
+
for ratio in opt_layer_wise_ratios:
|
31 |
+
search_config = opt_layer_wise_search_config_template.format(model_batch_size)
|
32 |
+
save_dir = opt_layer_wise_search_dir_template.format(model_size, ratio)
|
33 |
+
model_name = f"facebook/{model_size}"
|
34 |
+
wandb_name = f"{model_size}_{ratio}"
|
35 |
+
run_command = opt_layerwise_run_command_palette.format(model_name=model_name, q_ratio=ratio, search_config=search_config, wandb_name=wandb_name, save_dir=save_dir)
|
36 |
+
# print(run_command)
|
37 |
+
f.write(run_command + "\n")
|
38 |
+
f.write("\n")
|
39 |
+
|
40 |
+
|
41 |
+
# qwen template for searching
|
42 |
+
|
43 |
+
qwen15_model_sizes=["Qwen1.5-0.5B","Qwen1.5-1.8B","Qwen1.5-4B","Qwen1.5-7B","Qwen1.5-14B","Qwen1.5-32B","Qwen1.5-72B","Qwen1.5-110B"]
|
44 |
+
qwen15_batch_size = [8,8,4,2,2,2,1,1]
|
45 |
+
qwen15_layer_wise_ratios = [0.5,0.6,0.7,0.8,0.9,0.95,0.975]
|
46 |
+
qwen15_layer_wise_search_config_template = "mxint_4bit-pajama-random-50-bs-{}.toml"
|
47 |
+
qwen15_layer_wise_search_dir_template = "search/layerwise/{}_{}"
|
48 |
+
|
49 |
+
qwen15_layerwise_run_command_palette = "python src/main.py --model_arch qwen1.5 --model_name {model_name} --q_ratio {q_ratio} --search_config configs/search/layerwise/{search_config} --model_parallel --wandb_name {wandb_name} --wandb_group qwen15-layer-search --save_dir results/{save_dir}"
|
50 |
+
|
51 |
+
with open("qwen1.5_layerwise_run_commands.txt", "w") as f:
|
52 |
+
for i in range(len(qwen15_model_sizes)):
|
53 |
+
model_size = qwen15_model_sizes[i]
|
54 |
+
model_batch_size = qwen15_batch_size[i]
|
55 |
+
for ratio in qwen15_layer_wise_ratios:
|
56 |
+
search_config = qwen15_layer_wise_search_config_template.format(model_batch_size)
|
57 |
+
save_dir = qwen15_layer_wise_search_dir_template.format(model_size, ratio)
|
58 |
+
model_name = f"Qwen/{model_size}"
|
59 |
+
wandb_name = f"{model_size}_{ratio}"
|
60 |
+
run_command = qwen15_layerwise_run_command_palette.format(model_name=model_name, q_ratio=ratio, search_config=search_config, wandb_name=wandb_name, save_dir=save_dir)
|
61 |
+
# print(run_command)
|
62 |
+
f.write(run_command + "\n")
|
63 |
+
f.write("\n")
|
64 |
+
|
65 |
+
# llama template for searching
|
66 |
+
|
67 |
+
# qwen template for searching
|
68 |
+
|
69 |
+
llama_model_sizes=["llama-7b","llama-13b","llama-30b","llama-65b"]
|
70 |
+
llama_batch_size = [2,2,1,1]
|
71 |
+
llama_layer_wise_ratios = [0.5,0.6,0.7,0.8,0.9,0.95,0.975]
|
72 |
+
llama_layer_wise_search_config_template = "mxint_4bit-pajama-random-50-bs-{}.toml"
|
73 |
+
llama_layer_wise_search_dir_template = "search/layerwise/{}_{}"
|
74 |
+
|
75 |
+
llama_layerwise_run_command_palette = "python src/main.py --model_arch llama --model_name {model_name} --q_ratio {q_ratio} --search_config configs/search/layerwise/{search_config} --model_parallel --wandb_name {wandb_name} --wandb_group llama-layer-search --save_dir results/{save_dir}"
|
76 |
+
|
77 |
+
with open("llama_layerwise_run_commands.txt", "w") as f:
|
78 |
+
for i in range(len(llama_model_sizes)):
|
79 |
+
model_size = llama_model_sizes[i]
|
80 |
+
model_batch_size = llama_batch_size[i]
|
81 |
+
for ratio in llama_layer_wise_ratios:
|
82 |
+
search_config = llama_layer_wise_search_config_template.format(model_batch_size)
|
83 |
+
save_dir = llama_layer_wise_search_dir_template.format(model_size, ratio)
|
84 |
+
model_name = f"huggyllama/{model_size}"
|
85 |
+
wandb_name = f"{model_size}_{ratio}"
|
86 |
+
run_command = llama_layerwise_run_command_palette.format(model_name=model_name, q_ratio=ratio, search_config=search_config, wandb_name=wandb_name, save_dir=save_dir)
|
87 |
+
# print(run_command)
|
88 |
+
f.write(run_command + "\n")
|
89 |
+
f.write("\n")
|
90 |
+
|
91 |
+
llama2_model_sizes=["Llama-2-7b","Llama-2-13b","Llama-2-70b"]
|
92 |
+
llama2_batch_size = [2,2,1,1]
|
93 |
+
llama2_layer_wise_ratios = [0.5,0.6,0.7,0.8,0.9,0.95,0.975]
|
94 |
+
llama2_layer_wise_search_config_template = "mxint_4bit-pajama-random-50-bs-{}.toml"
|
95 |
+
llama2_layer_wise_search_dir_template = "search/layerwise/{}_{}"
|
96 |
+
|
97 |
+
llama2_layerwise_run_command_palette = "python src/main.py --model_arch llama2 --model_name {model_name} --q_ratio {q_ratio} --search_config configs/search/layerwise/{search_config} --model_parallel --wandb_name {wandb_name} --wandb_group llama-layer-search --save_dir results/{save_dir}"
|
98 |
+
|
99 |
+
with open("llama2_layerwise_run_commands.txt", "w") as f:
|
100 |
+
for i in range(len(llama2_model_sizes)):
|
101 |
+
model_size = llama2_model_sizes[i]
|
102 |
+
model_batch_size = llama2_batch_size[i]
|
103 |
+
for ratio in llama2_layer_wise_ratios:
|
104 |
+
search_config = llama2_layer_wise_search_config_template.format(model_batch_size)
|
105 |
+
save_dir = llama2_layer_wise_search_dir_template.format(model_size, ratio)
|
106 |
+
model_name = f"meta-llama/{model_size}-chat-hf" # use the instruction tempalte
|
107 |
+
wandb_name = f"{model_size}_{ratio}"
|
108 |
+
run_command = llama2_layerwise_run_command_palette.format(model_name=model_name, q_ratio=ratio, search_config=search_config, wandb_name=wandb_name, save_dir=save_dir)
|
109 |
+
# print(run_command)
|
110 |
+
f.write(run_command + "\n")
|
111 |
+
f.write("\n")
|
llm-q-scaling-law-master/run_scripts/matmult/llama2_matmult_run_commands.txt
ADDED
@@ -0,0 +1,18 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
python src/main.py --model_arch llama2 --model_name meta-llama/Llama-2-7b-chat-hf --q_ratio 0.5 --search_config configs/search/matmult/mxint_4bit-pajama-random-50-bs-2.toml --model_parallel --wandb_name Llama-2-7b_0.5 --wandb_group llama-layer-search --save_dir results/search/matmult/Llama-2-7b_0.5
|
2 |
+
python src/main.py --model_arch llama2 --model_name meta-llama/Llama-2-7b-chat-hf --q_ratio 0.9 --search_config configs/search/matmult/mxint_4bit-pajama-random-50-bs-2.toml --model_parallel --wandb_name Llama-2-7b_0.9 --wandb_group llama-layer-search --save_dir results/search/matmult/Llama-2-7b_0.9
|
3 |
+
python src/main.py --model_arch llama2 --model_name meta-llama/Llama-2-7b-chat-hf --q_ratio 0.95 --search_config configs/search/matmult/mxint_4bit-pajama-random-50-bs-2.toml --model_parallel --wandb_name Llama-2-7b_0.95 --wandb_group llama-layer-search --save_dir results/search/matmult/Llama-2-7b_0.95
|
4 |
+
python src/main.py --model_arch llama2 --model_name meta-llama/Llama-2-7b-chat-hf --q_ratio 0.975 --search_config configs/search/matmult/mxint_4bit-pajama-random-50-bs-2.toml --model_parallel --wandb_name Llama-2-7b_0.975 --wandb_group llama-layer-search --save_dir results/search/matmult/Llama-2-7b_0.975
|
5 |
+
python src/main.py --model_arch llama2 --model_name meta-llama/Llama-2-7b-chat-hf --q_ratio 0.99 --search_config configs/search/matmult/mxint_4bit-pajama-random-50-bs-2.toml --model_parallel --wandb_name Llama-2-7b_0.99 --wandb_group llama-layer-search --save_dir results/search/matmult/Llama-2-7b_0.99
|
6 |
+
|
7 |
+
python src/main.py --model_arch llama2 --model_name meta-llama/Llama-2-13b-chat-hf --q_ratio 0.5 --search_config configs/search/matmult/mxint_4bit-pajama-random-50-bs-2.toml --model_parallel --wandb_name Llama-2-13b_0.5 --wandb_group llama-layer-search --save_dir results/search/matmult/Llama-2-13b_0.5
|
8 |
+
python src/main.py --model_arch llama2 --model_name meta-llama/Llama-2-13b-chat-hf --q_ratio 0.9 --search_config configs/search/matmult/mxint_4bit-pajama-random-50-bs-2.toml --model_parallel --wandb_name Llama-2-13b_0.9 --wandb_group llama-layer-search --save_dir results/search/matmult/Llama-2-13b_0.9
|
9 |
+
python src/main.py --model_arch llama2 --model_name meta-llama/Llama-2-13b-chat-hf --q_ratio 0.95 --search_config configs/search/matmult/mxint_4bit-pajama-random-50-bs-2.toml --model_parallel --wandb_name Llama-2-13b_0.95 --wandb_group llama-layer-search --save_dir results/search/matmult/Llama-2-13b_0.95
|
10 |
+
python src/main.py --model_arch llama2 --model_name meta-llama/Llama-2-13b-chat-hf --q_ratio 0.975 --search_config configs/search/matmult/mxint_4bit-pajama-random-50-bs-2.toml --model_parallel --wandb_name Llama-2-13b_0.975 --wandb_group llama-layer-search --save_dir results/search/matmult/Llama-2-13b_0.975
|
11 |
+
python src/main.py --model_arch llama2 --model_name meta-llama/Llama-2-13b-chat-hf --q_ratio 0.99 --search_config configs/search/matmult/mxint_4bit-pajama-random-50-bs-2.toml --model_parallel --wandb_name Llama-2-13b_0.99 --wandb_group llama-layer-search --save_dir results/search/matmult/Llama-2-13b_0.99
|
12 |
+
|
13 |
+
python src/main.py --model_arch llama2 --model_name meta-llama/Llama-2-70b-chat-hf --q_ratio 0.5 --search_config configs/search/matmult/mxint_4bit-pajama-random-50-bs-1.toml --model_parallel --wandb_name Llama-2-70b_0.5 --wandb_group llama-layer-search --save_dir results/search/matmult/Llama-2-70b_0.5
|
14 |
+
python src/main.py --model_arch llama2 --model_name meta-llama/Llama-2-70b-chat-hf --q_ratio 0.9 --search_config configs/search/matmult/mxint_4bit-pajama-random-50-bs-1.toml --model_parallel --wandb_name Llama-2-70b_0.9 --wandb_group llama-layer-search --save_dir results/search/matmult/Llama-2-70b_0.9
|
15 |
+
python src/main.py --model_arch llama2 --model_name meta-llama/Llama-2-70b-chat-hf --q_ratio 0.95 --search_config configs/search/matmult/mxint_4bit-pajama-random-50-bs-1.toml --model_parallel --wandb_name Llama-2-70b_0.95 --wandb_group llama-layer-search --save_dir results/search/matmult/Llama-2-70b_0.95
|
16 |
+
python src/main.py --model_arch llama2 --model_name meta-llama/Llama-2-70b-chat-hf --q_ratio 0.975 --search_config configs/search/matmult/mxint_4bit-pajama-random-50-bs-1.toml --model_parallel --wandb_name Llama-2-70b_0.975 --wandb_group llama-layer-search --save_dir results/search/matmult/Llama-2-70b_0.975
|
17 |
+
python src/main.py --model_arch llama2 --model_name meta-llama/Llama-2-70b-chat-hf --q_ratio 0.99 --search_config configs/search/matmult/mxint_4bit-pajama-random-50-bs-1.toml --model_parallel --wandb_name Llama-2-70b_0.99 --wandb_group llama-layer-search --save_dir results/search/matmult/Llama-2-70b_0.99
|
18 |
+
|
llm-q-scaling-law-master/run_scripts/matmult/llama_matmult_run_commands.txt
ADDED
@@ -0,0 +1,24 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
python src/main.py --model_arch llama --model_name huggyllama/llama-7b --q_ratio 0.5 --search_config configs/search/matmult/mxint_4bit-pajama-random-50-bs-2.toml --model_parallel --wandb_name llama-7b_0.5 --wandb_group llama-matmult-search --save_dir results/search/matmult/llama-7b_0.5
|
2 |
+
python src/main.py --model_arch llama --model_name huggyllama/llama-7b --q_ratio 0.9 --search_config configs/search/matmult/mxint_4bit-pajama-random-50-bs-2.toml --model_parallel --wandb_name llama-7b_0.9 --wandb_group llama-matmult-search --save_dir results/search/matmult/llama-7b_0.9
|
3 |
+
python src/main.py --model_arch llama --model_name huggyllama/llama-7b --q_ratio 0.95 --search_config configs/search/matmult/mxint_4bit-pajama-random-50-bs-2.toml --model_parallel --wandb_name llama-7b_0.95 --wandb_group llama-matmult-search --save_dir results/search/matmult/llama-7b_0.95
|
4 |
+
python src/main.py --model_arch llama --model_name huggyllama/llama-7b --q_ratio 0.975 --search_config configs/search/matmult/mxint_4bit-pajama-random-50-bs-2.toml --model_parallel --wandb_name llama-7b_0.975 --wandb_group llama-matmult-search --save_dir results/search/matmult/llama-7b_0.975
|
5 |
+
python src/main.py --model_arch llama --model_name huggyllama/llama-7b --q_ratio 0.99 --search_config configs/search/matmult/mxint_4bit-pajama-random-50-bs-2.toml --model_parallel --wandb_name llama-7b_0.99 --wandb_group llama-matmult-search --save_dir results/search/matmult/llama-7b_0.99
|
6 |
+
|
7 |
+
python src/main.py --model_arch llama --model_name huggyllama/llama-13b --q_ratio 0.5 --search_config configs/search/matmult/mxint_4bit-pajama-random-50-bs-2.toml --model_parallel --wandb_name llama-13b_0.5 --wandb_group llama-matmult-search --save_dir results/search/matmult/llama-13b_0.5
|
8 |
+
python src/main.py --model_arch llama --model_name huggyllama/llama-13b --q_ratio 0.9 --search_config configs/search/matmult/mxint_4bit-pajama-random-50-bs-2.toml --model_parallel --wandb_name llama-13b_0.9 --wandb_group llama-matmult-search --save_dir results/search/matmult/llama-13b_0.9
|
9 |
+
python src/main.py --model_arch llama --model_name huggyllama/llama-13b --q_ratio 0.95 --search_config configs/search/matmult/mxint_4bit-pajama-random-50-bs-2.toml --model_parallel --wandb_name llama-13b_0.95 --wandb_group llama-matmult-search --save_dir results/search/matmult/llama-13b_0.95
|
10 |
+
python src/main.py --model_arch llama --model_name huggyllama/llama-13b --q_ratio 0.975 --search_config configs/search/matmult/mxint_4bit-pajama-random-50-bs-2.toml --model_parallel --wandb_name llama-13b_0.975 --wandb_group llama-matmult-search --save_dir results/search/matmult/llama-13b_0.975
|
11 |
+
python src/main.py --model_arch llama --model_name huggyllama/llama-13b --q_ratio 0.99 --search_config configs/search/matmult/mxint_4bit-pajama-random-50-bs-2.toml --model_parallel --wandb_name llama-13b_0.99 --wandb_group llama-matmult-search --save_dir results/search/matmult/llama-13b_0.99
|
12 |
+
|
13 |
+
python src/main.py --model_arch llama --model_name huggyllama/llama-30b --q_ratio 0.5 --search_config configs/search/matmult/mxint_4bit-pajama-random-50-bs-1.toml --model_parallel --wandb_name llama-30b_0.5 --wandb_group llama-matmult-search --save_dir results/search/matmult/llama-30b_0.5
|
14 |
+
python src/main.py --model_arch llama --model_name huggyllama/llama-30b --q_ratio 0.9 --search_config configs/search/matmult/mxint_4bit-pajama-random-50-bs-1.toml --model_parallel --wandb_name llama-30b_0.9 --wandb_group llama-matmult-search --save_dir results/search/matmult/llama-30b_0.9
|
15 |
+
python src/main.py --model_arch llama --model_name huggyllama/llama-30b --q_ratio 0.95 --search_config configs/search/matmult/mxint_4bit-pajama-random-50-bs-1.toml --model_parallel --wandb_name llama-30b_0.95 --wandb_group llama-matmult-search --save_dir results/search/matmult/llama-30b_0.95
|
16 |
+
python src/main.py --model_arch llama --model_name huggyllama/llama-30b --q_ratio 0.975 --search_config configs/search/matmult/mxint_4bit-pajama-random-50-bs-1.toml --model_parallel --wandb_name llama-30b_0.975 --wandb_group llama-matmult-search --save_dir results/search/matmult/llama-30b_0.975
|
17 |
+
python src/main.py --model_arch llama --model_name huggyllama/llama-30b --q_ratio 0.99 --search_config configs/search/matmult/mxint_4bit-pajama-random-50-bs-1.toml --model_parallel --wandb_name llama-30b_0.99 --wandb_group llama-matmult-search --save_dir results/search/matmult/llama-30b_0.99
|
18 |
+
|
19 |
+
python src/main.py --model_arch llama --model_name huggyllama/llama-65b --q_ratio 0.5 --search_config configs/search/matmult/mxint_4bit-pajama-random-50-bs-1.toml --model_parallel --wandb_name llama-65b_0.5 --wandb_group llama-matmult-search --save_dir results/search/matmult/llama-65b_0.5
|
20 |
+
python src/main.py --model_arch llama --model_name huggyllama/llama-65b --q_ratio 0.9 --search_config configs/search/matmult/mxint_4bit-pajama-random-50-bs-1.toml --model_parallel --wandb_name llama-65b_0.9 --wandb_group llama-matmult-search --save_dir results/search/matmult/llama-65b_0.9
|
21 |
+
python src/main.py --model_arch llama --model_name huggyllama/llama-65b --q_ratio 0.95 --search_config configs/search/matmult/mxint_4bit-pajama-random-50-bs-1.toml --model_parallel --wandb_name llama-65b_0.95 --wandb_group llama-matmult-search --save_dir results/search/matmult/llama-65b_0.95
|
22 |
+
python src/main.py --model_arch llama --model_name huggyllama/llama-65b --q_ratio 0.975 --search_config configs/search/matmult/mxint_4bit-pajama-random-50-bs-1.toml --model_parallel --wandb_name llama-65b_0.975 --wandb_group llama-matmult-search --save_dir results/search/matmult/llama-65b_0.975
|
23 |
+
python src/main.py --model_arch llama --model_name huggyllama/llama-65b --q_ratio 0.99 --search_config configs/search/matmult/mxint_4bit-pajama-random-50-bs-1.toml --model_parallel --wandb_name llama-65b_0.99 --wandb_group llama-matmult-search --save_dir results/search/matmult/llama-65b_0.99
|
24 |
+
|