rookiemango commited on
Commit
f2d0179
·
verified ·
1 Parent(s): 58beb49

Upload 115 files

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. llm-q-scaling-law-master/.gitignore +7 -0
  2. llm-q-scaling-law-master/.gitmodules +3 -0
  3. llm-q-scaling-law-master/README.md +23 -0
  4. llm-q-scaling-law-master/configs/debug/integer.toml +19 -0
  5. llm-q-scaling-law-master/configs/debug/mxint_4bit-bypass.toml +36 -0
  6. llm-q-scaling-law-master/configs/debug/mxint_4bit-no_evaluate.toml +42 -0
  7. llm-q-scaling-law-master/configs/debug/mxint_4bit-tinyllama.toml +42 -0
  8. llm-q-scaling-law-master/configs/search/layerwise/custom-eval.toml +38 -0
  9. llm-q-scaling-law-master/configs/search/layerwise/mxint_4bit-alpaca-random-50-bs-1.toml +42 -0
  10. llm-q-scaling-law-master/configs/search/layerwise/mxint_4bit-alpaca-random-50-bs-2.toml +42 -0
  11. llm-q-scaling-law-master/configs/search/layerwise/mxint_4bit-alpaca-random-50-bs-4.toml +42 -0
  12. llm-q-scaling-law-master/configs/search/layerwise/mxint_4bit-alpaca-random-50-bs-8.toml +42 -0
  13. llm-q-scaling-law-master/configs/search/layerwise/mxint_4bit-pajama-random-50-bs-1.toml +42 -0
  14. llm-q-scaling-law-master/configs/search/layerwise/mxint_4bit-pajama-random-50-bs-2.toml +42 -0
  15. llm-q-scaling-law-master/configs/search/layerwise/mxint_4bit-pajama-random-50-bs-4.toml +42 -0
  16. llm-q-scaling-law-master/configs/search/layerwise/mxint_4bit-pajama-random-50-bs-8.toml +42 -0
  17. llm-q-scaling-law-master/configs/search/matmult/mxint_4bit-alpaca-random-50-bs-1.toml +42 -0
  18. llm-q-scaling-law-master/configs/search/matmult/mxint_4bit-alpaca-random-50-bs-2.toml +42 -0
  19. llm-q-scaling-law-master/configs/search/matmult/mxint_4bit-alpaca-random-50-bs-4.toml +42 -0
  20. llm-q-scaling-law-master/configs/search/matmult/mxint_4bit-alpaca-random-50-bs-8.toml +42 -0
  21. llm-q-scaling-law-master/configs/search/matmult/mxint_4bit-pajama-random-50-bs-1.toml +42 -0
  22. llm-q-scaling-law-master/configs/search/matmult/mxint_4bit-pajama-random-50-bs-2.toml +42 -0
  23. llm-q-scaling-law-master/configs/search/matmult/mxint_4bit-pajama-random-50-bs-4.toml +42 -0
  24. llm-q-scaling-law-master/configs/search/matmult/mxint_4bit-pajama-random-50-bs-8.toml +42 -0
  25. llm-q-scaling-law-master/eval_scripts/eval_harness_commands.txt +34 -0
  26. llm-q-scaling-law-master/eval_scripts/layerwise/evalscript_generator.py +141 -0
  27. llm-q-scaling-law-master/eval_scripts/layerwise/llama_layerwise_eval_commands.txt +36 -0
  28. llm-q-scaling-law-master/eval_scripts/layerwise/opt_layerwise_eval_commands.txt +72 -0
  29. llm-q-scaling-law-master/eval_scripts/layerwise/qwen1.5_layerwise_eval_commands.txt +72 -0
  30. llm-q-scaling-law-master/eval_scripts/matmult/evalscript_generator.py +142 -0
  31. llm-q-scaling-law-master/eval_scripts/matmult/llama_matmult_eval_commands.txt +28 -0
  32. llm-q-scaling-law-master/eval_scripts/matmult/opt_matmult_eval_commands.txt +56 -0
  33. llm-q-scaling-law-master/eval_scripts/matmult/qwen1.5_matmult_eval_commands.txt +56 -0
  34. llm-q-scaling-law-master/q_ratio_results/layerwise/qwen1.5/qwen-0.5B-best-trail.txt +0 -0
  35. llm-q-scaling-law-master/q_ratio_results/layerwise/qwen1.5/qwen-1.8B-best-trail.txt +0 -0
  36. llm-q-scaling-law-master/q_ratio_results/layerwise/qwen1.5/qwen-14B-best-trail.txt +7 -0
  37. llm-q-scaling-law-master/q_ratio_results/layerwise/qwen1.5/qwen-32B-best-trail.txt +8 -0
  38. llm-q-scaling-law-master/q_ratio_results/layerwise/qwen1.5/qwen-4B-best-trail.txt +0 -0
  39. llm-q-scaling-law-master/q_ratio_results/layerwise/qwen1.5/qwen-72B-best-trail.txt +8 -0
  40. llm-q-scaling-law-master/q_ratio_results/layerwise/qwen1.5/qwen-7B-best-trail.txt +0 -0
  41. llm-q-scaling-law-master/q_ratio_results/matmult/qwen1.5/qwen-32B-best-trail.txt +6 -0
  42. llm-q-scaling-law-master/requirements.txt +26 -0
  43. llm-q-scaling-law-master/run_scripts/env_command.sh +23 -0
  44. llm-q-scaling-law-master/run_scripts/layerwise/llama2_layerwise_run_commands.txt +24 -0
  45. llm-q-scaling-law-master/run_scripts/layerwise/llama_layerwise_run_commands.txt +32 -0
  46. llm-q-scaling-law-master/run_scripts/layerwise/opt_layerwise_run_commands.txt +64 -0
  47. llm-q-scaling-law-master/run_scripts/layerwise/qwen1.5_layerwise_run_commands.txt +64 -0
  48. llm-q-scaling-law-master/run_scripts/layerwise/runscript_generator.py +111 -0
  49. llm-q-scaling-law-master/run_scripts/matmult/llama2_matmult_run_commands.txt +18 -0
  50. llm-q-scaling-law-master/run_scripts/matmult/llama_matmult_run_commands.txt +24 -0
llm-q-scaling-law-master/.gitignore ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ .vscode/
2
+ __pycache__/
3
+ checkpoints/
4
+ wandb/
5
+ ckpts/
6
+ ckpt/
7
+ results/
llm-q-scaling-law-master/.gitmodules ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ [submodule "src/lm-evaluation-harness"]
2
+ path = src/lm-evaluation-harness
3
+ url = [email protected]:EleutherAI/lm-evaluation-harness.git
llm-q-scaling-law-master/README.md ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # LLM Quantisation Scaling Law
2
+
3
+ ## Setup
4
+
5
+ Conda environment is recommended. To create a conda environment, run:
6
+
7
+ ```bash
8
+ conda create -n llm-mixed-q python=3.11 -y
9
+ pip install -r requirements.txt
10
+ git submodule update --init --recursive
11
+ ```
12
+
13
+ ## Features
14
+
15
+ * Supported model architectures:
16
+ + Qwen2
17
+ + OPT
18
+ + Llama
19
+
20
+
21
+ ## Entry points
22
+
23
+ The configuration file for model and search parameters is located in `./config/`.
llm-q-scaling-law-master/configs/debug/integer.toml ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [quantization]
2
+ name = "integer"
3
+ data_in_width = 16
4
+ data_in_frac_width = 3
5
+ weight_width = 16
6
+ weight_frac_width = 3
7
+ bias_width = 16
8
+ bias_frac_width = 3
9
+
10
+ [setup]
11
+ # low to high precision ratio
12
+ ratio = 0.1
13
+ # at what granularity?
14
+ # select from ["transformer_layer", "matmult"]
15
+ granularity = "transformer_layer"
16
+ tasks = ['sst', 'mnli']
17
+ batch_size = 16
18
+ num_samples_per_trial = 1024
19
+ num_trials = 16
llm-q-scaling-law-master/configs/debug/mxint_4bit-bypass.toml ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [quantization.linear.x]
2
+ name="bypass"
3
+ width=8
4
+ block_size=16
5
+ block_axis=-2
6
+ [quantization.linear.w]
7
+ name="bypass"
8
+ width=4
9
+ block_size=16
10
+ block_axis=-2
11
+
12
+ [quantization.matmul.x]
13
+ name="bypass"
14
+ width=8
15
+ block_size=16
16
+ block_axis=-2
17
+
18
+ [quantization.matmul.w]
19
+ name="bypass"
20
+ width=8
21
+ block_size=16
22
+ block_axis=-1
23
+
24
+ [setup]
25
+ # low to high precision ratio
26
+ ratio=0.9
27
+ # at what granularity?
28
+ # select from ["transformer_layer", "matmult"]
29
+ granularity="transformer_layer"
30
+ # granularity="matmult"
31
+ tasks=['custom_alpaca']
32
+ batch_size=8
33
+ num_samples_per_trial=10
34
+ num_trials=3
35
+ device_map="auto-balanced"
36
+ random=true
llm-q-scaling-law-master/configs/debug/mxint_4bit-no_evaluate.toml ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [quantization.linear.x]
2
+ name="mxint"
3
+ width=4
4
+ block_size=16
5
+ block_axis=-2
6
+ [quantization.linear.w]
7
+ name="mxint"
8
+ width=4
9
+ block_size=16
10
+ block_axis=-2
11
+
12
+ [quantization.matmul.x]
13
+ name="mxint"
14
+ width=4
15
+ block_size=16
16
+ block_axis=-2
17
+
18
+ [quantization.matmul.w]
19
+ name="mxint"
20
+ width=4
21
+ block_size=16
22
+ block_axis=-1
23
+
24
+ [setup]
25
+ # low to high precision ratio
26
+ ratio=0.9
27
+ # at what granularity?
28
+ # select from ["transformer_layer", "matmult"]
29
+ granularity="transformer_layer"
30
+ # granularity="matmult"
31
+ tasks=['custom_alpaca']
32
+ batch_size=4
33
+ num_samples_per_trial=1000
34
+ num_trials=3
35
+ device_map="auto-balanced"
36
+ random=true
37
+
38
+ [evaluation]
39
+ # set to [] if not using any evaluation benchmarks
40
+ tasks=[]
41
+ batch_size=1
42
+ num_best_epochs=1
llm-q-scaling-law-master/configs/debug/mxint_4bit-tinyllama.toml ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [quantization.linear.x]
2
+ name="mxint"
3
+ width=4
4
+ block_size=16
5
+ block_axis=-2
6
+ [quantization.linear.w]
7
+ name="mxint"
8
+ width=4
9
+ block_size=16
10
+ block_axis=-2
11
+
12
+ [quantization.matmul.x]
13
+ name="mxint"
14
+ width=4
15
+ block_size=16
16
+ block_axis=-2
17
+
18
+ [quantization.matmul.w]
19
+ name="mxint"
20
+ width=4
21
+ block_size=16
22
+ block_axis=-1
23
+
24
+ [setup]
25
+ # low to high precision ratio
26
+ ratio=0.9
27
+ # at what granularity?
28
+ # select from ["transformer_layer", "matmult"]
29
+ granularity="transformer_layer"
30
+ # granularity="matmult"
31
+ tasks=['custom_alpaca']
32
+ batch_size=8
33
+ num_samples_per_trial=100
34
+ num_trials=3
35
+ device_map="auto-balanced"
36
+ random=true
37
+
38
+ [evaluation]
39
+ # set to [] if not using any evaluation benchmarks
40
+ tasks=['mmlu']
41
+ batch_size=8
42
+ num_best_epochs=1
llm-q-scaling-law-master/configs/search/layerwise/custom-eval.toml ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [quantization.linear.x] # this is E2M1
2
+ name="minifloat"
3
+ width=4
4
+ exponent_width=2
5
+ [quantization.linear.w]
6
+ name="minifloat"
7
+ width=4
8
+ exponent_width=2
9
+
10
+ [quantization.matmul.x]
11
+ name="minifloat"
12
+ width=4
13
+ exponent_width=2
14
+
15
+ [quantization.matmul.w]
16
+ name="minifloat"
17
+ width=4
18
+ exponent_width=2
19
+
20
+ [setup]
21
+ # low to high precision ratio
22
+ ratio=0.0
23
+ # at what granularity?
24
+ # select from ["transformer_layer", "matmult"]
25
+ granularity="transformer_layer"
26
+ # granularity="matmult"
27
+ tasks=['custom_pajama']
28
+ batch_size=8
29
+ num_samples_per_trial=1000
30
+ num_trials=4
31
+ device_map="auto-balanced"
32
+ random=false
33
+
34
+ [evaluation]
35
+ # set to [] if not using any evaluation benchmarks
36
+ tasks=['scaling_law_easy', 'scaling_law_hard']
37
+ batch_size="auto:8"
38
+ num_best_epochs=3
llm-q-scaling-law-master/configs/search/layerwise/mxint_4bit-alpaca-random-50-bs-1.toml ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [quantization.linear.x]
2
+ name="mxint"
3
+ width=4
4
+ block_size=16
5
+ block_axis=-2
6
+ [quantization.linear.w]
7
+ name="mxint"
8
+ width=4
9
+ block_size=16
10
+ block_axis=-2
11
+
12
+ [quantization.matmul.x]
13
+ name="mxint"
14
+ width=4
15
+ block_size=16
16
+ block_axis=-2
17
+
18
+ [quantization.matmul.w]
19
+ name="mxint"
20
+ width=4
21
+ block_size=16
22
+ block_axis=-1
23
+
24
+ [setup]
25
+ # low to high precision ratio
26
+ ratio=0.0
27
+ # at what granularity?
28
+ # select from ["transformer_layer", "matmult"]
29
+ granularity="transformer_layer"
30
+ # granularity="matmult"
31
+ tasks=['custom_alpaca']
32
+ batch_size=1
33
+ num_samples_per_trial=1000
34
+ num_trials=50
35
+ device_map="auto-balanced"
36
+ random=false
37
+
38
+ [evaluation]
39
+ # set to [] if not using any evaluation benchmarks
40
+ tasks=['mmlu']
41
+ batch_size=1
42
+ num_best_epochs=3
llm-q-scaling-law-master/configs/search/layerwise/mxint_4bit-alpaca-random-50-bs-2.toml ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [quantization.linear.x]
2
+ name="mxint"
3
+ width=4
4
+ block_size=16
5
+ block_axis=-2
6
+ [quantization.linear.w]
7
+ name="mxint"
8
+ width=4
9
+ block_size=16
10
+ block_axis=-2
11
+
12
+ [quantization.matmul.x]
13
+ name="mxint"
14
+ width=4
15
+ block_size=16
16
+ block_axis=-2
17
+
18
+ [quantization.matmul.w]
19
+ name="mxint"
20
+ width=4
21
+ block_size=16
22
+ block_axis=-1
23
+
24
+ [setup]
25
+ # low to high precision ratio
26
+ ratio=0.0
27
+ # at what granularity?
28
+ # select from ["transformer_layer", "matmult"]
29
+ granularity="transformer_layer"
30
+ # granularity="matmult"
31
+ tasks=['custom_alpaca']
32
+ batch_size=2
33
+ num_samples_per_trial=1000
34
+ num_trials=50
35
+ device_map="auto-balanced"
36
+ random=false
37
+
38
+ [evaluation]
39
+ # set to [] if not using any evaluation benchmarks
40
+ tasks=['mmlu']
41
+ batch_size=2
42
+ num_best_epochs=3
llm-q-scaling-law-master/configs/search/layerwise/mxint_4bit-alpaca-random-50-bs-4.toml ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [quantization.linear.x]
2
+ name="mxint"
3
+ width=4
4
+ block_size=16
5
+ block_axis=-2
6
+ [quantization.linear.w]
7
+ name="mxint"
8
+ width=4
9
+ block_size=16
10
+ block_axis=-2
11
+
12
+ [quantization.matmul.x]
13
+ name="mxint"
14
+ width=4
15
+ block_size=16
16
+ block_axis=-2
17
+
18
+ [quantization.matmul.w]
19
+ name="mxint"
20
+ width=4
21
+ block_size=16
22
+ block_axis=-1
23
+
24
+ [setup]
25
+ # low to high precision ratio
26
+ ratio=0.0
27
+ # at what granularity?
28
+ # select from ["transformer_layer", "matmult"]
29
+ granularity="transformer_layer"
30
+ # granularity="matmult"
31
+ tasks=['custom_alpaca']
32
+ batch_size=4
33
+ num_samples_per_trial=1000
34
+ num_trials=50
35
+ device_map="auto-balanced"
36
+ random=false
37
+
38
+ [evaluation]
39
+ # set to [] if not using any evaluation benchmarks
40
+ tasks=['mmlu']
41
+ batch_size=4
42
+ num_best_epochs=3
llm-q-scaling-law-master/configs/search/layerwise/mxint_4bit-alpaca-random-50-bs-8.toml ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [quantization.linear.x]
2
+ name="mxint"
3
+ width=4
4
+ block_size=16
5
+ block_axis=-2
6
+ [quantization.linear.w]
7
+ name="mxint"
8
+ width=4
9
+ block_size=16
10
+ block_axis=-2
11
+
12
+ [quantization.matmul.x]
13
+ name="mxint"
14
+ width=4
15
+ block_size=16
16
+ block_axis=-2
17
+
18
+ [quantization.matmul.w]
19
+ name="mxint"
20
+ width=4
21
+ block_size=16
22
+ block_axis=-1
23
+
24
+ [setup]
25
+ # low to high precision ratio
26
+ ratio=0.0
27
+ # at what granularity?
28
+ # select from ["transformer_layer", "matmult"]
29
+ granularity="transformer_layer"
30
+ # granularity="matmult"
31
+ tasks=['custom_alpaca']
32
+ batch_size=8
33
+ num_samples_per_trial=1000
34
+ num_trials=50
35
+ device_map="auto-balanced"
36
+ random=false
37
+
38
+ [evaluation]
39
+ # set to [] if not using any evaluation benchmarks
40
+ tasks=['mmlu']
41
+ batch_size=8
42
+ num_best_epochs=3
llm-q-scaling-law-master/configs/search/layerwise/mxint_4bit-pajama-random-50-bs-1.toml ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [quantization.linear.x]
2
+ name="mxint"
3
+ width=4
4
+ block_size=16
5
+ block_axis=-2
6
+ [quantization.linear.w]
7
+ name="mxint"
8
+ width=4
9
+ block_size=16
10
+ block_axis=-2
11
+
12
+ [quantization.matmul.x]
13
+ name="mxint"
14
+ width=4
15
+ block_size=16
16
+ block_axis=-2
17
+
18
+ [quantization.matmul.w]
19
+ name="mxint"
20
+ width=4
21
+ block_size=16
22
+ block_axis=-1
23
+
24
+ [setup]
25
+ # low to high precision ratio
26
+ ratio=0.0
27
+ # at what granularity?
28
+ # select from ["transformer_layer", "matmult"]
29
+ granularity="transformer_layer"
30
+ # granularity="matmult"
31
+ tasks=['custom_pajama']
32
+ batch_size=1
33
+ num_samples_per_trial=1000
34
+ num_trials=50
35
+ device_map="auto-balanced"
36
+ random=false
37
+
38
+ [evaluation]
39
+ # set to [] if not using any evaluation benchmarks
40
+ tasks=['mmlu']
41
+ batch_size=1
42
+ num_best_epochs=3
llm-q-scaling-law-master/configs/search/layerwise/mxint_4bit-pajama-random-50-bs-2.toml ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [quantization.linear.x]
2
+ name="mxint"
3
+ width=4
4
+ block_size=16
5
+ block_axis=-2
6
+ [quantization.linear.w]
7
+ name="mxint"
8
+ width=4
9
+ block_size=16
10
+ block_axis=-2
11
+
12
+ [quantization.matmul.x]
13
+ name="mxint"
14
+ width=4
15
+ block_size=16
16
+ block_axis=-2
17
+
18
+ [quantization.matmul.w]
19
+ name="mxint"
20
+ width=4
21
+ block_size=16
22
+ block_axis=-1
23
+
24
+ [setup]
25
+ # low to high precision ratio
26
+ ratio=0.0
27
+ # at what granularity?
28
+ # select from ["transformer_layer", "matmult"]
29
+ granularity="transformer_layer"
30
+ # granularity="matmult"
31
+ tasks=['custom_pajama']
32
+ batch_size=2
33
+ num_samples_per_trial=1000
34
+ num_trials=50
35
+ device_map="auto-balanced"
36
+ random=false
37
+
38
+ [evaluation]
39
+ # set to [] if not using any evaluation benchmarks
40
+ tasks=['mmlu']
41
+ batch_size=2
42
+ num_best_epochs=3
llm-q-scaling-law-master/configs/search/layerwise/mxint_4bit-pajama-random-50-bs-4.toml ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [quantization.linear.x]
2
+ name="mxint"
3
+ width=4
4
+ block_size=16
5
+ block_axis=-2
6
+ [quantization.linear.w]
7
+ name="mxint"
8
+ width=4
9
+ block_size=16
10
+ block_axis=-2
11
+
12
+ [quantization.matmul.x]
13
+ name="mxint"
14
+ width=4
15
+ block_size=16
16
+ block_axis=-2
17
+
18
+ [quantization.matmul.w]
19
+ name="mxint"
20
+ width=4
21
+ block_size=16
22
+ block_axis=-1
23
+
24
+ [setup]
25
+ # low to high precision ratio
26
+ ratio=0.0
27
+ # at what granularity?
28
+ # select from ["transformer_layer", "matmult"]
29
+ granularity="transformer_layer"
30
+ # granularity="matmult"
31
+ tasks=['custom_pajama']
32
+ batch_size=4
33
+ num_samples_per_trial=1000
34
+ num_trials=50
35
+ device_map="auto-balanced"
36
+ random=false
37
+
38
+ [evaluation]
39
+ # set to [] if not using any evaluation benchmarks
40
+ tasks=['mmlu']
41
+ batch_size=4
42
+ num_best_epochs=3
llm-q-scaling-law-master/configs/search/layerwise/mxint_4bit-pajama-random-50-bs-8.toml ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [quantization.linear.x]
2
+ name="mxint"
3
+ width=4
4
+ block_size=16
5
+ block_axis=-2
6
+ [quantization.linear.w]
7
+ name="mxint"
8
+ width=4
9
+ block_size=16
10
+ block_axis=-2
11
+
12
+ [quantization.matmul.x]
13
+ name="mxint"
14
+ width=4
15
+ block_size=16
16
+ block_axis=-2
17
+
18
+ [quantization.matmul.w]
19
+ name="mxint"
20
+ width=4
21
+ block_size=16
22
+ block_axis=-1
23
+
24
+ [setup]
25
+ # low to high precision ratio
26
+ ratio=0.0
27
+ # at what granularity?
28
+ # select from ["transformer_layer", "matmult"]
29
+ granularity="transformer_layer"
30
+ # granularity="matmult"
31
+ tasks=['custom_pajama']
32
+ batch_size=8
33
+ num_samples_per_trial=1000
34
+ num_trials=50
35
+ device_map="auto-balanced"
36
+ random=false
37
+
38
+ [evaluation]
39
+ # set to [] if not using any evaluation benchmarks
40
+ tasks=['mmlu']
41
+ batch_size=8
42
+ num_best_epochs=3
llm-q-scaling-law-master/configs/search/matmult/mxint_4bit-alpaca-random-50-bs-1.toml ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [quantization.linear.x]
2
+ name="mxint"
3
+ width=4
4
+ block_size=16
5
+ block_axis=-2
6
+ [quantization.linear.w]
7
+ name="mxint"
8
+ width=4
9
+ block_size=16
10
+ block_axis=-2
11
+
12
+ [quantization.matmul.x]
13
+ name="mxint"
14
+ width=4
15
+ block_size=16
16
+ block_axis=-2
17
+
18
+ [quantization.matmul.w]
19
+ name="mxint"
20
+ width=4
21
+ block_size=16
22
+ block_axis=-1
23
+
24
+ [setup]
25
+ # low to high precision ratio
26
+ ratio=0.0
27
+ # at what granularity?
28
+ # select from ["transformer_layer", "matmult"]
29
+ granularity="matmult"
30
+ # granularity="matmult"
31
+ tasks=['custom_alpaca']
32
+ batch_size=1
33
+ num_samples_per_trial=1000
34
+ num_trials=50
35
+ device_map="auto-balanced"
36
+ random=false
37
+
38
+ [evaluation]
39
+ # set to [] if not using any evaluation benchmarks
40
+ tasks=['mmlu']
41
+ batch_size=1
42
+ num_best_epochs=3
llm-q-scaling-law-master/configs/search/matmult/mxint_4bit-alpaca-random-50-bs-2.toml ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [quantization.linear.x]
2
+ name="mxint"
3
+ width=4
4
+ block_size=16
5
+ block_axis=-2
6
+ [quantization.linear.w]
7
+ name="mxint"
8
+ width=4
9
+ block_size=16
10
+ block_axis=-2
11
+
12
+ [quantization.matmul.x]
13
+ name="mxint"
14
+ width=4
15
+ block_size=16
16
+ block_axis=-2
17
+
18
+ [quantization.matmul.w]
19
+ name="mxint"
20
+ width=4
21
+ block_size=16
22
+ block_axis=-1
23
+
24
+ [setup]
25
+ # low to high precision ratio
26
+ ratio=0.0
27
+ # at what granularity?
28
+ # select from ["transformer_layer", "matmult"]
29
+ granularity="matmult"
30
+ # granularity="matmult"
31
+ tasks=['custom_alpaca']
32
+ batch_size=2
33
+ num_samples_per_trial=1000
34
+ num_trials=50
35
+ device_map="auto-balanced"
36
+ random=false
37
+
38
+ [evaluation]
39
+ # set to [] if not using any evaluation benchmarks
40
+ tasks=['mmlu']
41
+ batch_size=1
42
+ num_best_epochs=3
llm-q-scaling-law-master/configs/search/matmult/mxint_4bit-alpaca-random-50-bs-4.toml ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [quantization.linear.x]
2
+ name="mxint"
3
+ width=4
4
+ block_size=16
5
+ block_axis=-2
6
+ [quantization.linear.w]
7
+ name="mxint"
8
+ width=4
9
+ block_size=16
10
+ block_axis=-2
11
+
12
+ [quantization.matmul.x]
13
+ name="mxint"
14
+ width=4
15
+ block_size=16
16
+ block_axis=-2
17
+
18
+ [quantization.matmul.w]
19
+ name="mxint"
20
+ width=4
21
+ block_size=16
22
+ block_axis=-1
23
+
24
+ [setup]
25
+ # low to high precision ratio
26
+ ratio=0.0
27
+ # at what granularity?
28
+ # select from ["transformer_layer", "matmult"]
29
+ granularity="matmult"
30
+ # granularity="matmult"
31
+ tasks=['custom_alpaca']
32
+ batch_size=4
33
+ num_samples_per_trial=1000
34
+ num_trials=50
35
+ device_map="auto-balanced"
36
+ random=false
37
+
38
+ [evaluation]
39
+ # set to [] if not using any evaluation benchmarks
40
+ tasks=['mmlu']
41
+ batch_size=1
42
+ num_best_epochs=3
llm-q-scaling-law-master/configs/search/matmult/mxint_4bit-alpaca-random-50-bs-8.toml ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [quantization.linear.x]
2
+ name="mxint"
3
+ width=4
4
+ block_size=16
5
+ block_axis=-2
6
+ [quantization.linear.w]
7
+ name="mxint"
8
+ width=4
9
+ block_size=16
10
+ block_axis=-2
11
+
12
+ [quantization.matmul.x]
13
+ name="mxint"
14
+ width=4
15
+ block_size=16
16
+ block_axis=-2
17
+
18
+ [quantization.matmul.w]
19
+ name="mxint"
20
+ width=4
21
+ block_size=16
22
+ block_axis=-1
23
+
24
+ [setup]
25
+ # low to high precision ratio
26
+ ratio=0.0
27
+ # at what granularity?
28
+ # select from ["transformer_layer", "matmult"]
29
+ granularity="matmult"
30
+ # granularity="matmult"
31
+ tasks=['custom_alpaca']
32
+ batch_size=8
33
+ num_samples_per_trial=1000
34
+ num_trials=50
35
+ device_map="auto-balanced"
36
+ random=false
37
+
38
+ [evaluation]
39
+ # set to [] if not using any evaluation benchmarks
40
+ tasks=['mmlu']
41
+ batch_size=1
42
+ num_best_epochs=3
llm-q-scaling-law-master/configs/search/matmult/mxint_4bit-pajama-random-50-bs-1.toml ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [quantization.linear.x]
2
+ name="mxint"
3
+ width=4
4
+ block_size=16
5
+ block_axis=-2
6
+ [quantization.linear.w]
7
+ name="mxint"
8
+ width=4
9
+ block_size=16
10
+ block_axis=-2
11
+
12
+ [quantization.matmul.x]
13
+ name="mxint"
14
+ width=4
15
+ block_size=16
16
+ block_axis=-2
17
+
18
+ [quantization.matmul.w]
19
+ name="mxint"
20
+ width=4
21
+ block_size=16
22
+ block_axis=-1
23
+
24
+ [setup]
25
+ # low to high precision ratio
26
+ ratio=0.0
27
+ # at what granularity?
28
+ # select from ["transformer_layer", "matmult"]
29
+ granularity="matmult"
30
+ # granularity="matmult"
31
+ tasks=['custom_pajama']
32
+ batch_size=1
33
+ num_samples_per_trial=1000
34
+ num_trials=50
35
+ device_map="auto-balanced"
36
+ random=false
37
+
38
+ [evaluation]
39
+ # set to [] if not using any evaluation benchmarks
40
+ tasks=['mmlu']
41
+ batch_size=1
42
+ num_best_epochs=3
llm-q-scaling-law-master/configs/search/matmult/mxint_4bit-pajama-random-50-bs-2.toml ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [quantization.linear.x]
2
+ name="mxint"
3
+ width=4
4
+ block_size=16
5
+ block_axis=-2
6
+ [quantization.linear.w]
7
+ name="mxint"
8
+ width=4
9
+ block_size=16
10
+ block_axis=-2
11
+
12
+ [quantization.matmul.x]
13
+ name="mxint"
14
+ width=4
15
+ block_size=16
16
+ block_axis=-2
17
+
18
+ [quantization.matmul.w]
19
+ name="mxint"
20
+ width=4
21
+ block_size=16
22
+ block_axis=-1
23
+
24
+ [setup]
25
+ # low to high precision ratio
26
+ ratio=0.0
27
+ # at what granularity?
28
+ # select from ["transformer_layer", "matmult"]
29
+ granularity="matmult"
30
+ # granularity="matmult"
31
+ tasks=['custom_pajama']
32
+ batch_size=2
33
+ num_samples_per_trial=1000
34
+ num_trials=50
35
+ device_map="auto-balanced"
36
+ random=false
37
+
38
+ [evaluation]
39
+ # set to [] if not using any evaluation benchmarks
40
+ tasks=['mmlu']
41
+ batch_size=2
42
+ num_best_epochs=3
llm-q-scaling-law-master/configs/search/matmult/mxint_4bit-pajama-random-50-bs-4.toml ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [quantization.linear.x]
2
+ name="mxint"
3
+ width=4
4
+ block_size=16
5
+ block_axis=-2
6
+ [quantization.linear.w]
7
+ name="mxint"
8
+ width=4
9
+ block_size=16
10
+ block_axis=-2
11
+
12
+ [quantization.matmul.x]
13
+ name="mxint"
14
+ width=4
15
+ block_size=16
16
+ block_axis=-2
17
+
18
+ [quantization.matmul.w]
19
+ name="mxint"
20
+ width=4
21
+ block_size=16
22
+ block_axis=-1
23
+
24
+ [setup]
25
+ # low to high precision ratio
26
+ ratio=0.0
27
+ # at what granularity?
28
+ # select from ["transformer_layer", "matmult"]
29
+ granularity="matmult"
30
+ # granularity="matmult"
31
+ tasks=['custom_pajama']
32
+ batch_size=4
33
+ num_samples_per_trial=1000
34
+ num_trials=50
35
+ device_map="auto-balanced"
36
+ random=false
37
+
38
+ [evaluation]
39
+ # set to [] if not using any evaluation benchmarks
40
+ tasks=['mmlu']
41
+ batch_size=4
42
+ num_best_epochs=3
llm-q-scaling-law-master/configs/search/matmult/mxint_4bit-pajama-random-50-bs-8.toml ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [quantization.linear.x]
2
+ name="mxint"
3
+ width=4
4
+ block_size=16
5
+ block_axis=-2
6
+ [quantization.linear.w]
7
+ name="mxint"
8
+ width=4
9
+ block_size=16
10
+ block_axis=-2
11
+
12
+ [quantization.matmul.x]
13
+ name="mxint"
14
+ width=4
15
+ block_size=16
16
+ block_axis=-2
17
+
18
+ [quantization.matmul.w]
19
+ name="mxint"
20
+ width=4
21
+ block_size=16
22
+ block_axis=-1
23
+
24
+ [setup]
25
+ # low to high precision ratio
26
+ ratio=0.0
27
+ # at what granularity?
28
+ # select from ["transformer_layer", "matmult"]
29
+ granularity="matmult"
30
+ # granularity="matmult"
31
+ tasks=['custom_pajama']
32
+ batch_size=8
33
+ num_samples_per_trial=1000
34
+ num_trials=50
35
+ device_map="auto-balanced"
36
+ random=false
37
+
38
+ [evaluation]
39
+ # set to [] if not using any evaluation benchmarks
40
+ tasks=['mmlu']
41
+ batch_size=8
42
+ num_best_epochs=3
llm-q-scaling-law-master/eval_scripts/eval_harness_commands.txt ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ accelerate launch -m lm_eval --model hf \
2
+ --tasks mmlu \
3
+ --model_args pretrained=Qwen/Qwen1.5-0.5B \
4
+ --batch_size 8
5
+
6
+ accelerate launch -m lm_eval --model hf \
7
+ --tasks mmlu \
8
+ --model_args pretrained=Qwen/Qwen1.5-1.8B \
9
+ --batch_size 8
10
+
11
+ accelerate launch -m lm_eval --model hf \
12
+ --tasks mmlu \
13
+ --model_args pretrained=Qwen/Qwen1.5-4B \
14
+ --batch_size 8
15
+
16
+ accelerate launch -m lm_eval --model hf \
17
+ --tasks mmlu \
18
+ --model_args pretrained=Qwen/Qwen1.5-7B \
19
+ --batch_size 4
20
+
21
+ lm_eval --model hf \
22
+ --tasks mmlu \
23
+ --model_args pretrained=Qwen/Qwen1.5-14B,parallelize=True \
24
+ --batch_size 8
25
+
26
+ lm_eval --model hf \
27
+ --tasks mmlu \
28
+ --model_args pretrained=Qwen/Qwen1.5-32B,parallelize=True \
29
+ --batch_size 4
30
+
31
+ lm_eval --model hf \
32
+ --tasks mmlu \
33
+ --model_args pretrained=Qwen/Qwen1.5-72B,parallelize=True \
34
+ --batch_size 1
llm-q-scaling-law-master/eval_scripts/layerwise/evalscript_generator.py ADDED
@@ -0,0 +1,141 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ This generates the runscripts for the LLM-Q scaling law simulations.
3
+
4
+ """
5
+
6
+ import os
7
+ import argparse
8
+
9
+ # the search command is
10
+ #python src/main.py --model_name {model_name} --search_config configs/search/mxint_4bit-tinyllama.toml --model_parallel --disable_wandb --save_dir ../ckpt/test
11
+ # the save dir is
12
+ # results/search/layerwise/{model_name}_{ratio}
13
+
14
+ granularity = ["transformer_layer"]
15
+
16
+ # opt template for searching
17
+
18
+ opt_model_sizes=["opt-125m","opt-350m","opt-1.3b","opt-2.7b","opt-6.7b","opt-13b","opt-30b","opt-66b"]
19
+ opt_batch_size = [8,8,8,8,4,2,1,1]
20
+ opt_layer_wise_ratios = [0.0,0.5,0.6,0.7,0.8,0.9,0.95,0.975]
21
+ opt_layer_wise_search_config_template = "mxint_4bit-pajama-random-50-bs-{}.toml"
22
+ opt_layer_wise_search_dir_template = "eval/layerwise/{}_{}"
23
+
24
+ opt_layerwise_run_command_palette = "python src/full_eval.py --model_arch opt --model_name {model_name} --ratio {q_ratio} --eval_config configs/search/layerwise/{search_config} --model_parallel --quantized_list \"{quantized_list}\""
25
+
26
+ with open("opt_layerwise_eval_commands.txt", "w") as f:
27
+ for i in range(len(opt_model_sizes)):
28
+
29
+ eval_best_trail_file = "../../q_ratio_results/layerwise/opt/opt-{}-best-trail.txt".format(opt_model_sizes[i].split("-")[1])
30
+ try:
31
+ with open(eval_best_trail_file, "r") as fi:
32
+ # read each line
33
+ lines = fi.readlines()
34
+ best_trail_q_lists_mapping = {}
35
+ for line in lines:
36
+ q_ratio, q_list = line.split(":")
37
+ q_list = q_list.strip()
38
+ #remove [] from the string
39
+ q_list = q_list[1:-1]
40
+ q_ratio = q_ratio.strip()
41
+ best_trail_q_lists_mapping[q_ratio] = q_list
42
+ except FileNotFoundError:
43
+ best_trail_q_lists_mapping = {}
44
+
45
+
46
+ model_size = opt_model_sizes[i]
47
+ model_batch_size = opt_batch_size[i]
48
+ for ratio in opt_layer_wise_ratios:
49
+ search_config = opt_layer_wise_search_config_template.format(model_batch_size)
50
+ save_dir = opt_layer_wise_search_dir_template.format(model_size, ratio)
51
+ q_list = best_trail_q_lists_mapping[str(ratio)] if str(ratio) in best_trail_q_lists_mapping else ""
52
+ model_name = f"facebook/{model_size}"
53
+ run_command = opt_layerwise_run_command_palette.format(model_name=model_name, q_ratio=ratio, search_config=search_config, save_dir=save_dir, quantized_list=q_list)
54
+ # print(run_command)
55
+ f.write(run_command + "\n")
56
+ f.write("\n")
57
+
58
+
59
+ # qwen template for searching
60
+
61
+ qwen15_model_sizes=["Qwen1.5-0.5B","Qwen1.5-1.8B","Qwen1.5-4B","Qwen1.5-7B","Qwen1.5-14B","Qwen1.5-32B","Qwen1.5-72B","Qwen1.5-110B"]
62
+ qwen15_batch_size = [8,8,4,2,2,2,1,1]
63
+ qwen15_layer_wise_ratios = [0.0,0.5,0.6,0.7,0.8,0.9,0.95,0.975]
64
+ qwen15_layer_wise_search_config_template = "mxint_4bit-pajama-random-50-bs-{}.toml"
65
+ qwen15_layer_wise_search_dir_template = "eval/layerwise/{}_{}"
66
+
67
+ qwen15_layerwise_run_command_palette = "python src/full_eval.py --model_arch qwen2 --model_name {model_name} --ratio {q_ratio} --eval_config configs/search/layerwise/{search_config} --model_parallel --quantized_list \"{quantized_list}\""
68
+
69
+ with open("qwen1.5_layerwise_eval_commands.txt", "w") as f:
70
+ for i in range(len(qwen15_model_sizes)):
71
+
72
+ eval_best_trail_file = "../../q_ratio_results/layerwise/qwen1.5/qwen-{}-best-trail.txt".format(qwen15_model_sizes[i].split("-")[1])
73
+ try:
74
+ with open(eval_best_trail_file, "r") as fi:
75
+ # read each line
76
+ lines = fi.readlines()
77
+ best_trail_q_lists_mapping = {}
78
+ for line in lines:
79
+ q_ratio, q_list = line.split(":")
80
+ q_list = q_list.strip()
81
+ q_list = q_list[1:-1]
82
+ q_ratio = q_ratio.strip()
83
+ best_trail_q_lists_mapping[q_ratio] = q_list
84
+ except FileNotFoundError:
85
+ best_trail_q_lists_mapping = {}
86
+
87
+ # print(best_trail_q_lists_mapping)
88
+
89
+ model_size = qwen15_model_sizes[i]
90
+ model_batch_size = qwen15_batch_size[i]
91
+ for ratio in qwen15_layer_wise_ratios:
92
+ search_config = qwen15_layer_wise_search_config_template.format(model_batch_size)
93
+ save_dir = qwen15_layer_wise_search_dir_template.format(model_size, ratio)
94
+ model_name = f"Qwen/{model_size}"
95
+ q_list = best_trail_q_lists_mapping[str(ratio)] if str(ratio) in best_trail_q_lists_mapping else ""
96
+ run_command = qwen15_layerwise_run_command_palette.format(model_name=model_name, q_ratio=ratio, search_config=search_config, save_dir=save_dir, quantized_list=q_list)
97
+ # print(run_command)
98
+ f.write(run_command + "\n")
99
+ f.write("\n")
100
+
101
+ # llama template for searching
102
+
103
+ # qwen template for searching
104
+
105
+ llama_model_sizes=["llama-7b","llama-13b","llama-30b","llama-65b"]
106
+ llama_batch_size = [2,2,1,1]
107
+ llama_layer_wise_ratios = [0.0,0.5,0.6,0.7,0.8,0.9,0.95,0.975]
108
+ llama_layer_wise_search_config_template = "mxint_4bit-pajama-random-50-bs-{}.toml"
109
+ llama_layer_wise_search_dir_template = "eval/layerwise/{}_{}"
110
+
111
+ llama_layerwise_run_command_palette = "python src/full_eval.py --model_arch llama --model_name {model_name} --ratio {q_ratio} --eval_config configs/search/layerwise/{search_config} --model_parallel --quantized_list \"{quantized_list}\""
112
+
113
+ with open("llama_layerwise_eval_commands.txt", "w") as f:
114
+ for i in range(len(llama_model_sizes)):
115
+
116
+ eval_best_trail_file = "../../q_ratio_results/layerwise/llama/llama-{}-best-trail.txt".format(qwen15_model_sizes[i].split("-")[1])
117
+ try:
118
+ with open(eval_best_trail_file, "r") as fi:
119
+ # read each line
120
+ lines = fi.readlines()
121
+ best_trail_q_lists_mapping = {}
122
+ for line in lines:
123
+ q_ratio, q_list = line.split(":")
124
+ q_list = q_list.strip()
125
+ q_list = q_list[1:-1]
126
+ q_ratio = q_ratio.strip()
127
+ best_trail_q_lists_mapping[q_ratio] = q_list
128
+ except FileNotFoundError:
129
+ best_trail_q_lists_mapping = {}
130
+
131
+ model_size = llama_model_sizes[i]
132
+ model_batch_size = llama_batch_size[i]
133
+ for ratio in llama_layer_wise_ratios:
134
+ search_config = llama_layer_wise_search_config_template.format(model_batch_size)
135
+ save_dir = llama_layer_wise_search_dir_template.format(model_size, ratio)
136
+ model_name = f"huggyllama/{model_size}"
137
+ q_list = best_trail_q_lists_mapping[str(ratio)] if str(ratio) in best_trail_q_lists_mapping else ""
138
+ run_command = llama_layerwise_run_command_palette.format(model_name=model_name, q_ratio=ratio, search_config=search_config, save_dir=save_dir, quantized_list=q_list)
139
+ # print(run_command)
140
+ f.write(run_command + "\n")
141
+ f.write("\n")
llm-q-scaling-law-master/eval_scripts/layerwise/llama_layerwise_eval_commands.txt ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ python src/full_eval.py --model_arch llama --model_name huggyllama/llama-7b --ratio 0.0 --eval_config configs/search/layerwise/mxint_4bit-alpaca-random-50-bs-2.toml --model_parallel --quantized_list ""
2
+ python src/full_eval.py --model_arch llama --model_name huggyllama/llama-7b --ratio 0.5 --eval_config configs/search/layerwise/mxint_4bit-alpaca-random-50-bs-2.toml --model_parallel --quantized_list ""
3
+ python src/full_eval.py --model_arch llama --model_name huggyllama/llama-7b --ratio 0.6 --eval_config configs/search/layerwise/mxint_4bit-alpaca-random-50-bs-2.toml --model_parallel --quantized_list ""
4
+ python src/full_eval.py --model_arch llama --model_name huggyllama/llama-7b --ratio 0.7 --eval_config configs/search/layerwise/mxint_4bit-alpaca-random-50-bs-2.toml --model_parallel --quantized_list ""
5
+ python src/full_eval.py --model_arch llama --model_name huggyllama/llama-7b --ratio 0.8 --eval_config configs/search/layerwise/mxint_4bit-alpaca-random-50-bs-2.toml --model_parallel --quantized_list ""
6
+ python src/full_eval.py --model_arch llama --model_name huggyllama/llama-7b --ratio 0.9 --eval_config configs/search/layerwise/mxint_4bit-alpaca-random-50-bs-2.toml --model_parallel --quantized_list ""
7
+ python src/full_eval.py --model_arch llama --model_name huggyllama/llama-7b --ratio 0.95 --eval_config configs/search/layerwise/mxint_4bit-alpaca-random-50-bs-2.toml --model_parallel --quantized_list ""
8
+ python src/full_eval.py --model_arch llama --model_name huggyllama/llama-7b --ratio 0.975 --eval_config configs/search/layerwise/mxint_4bit-alpaca-random-50-bs-2.toml --model_parallel --quantized_list ""
9
+
10
+ python src/full_eval.py --model_arch llama --model_name huggyllama/llama-13b --ratio 0.0 --eval_config configs/search/layerwise/mxint_4bit-alpaca-random-50-bs-2.toml --model_parallel --quantized_list ""
11
+ python src/full_eval.py --model_arch llama --model_name huggyllama/llama-13b --ratio 0.5 --eval_config configs/search/layerwise/mxint_4bit-alpaca-random-50-bs-2.toml --model_parallel --quantized_list ""
12
+ python src/full_eval.py --model_arch llama --model_name huggyllama/llama-13b --ratio 0.6 --eval_config configs/search/layerwise/mxint_4bit-alpaca-random-50-bs-2.toml --model_parallel --quantized_list ""
13
+ python src/full_eval.py --model_arch llama --model_name huggyllama/llama-13b --ratio 0.7 --eval_config configs/search/layerwise/mxint_4bit-alpaca-random-50-bs-2.toml --model_parallel --quantized_list ""
14
+ python src/full_eval.py --model_arch llama --model_name huggyllama/llama-13b --ratio 0.8 --eval_config configs/search/layerwise/mxint_4bit-alpaca-random-50-bs-2.toml --model_parallel --quantized_list ""
15
+ python src/full_eval.py --model_arch llama --model_name huggyllama/llama-13b --ratio 0.9 --eval_config configs/search/layerwise/mxint_4bit-alpaca-random-50-bs-2.toml --model_parallel --quantized_list ""
16
+ python src/full_eval.py --model_arch llama --model_name huggyllama/llama-13b --ratio 0.95 --eval_config configs/search/layerwise/mxint_4bit-alpaca-random-50-bs-2.toml --model_parallel --quantized_list ""
17
+ python src/full_eval.py --model_arch llama --model_name huggyllama/llama-13b --ratio 0.975 --eval_config configs/search/layerwise/mxint_4bit-alpaca-random-50-bs-2.toml --model_parallel --quantized_list ""
18
+
19
+ python src/full_eval.py --model_arch llama --model_name huggyllama/llama-30b --ratio 0.0 --eval_config configs/search/layerwise/mxint_4bit-alpaca-random-50-bs-1.toml --model_parallel --quantized_list ""
20
+ python src/full_eval.py --model_arch llama --model_name huggyllama/llama-30b --ratio 0.5 --eval_config configs/search/layerwise/mxint_4bit-alpaca-random-50-bs-1.toml --model_parallel --quantized_list ""
21
+ python src/full_eval.py --model_arch llama --model_name huggyllama/llama-30b --ratio 0.6 --eval_config configs/search/layerwise/mxint_4bit-alpaca-random-50-bs-1.toml --model_parallel --quantized_list ""
22
+ python src/full_eval.py --model_arch llama --model_name huggyllama/llama-30b --ratio 0.7 --eval_config configs/search/layerwise/mxint_4bit-alpaca-random-50-bs-1.toml --model_parallel --quantized_list ""
23
+ python src/full_eval.py --model_arch llama --model_name huggyllama/llama-30b --ratio 0.8 --eval_config configs/search/layerwise/mxint_4bit-alpaca-random-50-bs-1.toml --model_parallel --quantized_list ""
24
+ python src/full_eval.py --model_arch llama --model_name huggyllama/llama-30b --ratio 0.9 --eval_config configs/search/layerwise/mxint_4bit-alpaca-random-50-bs-1.toml --model_parallel --quantized_list ""
25
+ python src/full_eval.py --model_arch llama --model_name huggyllama/llama-30b --ratio 0.95 --eval_config configs/search/layerwise/mxint_4bit-alpaca-random-50-bs-1.toml --model_parallel --quantized_list ""
26
+ python src/full_eval.py --model_arch llama --model_name huggyllama/llama-30b --ratio 0.975 --eval_config configs/search/layerwise/mxint_4bit-alpaca-random-50-bs-1.toml --model_parallel --quantized_list ""
27
+
28
+ python src/full_eval.py --model_arch llama --model_name huggyllama/llama-65b --ratio 0.0 --eval_config configs/search/layerwise/mxint_4bit-alpaca-random-50-bs-1.toml --model_parallel --quantized_list ""
29
+ python src/full_eval.py --model_arch llama --model_name huggyllama/llama-65b --ratio 0.5 --eval_config configs/search/layerwise/mxint_4bit-alpaca-random-50-bs-1.toml --model_parallel --quantized_list ""
30
+ python src/full_eval.py --model_arch llama --model_name huggyllama/llama-65b --ratio 0.6 --eval_config configs/search/layerwise/mxint_4bit-alpaca-random-50-bs-1.toml --model_parallel --quantized_list ""
31
+ python src/full_eval.py --model_arch llama --model_name huggyllama/llama-65b --ratio 0.7 --eval_config configs/search/layerwise/mxint_4bit-alpaca-random-50-bs-1.toml --model_parallel --quantized_list ""
32
+ python src/full_eval.py --model_arch llama --model_name huggyllama/llama-65b --ratio 0.8 --eval_config configs/search/layerwise/mxint_4bit-alpaca-random-50-bs-1.toml --model_parallel --quantized_list ""
33
+ python src/full_eval.py --model_arch llama --model_name huggyllama/llama-65b --ratio 0.9 --eval_config configs/search/layerwise/mxint_4bit-alpaca-random-50-bs-1.toml --model_parallel --quantized_list ""
34
+ python src/full_eval.py --model_arch llama --model_name huggyllama/llama-65b --ratio 0.95 --eval_config configs/search/layerwise/mxint_4bit-alpaca-random-50-bs-1.toml --model_parallel --quantized_list ""
35
+ python src/full_eval.py --model_arch llama --model_name huggyllama/llama-65b --ratio 0.975 --eval_config configs/search/layerwise/mxint_4bit-alpaca-random-50-bs-1.toml --model_parallel --quantized_list ""
36
+
llm-q-scaling-law-master/eval_scripts/layerwise/opt_layerwise_eval_commands.txt ADDED
@@ -0,0 +1,72 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ python src/full_eval.py --model_arch opt --model_name facebook/opt-125m --ratio 0.0 --eval_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-8.toml --model_parallel --quantized_list ""
2
+ python src/full_eval.py --model_arch opt --model_name facebook/opt-125m --ratio 0.5 --eval_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-8.toml --model_parallel --quantized_list ""
3
+ python src/full_eval.py --model_arch opt --model_name facebook/opt-125m --ratio 0.6 --eval_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-8.toml --model_parallel --quantized_list ""
4
+ python src/full_eval.py --model_arch opt --model_name facebook/opt-125m --ratio 0.7 --eval_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-8.toml --model_parallel --quantized_list ""
5
+ python src/full_eval.py --model_arch opt --model_name facebook/opt-125m --ratio 0.8 --eval_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-8.toml --model_parallel --quantized_list ""
6
+ python src/full_eval.py --model_arch opt --model_name facebook/opt-125m --ratio 0.9 --eval_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-8.toml --model_parallel --quantized_list ""
7
+ python src/full_eval.py --model_arch opt --model_name facebook/opt-125m --ratio 0.95 --eval_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-8.toml --model_parallel --quantized_list ""
8
+ python src/full_eval.py --model_arch opt --model_name facebook/opt-125m --ratio 0.975 --eval_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-8.toml --model_parallel --quantized_list ""
9
+
10
+ python src/full_eval.py --model_arch opt --model_name facebook/opt-350m --ratio 0.0 --eval_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-8.toml --model_parallel --quantized_list ""
11
+ python src/full_eval.py --model_arch opt --model_name facebook/opt-350m --ratio 0.5 --eval_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-8.toml --model_parallel --quantized_list ""
12
+ python src/full_eval.py --model_arch opt --model_name facebook/opt-350m --ratio 0.6 --eval_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-8.toml --model_parallel --quantized_list ""
13
+ python src/full_eval.py --model_arch opt --model_name facebook/opt-350m --ratio 0.7 --eval_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-8.toml --model_parallel --quantized_list ""
14
+ python src/full_eval.py --model_arch opt --model_name facebook/opt-350m --ratio 0.8 --eval_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-8.toml --model_parallel --quantized_list ""
15
+ python src/full_eval.py --model_arch opt --model_name facebook/opt-350m --ratio 0.9 --eval_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-8.toml --model_parallel --quantized_list ""
16
+ python src/full_eval.py --model_arch opt --model_name facebook/opt-350m --ratio 0.95 --eval_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-8.toml --model_parallel --quantized_list ""
17
+ python src/full_eval.py --model_arch opt --model_name facebook/opt-350m --ratio 0.975 --eval_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-8.toml --model_parallel --quantized_list ""
18
+
19
+ python src/full_eval.py --model_arch opt --model_name facebook/opt-1.3b --ratio 0.0 --eval_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-8.toml --model_parallel --quantized_list ""
20
+ python src/full_eval.py --model_arch opt --model_name facebook/opt-1.3b --ratio 0.5 --eval_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-8.toml --model_parallel --quantized_list ""
21
+ python src/full_eval.py --model_arch opt --model_name facebook/opt-1.3b --ratio 0.6 --eval_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-8.toml --model_parallel --quantized_list ""
22
+ python src/full_eval.py --model_arch opt --model_name facebook/opt-1.3b --ratio 0.7 --eval_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-8.toml --model_parallel --quantized_list ""
23
+ python src/full_eval.py --model_arch opt --model_name facebook/opt-1.3b --ratio 0.8 --eval_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-8.toml --model_parallel --quantized_list ""
24
+ python src/full_eval.py --model_arch opt --model_name facebook/opt-1.3b --ratio 0.9 --eval_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-8.toml --model_parallel --quantized_list ""
25
+ python src/full_eval.py --model_arch opt --model_name facebook/opt-1.3b --ratio 0.95 --eval_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-8.toml --model_parallel --quantized_list ""
26
+ python src/full_eval.py --model_arch opt --model_name facebook/opt-1.3b --ratio 0.975 --eval_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-8.toml --model_parallel --quantized_list ""
27
+
28
+ python src/full_eval.py --model_arch opt --model_name facebook/opt-2.7b --ratio 0.0 --eval_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-8.toml --model_parallel --quantized_list ""
29
+ python src/full_eval.py --model_arch opt --model_name facebook/opt-2.7b --ratio 0.5 --eval_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-8.toml --model_parallel --quantized_list ""
30
+ python src/full_eval.py --model_arch opt --model_name facebook/opt-2.7b --ratio 0.6 --eval_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-8.toml --model_parallel --quantized_list ""
31
+ python src/full_eval.py --model_arch opt --model_name facebook/opt-2.7b --ratio 0.7 --eval_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-8.toml --model_parallel --quantized_list ""
32
+ python src/full_eval.py --model_arch opt --model_name facebook/opt-2.7b --ratio 0.8 --eval_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-8.toml --model_parallel --quantized_list ""
33
+ python src/full_eval.py --model_arch opt --model_name facebook/opt-2.7b --ratio 0.9 --eval_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-8.toml --model_parallel --quantized_list ""
34
+ python src/full_eval.py --model_arch opt --model_name facebook/opt-2.7b --ratio 0.95 --eval_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-8.toml --model_parallel --quantized_list ""
35
+ python src/full_eval.py --model_arch opt --model_name facebook/opt-2.7b --ratio 0.975 --eval_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-8.toml --model_parallel --quantized_list ""
36
+
37
+ python src/full_eval.py --model_arch opt --model_name facebook/opt-6.7b --ratio 0.0 --eval_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-4.toml --model_parallel --quantized_list ""
38
+ python src/full_eval.py --model_arch opt --model_name facebook/opt-6.7b --ratio 0.5 --eval_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-4.toml --model_parallel --quantized_list ""
39
+ python src/full_eval.py --model_arch opt --model_name facebook/opt-6.7b --ratio 0.6 --eval_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-4.toml --model_parallel --quantized_list ""
40
+ python src/full_eval.py --model_arch opt --model_name facebook/opt-6.7b --ratio 0.7 --eval_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-4.toml --model_parallel --quantized_list ""
41
+ python src/full_eval.py --model_arch opt --model_name facebook/opt-6.7b --ratio 0.8 --eval_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-4.toml --model_parallel --quantized_list ""
42
+ python src/full_eval.py --model_arch opt --model_name facebook/opt-6.7b --ratio 0.9 --eval_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-4.toml --model_parallel --quantized_list ""
43
+ python src/full_eval.py --model_arch opt --model_name facebook/opt-6.7b --ratio 0.95 --eval_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-4.toml --model_parallel --quantized_list ""
44
+ python src/full_eval.py --model_arch opt --model_name facebook/opt-6.7b --ratio 0.975 --eval_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-4.toml --model_parallel --quantized_list ""
45
+
46
+ python src/full_eval.py --model_arch opt --model_name facebook/opt-13b --ratio 0.0 --eval_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-2.toml --model_parallel --quantized_list ""
47
+ python src/full_eval.py --model_arch opt --model_name facebook/opt-13b --ratio 0.5 --eval_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-2.toml --model_parallel --quantized_list ""
48
+ python src/full_eval.py --model_arch opt --model_name facebook/opt-13b --ratio 0.6 --eval_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-2.toml --model_parallel --quantized_list ""
49
+ python src/full_eval.py --model_arch opt --model_name facebook/opt-13b --ratio 0.7 --eval_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-2.toml --model_parallel --quantized_list ""
50
+ python src/full_eval.py --model_arch opt --model_name facebook/opt-13b --ratio 0.8 --eval_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-2.toml --model_parallel --quantized_list ""
51
+ python src/full_eval.py --model_arch opt --model_name facebook/opt-13b --ratio 0.9 --eval_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-2.toml --model_parallel --quantized_list ""
52
+ python src/full_eval.py --model_arch opt --model_name facebook/opt-13b --ratio 0.95 --eval_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-2.toml --model_parallel --quantized_list ""
53
+ python src/full_eval.py --model_arch opt --model_name facebook/opt-13b --ratio 0.975 --eval_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-2.toml --model_parallel --quantized_list ""
54
+
55
+ python src/full_eval.py --model_arch opt --model_name facebook/opt-30b --ratio 0.0 --eval_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-1.toml --model_parallel --quantized_list ""
56
+ python src/full_eval.py --model_arch opt --model_name facebook/opt-30b --ratio 0.5 --eval_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-1.toml --model_parallel --quantized_list ""
57
+ python src/full_eval.py --model_arch opt --model_name facebook/opt-30b --ratio 0.6 --eval_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-1.toml --model_parallel --quantized_list ""
58
+ python src/full_eval.py --model_arch opt --model_name facebook/opt-30b --ratio 0.7 --eval_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-1.toml --model_parallel --quantized_list ""
59
+ python src/full_eval.py --model_arch opt --model_name facebook/opt-30b --ratio 0.8 --eval_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-1.toml --model_parallel --quantized_list ""
60
+ python src/full_eval.py --model_arch opt --model_name facebook/opt-30b --ratio 0.9 --eval_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-1.toml --model_parallel --quantized_list ""
61
+ python src/full_eval.py --model_arch opt --model_name facebook/opt-30b --ratio 0.95 --eval_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-1.toml --model_parallel --quantized_list ""
62
+ python src/full_eval.py --model_arch opt --model_name facebook/opt-30b --ratio 0.975 --eval_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-1.toml --model_parallel --quantized_list ""
63
+
64
+ python src/full_eval.py --model_arch opt --model_name facebook/opt-66b --ratio 0.0 --eval_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-1.toml --model_parallel --quantized_list ""
65
+ python src/full_eval.py --model_arch opt --model_name facebook/opt-66b --ratio 0.5 --eval_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-1.toml --model_parallel --quantized_list ""
66
+ python src/full_eval.py --model_arch opt --model_name facebook/opt-66b --ratio 0.6 --eval_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-1.toml --model_parallel --quantized_list ""
67
+ python src/full_eval.py --model_arch opt --model_name facebook/opt-66b --ratio 0.7 --eval_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-1.toml --model_parallel --quantized_list ""
68
+ python src/full_eval.py --model_arch opt --model_name facebook/opt-66b --ratio 0.8 --eval_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-1.toml --model_parallel --quantized_list ""
69
+ python src/full_eval.py --model_arch opt --model_name facebook/opt-66b --ratio 0.9 --eval_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-1.toml --model_parallel --quantized_list ""
70
+ python src/full_eval.py --model_arch opt --model_name facebook/opt-66b --ratio 0.95 --eval_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-1.toml --model_parallel --quantized_list ""
71
+ python src/full_eval.py --model_arch opt --model_name facebook/opt-66b --ratio 0.975 --eval_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-1.toml --model_parallel --quantized_list ""
72
+
llm-q-scaling-law-master/eval_scripts/layerwise/qwen1.5_layerwise_eval_commands.txt ADDED
@@ -0,0 +1,72 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ python src/full_eval.py --model_arch qwen2 --model_name Qwen/Qwen1.5-0.5B --ratio 0.0 --eval_config configs/search/layerwise/mxint_4bit-alpaca-random-50-bs-8.toml --model_parallel --quantized_list ""
2
+ python src/full_eval.py --model_arch qwen2 --model_name Qwen/Qwen1.5-0.5B --ratio 0.5 --eval_config configs/search/layerwise/mxint_4bit-alpaca-random-50-bs-8.toml --model_parallel --quantized_list ""
3
+ python src/full_eval.py --model_arch qwen2 --model_name Qwen/Qwen1.5-0.5B --ratio 0.6 --eval_config configs/search/layerwise/mxint_4bit-alpaca-random-50-bs-8.toml --model_parallel --quantized_list ""
4
+ python src/full_eval.py --model_arch qwen2 --model_name Qwen/Qwen1.5-0.5B --ratio 0.7 --eval_config configs/search/layerwise/mxint_4bit-alpaca-random-50-bs-8.toml --model_parallel --quantized_list ""
5
+ python src/full_eval.py --model_arch qwen2 --model_name Qwen/Qwen1.5-0.5B --ratio 0.8 --eval_config configs/search/layerwise/mxint_4bit-alpaca-random-50-bs-8.toml --model_parallel --quantized_list ""
6
+ python src/full_eval.py --model_arch qwen2 --model_name Qwen/Qwen1.5-0.5B --ratio 0.9 --eval_config configs/search/layerwise/mxint_4bit-alpaca-random-50-bs-8.toml --model_parallel --quantized_list ""
7
+ python src/full_eval.py --model_arch qwen2 --model_name Qwen/Qwen1.5-0.5B --ratio 0.95 --eval_config configs/search/layerwise/mxint_4bit-alpaca-random-50-bs-8.toml --model_parallel --quantized_list ""
8
+ python src/full_eval.py --model_arch qwen2 --model_name Qwen/Qwen1.5-0.5B --ratio 0.975 --eval_config configs/search/layerwise/mxint_4bit-alpaca-random-50-bs-8.toml --model_parallel --quantized_list ""
9
+
10
+ python src/full_eval.py --model_arch qwen2 --model_name Qwen/Qwen1.5-1.8B --ratio 0.0 --eval_config configs/search/layerwise/mxint_4bit-alpaca-random-50-bs-8.toml --model_parallel --quantized_list ""
11
+ python src/full_eval.py --model_arch qwen2 --model_name Qwen/Qwen1.5-1.8B --ratio 0.5 --eval_config configs/search/layerwise/mxint_4bit-alpaca-random-50-bs-8.toml --model_parallel --quantized_list ""
12
+ python src/full_eval.py --model_arch qwen2 --model_name Qwen/Qwen1.5-1.8B --ratio 0.6 --eval_config configs/search/layerwise/mxint_4bit-alpaca-random-50-bs-8.toml --model_parallel --quantized_list ""
13
+ python src/full_eval.py --model_arch qwen2 --model_name Qwen/Qwen1.5-1.8B --ratio 0.7 --eval_config configs/search/layerwise/mxint_4bit-alpaca-random-50-bs-8.toml --model_parallel --quantized_list ""
14
+ python src/full_eval.py --model_arch qwen2 --model_name Qwen/Qwen1.5-1.8B --ratio 0.8 --eval_config configs/search/layerwise/mxint_4bit-alpaca-random-50-bs-8.toml --model_parallel --quantized_list ""
15
+ python src/full_eval.py --model_arch qwen2 --model_name Qwen/Qwen1.5-1.8B --ratio 0.9 --eval_config configs/search/layerwise/mxint_4bit-alpaca-random-50-bs-8.toml --model_parallel --quantized_list ""
16
+ python src/full_eval.py --model_arch qwen2 --model_name Qwen/Qwen1.5-1.8B --ratio 0.95 --eval_config configs/search/layerwise/mxint_4bit-alpaca-random-50-bs-8.toml --model_parallel --quantized_list ""
17
+ python src/full_eval.py --model_arch qwen2 --model_name Qwen/Qwen1.5-1.8B --ratio 0.975 --eval_config configs/search/layerwise/mxint_4bit-alpaca-random-50-bs-8.toml --model_parallel --quantized_list ""
18
+
19
+ python src/full_eval.py --model_arch qwen2 --model_name Qwen/Qwen1.5-4B --ratio 0.0 --eval_config configs/search/layerwise/mxint_4bit-alpaca-random-50-bs-4.toml --model_parallel --quantized_list ""
20
+ python src/full_eval.py --model_arch qwen2 --model_name Qwen/Qwen1.5-4B --ratio 0.5 --eval_config configs/search/layerwise/mxint_4bit-alpaca-random-50-bs-4.toml --model_parallel --quantized_list ""
21
+ python src/full_eval.py --model_arch qwen2 --model_name Qwen/Qwen1.5-4B --ratio 0.6 --eval_config configs/search/layerwise/mxint_4bit-alpaca-random-50-bs-4.toml --model_parallel --quantized_list ""
22
+ python src/full_eval.py --model_arch qwen2 --model_name Qwen/Qwen1.5-4B --ratio 0.7 --eval_config configs/search/layerwise/mxint_4bit-alpaca-random-50-bs-4.toml --model_parallel --quantized_list ""
23
+ python src/full_eval.py --model_arch qwen2 --model_name Qwen/Qwen1.5-4B --ratio 0.8 --eval_config configs/search/layerwise/mxint_4bit-alpaca-random-50-bs-4.toml --model_parallel --quantized_list ""
24
+ python src/full_eval.py --model_arch qwen2 --model_name Qwen/Qwen1.5-4B --ratio 0.9 --eval_config configs/search/layerwise/mxint_4bit-alpaca-random-50-bs-4.toml --model_parallel --quantized_list ""
25
+ python src/full_eval.py --model_arch qwen2 --model_name Qwen/Qwen1.5-4B --ratio 0.95 --eval_config configs/search/layerwise/mxint_4bit-alpaca-random-50-bs-4.toml --model_parallel --quantized_list ""
26
+ python src/full_eval.py --model_arch qwen2 --model_name Qwen/Qwen1.5-4B --ratio 0.975 --eval_config configs/search/layerwise/mxint_4bit-alpaca-random-50-bs-4.toml --model_parallel --quantized_list ""
27
+
28
+ python src/full_eval.py --model_arch qwen2 --model_name Qwen/Qwen1.5-7B --ratio 0.0 --eval_config configs/search/layerwise/mxint_4bit-alpaca-random-50-bs-2.toml --model_parallel --quantized_list ""
29
+ python src/full_eval.py --model_arch qwen2 --model_name Qwen/Qwen1.5-7B --ratio 0.5 --eval_config configs/search/layerwise/mxint_4bit-alpaca-random-50-bs-2.toml --model_parallel --quantized_list ""
30
+ python src/full_eval.py --model_arch qwen2 --model_name Qwen/Qwen1.5-7B --ratio 0.6 --eval_config configs/search/layerwise/mxint_4bit-alpaca-random-50-bs-2.toml --model_parallel --quantized_list ""
31
+ python src/full_eval.py --model_arch qwen2 --model_name Qwen/Qwen1.5-7B --ratio 0.7 --eval_config configs/search/layerwise/mxint_4bit-alpaca-random-50-bs-2.toml --model_parallel --quantized_list ""
32
+ python src/full_eval.py --model_arch qwen2 --model_name Qwen/Qwen1.5-7B --ratio 0.8 --eval_config configs/search/layerwise/mxint_4bit-alpaca-random-50-bs-2.toml --model_parallel --quantized_list ""
33
+ python src/full_eval.py --model_arch qwen2 --model_name Qwen/Qwen1.5-7B --ratio 0.9 --eval_config configs/search/layerwise/mxint_4bit-alpaca-random-50-bs-2.toml --model_parallel --quantized_list ""
34
+ python src/full_eval.py --model_arch qwen2 --model_name Qwen/Qwen1.5-7B --ratio 0.95 --eval_config configs/search/layerwise/mxint_4bit-alpaca-random-50-bs-2.toml --model_parallel --quantized_list ""
35
+ python src/full_eval.py --model_arch qwen2 --model_name Qwen/Qwen1.5-7B --ratio 0.975 --eval_config configs/search/layerwise/mxint_4bit-alpaca-random-50-bs-2.toml --model_parallel --quantized_list ""
36
+
37
+ python src/full_eval.py --model_arch qwen2 --model_name Qwen/Qwen1.5-14B --ratio 0.0 --eval_config configs/search/layerwise/mxint_4bit-alpaca-random-50-bs-2.toml --model_parallel --quantized_list ""
38
+ python src/full_eval.py --model_arch qwen2 --model_name Qwen/Qwen1.5-14B --ratio 0.5 --eval_config configs/search/layerwise/mxint_4bit-alpaca-random-50-bs-2.toml --model_parallel --quantized_list "1, 1, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0"
39
+ python src/full_eval.py --model_arch qwen2 --model_name Qwen/Qwen1.5-14B --ratio 0.6 --eval_config configs/search/layerwise/mxint_4bit-alpaca-random-50-bs-2.toml --model_parallel --quantized_list "1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1, 1, 0, 0, 1, 1, 1, 0, 1, 0, 0, 1, 0, 1, 1, 1, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 1, 1, 0"
40
+ python src/full_eval.py --model_arch qwen2 --model_name Qwen/Qwen1.5-14B --ratio 0.7 --eval_config configs/search/layerwise/mxint_4bit-alpaca-random-50-bs-2.toml --model_parallel --quantized_list "1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0, 1, 0, 0"
41
+ python src/full_eval.py --model_arch qwen2 --model_name Qwen/Qwen1.5-14B --ratio 0.8 --eval_config configs/search/layerwise/mxint_4bit-alpaca-random-50-bs-2.toml --model_parallel --quantized_list "1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 0, 0, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 1, 1, 0, 1, 0, 1, 1"
42
+ python src/full_eval.py --model_arch qwen2 --model_name Qwen/Qwen1.5-14B --ratio 0.9 --eval_config configs/search/layerwise/mxint_4bit-alpaca-random-50-bs-2.toml --model_parallel --quantized_list "1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 0, 0, 1, 0, 0, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1"
43
+ python src/full_eval.py --model_arch qwen2 --model_name Qwen/Qwen1.5-14B --ratio 0.95 --eval_config configs/search/layerwise/mxint_4bit-alpaca-random-50-bs-2.toml --model_parallel --quantized_list "1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1"
44
+ python src/full_eval.py --model_arch qwen2 --model_name Qwen/Qwen1.5-14B --ratio 0.975 --eval_config configs/search/layerwise/mxint_4bit-alpaca-random-50-bs-2.toml --model_parallel --quantized_list "1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1"
45
+
46
+ python src/full_eval.py --model_arch qwen2 --model_name Qwen/Qwen1.5-32B --ratio 0.0 --eval_config configs/search/layerwise/mxint_4bit-alpaca-random-50-bs-2.toml --model_parallel --quantized_list "0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0"
47
+ python src/full_eval.py --model_arch qwen2 --model_name Qwen/Qwen1.5-32B --ratio 0.5 --eval_config configs/search/layerwise/mxint_4bit-alpaca-random-50-bs-2.toml --model_parallel --quantized_list "1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0"
48
+ python src/full_eval.py --model_arch qwen2 --model_name Qwen/Qwen1.5-32B --ratio 0.6 --eval_config configs/search/layerwise/mxint_4bit-alpaca-random-50-bs-2.toml --model_parallel --quantized_list "0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 0, 1, 1, 1, 0, 1, 0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 1, 0, 0, 1, 0, 0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 0, 1, 0, 1, 0, 1, 1, 0, 1, 0, 0, 0, 1, 0"
49
+ python src/full_eval.py --model_arch qwen2 --model_name Qwen/Qwen1.5-32B --ratio 0.7 --eval_config configs/search/layerwise/mxint_4bit-alpaca-random-50-bs-2.toml --model_parallel --quantized_list "1, 1, 0, 0, 1, 0, 1, 0, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 1, 0, 1, 1, 1, 0, 0, 1, 1, 0, 0, 0, 1, 1, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 0, 0, 0, 1, 0"
50
+ python src/full_eval.py --model_arch qwen2 --model_name Qwen/Qwen1.5-32B --ratio 0.8 --eval_config configs/search/layerwise/mxint_4bit-alpaca-random-50-bs-2.toml --model_parallel --quantized_list "1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 0, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 0, 1, 0"
51
+ python src/full_eval.py --model_arch qwen2 --model_name Qwen/Qwen1.5-32B --ratio 0.9 --eval_config configs/search/layerwise/mxint_4bit-alpaca-random-50-bs-2.toml --model_parallel --quantized_list "0, 1, 1, 0, 1, 0, 1, 1, 0, 1, 0, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 0, 0, 0, 1, 1, 1"
52
+ python src/full_eval.py --model_arch qwen2 --model_name Qwen/Qwen1.5-32B --ratio 0.95 --eval_config configs/search/layerwise/mxint_4bit-alpaca-random-50-bs-2.toml --model_parallel --quantized_list "0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 1, 1, 1, 0, 1, 0, 0, 0, 0, 1, 0"
53
+ python src/full_eval.py --model_arch qwen2 --model_name Qwen/Qwen1.5-32B --ratio 0.975 --eval_config configs/search/layerwise/mxint_4bit-alpaca-random-50-bs-2.toml --model_parallel --quantized_list ""
54
+
55
+ python src/full_eval.py --model_arch qwen2 --model_name Qwen/Qwen1.5-72B --ratio 0.0 --eval_config configs/search/layerwise/mxint_4bit-alpaca-random-50-bs-1.toml --model_parallel --quantized_list "0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0"
56
+ python src/full_eval.py --model_arch qwen2 --model_name Qwen/Qwen1.5-72B --ratio 0.5 --eval_config configs/search/layerwise/mxint_4bit-alpaca-random-50-bs-1.toml --model_parallel --quantized_list "0, 0, 1, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 1, 0, 1, 0, 1, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 0, 1, 1, 1, 0, 0"
57
+ python src/full_eval.py --model_arch qwen2 --model_name Qwen/Qwen1.5-72B --ratio 0.6 --eval_config configs/search/layerwise/mxint_4bit-alpaca-random-50-bs-1.toml --model_parallel --quantized_list "1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 0, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 0, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 0, 1, 0, 1, 1, 0, 0, 0, 0, 1, 0, 1"
58
+ python src/full_eval.py --model_arch qwen2 --model_name Qwen/Qwen1.5-72B --ratio 0.7 --eval_config configs/search/layerwise/mxint_4bit-alpaca-random-50-bs-1.toml --model_parallel --quantized_list "0, 0, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 1, 1, 0, 1, 1, 0, 1, 0, 1, 0, 0, 0, 1, 0, 1, 1, 0, 0, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0"
59
+ python src/full_eval.py --model_arch qwen2 --model_name Qwen/Qwen1.5-72B --ratio 0.8 --eval_config configs/search/layerwise/mxint_4bit-alpaca-random-50-bs-1.toml --model_parallel --quantized_list "1, 0, 1, 0, 0, 0, 1, 1, 1, 0, 1, 1, 0, 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 1, 0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1"
60
+ python src/full_eval.py --model_arch qwen2 --model_name Qwen/Qwen1.5-72B --ratio 0.9 --eval_config configs/search/layerwise/mxint_4bit-alpaca-random-50-bs-1.toml --model_parallel --quantized_list "0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1"
61
+ python src/full_eval.py --model_arch qwen2 --model_name Qwen/Qwen1.5-72B --ratio 0.95 --eval_config configs/search/layerwise/mxint_4bit-alpaca-random-50-bs-1.toml --model_parallel --quantized_list "0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1"
62
+ python src/full_eval.py --model_arch qwen2 --model_name Qwen/Qwen1.5-72B --ratio 0.975 --eval_config configs/search/layerwise/mxint_4bit-alpaca-random-50-bs-1.toml --model_parallel --quantized_list ""
63
+
64
+ python src/full_eval.py --model_arch qwen2 --model_name Qwen/Qwen1.5-110B --ratio 0.0 --eval_config configs/search/layerwise/mxint_4bit-alpaca-random-50-bs-1.toml --model_parallel --quantized_list ""
65
+ python src/full_eval.py --model_arch qwen2 --model_name Qwen/Qwen1.5-110B --ratio 0.5 --eval_config configs/search/layerwise/mxint_4bit-alpaca-random-50-bs-1.toml --model_parallel --quantized_list ""
66
+ python src/full_eval.py --model_arch qwen2 --model_name Qwen/Qwen1.5-110B --ratio 0.6 --eval_config configs/search/layerwise/mxint_4bit-alpaca-random-50-bs-1.toml --model_parallel --quantized_list ""
67
+ python src/full_eval.py --model_arch qwen2 --model_name Qwen/Qwen1.5-110B --ratio 0.7 --eval_config configs/search/layerwise/mxint_4bit-alpaca-random-50-bs-1.toml --model_parallel --quantized_list ""
68
+ python src/full_eval.py --model_arch qwen2 --model_name Qwen/Qwen1.5-110B --ratio 0.8 --eval_config configs/search/layerwise/mxint_4bit-alpaca-random-50-bs-1.toml --model_parallel --quantized_list ""
69
+ python src/full_eval.py --model_arch qwen2 --model_name Qwen/Qwen1.5-110B --ratio 0.9 --eval_config configs/search/layerwise/mxint_4bit-alpaca-random-50-bs-1.toml --model_parallel --quantized_list ""
70
+ python src/full_eval.py --model_arch qwen2 --model_name Qwen/Qwen1.5-110B --ratio 0.95 --eval_config configs/search/layerwise/mxint_4bit-alpaca-random-50-bs-1.toml --model_parallel --quantized_list ""
71
+ python src/full_eval.py --model_arch qwen2 --model_name Qwen/Qwen1.5-110B --ratio 0.975 --eval_config configs/search/layerwise/mxint_4bit-alpaca-random-50-bs-1.toml --model_parallel --quantized_list ""
72
+
llm-q-scaling-law-master/eval_scripts/matmult/evalscript_generator.py ADDED
@@ -0,0 +1,142 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ This generates the runscripts for the LLM-Q scaling law simulations.
3
+
4
+ """
5
+
6
+ import os
7
+ import argparse
8
+
9
+ # the search command is
10
+ #python src/main.py --model_name {model_name} --search_config configs/search/mxint_4bit-tinyllama.toml --model_parallel --disable_wandb --save_dir ../ckpt/test
11
+ # the save dir is
12
+ # results/search/layerwise/{model_name}_{ratio}
13
+
14
+ granularity = ["matmult"]
15
+
16
+ # opt template for searching
17
+
18
+ opt_model_sizes=["opt-125m","opt-350m","opt-1.3b","opt-2.7b","opt-6.7b","opt-13b","opt-30b","opt-66b"]
19
+ opt_batch_size = [8,8,8,8,4,2,1,1]
20
+ opt_matmult_ratios = [0.0,0.5,0.9,0.95,0.975,0.99]
21
+ opt_matmult_search_config_template = "mxint_4bit-pajama-random-50-bs-{}.toml"
22
+ opt_matmult_search_dir_template = "eval/matmult/{}_{}"
23
+
24
+ opt_matmult_run_command_palette = "python src/full_eval.py --model_arch opt --model_name {model_name} --ratio {q_ratio} --eval_config configs/search/matmult/{search_config} --model_parallel --quantized_list \"{quantized_list}\""
25
+
26
+ with open("opt_matmult_eval_commands.txt", "w") as f:
27
+ for i in range(len(opt_model_sizes)):
28
+
29
+ eval_best_trail_file = "../../q_ratio_results/matmult/opt/opt-{}-best-trail.txt".format(opt_model_sizes[i].split("-")[1])
30
+ try:
31
+ with open(eval_best_trail_file, "r") as fi:
32
+ # read each line
33
+ lines = fi.readlines()
34
+ best_trail_q_lists_mapping = {}
35
+ for line in lines:
36
+ q_ratio, q_list = line.split(":")
37
+ q_list = q_list.strip()
38
+ #remove [] from the string
39
+ q_list = q_list[1:-1]
40
+ q_ratio = q_ratio.strip()
41
+ best_trail_q_lists_mapping[q_ratio] = q_list
42
+ except FileNotFoundError:
43
+ best_trail_q_lists_mapping = {}
44
+
45
+
46
+ model_size = opt_model_sizes[i]
47
+ model_batch_size = opt_batch_size[i]
48
+ for ratio in opt_matmult_ratios:
49
+ search_config = opt_matmult_search_config_template.format(model_batch_size)
50
+ save_dir = opt_matmult_search_dir_template.format(model_size, ratio)
51
+ q_list = best_trail_q_lists_mapping[str(ratio)] if str(ratio) in best_trail_q_lists_mapping else ""
52
+ model_name = f"facebook/{model_size}"
53
+ run_command = opt_matmult_run_command_palette.format(model_name=model_name, q_ratio=ratio, search_config=search_config, save_dir=save_dir, quantized_list=q_list)
54
+ # print(run_command)
55
+ f.write(run_command + "\n")
56
+ f.write("\n")
57
+
58
+
59
+ # qwen template for searching
60
+
61
+ qwen15_model_sizes=["Qwen1.5-0.5B","Qwen1.5-1.8B","Qwen1.5-4B","Qwen1.5-7B","Qwen1.5-14B","Qwen1.5-32B","Qwen1.5-72B","Qwen1.5-110B"]
62
+ qwen15_batch_size = [8,8,4,2,2,2,1,1]
63
+ qwen15_matmult_ratios = [0.0,0.5,0.9,0.95,0.975,0.99]
64
+ qwen15_matmult_search_config_template = "mxint_4bit-pajama-random-50-bs-{}.toml"
65
+ qwen15_matmult_search_dir_template = "eval/matmult/{}_{}"
66
+
67
+ qwen15_matmult_run_command_palette = "python src/full_eval.py --model_arch qwen2 --model_name {model_name} --ratio {q_ratio} --eval_config configs/search/matmult/{search_config} --model_parallel --quantized_list \"{quantized_list}\""
68
+
69
+ with open("qwen1.5_matmult_eval_commands.txt", "w") as f:
70
+ for i in range(len(qwen15_model_sizes)):
71
+
72
+ eval_best_trail_file = "../../q_ratio_results/matmult/qwen1.5/qwen-{}-best-trail.txt".format(qwen15_model_sizes[i].split("-")[1])
73
+ try:
74
+ with open(eval_best_trail_file, "r") as fi:
75
+ # print("Reading best trail file: ", eval_best_trail_file)
76
+ # read each line
77
+ lines = fi.readlines()
78
+ best_trail_q_lists_mapping = {}
79
+ for line in lines:
80
+ q_ratio, q_list = line.split(":")
81
+ q_list = q_list.strip()
82
+ q_list = q_list[1:-1]
83
+ q_ratio = q_ratio.strip()
84
+ best_trail_q_lists_mapping[q_ratio] = q_list
85
+ except FileNotFoundError:
86
+ best_trail_q_lists_mapping = {}
87
+
88
+ # print(best_trail_q_lists_mapping)
89
+
90
+ model_size = qwen15_model_sizes[i]
91
+ model_batch_size = qwen15_batch_size[i]
92
+ for ratio in qwen15_matmult_ratios:
93
+ search_config = qwen15_matmult_search_config_template.format(model_batch_size)
94
+ save_dir = qwen15_matmult_search_dir_template.format(model_size, ratio)
95
+ model_name = f"Qwen/{model_size}"
96
+ q_list = best_trail_q_lists_mapping[str(ratio)] if str(ratio) in best_trail_q_lists_mapping else ""
97
+ run_command = qwen15_matmult_run_command_palette.format(model_name=model_name, q_ratio=ratio, search_config=search_config, save_dir=save_dir, quantized_list=q_list)
98
+ # print(run_command)
99
+ f.write(run_command + "\n")
100
+ f.write("\n")
101
+
102
+ # llama template for searching
103
+
104
+ # qwen template for searching
105
+
106
+ llama_model_sizes=["llama-7b","llama-13b","llama-30b","llama-65b"]
107
+ llama_batch_size = [2,2,1,1]
108
+ llama_matmult_ratios = [0.0,0.5,0.9,0.95,0.975,0.99]
109
+ llama_matmult_search_config_template = "mxint_4bit-pajama-random-50-bs-{}.toml"
110
+ llama_matmult_search_dir_template = "eval/matmult/{}_{}"
111
+
112
+ llama_matmult_run_command_palette = "python src/full_eval.py --model_arch llama --model_name {model_name} --ratio {q_ratio} --eval_config configs/search/matmult/{search_config} --model_parallel --quantized_list \"{quantized_list}\""
113
+
114
+ with open("llama_matmult_eval_commands.txt", "w") as f:
115
+ for i in range(len(llama_model_sizes)):
116
+
117
+ eval_best_trail_file = "../../q_ratio_results/matmult/llama/llama-{}-best-trail.txt".format(qwen15_model_sizes[i].split("-")[1])
118
+ try:
119
+ with open(eval_best_trail_file, "r") as fi:
120
+ # read each line
121
+ lines = fi.readlines()
122
+ best_trail_q_lists_mapping = {}
123
+ for line in lines:
124
+ q_ratio, q_list = line.split(":")
125
+ q_list = q_list.strip()
126
+ q_list = q_list[1:-1]
127
+ q_ratio = q_ratio.strip()
128
+ best_trail_q_lists_mapping[q_ratio] = q_list
129
+ except FileNotFoundError:
130
+ best_trail_q_lists_mapping = {}
131
+
132
+ model_size = llama_model_sizes[i]
133
+ model_batch_size = llama_batch_size[i]
134
+ for ratio in llama_matmult_ratios:
135
+ search_config = llama_matmult_search_config_template.format(model_batch_size)
136
+ save_dir = llama_matmult_search_dir_template.format(model_size, ratio)
137
+ model_name = f"huggyllama/{model_size}"
138
+ q_list = best_trail_q_lists_mapping[str(ratio)] if str(ratio) in best_trail_q_lists_mapping else ""
139
+ run_command = llama_matmult_run_command_palette.format(model_name=model_name, q_ratio=ratio, search_config=search_config, save_dir=save_dir, quantized_list=q_list)
140
+ # print(run_command)
141
+ f.write(run_command + "\n")
142
+ f.write("\n")
llm-q-scaling-law-master/eval_scripts/matmult/llama_matmult_eval_commands.txt ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ python src/full_eval.py --model_arch llama --model_name huggyllama/llama-7b --ratio 0.0 --eval_config configs/search/matmult/mxint_4bit-pajama-random-50-bs-2.toml --model_parallel --quantized_list ""
2
+ python src/full_eval.py --model_arch llama --model_name huggyllama/llama-7b --ratio 0.5 --eval_config configs/search/matmult/mxint_4bit-pajama-random-50-bs-2.toml --model_parallel --quantized_list ""
3
+ python src/full_eval.py --model_arch llama --model_name huggyllama/llama-7b --ratio 0.9 --eval_config configs/search/matmult/mxint_4bit-pajama-random-50-bs-2.toml --model_parallel --quantized_list ""
4
+ python src/full_eval.py --model_arch llama --model_name huggyllama/llama-7b --ratio 0.95 --eval_config configs/search/matmult/mxint_4bit-pajama-random-50-bs-2.toml --model_parallel --quantized_list ""
5
+ python src/full_eval.py --model_arch llama --model_name huggyllama/llama-7b --ratio 0.975 --eval_config configs/search/matmult/mxint_4bit-pajama-random-50-bs-2.toml --model_parallel --quantized_list ""
6
+ python src/full_eval.py --model_arch llama --model_name huggyllama/llama-7b --ratio 0.99 --eval_config configs/search/matmult/mxint_4bit-pajama-random-50-bs-2.toml --model_parallel --quantized_list ""
7
+
8
+ python src/full_eval.py --model_arch llama --model_name huggyllama/llama-13b --ratio 0.0 --eval_config configs/search/matmult/mxint_4bit-pajama-random-50-bs-2.toml --model_parallel --quantized_list ""
9
+ python src/full_eval.py --model_arch llama --model_name huggyllama/llama-13b --ratio 0.5 --eval_config configs/search/matmult/mxint_4bit-pajama-random-50-bs-2.toml --model_parallel --quantized_list ""
10
+ python src/full_eval.py --model_arch llama --model_name huggyllama/llama-13b --ratio 0.9 --eval_config configs/search/matmult/mxint_4bit-pajama-random-50-bs-2.toml --model_parallel --quantized_list ""
11
+ python src/full_eval.py --model_arch llama --model_name huggyllama/llama-13b --ratio 0.95 --eval_config configs/search/matmult/mxint_4bit-pajama-random-50-bs-2.toml --model_parallel --quantized_list ""
12
+ python src/full_eval.py --model_arch llama --model_name huggyllama/llama-13b --ratio 0.975 --eval_config configs/search/matmult/mxint_4bit-pajama-random-50-bs-2.toml --model_parallel --quantized_list ""
13
+ python src/full_eval.py --model_arch llama --model_name huggyllama/llama-13b --ratio 0.99 --eval_config configs/search/matmult/mxint_4bit-pajama-random-50-bs-2.toml --model_parallel --quantized_list ""
14
+
15
+ python src/full_eval.py --model_arch llama --model_name huggyllama/llama-30b --ratio 0.0 --eval_config configs/search/matmult/mxint_4bit-pajama-random-50-bs-1.toml --model_parallel --quantized_list ""
16
+ python src/full_eval.py --model_arch llama --model_name huggyllama/llama-30b --ratio 0.5 --eval_config configs/search/matmult/mxint_4bit-pajama-random-50-bs-1.toml --model_parallel --quantized_list ""
17
+ python src/full_eval.py --model_arch llama --model_name huggyllama/llama-30b --ratio 0.9 --eval_config configs/search/matmult/mxint_4bit-pajama-random-50-bs-1.toml --model_parallel --quantized_list ""
18
+ python src/full_eval.py --model_arch llama --model_name huggyllama/llama-30b --ratio 0.95 --eval_config configs/search/matmult/mxint_4bit-pajama-random-50-bs-1.toml --model_parallel --quantized_list ""
19
+ python src/full_eval.py --model_arch llama --model_name huggyllama/llama-30b --ratio 0.975 --eval_config configs/search/matmult/mxint_4bit-pajama-random-50-bs-1.toml --model_parallel --quantized_list ""
20
+ python src/full_eval.py --model_arch llama --model_name huggyllama/llama-30b --ratio 0.99 --eval_config configs/search/matmult/mxint_4bit-pajama-random-50-bs-1.toml --model_parallel --quantized_list ""
21
+
22
+ python src/full_eval.py --model_arch llama --model_name huggyllama/llama-65b --ratio 0.0 --eval_config configs/search/matmult/mxint_4bit-pajama-random-50-bs-1.toml --model_parallel --quantized_list ""
23
+ python src/full_eval.py --model_arch llama --model_name huggyllama/llama-65b --ratio 0.5 --eval_config configs/search/matmult/mxint_4bit-pajama-random-50-bs-1.toml --model_parallel --quantized_list ""
24
+ python src/full_eval.py --model_arch llama --model_name huggyllama/llama-65b --ratio 0.9 --eval_config configs/search/matmult/mxint_4bit-pajama-random-50-bs-1.toml --model_parallel --quantized_list ""
25
+ python src/full_eval.py --model_arch llama --model_name huggyllama/llama-65b --ratio 0.95 --eval_config configs/search/matmult/mxint_4bit-pajama-random-50-bs-1.toml --model_parallel --quantized_list ""
26
+ python src/full_eval.py --model_arch llama --model_name huggyllama/llama-65b --ratio 0.975 --eval_config configs/search/matmult/mxint_4bit-pajama-random-50-bs-1.toml --model_parallel --quantized_list ""
27
+ python src/full_eval.py --model_arch llama --model_name huggyllama/llama-65b --ratio 0.99 --eval_config configs/search/matmult/mxint_4bit-pajama-random-50-bs-1.toml --model_parallel --quantized_list ""
28
+
llm-q-scaling-law-master/eval_scripts/matmult/opt_matmult_eval_commands.txt ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ python src/full_eval.py --model_arch opt --model_name facebook/opt-125m --ratio 0.0 --eval_config configs/search/matmult/mxint_4bit-pajama-random-50-bs-8.toml --model_parallel --quantized_list ""
2
+ python src/full_eval.py --model_arch opt --model_name facebook/opt-125m --ratio 0.5 --eval_config configs/search/matmult/mxint_4bit-pajama-random-50-bs-8.toml --model_parallel --quantized_list ""
3
+ python src/full_eval.py --model_arch opt --model_name facebook/opt-125m --ratio 0.9 --eval_config configs/search/matmult/mxint_4bit-pajama-random-50-bs-8.toml --model_parallel --quantized_list ""
4
+ python src/full_eval.py --model_arch opt --model_name facebook/opt-125m --ratio 0.95 --eval_config configs/search/matmult/mxint_4bit-pajama-random-50-bs-8.toml --model_parallel --quantized_list ""
5
+ python src/full_eval.py --model_arch opt --model_name facebook/opt-125m --ratio 0.975 --eval_config configs/search/matmult/mxint_4bit-pajama-random-50-bs-8.toml --model_parallel --quantized_list ""
6
+ python src/full_eval.py --model_arch opt --model_name facebook/opt-125m --ratio 0.99 --eval_config configs/search/matmult/mxint_4bit-pajama-random-50-bs-8.toml --model_parallel --quantized_list ""
7
+
8
+ python src/full_eval.py --model_arch opt --model_name facebook/opt-350m --ratio 0.0 --eval_config configs/search/matmult/mxint_4bit-pajama-random-50-bs-8.toml --model_parallel --quantized_list ""
9
+ python src/full_eval.py --model_arch opt --model_name facebook/opt-350m --ratio 0.5 --eval_config configs/search/matmult/mxint_4bit-pajama-random-50-bs-8.toml --model_parallel --quantized_list ""
10
+ python src/full_eval.py --model_arch opt --model_name facebook/opt-350m --ratio 0.9 --eval_config configs/search/matmult/mxint_4bit-pajama-random-50-bs-8.toml --model_parallel --quantized_list ""
11
+ python src/full_eval.py --model_arch opt --model_name facebook/opt-350m --ratio 0.95 --eval_config configs/search/matmult/mxint_4bit-pajama-random-50-bs-8.toml --model_parallel --quantized_list ""
12
+ python src/full_eval.py --model_arch opt --model_name facebook/opt-350m --ratio 0.975 --eval_config configs/search/matmult/mxint_4bit-pajama-random-50-bs-8.toml --model_parallel --quantized_list ""
13
+ python src/full_eval.py --model_arch opt --model_name facebook/opt-350m --ratio 0.99 --eval_config configs/search/matmult/mxint_4bit-pajama-random-50-bs-8.toml --model_parallel --quantized_list ""
14
+
15
+ python src/full_eval.py --model_arch opt --model_name facebook/opt-1.3b --ratio 0.0 --eval_config configs/search/matmult/mxint_4bit-pajama-random-50-bs-8.toml --model_parallel --quantized_list ""
16
+ python src/full_eval.py --model_arch opt --model_name facebook/opt-1.3b --ratio 0.5 --eval_config configs/search/matmult/mxint_4bit-pajama-random-50-bs-8.toml --model_parallel --quantized_list ""
17
+ python src/full_eval.py --model_arch opt --model_name facebook/opt-1.3b --ratio 0.9 --eval_config configs/search/matmult/mxint_4bit-pajama-random-50-bs-8.toml --model_parallel --quantized_list ""
18
+ python src/full_eval.py --model_arch opt --model_name facebook/opt-1.3b --ratio 0.95 --eval_config configs/search/matmult/mxint_4bit-pajama-random-50-bs-8.toml --model_parallel --quantized_list ""
19
+ python src/full_eval.py --model_arch opt --model_name facebook/opt-1.3b --ratio 0.975 --eval_config configs/search/matmult/mxint_4bit-pajama-random-50-bs-8.toml --model_parallel --quantized_list ""
20
+ python src/full_eval.py --model_arch opt --model_name facebook/opt-1.3b --ratio 0.99 --eval_config configs/search/matmult/mxint_4bit-pajama-random-50-bs-8.toml --model_parallel --quantized_list ""
21
+
22
+ python src/full_eval.py --model_arch opt --model_name facebook/opt-2.7b --ratio 0.0 --eval_config configs/search/matmult/mxint_4bit-pajama-random-50-bs-8.toml --model_parallel --quantized_list ""
23
+ python src/full_eval.py --model_arch opt --model_name facebook/opt-2.7b --ratio 0.5 --eval_config configs/search/matmult/mxint_4bit-pajama-random-50-bs-8.toml --model_parallel --quantized_list ""
24
+ python src/full_eval.py --model_arch opt --model_name facebook/opt-2.7b --ratio 0.9 --eval_config configs/search/matmult/mxint_4bit-pajama-random-50-bs-8.toml --model_parallel --quantized_list ""
25
+ python src/full_eval.py --model_arch opt --model_name facebook/opt-2.7b --ratio 0.95 --eval_config configs/search/matmult/mxint_4bit-pajama-random-50-bs-8.toml --model_parallel --quantized_list ""
26
+ python src/full_eval.py --model_arch opt --model_name facebook/opt-2.7b --ratio 0.975 --eval_config configs/search/matmult/mxint_4bit-pajama-random-50-bs-8.toml --model_parallel --quantized_list ""
27
+ python src/full_eval.py --model_arch opt --model_name facebook/opt-2.7b --ratio 0.99 --eval_config configs/search/matmult/mxint_4bit-pajama-random-50-bs-8.toml --model_parallel --quantized_list ""
28
+
29
+ python src/full_eval.py --model_arch opt --model_name facebook/opt-6.7b --ratio 0.0 --eval_config configs/search/matmult/mxint_4bit-pajama-random-50-bs-4.toml --model_parallel --quantized_list ""
30
+ python src/full_eval.py --model_arch opt --model_name facebook/opt-6.7b --ratio 0.5 --eval_config configs/search/matmult/mxint_4bit-pajama-random-50-bs-4.toml --model_parallel --quantized_list ""
31
+ python src/full_eval.py --model_arch opt --model_name facebook/opt-6.7b --ratio 0.9 --eval_config configs/search/matmult/mxint_4bit-pajama-random-50-bs-4.toml --model_parallel --quantized_list ""
32
+ python src/full_eval.py --model_arch opt --model_name facebook/opt-6.7b --ratio 0.95 --eval_config configs/search/matmult/mxint_4bit-pajama-random-50-bs-4.toml --model_parallel --quantized_list ""
33
+ python src/full_eval.py --model_arch opt --model_name facebook/opt-6.7b --ratio 0.975 --eval_config configs/search/matmult/mxint_4bit-pajama-random-50-bs-4.toml --model_parallel --quantized_list ""
34
+ python src/full_eval.py --model_arch opt --model_name facebook/opt-6.7b --ratio 0.99 --eval_config configs/search/matmult/mxint_4bit-pajama-random-50-bs-4.toml --model_parallel --quantized_list ""
35
+
36
+ python src/full_eval.py --model_arch opt --model_name facebook/opt-13b --ratio 0.0 --eval_config configs/search/matmult/mxint_4bit-pajama-random-50-bs-2.toml --model_parallel --quantized_list ""
37
+ python src/full_eval.py --model_arch opt --model_name facebook/opt-13b --ratio 0.5 --eval_config configs/search/matmult/mxint_4bit-pajama-random-50-bs-2.toml --model_parallel --quantized_list ""
38
+ python src/full_eval.py --model_arch opt --model_name facebook/opt-13b --ratio 0.9 --eval_config configs/search/matmult/mxint_4bit-pajama-random-50-bs-2.toml --model_parallel --quantized_list ""
39
+ python src/full_eval.py --model_arch opt --model_name facebook/opt-13b --ratio 0.95 --eval_config configs/search/matmult/mxint_4bit-pajama-random-50-bs-2.toml --model_parallel --quantized_list ""
40
+ python src/full_eval.py --model_arch opt --model_name facebook/opt-13b --ratio 0.975 --eval_config configs/search/matmult/mxint_4bit-pajama-random-50-bs-2.toml --model_parallel --quantized_list ""
41
+ python src/full_eval.py --model_arch opt --model_name facebook/opt-13b --ratio 0.99 --eval_config configs/search/matmult/mxint_4bit-pajama-random-50-bs-2.toml --model_parallel --quantized_list ""
42
+
43
+ python src/full_eval.py --model_arch opt --model_name facebook/opt-30b --ratio 0.0 --eval_config configs/search/matmult/mxint_4bit-pajama-random-50-bs-1.toml --model_parallel --quantized_list ""
44
+ python src/full_eval.py --model_arch opt --model_name facebook/opt-30b --ratio 0.5 --eval_config configs/search/matmult/mxint_4bit-pajama-random-50-bs-1.toml --model_parallel --quantized_list ""
45
+ python src/full_eval.py --model_arch opt --model_name facebook/opt-30b --ratio 0.9 --eval_config configs/search/matmult/mxint_4bit-pajama-random-50-bs-1.toml --model_parallel --quantized_list ""
46
+ python src/full_eval.py --model_arch opt --model_name facebook/opt-30b --ratio 0.95 --eval_config configs/search/matmult/mxint_4bit-pajama-random-50-bs-1.toml --model_parallel --quantized_list ""
47
+ python src/full_eval.py --model_arch opt --model_name facebook/opt-30b --ratio 0.975 --eval_config configs/search/matmult/mxint_4bit-pajama-random-50-bs-1.toml --model_parallel --quantized_list ""
48
+ python src/full_eval.py --model_arch opt --model_name facebook/opt-30b --ratio 0.99 --eval_config configs/search/matmult/mxint_4bit-pajama-random-50-bs-1.toml --model_parallel --quantized_list ""
49
+
50
+ python src/full_eval.py --model_arch opt --model_name facebook/opt-66b --ratio 0.0 --eval_config configs/search/matmult/mxint_4bit-pajama-random-50-bs-1.toml --model_parallel --quantized_list ""
51
+ python src/full_eval.py --model_arch opt --model_name facebook/opt-66b --ratio 0.5 --eval_config configs/search/matmult/mxint_4bit-pajama-random-50-bs-1.toml --model_parallel --quantized_list ""
52
+ python src/full_eval.py --model_arch opt --model_name facebook/opt-66b --ratio 0.9 --eval_config configs/search/matmult/mxint_4bit-pajama-random-50-bs-1.toml --model_parallel --quantized_list ""
53
+ python src/full_eval.py --model_arch opt --model_name facebook/opt-66b --ratio 0.95 --eval_config configs/search/matmult/mxint_4bit-pajama-random-50-bs-1.toml --model_parallel --quantized_list ""
54
+ python src/full_eval.py --model_arch opt --model_name facebook/opt-66b --ratio 0.975 --eval_config configs/search/matmult/mxint_4bit-pajama-random-50-bs-1.toml --model_parallel --quantized_list ""
55
+ python src/full_eval.py --model_arch opt --model_name facebook/opt-66b --ratio 0.99 --eval_config configs/search/matmult/mxint_4bit-pajama-random-50-bs-1.toml --model_parallel --quantized_list ""
56
+
llm-q-scaling-law-master/eval_scripts/matmult/qwen1.5_matmult_eval_commands.txt ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ python src/full_eval.py --model_arch qwen2 --model_name Qwen/Qwen1.5-0.5B --ratio 0.0 --eval_config configs/search/matmult/mxint_4bit-pajama-random-50-bs-8.toml --model_parallel --quantized_list ""
2
+ python src/full_eval.py --model_arch qwen2 --model_name Qwen/Qwen1.5-0.5B --ratio 0.5 --eval_config configs/search/matmult/mxint_4bit-pajama-random-50-bs-8.toml --model_parallel --quantized_list ""
3
+ python src/full_eval.py --model_arch qwen2 --model_name Qwen/Qwen1.5-0.5B --ratio 0.9 --eval_config configs/search/matmult/mxint_4bit-pajama-random-50-bs-8.toml --model_parallel --quantized_list ""
4
+ python src/full_eval.py --model_arch qwen2 --model_name Qwen/Qwen1.5-0.5B --ratio 0.95 --eval_config configs/search/matmult/mxint_4bit-pajama-random-50-bs-8.toml --model_parallel --quantized_list ""
5
+ python src/full_eval.py --model_arch qwen2 --model_name Qwen/Qwen1.5-0.5B --ratio 0.975 --eval_config configs/search/matmult/mxint_4bit-pajama-random-50-bs-8.toml --model_parallel --quantized_list ""
6
+ python src/full_eval.py --model_arch qwen2 --model_name Qwen/Qwen1.5-0.5B --ratio 0.99 --eval_config configs/search/matmult/mxint_4bit-pajama-random-50-bs-8.toml --model_parallel --quantized_list ""
7
+
8
+ python src/full_eval.py --model_arch qwen2 --model_name Qwen/Qwen1.5-1.8B --ratio 0.0 --eval_config configs/search/matmult/mxint_4bit-pajama-random-50-bs-8.toml --model_parallel --quantized_list ""
9
+ python src/full_eval.py --model_arch qwen2 --model_name Qwen/Qwen1.5-1.8B --ratio 0.5 --eval_config configs/search/matmult/mxint_4bit-pajama-random-50-bs-8.toml --model_parallel --quantized_list ""
10
+ python src/full_eval.py --model_arch qwen2 --model_name Qwen/Qwen1.5-1.8B --ratio 0.9 --eval_config configs/search/matmult/mxint_4bit-pajama-random-50-bs-8.toml --model_parallel --quantized_list ""
11
+ python src/full_eval.py --model_arch qwen2 --model_name Qwen/Qwen1.5-1.8B --ratio 0.95 --eval_config configs/search/matmult/mxint_4bit-pajama-random-50-bs-8.toml --model_parallel --quantized_list ""
12
+ python src/full_eval.py --model_arch qwen2 --model_name Qwen/Qwen1.5-1.8B --ratio 0.975 --eval_config configs/search/matmult/mxint_4bit-pajama-random-50-bs-8.toml --model_parallel --quantized_list ""
13
+ python src/full_eval.py --model_arch qwen2 --model_name Qwen/Qwen1.5-1.8B --ratio 0.99 --eval_config configs/search/matmult/mxint_4bit-pajama-random-50-bs-8.toml --model_parallel --quantized_list ""
14
+
15
+ python src/full_eval.py --model_arch qwen2 --model_name Qwen/Qwen1.5-4B --ratio 0.0 --eval_config configs/search/matmult/mxint_4bit-pajama-random-50-bs-4.toml --model_parallel --quantized_list ""
16
+ python src/full_eval.py --model_arch qwen2 --model_name Qwen/Qwen1.5-4B --ratio 0.5 --eval_config configs/search/matmult/mxint_4bit-pajama-random-50-bs-4.toml --model_parallel --quantized_list ""
17
+ python src/full_eval.py --model_arch qwen2 --model_name Qwen/Qwen1.5-4B --ratio 0.9 --eval_config configs/search/matmult/mxint_4bit-pajama-random-50-bs-4.toml --model_parallel --quantized_list ""
18
+ python src/full_eval.py --model_arch qwen2 --model_name Qwen/Qwen1.5-4B --ratio 0.95 --eval_config configs/search/matmult/mxint_4bit-pajama-random-50-bs-4.toml --model_parallel --quantized_list ""
19
+ python src/full_eval.py --model_arch qwen2 --model_name Qwen/Qwen1.5-4B --ratio 0.975 --eval_config configs/search/matmult/mxint_4bit-pajama-random-50-bs-4.toml --model_parallel --quantized_list ""
20
+ python src/full_eval.py --model_arch qwen2 --model_name Qwen/Qwen1.5-4B --ratio 0.99 --eval_config configs/search/matmult/mxint_4bit-pajama-random-50-bs-4.toml --model_parallel --quantized_list ""
21
+
22
+ python src/full_eval.py --model_arch qwen2 --model_name Qwen/Qwen1.5-7B --ratio 0.0 --eval_config configs/search/matmult/mxint_4bit-pajama-random-50-bs-2.toml --model_parallel --quantized_list ""
23
+ python src/full_eval.py --model_arch qwen2 --model_name Qwen/Qwen1.5-7B --ratio 0.5 --eval_config configs/search/matmult/mxint_4bit-pajama-random-50-bs-2.toml --model_parallel --quantized_list ""
24
+ python src/full_eval.py --model_arch qwen2 --model_name Qwen/Qwen1.5-7B --ratio 0.9 --eval_config configs/search/matmult/mxint_4bit-pajama-random-50-bs-2.toml --model_parallel --quantized_list ""
25
+ python src/full_eval.py --model_arch qwen2 --model_name Qwen/Qwen1.5-7B --ratio 0.95 --eval_config configs/search/matmult/mxint_4bit-pajama-random-50-bs-2.toml --model_parallel --quantized_list ""
26
+ python src/full_eval.py --model_arch qwen2 --model_name Qwen/Qwen1.5-7B --ratio 0.975 --eval_config configs/search/matmult/mxint_4bit-pajama-random-50-bs-2.toml --model_parallel --quantized_list ""
27
+ python src/full_eval.py --model_arch qwen2 --model_name Qwen/Qwen1.5-7B --ratio 0.99 --eval_config configs/search/matmult/mxint_4bit-pajama-random-50-bs-2.toml --model_parallel --quantized_list ""
28
+
29
+ python src/full_eval.py --model_arch qwen2 --model_name Qwen/Qwen1.5-14B --ratio 0.0 --eval_config configs/search/matmult/mxint_4bit-pajama-random-50-bs-2.toml --model_parallel --quantized_list ""
30
+ python src/full_eval.py --model_arch qwen2 --model_name Qwen/Qwen1.5-14B --ratio 0.5 --eval_config configs/search/matmult/mxint_4bit-pajama-random-50-bs-2.toml --model_parallel --quantized_list ""
31
+ python src/full_eval.py --model_arch qwen2 --model_name Qwen/Qwen1.5-14B --ratio 0.9 --eval_config configs/search/matmult/mxint_4bit-pajama-random-50-bs-2.toml --model_parallel --quantized_list ""
32
+ python src/full_eval.py --model_arch qwen2 --model_name Qwen/Qwen1.5-14B --ratio 0.95 --eval_config configs/search/matmult/mxint_4bit-pajama-random-50-bs-2.toml --model_parallel --quantized_list ""
33
+ python src/full_eval.py --model_arch qwen2 --model_name Qwen/Qwen1.5-14B --ratio 0.975 --eval_config configs/search/matmult/mxint_4bit-pajama-random-50-bs-2.toml --model_parallel --quantized_list ""
34
+ python src/full_eval.py --model_arch qwen2 --model_name Qwen/Qwen1.5-14B --ratio 0.99 --eval_config configs/search/matmult/mxint_4bit-pajama-random-50-bs-2.toml --model_parallel --quantized_list ""
35
+
36
+ python src/full_eval.py --model_arch qwen2 --model_name Qwen/Qwen1.5-32B --ratio 0.0 --eval_config configs/search/matmult/mxint_4bit-pajama-random-50-bs-2.toml --model_parallel --quantized_list "0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0"
37
+ python src/full_eval.py --model_arch qwen2 --model_name Qwen/Qwen1.5-32B --ratio 0.5 --eval_config configs/search/matmult/mxint_4bit-pajama-random-50-bs-2.toml --model_parallel --quantized_list "1, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0, 1, 0, 1, 1, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 0, 0, 1, 1, 1, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, 0, 1, 1, 0, 0, 1, 0, 1, 0, 0, 0, 1, 1, 1, 0, 1, 0, 0, 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 0, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 1, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 0, 0, 0, 1, 0, 1, 1, 0, 0, 1, 0, 0, 0, 1, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 0, 0, 1, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 1, 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 1, 0, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 0, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 1, 0, 0, 1, 1, 0, 1, 1, 0, 0, 0, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 1, 1, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 0, 1, 1, 1, 0, 0, 0, 1, 0, 1, 1, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 1, 0, 0, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 1, 0, 1, 0, 1, 0, 1, 1, 1, 0, 1, 0, 1, 1, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 0, 1, 1, 1, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 1, 1, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0, 1, 1, 0, 0, 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 1, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 0, 0, 0, 1, 1, 1, 1, 1, 0, 1, 1, 0, 0, 0, 1, 1, 0"
38
+ python src/full_eval.py --model_arch qwen2 --model_name Qwen/Qwen1.5-32B --ratio 0.9 --eval_config configs/search/matmult/mxint_4bit-pajama-random-50-bs-2.toml --model_parallel --quantized_list "1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 0, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0, 1, 1, 0, 1, 1, 0, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0, 0, 1, 1, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 0"
39
+ python src/full_eval.py --model_arch qwen2 --model_name Qwen/Qwen1.5-32B --ratio 0.95 --eval_config configs/search/matmult/mxint_4bit-pajama-random-50-bs-2.toml --model_parallel --quantized_list "1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 0, 0, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 1, 1, 0, 0, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1"
40
+ python src/full_eval.py --model_arch qwen2 --model_name Qwen/Qwen1.5-32B --ratio 0.975 --eval_config configs/search/matmult/mxint_4bit-pajama-random-50-bs-2.toml --model_parallel --quantized_list "1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1"
41
+ python src/full_eval.py --model_arch qwen2 --model_name Qwen/Qwen1.5-32B --ratio 0.99 --eval_config configs/search/matmult/mxint_4bit-pajama-random-50-bs-2.toml --model_parallel --quantized_list "1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1"
42
+
43
+ python src/full_eval.py --model_arch qwen2 --model_name Qwen/Qwen1.5-72B --ratio 0.0 --eval_config configs/search/matmult/mxint_4bit-pajama-random-50-bs-1.toml --model_parallel --quantized_list ""
44
+ python src/full_eval.py --model_arch qwen2 --model_name Qwen/Qwen1.5-72B --ratio 0.5 --eval_config configs/search/matmult/mxint_4bit-pajama-random-50-bs-1.toml --model_parallel --quantized_list ""
45
+ python src/full_eval.py --model_arch qwen2 --model_name Qwen/Qwen1.5-72B --ratio 0.9 --eval_config configs/search/matmult/mxint_4bit-pajama-random-50-bs-1.toml --model_parallel --quantized_list ""
46
+ python src/full_eval.py --model_arch qwen2 --model_name Qwen/Qwen1.5-72B --ratio 0.95 --eval_config configs/search/matmult/mxint_4bit-pajama-random-50-bs-1.toml --model_parallel --quantized_list ""
47
+ python src/full_eval.py --model_arch qwen2 --model_name Qwen/Qwen1.5-72B --ratio 0.975 --eval_config configs/search/matmult/mxint_4bit-pajama-random-50-bs-1.toml --model_parallel --quantized_list ""
48
+ python src/full_eval.py --model_arch qwen2 --model_name Qwen/Qwen1.5-72B --ratio 0.99 --eval_config configs/search/matmult/mxint_4bit-pajama-random-50-bs-1.toml --model_parallel --quantized_list ""
49
+
50
+ python src/full_eval.py --model_arch qwen2 --model_name Qwen/Qwen1.5-110B --ratio 0.0 --eval_config configs/search/matmult/mxint_4bit-pajama-random-50-bs-1.toml --model_parallel --quantized_list ""
51
+ python src/full_eval.py --model_arch qwen2 --model_name Qwen/Qwen1.5-110B --ratio 0.5 --eval_config configs/search/matmult/mxint_4bit-pajama-random-50-bs-1.toml --model_parallel --quantized_list ""
52
+ python src/full_eval.py --model_arch qwen2 --model_name Qwen/Qwen1.5-110B --ratio 0.9 --eval_config configs/search/matmult/mxint_4bit-pajama-random-50-bs-1.toml --model_parallel --quantized_list ""
53
+ python src/full_eval.py --model_arch qwen2 --model_name Qwen/Qwen1.5-110B --ratio 0.95 --eval_config configs/search/matmult/mxint_4bit-pajama-random-50-bs-1.toml --model_parallel --quantized_list ""
54
+ python src/full_eval.py --model_arch qwen2 --model_name Qwen/Qwen1.5-110B --ratio 0.975 --eval_config configs/search/matmult/mxint_4bit-pajama-random-50-bs-1.toml --model_parallel --quantized_list ""
55
+ python src/full_eval.py --model_arch qwen2 --model_name Qwen/Qwen1.5-110B --ratio 0.99 --eval_config configs/search/matmult/mxint_4bit-pajama-random-50-bs-1.toml --model_parallel --quantized_list ""
56
+
llm-q-scaling-law-master/q_ratio_results/layerwise/qwen1.5/qwen-0.5B-best-trail.txt ADDED
File without changes
llm-q-scaling-law-master/q_ratio_results/layerwise/qwen1.5/qwen-1.8B-best-trail.txt ADDED
File without changes
llm-q-scaling-law-master/q_ratio_results/layerwise/qwen1.5/qwen-14B-best-trail.txt ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ 0.5 : [1, 1, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0]
2
+ 0.6 : [1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1, 1, 0, 0, 1, 1, 1, 0, 1, 0, 0, 1, 0, 1, 1, 1, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 1, 1, 0]
3
+ 0.7 : [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0, 1, 0, 0]
4
+ 0.8 : [1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 0, 0, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 1, 1, 0, 1, 0, 1, 1]
5
+ 0.9 : [1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 0, 0, 1, 0, 0, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1]
6
+ 0.95 : [1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1]
7
+ 0.975 : [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1]
llm-q-scaling-law-master/q_ratio_results/layerwise/qwen1.5/qwen-32B-best-trail.txt ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ 0.0 : [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
2
+ 0.5 : [1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0]
3
+ 0.6 : [0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 0, 1, 1, 1, 0, 1, 0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 1, 0, 0, 1, 0, 0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 0, 1, 0, 1, 0, 1, 1, 0, 1, 0, 0, 0, 1, 0]
4
+ 0.7 : [1, 1, 0, 0, 1, 0, 1, 0, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 1, 0, 1, 1, 1, 0, 0, 1, 1, 0, 0, 0, 1, 1, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 0, 0, 0, 1, 0]
5
+ 0.8 : [1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 0, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 0, 1, 0]
6
+ 0.9 : [0, 1, 1, 0, 1, 0, 1, 1, 0, 1, 0, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 0, 0, 0, 1, 1, 1]
7
+ 0.95 : [0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 1, 1, 1, 0, 1, 0, 0, 0, 0, 1, 0]
8
+ 0.975 :
llm-q-scaling-law-master/q_ratio_results/layerwise/qwen1.5/qwen-4B-best-trail.txt ADDED
File without changes
llm-q-scaling-law-master/q_ratio_results/layerwise/qwen1.5/qwen-72B-best-trail.txt ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ 0.0 : [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
2
+ 0.5 : [0, 0, 1, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 1, 0, 1, 0, 1, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 0, 1, 1, 1, 0, 0]
3
+ 0.6 : [1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 0, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 0, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 0, 1, 0, 1, 1, 0, 0, 0, 0, 1, 0, 1]
4
+ 0.7 : [0, 0, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 1, 1, 0, 1, 1, 0, 1, 0, 1, 0, 0, 0, 1, 0, 1, 1, 0, 0, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0]
5
+ 0.8 : [1, 0, 1, 0, 0, 0, 1, 1, 1, 0, 1, 1, 0, 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 1, 0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1]
6
+ 0.9 : [0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1]
7
+ 0.95 : [0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
8
+ 0.975 :
llm-q-scaling-law-master/q_ratio_results/layerwise/qwen1.5/qwen-7B-best-trail.txt ADDED
File without changes
llm-q-scaling-law-master/q_ratio_results/matmult/qwen1.5/qwen-32B-best-trail.txt ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ 0.0 : [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
2
+ 0.5 : [1, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0, 1, 0, 1, 1, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 0, 0, 1, 1, 1, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, 0, 1, 1, 0, 0, 1, 0, 1, 0, 0, 0, 1, 1, 1, 0, 1, 0, 0, 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 0, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 1, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 0, 0, 0, 1, 0, 1, 1, 0, 0, 1, 0, 0, 0, 1, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 0, 0, 1, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 1, 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 1, 0, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 0, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 1, 0, 0, 1, 1, 0, 1, 1, 0, 0, 0, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 1, 1, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 0, 1, 1, 1, 0, 0, 0, 1, 0, 1, 1, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 1, 0, 0, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 1, 0, 1, 0, 1, 0, 1, 1, 1, 0, 1, 0, 1, 1, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 0, 1, 1, 1, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 1, 1, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0, 1, 1, 0, 0, 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 1, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 0, 0, 0, 1, 1, 1, 1, 1, 0, 1, 1, 0, 0, 0, 1, 1, 0]
3
+ 0.9 : [1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 0, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0, 1, 1, 0, 1, 1, 0, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0, 0, 1, 1, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 0]
4
+ 0.95 : [1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 0, 0, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 1, 1, 0, 0, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1]
5
+ 0.975 : [1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
6
+ 0.99 : [1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
llm-q-scaling-law-master/requirements.txt ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ torch==2.3.1
2
+ transformers==4.42.3
3
+ datasets==2.20.0
4
+ nvitop
5
+ accelerate
6
+ joblib
7
+ optuna
8
+ wandb
9
+ toml
10
+
11
+ evaluate>=0.4.0
12
+ jsonlines
13
+ numexpr
14
+ peft>=0.2.0
15
+ pybind11>=2.6.2
16
+ pytablewriter
17
+ rouge-score>=0.0.4
18
+ sacrebleu>=1.5.0
19
+ scikit-learn>=0.24.1
20
+ sqlitedict
21
+ tqdm-multiprocess
22
+ zstandard
23
+ dill
24
+ word2number
25
+ more_itertools
26
+ sentencepiece
llm-q-scaling-law-master/run_scripts/env_command.sh ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ export CUDA_VISIBLE_DEVICES=0
2
+ conda activate llm-mixed-q
3
+
4
+ export CUDA_VISIBLE_DEVICES=1
5
+ conda activate llm-mixed-q
6
+
7
+ export CUDA_VISIBLE_DEVICES=2
8
+ conda activate llm-mixed-q
9
+
10
+ export CUDA_VISIBLE_DEVICES=3
11
+ conda activate llm-mixed-q
12
+
13
+ export CUDA_VISIBLE_DEVICES=4
14
+ conda activate llm-mixed-q
15
+
16
+ export CUDA_VISIBLE_DEVICES=5
17
+ conda activate llm-mixed-q
18
+
19
+ export CUDA_VISIBLE_DEVICES=6
20
+ conda activate llm-mixed-q
21
+
22
+ export CUDA_VISIBLE_DEVICES=7
23
+ conda activate llm-mixed-q
llm-q-scaling-law-master/run_scripts/layerwise/llama2_layerwise_run_commands.txt ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ python src/main.py --model_arch llama2 --model_name meta-llama/Llama-2-7b-chat-hf --q_ratio 0.5 --search_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-2.toml --model_parallel --wandb_name Llama-2-7b_0.5 --wandb_group llama-layer-search --save_dir results/search/layerwise/Llama-2-7b_0.5
2
+ python src/main.py --model_arch llama2 --model_name meta-llama/Llama-2-7b-chat-hf --q_ratio 0.6 --search_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-2.toml --model_parallel --wandb_name Llama-2-7b_0.6 --wandb_group llama-layer-search --save_dir results/search/layerwise/Llama-2-7b_0.6
3
+ python src/main.py --model_arch llama2 --model_name meta-llama/Llama-2-7b-chat-hf --q_ratio 0.7 --search_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-2.toml --model_parallel --wandb_name Llama-2-7b_0.7 --wandb_group llama-layer-search --save_dir results/search/layerwise/Llama-2-7b_0.7
4
+ python src/main.py --model_arch llama2 --model_name meta-llama/Llama-2-7b-chat-hf --q_ratio 0.8 --search_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-2.toml --model_parallel --wandb_name Llama-2-7b_0.8 --wandb_group llama-layer-search --save_dir results/search/layerwise/Llama-2-7b_0.8
5
+ python src/main.py --model_arch llama2 --model_name meta-llama/Llama-2-7b-chat-hf --q_ratio 0.9 --search_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-2.toml --model_parallel --wandb_name Llama-2-7b_0.9 --wandb_group llama-layer-search --save_dir results/search/layerwise/Llama-2-7b_0.9
6
+ python src/main.py --model_arch llama2 --model_name meta-llama/Llama-2-7b-chat-hf --q_ratio 0.95 --search_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-2.toml --model_parallel --wandb_name Llama-2-7b_0.95 --wandb_group llama-layer-search --save_dir results/search/layerwise/Llama-2-7b_0.95
7
+ python src/main.py --model_arch llama2 --model_name meta-llama/Llama-2-7b-chat-hf --q_ratio 0.975 --search_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-2.toml --model_parallel --wandb_name Llama-2-7b_0.975 --wandb_group llama-layer-search --save_dir results/search/layerwise/Llama-2-7b_0.975
8
+
9
+ python src/main.py --model_arch llama2 --model_name meta-llama/Llama-2-13b-chat-hf --q_ratio 0.5 --search_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-2.toml --model_parallel --wandb_name Llama-2-13b_0.5 --wandb_group llama-layer-search --save_dir results/search/layerwise/Llama-2-13b_0.5
10
+ python src/main.py --model_arch llama2 --model_name meta-llama/Llama-2-13b-chat-hf --q_ratio 0.6 --search_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-2.toml --model_parallel --wandb_name Llama-2-13b_0.6 --wandb_group llama-layer-search --save_dir results/search/layerwise/Llama-2-13b_0.6
11
+ python src/main.py --model_arch llama2 --model_name meta-llama/Llama-2-13b-chat-hf --q_ratio 0.7 --search_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-2.toml --model_parallel --wandb_name Llama-2-13b_0.7 --wandb_group llama-layer-search --save_dir results/search/layerwise/Llama-2-13b_0.7
12
+ python src/main.py --model_arch llama2 --model_name meta-llama/Llama-2-13b-chat-hf --q_ratio 0.8 --search_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-2.toml --model_parallel --wandb_name Llama-2-13b_0.8 --wandb_group llama-layer-search --save_dir results/search/layerwise/Llama-2-13b_0.8
13
+ python src/main.py --model_arch llama2 --model_name meta-llama/Llama-2-13b-chat-hf --q_ratio 0.9 --search_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-2.toml --model_parallel --wandb_name Llama-2-13b_0.9 --wandb_group llama-layer-search --save_dir results/search/layerwise/Llama-2-13b_0.9
14
+ python src/main.py --model_arch llama2 --model_name meta-llama/Llama-2-13b-chat-hf --q_ratio 0.95 --search_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-2.toml --model_parallel --wandb_name Llama-2-13b_0.95 --wandb_group llama-layer-search --save_dir results/search/layerwise/Llama-2-13b_0.95
15
+ python src/main.py --model_arch llama2 --model_name meta-llama/Llama-2-13b-chat-hf --q_ratio 0.975 --search_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-2.toml --model_parallel --wandb_name Llama-2-13b_0.975 --wandb_group llama-layer-search --save_dir results/search/layerwise/Llama-2-13b_0.975
16
+
17
+ python src/main.py --model_arch llama2 --model_name meta-llama/Llama-2-70b-chat-hf --q_ratio 0.5 --search_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-1.toml --model_parallel --wandb_name Llama-2-70b_0.5 --wandb_group llama-layer-search --save_dir results/search/layerwise/Llama-2-70b_0.5
18
+ python src/main.py --model_arch llama2 --model_name meta-llama/Llama-2-70b-chat-hf --q_ratio 0.6 --search_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-1.toml --model_parallel --wandb_name Llama-2-70b_0.6 --wandb_group llama-layer-search --save_dir results/search/layerwise/Llama-2-70b_0.6
19
+ python src/main.py --model_arch llama2 --model_name meta-llama/Llama-2-70b-chat-hf --q_ratio 0.7 --search_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-1.toml --model_parallel --wandb_name Llama-2-70b_0.7 --wandb_group llama-layer-search --save_dir results/search/layerwise/Llama-2-70b_0.7
20
+ python src/main.py --model_arch llama2 --model_name meta-llama/Llama-2-70b-chat-hf --q_ratio 0.8 --search_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-1.toml --model_parallel --wandb_name Llama-2-70b_0.8 --wandb_group llama-layer-search --save_dir results/search/layerwise/Llama-2-70b_0.8
21
+ python src/main.py --model_arch llama2 --model_name meta-llama/Llama-2-70b-chat-hf --q_ratio 0.9 --search_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-1.toml --model_parallel --wandb_name Llama-2-70b_0.9 --wandb_group llama-layer-search --save_dir results/search/layerwise/Llama-2-70b_0.9
22
+ python src/main.py --model_arch llama2 --model_name meta-llama/Llama-2-70b-chat-hf --q_ratio 0.95 --search_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-1.toml --model_parallel --wandb_name Llama-2-70b_0.95 --wandb_group llama-layer-search --save_dir results/search/layerwise/Llama-2-70b_0.95
23
+ python src/main.py --model_arch llama2 --model_name meta-llama/Llama-2-70b-chat-hf --q_ratio 0.975 --search_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-1.toml --model_parallel --wandb_name Llama-2-70b_0.975 --wandb_group llama-layer-search --save_dir results/search/layerwise/Llama-2-70b_0.975
24
+
llm-q-scaling-law-master/run_scripts/layerwise/llama_layerwise_run_commands.txt ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ python src/main.py --model_arch llama --model_name huggyllama/llama-7b --q_ratio 0.5 --search_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-2.toml --model_parallel --wandb_name llama-7b_0.5 --wandb_group llama-layer-search --save_dir results/search/layerwise/llama-7b_0.5
2
+ python src/main.py --model_arch llama --model_name huggyllama/llama-7b --q_ratio 0.6 --search_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-2.toml --model_parallel --wandb_name llama-7b_0.6 --wandb_group llama-layer-search --save_dir results/search/layerwise/llama-7b_0.6
3
+ python src/main.py --model_arch llama --model_name huggyllama/llama-7b --q_ratio 0.7 --search_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-2.toml --model_parallel --wandb_name llama-7b_0.7 --wandb_group llama-layer-search --save_dir results/search/layerwise/llama-7b_0.7
4
+ python src/main.py --model_arch llama --model_name huggyllama/llama-7b --q_ratio 0.8 --search_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-2.toml --model_parallel --wandb_name llama-7b_0.8 --wandb_group llama-layer-search --save_dir results/search/layerwise/llama-7b_0.8
5
+ python src/main.py --model_arch llama --model_name huggyllama/llama-7b --q_ratio 0.9 --search_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-2.toml --model_parallel --wandb_name llama-7b_0.9 --wandb_group llama-layer-search --save_dir results/search/layerwise/llama-7b_0.9
6
+ python src/main.py --model_arch llama --model_name huggyllama/llama-7b --q_ratio 0.95 --search_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-2.toml --model_parallel --wandb_name llama-7b_0.95 --wandb_group llama-layer-search --save_dir results/search/layerwise/llama-7b_0.95
7
+ python src/main.py --model_arch llama --model_name huggyllama/llama-7b --q_ratio 0.975 --search_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-2.toml --model_parallel --wandb_name llama-7b_0.975 --wandb_group llama-layer-search --save_dir results/search/layerwise/llama-7b_0.975
8
+
9
+ python src/main.py --model_arch llama --model_name huggyllama/llama-13b --q_ratio 0.5 --search_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-2.toml --model_parallel --wandb_name llama-13b_0.5 --wandb_group llama-layer-search --save_dir results/search/layerwise/llama-13b_0.5
10
+ python src/main.py --model_arch llama --model_name huggyllama/llama-13b --q_ratio 0.6 --search_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-2.toml --model_parallel --wandb_name llama-13b_0.6 --wandb_group llama-layer-search --save_dir results/search/layerwise/llama-13b_0.6
11
+ python src/main.py --model_arch llama --model_name huggyllama/llama-13b --q_ratio 0.7 --search_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-2.toml --model_parallel --wandb_name llama-13b_0.7 --wandb_group llama-layer-search --save_dir results/search/layerwise/llama-13b_0.7
12
+ python src/main.py --model_arch llama --model_name huggyllama/llama-13b --q_ratio 0.8 --search_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-2.toml --model_parallel --wandb_name llama-13b_0.8 --wandb_group llama-layer-search --save_dir results/search/layerwise/llama-13b_0.8
13
+ python src/main.py --model_arch llama --model_name huggyllama/llama-13b --q_ratio 0.9 --search_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-2.toml --model_parallel --wandb_name llama-13b_0.9 --wandb_group llama-layer-search --save_dir results/search/layerwise/llama-13b_0.9
14
+ python src/main.py --model_arch llama --model_name huggyllama/llama-13b --q_ratio 0.95 --search_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-2.toml --model_parallel --wandb_name llama-13b_0.95 --wandb_group llama-layer-search --save_dir results/search/layerwise/llama-13b_0.95
15
+ python src/main.py --model_arch llama --model_name huggyllama/llama-13b --q_ratio 0.975 --search_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-2.toml --model_parallel --wandb_name llama-13b_0.975 --wandb_group llama-layer-search --save_dir results/search/layerwise/llama-13b_0.975
16
+
17
+ python src/main.py --model_arch llama --model_name huggyllama/llama-30b --q_ratio 0.5 --search_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-1.toml --model_parallel --wandb_name llama-30b_0.5 --wandb_group llama-layer-search --save_dir results/search/layerwise/llama-30b_0.5
18
+ python src/main.py --model_arch llama --model_name huggyllama/llama-30b --q_ratio 0.6 --search_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-1.toml --model_parallel --wandb_name llama-30b_0.6 --wandb_group llama-layer-search --save_dir results/search/layerwise/llama-30b_0.6
19
+ python src/main.py --model_arch llama --model_name huggyllama/llama-30b --q_ratio 0.7 --search_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-1.toml --model_parallel --wandb_name llama-30b_0.7 --wandb_group llama-layer-search --save_dir results/search/layerwise/llama-30b_0.7
20
+ python src/main.py --model_arch llama --model_name huggyllama/llama-30b --q_ratio 0.8 --search_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-1.toml --model_parallel --wandb_name llama-30b_0.8 --wandb_group llama-layer-search --save_dir results/search/layerwise/llama-30b_0.8
21
+ python src/main.py --model_arch llama --model_name huggyllama/llama-30b --q_ratio 0.9 --search_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-1.toml --model_parallel --wandb_name llama-30b_0.9 --wandb_group llama-layer-search --save_dir results/search/layerwise/llama-30b_0.9
22
+ python src/main.py --model_arch llama --model_name huggyllama/llama-30b --q_ratio 0.95 --search_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-1.toml --model_parallel --wandb_name llama-30b_0.95 --wandb_group llama-layer-search --save_dir results/search/layerwise/llama-30b_0.95
23
+ python src/main.py --model_arch llama --model_name huggyllama/llama-30b --q_ratio 0.975 --search_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-1.toml --model_parallel --wandb_name llama-30b_0.975 --wandb_group llama-layer-search --save_dir results/search/layerwise/llama-30b_0.975
24
+
25
+ python src/main.py --model_arch llama --model_name huggyllama/llama-65b --q_ratio 0.5 --search_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-1.toml --model_parallel --wandb_name llama-65b_0.5 --wandb_group llama-layer-search --save_dir results/search/layerwise/llama-65b_0.5
26
+ python src/main.py --model_arch llama --model_name huggyllama/llama-65b --q_ratio 0.6 --search_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-1.toml --model_parallel --wandb_name llama-65b_0.6 --wandb_group llama-layer-search --save_dir results/search/layerwise/llama-65b_0.6
27
+ python src/main.py --model_arch llama --model_name huggyllama/llama-65b --q_ratio 0.7 --search_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-1.toml --model_parallel --wandb_name llama-65b_0.7 --wandb_group llama-layer-search --save_dir results/search/layerwise/llama-65b_0.7
28
+ python src/main.py --model_arch llama --model_name huggyllama/llama-65b --q_ratio 0.8 --search_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-1.toml --model_parallel --wandb_name llama-65b_0.8 --wandb_group llama-layer-search --save_dir results/search/layerwise/llama-65b_0.8
29
+ python src/main.py --model_arch llama --model_name huggyllama/llama-65b --q_ratio 0.9 --search_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-1.toml --model_parallel --wandb_name llama-65b_0.9 --wandb_group llama-layer-search --save_dir results/search/layerwise/llama-65b_0.9
30
+ python src/main.py --model_arch llama --model_name huggyllama/llama-65b --q_ratio 0.95 --search_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-1.toml --model_parallel --wandb_name llama-65b_0.95 --wandb_group llama-layer-search --save_dir results/search/layerwise/llama-65b_0.95
31
+ python src/main.py --model_arch llama --model_name huggyllama/llama-65b --q_ratio 0.975 --search_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-1.toml --model_parallel --wandb_name llama-65b_0.975 --wandb_group llama-layer-search --save_dir results/search/layerwise/llama-65b_0.975
32
+
llm-q-scaling-law-master/run_scripts/layerwise/opt_layerwise_run_commands.txt ADDED
@@ -0,0 +1,64 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ python src/main.py --model_arch opt --model_name facebook/opt-125m --q_ratio 0.5 --search_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-8.toml --model_parallel --wandb_name opt-125m_0.5 --wandb_group opt-layer-search --save_dir results/search/layerwise/opt-125m_0.5
2
+ python src/main.py --model_arch opt --model_name facebook/opt-125m --q_ratio 0.6 --search_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-8.toml --model_parallel --wandb_name opt-125m_0.6 --wandb_group opt-layer-search --save_dir results/search/layerwise/opt-125m_0.6
3
+ python src/main.py --model_arch opt --model_name facebook/opt-125m --q_ratio 0.7 --search_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-8.toml --model_parallel --wandb_name opt-125m_0.7 --wandb_group opt-layer-search --save_dir results/search/layerwise/opt-125m_0.7
4
+ python src/main.py --model_arch opt --model_name facebook/opt-125m --q_ratio 0.8 --search_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-8.toml --model_parallel --wandb_name opt-125m_0.8 --wandb_group opt-layer-search --save_dir results/search/layerwise/opt-125m_0.8
5
+ python src/main.py --model_arch opt --model_name facebook/opt-125m --q_ratio 0.9 --search_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-8.toml --model_parallel --wandb_name opt-125m_0.9 --wandb_group opt-layer-search --save_dir results/search/layerwise/opt-125m_0.9
6
+ python src/main.py --model_arch opt --model_name facebook/opt-125m --q_ratio 0.95 --search_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-8.toml --model_parallel --wandb_name opt-125m_0.95 --wandb_group opt-layer-search --save_dir results/search/layerwise/opt-125m_0.95
7
+ python src/main.py --model_arch opt --model_name facebook/opt-125m --q_ratio 0.975 --search_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-8.toml --model_parallel --wandb_name opt-125m_0.975 --wandb_group opt-layer-search --save_dir results/search/layerwise/opt-125m_0.975
8
+
9
+ python src/main.py --model_arch opt --model_name facebook/opt-350m --q_ratio 0.5 --search_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-8.toml --model_parallel --wandb_name opt-350m_0.5 --wandb_group opt-layer-search --save_dir results/search/layerwise/opt-350m_0.5
10
+ python src/main.py --model_arch opt --model_name facebook/opt-350m --q_ratio 0.6 --search_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-8.toml --model_parallel --wandb_name opt-350m_0.6 --wandb_group opt-layer-search --save_dir results/search/layerwise/opt-350m_0.6
11
+ python src/main.py --model_arch opt --model_name facebook/opt-350m --q_ratio 0.7 --search_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-8.toml --model_parallel --wandb_name opt-350m_0.7 --wandb_group opt-layer-search --save_dir results/search/layerwise/opt-350m_0.7
12
+ python src/main.py --model_arch opt --model_name facebook/opt-350m --q_ratio 0.8 --search_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-8.toml --model_parallel --wandb_name opt-350m_0.8 --wandb_group opt-layer-search --save_dir results/search/layerwise/opt-350m_0.8
13
+ python src/main.py --model_arch opt --model_name facebook/opt-350m --q_ratio 0.9 --search_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-8.toml --model_parallel --wandb_name opt-350m_0.9 --wandb_group opt-layer-search --save_dir results/search/layerwise/opt-350m_0.9
14
+ python src/main.py --model_arch opt --model_name facebook/opt-350m --q_ratio 0.95 --search_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-8.toml --model_parallel --wandb_name opt-350m_0.95 --wandb_group opt-layer-search --save_dir results/search/layerwise/opt-350m_0.95
15
+ python src/main.py --model_arch opt --model_name facebook/opt-350m --q_ratio 0.975 --search_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-8.toml --model_parallel --wandb_name opt-350m_0.975 --wandb_group opt-layer-search --save_dir results/search/layerwise/opt-350m_0.975
16
+
17
+ python src/main.py --model_arch opt --model_name facebook/opt-1.3b --q_ratio 0.5 --search_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-8.toml --model_parallel --wandb_name opt-1.3b_0.5 --wandb_group opt-layer-search --save_dir results/search/layerwise/opt-1.3b_0.5
18
+ python src/main.py --model_arch opt --model_name facebook/opt-1.3b --q_ratio 0.6 --search_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-8.toml --model_parallel --wandb_name opt-1.3b_0.6 --wandb_group opt-layer-search --save_dir results/search/layerwise/opt-1.3b_0.6
19
+ python src/main.py --model_arch opt --model_name facebook/opt-1.3b --q_ratio 0.7 --search_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-8.toml --model_parallel --wandb_name opt-1.3b_0.7 --wandb_group opt-layer-search --save_dir results/search/layerwise/opt-1.3b_0.7
20
+ python src/main.py --model_arch opt --model_name facebook/opt-1.3b --q_ratio 0.8 --search_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-8.toml --model_parallel --wandb_name opt-1.3b_0.8 --wandb_group opt-layer-search --save_dir results/search/layerwise/opt-1.3b_0.8
21
+ python src/main.py --model_arch opt --model_name facebook/opt-1.3b --q_ratio 0.9 --search_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-8.toml --model_parallel --wandb_name opt-1.3b_0.9 --wandb_group opt-layer-search --save_dir results/search/layerwise/opt-1.3b_0.9
22
+ python src/main.py --model_arch opt --model_name facebook/opt-1.3b --q_ratio 0.95 --search_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-8.toml --model_parallel --wandb_name opt-1.3b_0.95 --wandb_group opt-layer-search --save_dir results/search/layerwise/opt-1.3b_0.95
23
+ python src/main.py --model_arch opt --model_name facebook/opt-1.3b --q_ratio 0.975 --search_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-8.toml --model_parallel --wandb_name opt-1.3b_0.975 --wandb_group opt-layer-search --save_dir results/search/layerwise/opt-1.3b_0.975
24
+
25
+ python src/main.py --model_arch opt --model_name facebook/opt-2.7b --q_ratio 0.5 --search_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-8.toml --model_parallel --wandb_name opt-2.7b_0.5 --wandb_group opt-layer-search --save_dir results/search/layerwise/opt-2.7b_0.5
26
+ python src/main.py --model_arch opt --model_name facebook/opt-2.7b --q_ratio 0.6 --search_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-8.toml --model_parallel --wandb_name opt-2.7b_0.6 --wandb_group opt-layer-search --save_dir results/search/layerwise/opt-2.7b_0.6
27
+ python src/main.py --model_arch opt --model_name facebook/opt-2.7b --q_ratio 0.7 --search_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-8.toml --model_parallel --wandb_name opt-2.7b_0.7 --wandb_group opt-layer-search --save_dir results/search/layerwise/opt-2.7b_0.7
28
+ python src/main.py --model_arch opt --model_name facebook/opt-2.7b --q_ratio 0.8 --search_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-8.toml --model_parallel --wandb_name opt-2.7b_0.8 --wandb_group opt-layer-search --save_dir results/search/layerwise/opt-2.7b_0.8
29
+ python src/main.py --model_arch opt --model_name facebook/opt-2.7b --q_ratio 0.9 --search_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-8.toml --model_parallel --wandb_name opt-2.7b_0.9 --wandb_group opt-layer-search --save_dir results/search/layerwise/opt-2.7b_0.9
30
+ python src/main.py --model_arch opt --model_name facebook/opt-2.7b --q_ratio 0.95 --search_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-8.toml --model_parallel --wandb_name opt-2.7b_0.95 --wandb_group opt-layer-search --save_dir results/search/layerwise/opt-2.7b_0.95
31
+ python src/main.py --model_arch opt --model_name facebook/opt-2.7b --q_ratio 0.975 --search_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-8.toml --model_parallel --wandb_name opt-2.7b_0.975 --wandb_group opt-layer-search --save_dir results/search/layerwise/opt-2.7b_0.975
32
+
33
+ python src/main.py --model_arch opt --model_name facebook/opt-6.7b --q_ratio 0.5 --search_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-4.toml --model_parallel --wandb_name opt-6.7b_0.5 --wandb_group opt-layer-search --save_dir results/search/layerwise/opt-6.7b_0.5
34
+ python src/main.py --model_arch opt --model_name facebook/opt-6.7b --q_ratio 0.6 --search_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-4.toml --model_parallel --wandb_name opt-6.7b_0.6 --wandb_group opt-layer-search --save_dir results/search/layerwise/opt-6.7b_0.6
35
+ python src/main.py --model_arch opt --model_name facebook/opt-6.7b --q_ratio 0.7 --search_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-4.toml --model_parallel --wandb_name opt-6.7b_0.7 --wandb_group opt-layer-search --save_dir results/search/layerwise/opt-6.7b_0.7
36
+ python src/main.py --model_arch opt --model_name facebook/opt-6.7b --q_ratio 0.8 --search_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-4.toml --model_parallel --wandb_name opt-6.7b_0.8 --wandb_group opt-layer-search --save_dir results/search/layerwise/opt-6.7b_0.8
37
+ python src/main.py --model_arch opt --model_name facebook/opt-6.7b --q_ratio 0.9 --search_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-4.toml --model_parallel --wandb_name opt-6.7b_0.9 --wandb_group opt-layer-search --save_dir results/search/layerwise/opt-6.7b_0.9
38
+ python src/main.py --model_arch opt --model_name facebook/opt-6.7b --q_ratio 0.95 --search_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-4.toml --model_parallel --wandb_name opt-6.7b_0.95 --wandb_group opt-layer-search --save_dir results/search/layerwise/opt-6.7b_0.95
39
+ python src/main.py --model_arch opt --model_name facebook/opt-6.7b --q_ratio 0.975 --search_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-4.toml --model_parallel --wandb_name opt-6.7b_0.975 --wandb_group opt-layer-search --save_dir results/search/layerwise/opt-6.7b_0.975
40
+
41
+ python src/main.py --model_arch opt --model_name facebook/opt-13b --q_ratio 0.5 --search_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-2.toml --model_parallel --wandb_name opt-13b_0.5 --wandb_group opt-layer-search --save_dir results/search/layerwise/opt-13b_0.5
42
+ python src/main.py --model_arch opt --model_name facebook/opt-13b --q_ratio 0.6 --search_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-2.toml --model_parallel --wandb_name opt-13b_0.6 --wandb_group opt-layer-search --save_dir results/search/layerwise/opt-13b_0.6
43
+ python src/main.py --model_arch opt --model_name facebook/opt-13b --q_ratio 0.7 --search_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-2.toml --model_parallel --wandb_name opt-13b_0.7 --wandb_group opt-layer-search --save_dir results/search/layerwise/opt-13b_0.7
44
+ python src/main.py --model_arch opt --model_name facebook/opt-13b --q_ratio 0.8 --search_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-2.toml --model_parallel --wandb_name opt-13b_0.8 --wandb_group opt-layer-search --save_dir results/search/layerwise/opt-13b_0.8
45
+ python src/main.py --model_arch opt --model_name facebook/opt-13b --q_ratio 0.9 --search_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-2.toml --model_parallel --wandb_name opt-13b_0.9 --wandb_group opt-layer-search --save_dir results/search/layerwise/opt-13b_0.9
46
+ python src/main.py --model_arch opt --model_name facebook/opt-13b --q_ratio 0.95 --search_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-2.toml --model_parallel --wandb_name opt-13b_0.95 --wandb_group opt-layer-search --save_dir results/search/layerwise/opt-13b_0.95
47
+ python src/main.py --model_arch opt --model_name facebook/opt-13b --q_ratio 0.975 --search_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-2.toml --model_parallel --wandb_name opt-13b_0.975 --wandb_group opt-layer-search --save_dir results/search/layerwise/opt-13b_0.975
48
+
49
+ python src/main.py --model_arch opt --model_name facebook/opt-30b --q_ratio 0.5 --search_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-1.toml --model_parallel --wandb_name opt-30b_0.5 --wandb_group opt-layer-search --save_dir results/search/layerwise/opt-30b_0.5
50
+ python src/main.py --model_arch opt --model_name facebook/opt-30b --q_ratio 0.6 --search_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-1.toml --model_parallel --wandb_name opt-30b_0.6 --wandb_group opt-layer-search --save_dir results/search/layerwise/opt-30b_0.6
51
+ python src/main.py --model_arch opt --model_name facebook/opt-30b --q_ratio 0.7 --search_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-1.toml --model_parallel --wandb_name opt-30b_0.7 --wandb_group opt-layer-search --save_dir results/search/layerwise/opt-30b_0.7
52
+ python src/main.py --model_arch opt --model_name facebook/opt-30b --q_ratio 0.8 --search_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-1.toml --model_parallel --wandb_name opt-30b_0.8 --wandb_group opt-layer-search --save_dir results/search/layerwise/opt-30b_0.8
53
+ python src/main.py --model_arch opt --model_name facebook/opt-30b --q_ratio 0.9 --search_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-1.toml --model_parallel --wandb_name opt-30b_0.9 --wandb_group opt-layer-search --save_dir results/search/layerwise/opt-30b_0.9
54
+ python src/main.py --model_arch opt --model_name facebook/opt-30b --q_ratio 0.95 --search_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-1.toml --model_parallel --wandb_name opt-30b_0.95 --wandb_group opt-layer-search --save_dir results/search/layerwise/opt-30b_0.95
55
+ python src/main.py --model_arch opt --model_name facebook/opt-30b --q_ratio 0.975 --search_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-1.toml --model_parallel --wandb_name opt-30b_0.975 --wandb_group opt-layer-search --save_dir results/search/layerwise/opt-30b_0.975
56
+
57
+ python src/main.py --model_arch opt --model_name facebook/opt-66b --q_ratio 0.5 --search_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-1.toml --model_parallel --wandb_name opt-66b_0.5 --wandb_group opt-layer-search --save_dir results/search/layerwise/opt-66b_0.5
58
+ python src/main.py --model_arch opt --model_name facebook/opt-66b --q_ratio 0.6 --search_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-1.toml --model_parallel --wandb_name opt-66b_0.6 --wandb_group opt-layer-search --save_dir results/search/layerwise/opt-66b_0.6
59
+ python src/main.py --model_arch opt --model_name facebook/opt-66b --q_ratio 0.7 --search_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-1.toml --model_parallel --wandb_name opt-66b_0.7 --wandb_group opt-layer-search --save_dir results/search/layerwise/opt-66b_0.7
60
+ python src/main.py --model_arch opt --model_name facebook/opt-66b --q_ratio 0.8 --search_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-1.toml --model_parallel --wandb_name opt-66b_0.8 --wandb_group opt-layer-search --save_dir results/search/layerwise/opt-66b_0.8
61
+ python src/main.py --model_arch opt --model_name facebook/opt-66b --q_ratio 0.9 --search_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-1.toml --model_parallel --wandb_name opt-66b_0.9 --wandb_group opt-layer-search --save_dir results/search/layerwise/opt-66b_0.9
62
+ python src/main.py --model_arch opt --model_name facebook/opt-66b --q_ratio 0.95 --search_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-1.toml --model_parallel --wandb_name opt-66b_0.95 --wandb_group opt-layer-search --save_dir results/search/layerwise/opt-66b_0.95
63
+ python src/main.py --model_arch opt --model_name facebook/opt-66b --q_ratio 0.975 --search_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-1.toml --model_parallel --wandb_name opt-66b_0.975 --wandb_group opt-layer-search --save_dir results/search/layerwise/opt-66b_0.975
64
+
llm-q-scaling-law-master/run_scripts/layerwise/qwen1.5_layerwise_run_commands.txt ADDED
@@ -0,0 +1,64 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ python src/main.py --model_arch qwen1.5 --model_name Qwen/Qwen1.5-0.5B --q_ratio 0.5 --search_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-8.toml --model_parallel --wandb_name Qwen1.5-0.5B_0.5 --wandb_group qwen15-layer-search --save_dir results/search/layerwise/Qwen1.5-0.5B_0.5
2
+ python src/main.py --model_arch qwen1.5 --model_name Qwen/Qwen1.5-0.5B --q_ratio 0.6 --search_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-8.toml --model_parallel --wandb_name Qwen1.5-0.5B_0.6 --wandb_group qwen15-layer-search --save_dir results/search/layerwise/Qwen1.5-0.5B_0.6
3
+ python src/main.py --model_arch qwen1.5 --model_name Qwen/Qwen1.5-0.5B --q_ratio 0.7 --search_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-8.toml --model_parallel --wandb_name Qwen1.5-0.5B_0.7 --wandb_group qwen15-layer-search --save_dir results/search/layerwise/Qwen1.5-0.5B_0.7
4
+ python src/main.py --model_arch qwen1.5 --model_name Qwen/Qwen1.5-0.5B --q_ratio 0.8 --search_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-8.toml --model_parallel --wandb_name Qwen1.5-0.5B_0.8 --wandb_group qwen15-layer-search --save_dir results/search/layerwise/Qwen1.5-0.5B_0.8
5
+ python src/main.py --model_arch qwen1.5 --model_name Qwen/Qwen1.5-0.5B --q_ratio 0.9 --search_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-8.toml --model_parallel --wandb_name Qwen1.5-0.5B_0.9 --wandb_group qwen15-layer-search --save_dir results/search/layerwise/Qwen1.5-0.5B_0.9
6
+ python src/main.py --model_arch qwen1.5 --model_name Qwen/Qwen1.5-0.5B --q_ratio 0.95 --search_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-8.toml --model_parallel --wandb_name Qwen1.5-0.5B_0.95 --wandb_group qwen15-layer-search --save_dir results/search/layerwise/Qwen1.5-0.5B_0.95
7
+ python src/main.py --model_arch qwen1.5 --model_name Qwen/Qwen1.5-0.5B --q_ratio 0.975 --search_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-8.toml --model_parallel --wandb_name Qwen1.5-0.5B_0.975 --wandb_group qwen15-layer-search --save_dir results/search/layerwise/Qwen1.5-0.5B_0.975
8
+
9
+ python src/main.py --model_arch qwen1.5 --model_name Qwen/Qwen1.5-1.8B --q_ratio 0.5 --search_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-8.toml --model_parallel --wandb_name Qwen1.5-1.8B_0.5 --wandb_group qwen15-layer-search --save_dir results/search/layerwise/Qwen1.5-1.8B_0.5
10
+ python src/main.py --model_arch qwen1.5 --model_name Qwen/Qwen1.5-1.8B --q_ratio 0.6 --search_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-8.toml --model_parallel --wandb_name Qwen1.5-1.8B_0.6 --wandb_group qwen15-layer-search --save_dir results/search/layerwise/Qwen1.5-1.8B_0.6
11
+ python src/main.py --model_arch qwen1.5 --model_name Qwen/Qwen1.5-1.8B --q_ratio 0.7 --search_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-8.toml --model_parallel --wandb_name Qwen1.5-1.8B_0.7 --wandb_group qwen15-layer-search --save_dir results/search/layerwise/Qwen1.5-1.8B_0.7
12
+ python src/main.py --model_arch qwen1.5 --model_name Qwen/Qwen1.5-1.8B --q_ratio 0.8 --search_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-8.toml --model_parallel --wandb_name Qwen1.5-1.8B_0.8 --wandb_group qwen15-layer-search --save_dir results/search/layerwise/Qwen1.5-1.8B_0.8
13
+ python src/main.py --model_arch qwen1.5 --model_name Qwen/Qwen1.5-1.8B --q_ratio 0.9 --search_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-8.toml --model_parallel --wandb_name Qwen1.5-1.8B_0.9 --wandb_group qwen15-layer-search --save_dir results/search/layerwise/Qwen1.5-1.8B_0.9
14
+ python src/main.py --model_arch qwen1.5 --model_name Qwen/Qwen1.5-1.8B --q_ratio 0.95 --search_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-8.toml --model_parallel --wandb_name Qwen1.5-1.8B_0.95 --wandb_group qwen15-layer-search --save_dir results/search/layerwise/Qwen1.5-1.8B_0.95
15
+ python src/main.py --model_arch qwen1.5 --model_name Qwen/Qwen1.5-1.8B --q_ratio 0.975 --search_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-8.toml --model_parallel --wandb_name Qwen1.5-1.8B_0.975 --wandb_group qwen15-layer-search --save_dir results/search/layerwise/Qwen1.5-1.8B_0.975
16
+
17
+ python src/main.py --model_arch qwen1.5 --model_name Qwen/Qwen1.5-4B --q_ratio 0.5 --search_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-4.toml --model_parallel --wandb_name Qwen1.5-4B_0.5 --wandb_group qwen15-layer-search --save_dir results/search/layerwise/Qwen1.5-4B_0.5
18
+ python src/main.py --model_arch qwen1.5 --model_name Qwen/Qwen1.5-4B --q_ratio 0.6 --search_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-4.toml --model_parallel --wandb_name Qwen1.5-4B_0.6 --wandb_group qwen15-layer-search --save_dir results/search/layerwise/Qwen1.5-4B_0.6
19
+ python src/main.py --model_arch qwen1.5 --model_name Qwen/Qwen1.5-4B --q_ratio 0.7 --search_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-4.toml --model_parallel --wandb_name Qwen1.5-4B_0.7 --wandb_group qwen15-layer-search --save_dir results/search/layerwise/Qwen1.5-4B_0.7
20
+ python src/main.py --model_arch qwen1.5 --model_name Qwen/Qwen1.5-4B --q_ratio 0.8 --search_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-4.toml --model_parallel --wandb_name Qwen1.5-4B_0.8 --wandb_group qwen15-layer-search --save_dir results/search/layerwise/Qwen1.5-4B_0.8
21
+ python src/main.py --model_arch qwen1.5 --model_name Qwen/Qwen1.5-4B --q_ratio 0.9 --search_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-4.toml --model_parallel --wandb_name Qwen1.5-4B_0.9 --wandb_group qwen15-layer-search --save_dir results/search/layerwise/Qwen1.5-4B_0.9
22
+ python src/main.py --model_arch qwen1.5 --model_name Qwen/Qwen1.5-4B --q_ratio 0.95 --search_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-4.toml --model_parallel --wandb_name Qwen1.5-4B_0.95 --wandb_group qwen15-layer-search --save_dir results/search/layerwise/Qwen1.5-4B_0.95
23
+ python src/main.py --model_arch qwen1.5 --model_name Qwen/Qwen1.5-4B --q_ratio 0.975 --search_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-4.toml --model_parallel --wandb_name Qwen1.5-4B_0.975 --wandb_group qwen15-layer-search --save_dir results/search/layerwise/Qwen1.5-4B_0.975
24
+
25
+ python src/main.py --model_arch qwen1.5 --model_name Qwen/Qwen1.5-7B --q_ratio 0.5 --search_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-2.toml --model_parallel --wandb_name Qwen1.5-7B_0.5 --wandb_group qwen15-layer-search --save_dir results/search/layerwise/Qwen1.5-7B_0.5
26
+ python src/main.py --model_arch qwen1.5 --model_name Qwen/Qwen1.5-7B --q_ratio 0.6 --search_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-2.toml --model_parallel --wandb_name Qwen1.5-7B_0.6 --wandb_group qwen15-layer-search --save_dir results/search/layerwise/Qwen1.5-7B_0.6
27
+ python src/main.py --model_arch qwen1.5 --model_name Qwen/Qwen1.5-7B --q_ratio 0.7 --search_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-2.toml --model_parallel --wandb_name Qwen1.5-7B_0.7 --wandb_group qwen15-layer-search --save_dir results/search/layerwise/Qwen1.5-7B_0.7
28
+ python src/main.py --model_arch qwen1.5 --model_name Qwen/Qwen1.5-7B --q_ratio 0.8 --search_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-2.toml --model_parallel --wandb_name Qwen1.5-7B_0.8 --wandb_group qwen15-layer-search --save_dir results/search/layerwise/Qwen1.5-7B_0.8
29
+ python src/main.py --model_arch qwen1.5 --model_name Qwen/Qwen1.5-7B --q_ratio 0.9 --search_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-2.toml --model_parallel --wandb_name Qwen1.5-7B_0.9 --wandb_group qwen15-layer-search --save_dir results/search/layerwise/Qwen1.5-7B_0.9
30
+ python src/main.py --model_arch qwen1.5 --model_name Qwen/Qwen1.5-7B --q_ratio 0.95 --search_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-2.toml --model_parallel --wandb_name Qwen1.5-7B_0.95 --wandb_group qwen15-layer-search --save_dir results/search/layerwise/Qwen1.5-7B_0.95
31
+ python src/main.py --model_arch qwen1.5 --model_name Qwen/Qwen1.5-7B --q_ratio 0.975 --search_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-2.toml --model_parallel --wandb_name Qwen1.5-7B_0.975 --wandb_group qwen15-layer-search --save_dir results/search/layerwise/Qwen1.5-7B_0.975
32
+
33
+ python src/main.py --model_arch qwen1.5 --model_name Qwen/Qwen1.5-14B --q_ratio 0.5 --search_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-2.toml --model_parallel --wandb_name Qwen1.5-14B_0.5 --wandb_group qwen15-layer-search --save_dir results/search/layerwise/Qwen1.5-14B_0.5
34
+ python src/main.py --model_arch qwen1.5 --model_name Qwen/Qwen1.5-14B --q_ratio 0.6 --search_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-2.toml --model_parallel --wandb_name Qwen1.5-14B_0.6 --wandb_group qwen15-layer-search --save_dir results/search/layerwise/Qwen1.5-14B_0.6
35
+ python src/main.py --model_arch qwen1.5 --model_name Qwen/Qwen1.5-14B --q_ratio 0.7 --search_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-2.toml --model_parallel --wandb_name Qwen1.5-14B_0.7 --wandb_group qwen15-layer-search --save_dir results/search/layerwise/Qwen1.5-14B_0.7
36
+ python src/main.py --model_arch qwen1.5 --model_name Qwen/Qwen1.5-14B --q_ratio 0.8 --search_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-2.toml --model_parallel --wandb_name Qwen1.5-14B_0.8 --wandb_group qwen15-layer-search --save_dir results/search/layerwise/Qwen1.5-14B_0.8
37
+ python src/main.py --model_arch qwen1.5 --model_name Qwen/Qwen1.5-14B --q_ratio 0.9 --search_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-2.toml --model_parallel --wandb_name Qwen1.5-14B_0.9 --wandb_group qwen15-layer-search --save_dir results/search/layerwise/Qwen1.5-14B_0.9
38
+ python src/main.py --model_arch qwen1.5 --model_name Qwen/Qwen1.5-14B --q_ratio 0.95 --search_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-2.toml --model_parallel --wandb_name Qwen1.5-14B_0.95 --wandb_group qwen15-layer-search --save_dir results/search/layerwise/Qwen1.5-14B_0.95
39
+ python src/main.py --model_arch qwen1.5 --model_name Qwen/Qwen1.5-14B --q_ratio 0.975 --search_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-2.toml --model_parallel --wandb_name Qwen1.5-14B_0.975 --wandb_group qwen15-layer-search --save_dir results/search/layerwise/Qwen1.5-14B_0.975
40
+
41
+ python src/main.py --model_arch qwen1.5 --model_name Qwen/Qwen1.5-32B --q_ratio 0.5 --search_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-2.toml --model_parallel --wandb_name Qwen1.5-32B_0.5 --wandb_group qwen15-layer-search --save_dir results/search/layerwise/Qwen1.5-32B_0.5
42
+ python src/main.py --model_arch qwen1.5 --model_name Qwen/Qwen1.5-32B --q_ratio 0.6 --search_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-2.toml --model_parallel --wandb_name Qwen1.5-32B_0.6 --wandb_group qwen15-layer-search --save_dir results/search/layerwise/Qwen1.5-32B_0.6
43
+ python src/main.py --model_arch qwen1.5 --model_name Qwen/Qwen1.5-32B --q_ratio 0.7 --search_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-2.toml --model_parallel --wandb_name Qwen1.5-32B_0.7 --wandb_group qwen15-layer-search --save_dir results/search/layerwise/Qwen1.5-32B_0.7
44
+ python src/main.py --model_arch qwen1.5 --model_name Qwen/Qwen1.5-32B --q_ratio 0.8 --search_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-2.toml --model_parallel --wandb_name Qwen1.5-32B_0.8 --wandb_group qwen15-layer-search --save_dir results/search/layerwise/Qwen1.5-32B_0.8
45
+ python src/main.py --model_arch qwen1.5 --model_name Qwen/Qwen1.5-32B --q_ratio 0.9 --search_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-2.toml --model_parallel --wandb_name Qwen1.5-32B_0.9 --wandb_group qwen15-layer-search --save_dir results/search/layerwise/Qwen1.5-32B_0.9
46
+ python src/main.py --model_arch qwen1.5 --model_name Qwen/Qwen1.5-32B --q_ratio 0.95 --search_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-2.toml --model_parallel --wandb_name Qwen1.5-32B_0.95 --wandb_group qwen15-layer-search --save_dir results/search/layerwise/Qwen1.5-32B_0.95
47
+ python src/main.py --model_arch qwen1.5 --model_name Qwen/Qwen1.5-32B --q_ratio 0.975 --search_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-2.toml --model_parallel --wandb_name Qwen1.5-32B_0.975 --wandb_group qwen15-layer-search --save_dir results/search/layerwise/Qwen1.5-32B_0.975
48
+
49
+ python src/main.py --model_arch qwen1.5 --model_name Qwen/Qwen1.5-72B --q_ratio 0.5 --search_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-1.toml --model_parallel --wandb_name Qwen1.5-72B_0.5 --wandb_group qwen15-layer-search --save_dir results/search/layerwise/Qwen1.5-72B_0.5
50
+ python src/main.py --model_arch qwen1.5 --model_name Qwen/Qwen1.5-72B --q_ratio 0.6 --search_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-1.toml --model_parallel --wandb_name Qwen1.5-72B_0.6 --wandb_group qwen15-layer-search --save_dir results/search/layerwise/Qwen1.5-72B_0.6
51
+ python src/main.py --model_arch qwen1.5 --model_name Qwen/Qwen1.5-72B --q_ratio 0.7 --search_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-1.toml --model_parallel --wandb_name Qwen1.5-72B_0.7 --wandb_group qwen15-layer-search --save_dir results/search/layerwise/Qwen1.5-72B_0.7
52
+ python src/main.py --model_arch qwen1.5 --model_name Qwen/Qwen1.5-72B --q_ratio 0.8 --search_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-1.toml --model_parallel --wandb_name Qwen1.5-72B_0.8 --wandb_group qwen15-layer-search --save_dir results/search/layerwise/Qwen1.5-72B_0.8
53
+ python src/main.py --model_arch qwen1.5 --model_name Qwen/Qwen1.5-72B --q_ratio 0.9 --search_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-1.toml --model_parallel --wandb_name Qwen1.5-72B_0.9 --wandb_group qwen15-layer-search --save_dir results/search/layerwise/Qwen1.5-72B_0.9
54
+ python src/main.py --model_arch qwen1.5 --model_name Qwen/Qwen1.5-72B --q_ratio 0.95 --search_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-1.toml --model_parallel --wandb_name Qwen1.5-72B_0.95 --wandb_group qwen15-layer-search --save_dir results/search/layerwise/Qwen1.5-72B_0.95
55
+ python src/main.py --model_arch qwen1.5 --model_name Qwen/Qwen1.5-72B --q_ratio 0.975 --search_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-1.toml --model_parallel --wandb_name Qwen1.5-72B_0.975 --wandb_group qwen15-layer-search --save_dir results/search/layerwise/Qwen1.5-72B_0.975
56
+
57
+ python src/main.py --model_arch qwen1.5 --model_name Qwen/Qwen1.5-110B --q_ratio 0.5 --search_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-1.toml --model_parallel --wandb_name Qwen1.5-110B_0.5 --wandb_group qwen15-layer-search --save_dir results/search/layerwise/Qwen1.5-110B_0.5
58
+ python src/main.py --model_arch qwen1.5 --model_name Qwen/Qwen1.5-110B --q_ratio 0.6 --search_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-1.toml --model_parallel --wandb_name Qwen1.5-110B_0.6 --wandb_group qwen15-layer-search --save_dir results/search/layerwise/Qwen1.5-110B_0.6
59
+ python src/main.py --model_arch qwen1.5 --model_name Qwen/Qwen1.5-110B --q_ratio 0.7 --search_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-1.toml --model_parallel --wandb_name Qwen1.5-110B_0.7 --wandb_group qwen15-layer-search --save_dir results/search/layerwise/Qwen1.5-110B_0.7
60
+ python src/main.py --model_arch qwen1.5 --model_name Qwen/Qwen1.5-110B --q_ratio 0.8 --search_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-1.toml --model_parallel --wandb_name Qwen1.5-110B_0.8 --wandb_group qwen15-layer-search --save_dir results/search/layerwise/Qwen1.5-110B_0.8
61
+ python src/main.py --model_arch qwen1.5 --model_name Qwen/Qwen1.5-110B --q_ratio 0.9 --search_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-1.toml --model_parallel --wandb_name Qwen1.5-110B_0.9 --wandb_group qwen15-layer-search --save_dir results/search/layerwise/Qwen1.5-110B_0.9
62
+ python src/main.py --model_arch qwen1.5 --model_name Qwen/Qwen1.5-110B --q_ratio 0.95 --search_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-1.toml --model_parallel --wandb_name Qwen1.5-110B_0.95 --wandb_group qwen15-layer-search --save_dir results/search/layerwise/Qwen1.5-110B_0.95
63
+ python src/main.py --model_arch qwen1.5 --model_name Qwen/Qwen1.5-110B --q_ratio 0.975 --search_config configs/search/layerwise/mxint_4bit-pajama-random-50-bs-1.toml --model_parallel --wandb_name Qwen1.5-110B_0.975 --wandb_group qwen15-layer-search --save_dir results/search/layerwise/Qwen1.5-110B_0.975
64
+
llm-q-scaling-law-master/run_scripts/layerwise/runscript_generator.py ADDED
@@ -0,0 +1,111 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ This generates the runscripts for the LLM-Q scaling law simulations.
3
+
4
+ """
5
+
6
+ import os
7
+ import argparse
8
+
9
+ # the search command is
10
+ #python src/main.py --model_name {model_name} --search_config configs/search/mxint_4bit-tinyllama.toml --model_parallel --disable_wandb --save_dir ../ckpt/test
11
+ # the save dir is
12
+ # results/search/layerwise/{model_name}_{ratio}
13
+
14
+ granularity = ["transformer_layer"]
15
+
16
+ # opt template for searching
17
+
18
+ opt_model_sizes=["opt-125m","opt-350m","opt-1.3b","opt-2.7b","opt-6.7b","opt-13b","opt-30b","opt-66b"]
19
+ opt_batch_size = [8,8,8,8,4,2,1,1]
20
+ opt_layer_wise_ratios = [0.5,0.6,0.7,0.8,0.9,0.95,0.975]
21
+ opt_layer_wise_search_config_template = "mxint_4bit-pajama-random-50-bs-{}.toml"
22
+ opt_layer_wise_search_dir_template = "search/layerwise/{}_{}"
23
+
24
+ opt_layerwise_run_command_palette = "python src/main.py --model_arch opt --model_name {model_name} --q_ratio {q_ratio} --search_config configs/search/layerwise/{search_config} --model_parallel --wandb_name {wandb_name} --wandb_group opt-layer-search --save_dir results/{save_dir}"
25
+
26
+ with open("opt_layerwise_run_commands.txt", "w") as f:
27
+ for i in range(len(opt_model_sizes)):
28
+ model_size = opt_model_sizes[i]
29
+ model_batch_size = opt_batch_size[i]
30
+ for ratio in opt_layer_wise_ratios:
31
+ search_config = opt_layer_wise_search_config_template.format(model_batch_size)
32
+ save_dir = opt_layer_wise_search_dir_template.format(model_size, ratio)
33
+ model_name = f"facebook/{model_size}"
34
+ wandb_name = f"{model_size}_{ratio}"
35
+ run_command = opt_layerwise_run_command_palette.format(model_name=model_name, q_ratio=ratio, search_config=search_config, wandb_name=wandb_name, save_dir=save_dir)
36
+ # print(run_command)
37
+ f.write(run_command + "\n")
38
+ f.write("\n")
39
+
40
+
41
+ # qwen template for searching
42
+
43
+ qwen15_model_sizes=["Qwen1.5-0.5B","Qwen1.5-1.8B","Qwen1.5-4B","Qwen1.5-7B","Qwen1.5-14B","Qwen1.5-32B","Qwen1.5-72B","Qwen1.5-110B"]
44
+ qwen15_batch_size = [8,8,4,2,2,2,1,1]
45
+ qwen15_layer_wise_ratios = [0.5,0.6,0.7,0.8,0.9,0.95,0.975]
46
+ qwen15_layer_wise_search_config_template = "mxint_4bit-pajama-random-50-bs-{}.toml"
47
+ qwen15_layer_wise_search_dir_template = "search/layerwise/{}_{}"
48
+
49
+ qwen15_layerwise_run_command_palette = "python src/main.py --model_arch qwen1.5 --model_name {model_name} --q_ratio {q_ratio} --search_config configs/search/layerwise/{search_config} --model_parallel --wandb_name {wandb_name} --wandb_group qwen15-layer-search --save_dir results/{save_dir}"
50
+
51
+ with open("qwen1.5_layerwise_run_commands.txt", "w") as f:
52
+ for i in range(len(qwen15_model_sizes)):
53
+ model_size = qwen15_model_sizes[i]
54
+ model_batch_size = qwen15_batch_size[i]
55
+ for ratio in qwen15_layer_wise_ratios:
56
+ search_config = qwen15_layer_wise_search_config_template.format(model_batch_size)
57
+ save_dir = qwen15_layer_wise_search_dir_template.format(model_size, ratio)
58
+ model_name = f"Qwen/{model_size}"
59
+ wandb_name = f"{model_size}_{ratio}"
60
+ run_command = qwen15_layerwise_run_command_palette.format(model_name=model_name, q_ratio=ratio, search_config=search_config, wandb_name=wandb_name, save_dir=save_dir)
61
+ # print(run_command)
62
+ f.write(run_command + "\n")
63
+ f.write("\n")
64
+
65
+ # llama template for searching
66
+
67
+ # qwen template for searching
68
+
69
+ llama_model_sizes=["llama-7b","llama-13b","llama-30b","llama-65b"]
70
+ llama_batch_size = [2,2,1,1]
71
+ llama_layer_wise_ratios = [0.5,0.6,0.7,0.8,0.9,0.95,0.975]
72
+ llama_layer_wise_search_config_template = "mxint_4bit-pajama-random-50-bs-{}.toml"
73
+ llama_layer_wise_search_dir_template = "search/layerwise/{}_{}"
74
+
75
+ llama_layerwise_run_command_palette = "python src/main.py --model_arch llama --model_name {model_name} --q_ratio {q_ratio} --search_config configs/search/layerwise/{search_config} --model_parallel --wandb_name {wandb_name} --wandb_group llama-layer-search --save_dir results/{save_dir}"
76
+
77
+ with open("llama_layerwise_run_commands.txt", "w") as f:
78
+ for i in range(len(llama_model_sizes)):
79
+ model_size = llama_model_sizes[i]
80
+ model_batch_size = llama_batch_size[i]
81
+ for ratio in llama_layer_wise_ratios:
82
+ search_config = llama_layer_wise_search_config_template.format(model_batch_size)
83
+ save_dir = llama_layer_wise_search_dir_template.format(model_size, ratio)
84
+ model_name = f"huggyllama/{model_size}"
85
+ wandb_name = f"{model_size}_{ratio}"
86
+ run_command = llama_layerwise_run_command_palette.format(model_name=model_name, q_ratio=ratio, search_config=search_config, wandb_name=wandb_name, save_dir=save_dir)
87
+ # print(run_command)
88
+ f.write(run_command + "\n")
89
+ f.write("\n")
90
+
91
+ llama2_model_sizes=["Llama-2-7b","Llama-2-13b","Llama-2-70b"]
92
+ llama2_batch_size = [2,2,1,1]
93
+ llama2_layer_wise_ratios = [0.5,0.6,0.7,0.8,0.9,0.95,0.975]
94
+ llama2_layer_wise_search_config_template = "mxint_4bit-pajama-random-50-bs-{}.toml"
95
+ llama2_layer_wise_search_dir_template = "search/layerwise/{}_{}"
96
+
97
+ llama2_layerwise_run_command_palette = "python src/main.py --model_arch llama2 --model_name {model_name} --q_ratio {q_ratio} --search_config configs/search/layerwise/{search_config} --model_parallel --wandb_name {wandb_name} --wandb_group llama-layer-search --save_dir results/{save_dir}"
98
+
99
+ with open("llama2_layerwise_run_commands.txt", "w") as f:
100
+ for i in range(len(llama2_model_sizes)):
101
+ model_size = llama2_model_sizes[i]
102
+ model_batch_size = llama2_batch_size[i]
103
+ for ratio in llama2_layer_wise_ratios:
104
+ search_config = llama2_layer_wise_search_config_template.format(model_batch_size)
105
+ save_dir = llama2_layer_wise_search_dir_template.format(model_size, ratio)
106
+ model_name = f"meta-llama/{model_size}-chat-hf" # use the instruction tempalte
107
+ wandb_name = f"{model_size}_{ratio}"
108
+ run_command = llama2_layerwise_run_command_palette.format(model_name=model_name, q_ratio=ratio, search_config=search_config, wandb_name=wandb_name, save_dir=save_dir)
109
+ # print(run_command)
110
+ f.write(run_command + "\n")
111
+ f.write("\n")
llm-q-scaling-law-master/run_scripts/matmult/llama2_matmult_run_commands.txt ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ python src/main.py --model_arch llama2 --model_name meta-llama/Llama-2-7b-chat-hf --q_ratio 0.5 --search_config configs/search/matmult/mxint_4bit-pajama-random-50-bs-2.toml --model_parallel --wandb_name Llama-2-7b_0.5 --wandb_group llama-layer-search --save_dir results/search/matmult/Llama-2-7b_0.5
2
+ python src/main.py --model_arch llama2 --model_name meta-llama/Llama-2-7b-chat-hf --q_ratio 0.9 --search_config configs/search/matmult/mxint_4bit-pajama-random-50-bs-2.toml --model_parallel --wandb_name Llama-2-7b_0.9 --wandb_group llama-layer-search --save_dir results/search/matmult/Llama-2-7b_0.9
3
+ python src/main.py --model_arch llama2 --model_name meta-llama/Llama-2-7b-chat-hf --q_ratio 0.95 --search_config configs/search/matmult/mxint_4bit-pajama-random-50-bs-2.toml --model_parallel --wandb_name Llama-2-7b_0.95 --wandb_group llama-layer-search --save_dir results/search/matmult/Llama-2-7b_0.95
4
+ python src/main.py --model_arch llama2 --model_name meta-llama/Llama-2-7b-chat-hf --q_ratio 0.975 --search_config configs/search/matmult/mxint_4bit-pajama-random-50-bs-2.toml --model_parallel --wandb_name Llama-2-7b_0.975 --wandb_group llama-layer-search --save_dir results/search/matmult/Llama-2-7b_0.975
5
+ python src/main.py --model_arch llama2 --model_name meta-llama/Llama-2-7b-chat-hf --q_ratio 0.99 --search_config configs/search/matmult/mxint_4bit-pajama-random-50-bs-2.toml --model_parallel --wandb_name Llama-2-7b_0.99 --wandb_group llama-layer-search --save_dir results/search/matmult/Llama-2-7b_0.99
6
+
7
+ python src/main.py --model_arch llama2 --model_name meta-llama/Llama-2-13b-chat-hf --q_ratio 0.5 --search_config configs/search/matmult/mxint_4bit-pajama-random-50-bs-2.toml --model_parallel --wandb_name Llama-2-13b_0.5 --wandb_group llama-layer-search --save_dir results/search/matmult/Llama-2-13b_0.5
8
+ python src/main.py --model_arch llama2 --model_name meta-llama/Llama-2-13b-chat-hf --q_ratio 0.9 --search_config configs/search/matmult/mxint_4bit-pajama-random-50-bs-2.toml --model_parallel --wandb_name Llama-2-13b_0.9 --wandb_group llama-layer-search --save_dir results/search/matmult/Llama-2-13b_0.9
9
+ python src/main.py --model_arch llama2 --model_name meta-llama/Llama-2-13b-chat-hf --q_ratio 0.95 --search_config configs/search/matmult/mxint_4bit-pajama-random-50-bs-2.toml --model_parallel --wandb_name Llama-2-13b_0.95 --wandb_group llama-layer-search --save_dir results/search/matmult/Llama-2-13b_0.95
10
+ python src/main.py --model_arch llama2 --model_name meta-llama/Llama-2-13b-chat-hf --q_ratio 0.975 --search_config configs/search/matmult/mxint_4bit-pajama-random-50-bs-2.toml --model_parallel --wandb_name Llama-2-13b_0.975 --wandb_group llama-layer-search --save_dir results/search/matmult/Llama-2-13b_0.975
11
+ python src/main.py --model_arch llama2 --model_name meta-llama/Llama-2-13b-chat-hf --q_ratio 0.99 --search_config configs/search/matmult/mxint_4bit-pajama-random-50-bs-2.toml --model_parallel --wandb_name Llama-2-13b_0.99 --wandb_group llama-layer-search --save_dir results/search/matmult/Llama-2-13b_0.99
12
+
13
+ python src/main.py --model_arch llama2 --model_name meta-llama/Llama-2-70b-chat-hf --q_ratio 0.5 --search_config configs/search/matmult/mxint_4bit-pajama-random-50-bs-1.toml --model_parallel --wandb_name Llama-2-70b_0.5 --wandb_group llama-layer-search --save_dir results/search/matmult/Llama-2-70b_0.5
14
+ python src/main.py --model_arch llama2 --model_name meta-llama/Llama-2-70b-chat-hf --q_ratio 0.9 --search_config configs/search/matmult/mxint_4bit-pajama-random-50-bs-1.toml --model_parallel --wandb_name Llama-2-70b_0.9 --wandb_group llama-layer-search --save_dir results/search/matmult/Llama-2-70b_0.9
15
+ python src/main.py --model_arch llama2 --model_name meta-llama/Llama-2-70b-chat-hf --q_ratio 0.95 --search_config configs/search/matmult/mxint_4bit-pajama-random-50-bs-1.toml --model_parallel --wandb_name Llama-2-70b_0.95 --wandb_group llama-layer-search --save_dir results/search/matmult/Llama-2-70b_0.95
16
+ python src/main.py --model_arch llama2 --model_name meta-llama/Llama-2-70b-chat-hf --q_ratio 0.975 --search_config configs/search/matmult/mxint_4bit-pajama-random-50-bs-1.toml --model_parallel --wandb_name Llama-2-70b_0.975 --wandb_group llama-layer-search --save_dir results/search/matmult/Llama-2-70b_0.975
17
+ python src/main.py --model_arch llama2 --model_name meta-llama/Llama-2-70b-chat-hf --q_ratio 0.99 --search_config configs/search/matmult/mxint_4bit-pajama-random-50-bs-1.toml --model_parallel --wandb_name Llama-2-70b_0.99 --wandb_group llama-layer-search --save_dir results/search/matmult/Llama-2-70b_0.99
18
+
llm-q-scaling-law-master/run_scripts/matmult/llama_matmult_run_commands.txt ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ python src/main.py --model_arch llama --model_name huggyllama/llama-7b --q_ratio 0.5 --search_config configs/search/matmult/mxint_4bit-pajama-random-50-bs-2.toml --model_parallel --wandb_name llama-7b_0.5 --wandb_group llama-matmult-search --save_dir results/search/matmult/llama-7b_0.5
2
+ python src/main.py --model_arch llama --model_name huggyllama/llama-7b --q_ratio 0.9 --search_config configs/search/matmult/mxint_4bit-pajama-random-50-bs-2.toml --model_parallel --wandb_name llama-7b_0.9 --wandb_group llama-matmult-search --save_dir results/search/matmult/llama-7b_0.9
3
+ python src/main.py --model_arch llama --model_name huggyllama/llama-7b --q_ratio 0.95 --search_config configs/search/matmult/mxint_4bit-pajama-random-50-bs-2.toml --model_parallel --wandb_name llama-7b_0.95 --wandb_group llama-matmult-search --save_dir results/search/matmult/llama-7b_0.95
4
+ python src/main.py --model_arch llama --model_name huggyllama/llama-7b --q_ratio 0.975 --search_config configs/search/matmult/mxint_4bit-pajama-random-50-bs-2.toml --model_parallel --wandb_name llama-7b_0.975 --wandb_group llama-matmult-search --save_dir results/search/matmult/llama-7b_0.975
5
+ python src/main.py --model_arch llama --model_name huggyllama/llama-7b --q_ratio 0.99 --search_config configs/search/matmult/mxint_4bit-pajama-random-50-bs-2.toml --model_parallel --wandb_name llama-7b_0.99 --wandb_group llama-matmult-search --save_dir results/search/matmult/llama-7b_0.99
6
+
7
+ python src/main.py --model_arch llama --model_name huggyllama/llama-13b --q_ratio 0.5 --search_config configs/search/matmult/mxint_4bit-pajama-random-50-bs-2.toml --model_parallel --wandb_name llama-13b_0.5 --wandb_group llama-matmult-search --save_dir results/search/matmult/llama-13b_0.5
8
+ python src/main.py --model_arch llama --model_name huggyllama/llama-13b --q_ratio 0.9 --search_config configs/search/matmult/mxint_4bit-pajama-random-50-bs-2.toml --model_parallel --wandb_name llama-13b_0.9 --wandb_group llama-matmult-search --save_dir results/search/matmult/llama-13b_0.9
9
+ python src/main.py --model_arch llama --model_name huggyllama/llama-13b --q_ratio 0.95 --search_config configs/search/matmult/mxint_4bit-pajama-random-50-bs-2.toml --model_parallel --wandb_name llama-13b_0.95 --wandb_group llama-matmult-search --save_dir results/search/matmult/llama-13b_0.95
10
+ python src/main.py --model_arch llama --model_name huggyllama/llama-13b --q_ratio 0.975 --search_config configs/search/matmult/mxint_4bit-pajama-random-50-bs-2.toml --model_parallel --wandb_name llama-13b_0.975 --wandb_group llama-matmult-search --save_dir results/search/matmult/llama-13b_0.975
11
+ python src/main.py --model_arch llama --model_name huggyllama/llama-13b --q_ratio 0.99 --search_config configs/search/matmult/mxint_4bit-pajama-random-50-bs-2.toml --model_parallel --wandb_name llama-13b_0.99 --wandb_group llama-matmult-search --save_dir results/search/matmult/llama-13b_0.99
12
+
13
+ python src/main.py --model_arch llama --model_name huggyllama/llama-30b --q_ratio 0.5 --search_config configs/search/matmult/mxint_4bit-pajama-random-50-bs-1.toml --model_parallel --wandb_name llama-30b_0.5 --wandb_group llama-matmult-search --save_dir results/search/matmult/llama-30b_0.5
14
+ python src/main.py --model_arch llama --model_name huggyllama/llama-30b --q_ratio 0.9 --search_config configs/search/matmult/mxint_4bit-pajama-random-50-bs-1.toml --model_parallel --wandb_name llama-30b_0.9 --wandb_group llama-matmult-search --save_dir results/search/matmult/llama-30b_0.9
15
+ python src/main.py --model_arch llama --model_name huggyllama/llama-30b --q_ratio 0.95 --search_config configs/search/matmult/mxint_4bit-pajama-random-50-bs-1.toml --model_parallel --wandb_name llama-30b_0.95 --wandb_group llama-matmult-search --save_dir results/search/matmult/llama-30b_0.95
16
+ python src/main.py --model_arch llama --model_name huggyllama/llama-30b --q_ratio 0.975 --search_config configs/search/matmult/mxint_4bit-pajama-random-50-bs-1.toml --model_parallel --wandb_name llama-30b_0.975 --wandb_group llama-matmult-search --save_dir results/search/matmult/llama-30b_0.975
17
+ python src/main.py --model_arch llama --model_name huggyllama/llama-30b --q_ratio 0.99 --search_config configs/search/matmult/mxint_4bit-pajama-random-50-bs-1.toml --model_parallel --wandb_name llama-30b_0.99 --wandb_group llama-matmult-search --save_dir results/search/matmult/llama-30b_0.99
18
+
19
+ python src/main.py --model_arch llama --model_name huggyllama/llama-65b --q_ratio 0.5 --search_config configs/search/matmult/mxint_4bit-pajama-random-50-bs-1.toml --model_parallel --wandb_name llama-65b_0.5 --wandb_group llama-matmult-search --save_dir results/search/matmult/llama-65b_0.5
20
+ python src/main.py --model_arch llama --model_name huggyllama/llama-65b --q_ratio 0.9 --search_config configs/search/matmult/mxint_4bit-pajama-random-50-bs-1.toml --model_parallel --wandb_name llama-65b_0.9 --wandb_group llama-matmult-search --save_dir results/search/matmult/llama-65b_0.9
21
+ python src/main.py --model_arch llama --model_name huggyllama/llama-65b --q_ratio 0.95 --search_config configs/search/matmult/mxint_4bit-pajama-random-50-bs-1.toml --model_parallel --wandb_name llama-65b_0.95 --wandb_group llama-matmult-search --save_dir results/search/matmult/llama-65b_0.95
22
+ python src/main.py --model_arch llama --model_name huggyllama/llama-65b --q_ratio 0.975 --search_config configs/search/matmult/mxint_4bit-pajama-random-50-bs-1.toml --model_parallel --wandb_name llama-65b_0.975 --wandb_group llama-matmult-search --save_dir results/search/matmult/llama-65b_0.975
23
+ python src/main.py --model_arch llama --model_name huggyllama/llama-65b --q_ratio 0.99 --search_config configs/search/matmult/mxint_4bit-pajama-random-50-bs-1.toml --model_parallel --wandb_name llama-65b_0.99 --wandb_group llama-matmult-search --save_dir results/search/matmult/llama-65b_0.99
24
+