SichangHe commited on
Commit
40d43a5
·
verified ·
1 Parent(s): 3d68bf7

Code to generate.

Browse files
Files changed (1) hide show
  1. README.md +34 -3
README.md CHANGED
@@ -1,3 +1,34 @@
1
- ---
2
- license: apache-2.0
3
- ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: apache-2.0
3
+ ---
4
+
5
+ Generated from:
6
+
7
+ ```python
8
+ import torch
9
+ from llmcompressor import oneshot
10
+ from llmcompressor.modifiers.quantization import QuantizationModifier
11
+ from transformers import (
12
+ AutoModelForCausalLM, # type:ignore[reportPrivateImportUsage]
13
+ AutoTokenizer, # type:ignore[reportPrivateImportUsage]
14
+ )
15
+
16
+ MODEL_ID = "tiiuae/falcon-7b-instruct"
17
+
18
+ # Copied from <https://github.com/vllm-project/llm-compressor/blob/9d8a46418f517dd6399e2e9c179805247a7be584/examples/quantization_w8a8_fp8/README.md>
19
+ model = AutoModelForCausalLM.from_pretrained(MODEL_ID, torch_dtype=torch.bfloat16)
20
+ tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
21
+
22
+ # Configure the simple PTQ quantization
23
+ recipe = QuantizationModifier(
24
+ targets="Linear", scheme="FP8_DYNAMIC", ignore=["lm_head"]
25
+ )
26
+
27
+ # Apply the quantization algorithm.
28
+ oneshot(model=model, recipe=recipe) # type:ignore[arg-type]
29
+
30
+ # Save the model.
31
+ SAVE_DIR = f"data/models/{MODEL_ID.split('/')[-1]}-FP8-Dynamic"
32
+ model.save_pretrained(SAVE_DIR)
33
+ tokenizer.save_pretrained(SAVE_DIR)
34
+ ```