uyiosa commited on
Commit
cf11603
·
verified ·
1 Parent(s): 8c69649

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +33 -1
README.md CHANGED
@@ -15,4 +15,36 @@ tags:
15
  - llama
16
  - llama-3
17
  - pytorch
18
- ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
15
  - llama
16
  - llama-3
17
  - pytorch
18
+ ---
19
+
20
+ Model is quantized to FP8 using llm_compressor.
21
+
22
+ ```python
23
+ from transformers import AutoTokenizer, AutoModelForCausalLM
24
+ from llmcompressor.transformers import oneshot
25
+ from llmcompressor.modifiers.quantization import QuantizationModifier
26
+
27
+ # Define the model ID for the model you want to quantize
28
+ MODEL_ID = "meta-llama/Llama-3.2-1B-Instruct"
29
+
30
+ # Load the model and tokenizer
31
+ model = AutoModelForCausalLM.from_pretrained(
32
+ MODEL_ID, device_map="auto", torch_dtype="auto"
33
+ )
34
+ tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
35
+
36
+ # Configure the quantization recipe
37
+ recipe = QuantizationModifier(targets="Linear", scheme="FP8_DYNAMIC", ignore=["lm_head"])
38
+
39
+ # Apply the quantization algorithm
40
+ oneshot(model=model, recipe=recipe)
41
+
42
+ # Define the directory to save the quantized model
43
+ SAVE_DIR = MODEL_ID.split("/")[1] + "-FP8-Dynamic"
44
+
45
+ # Save the quantized model and tokenizer
46
+ model.save_pretrained(SAVE_DIR)
47
+ tokenizer.save_pretrained(SAVE_DIR)
48
+
49
+ print(f"Quantized model saved to (SAVE_DIR)")
50
+ ```