Update README.md

As correctly questioned in [community section](https://huggingface.co/neuralmagic/Meta-Llama-3.1-70B-Instruct-quantized.w8a8/discussions/2), your recipe contradicts with quantized model parameters in `recipe.yaml`. I guess the correct version of the recipe is the one that is demonstrated in [llm-compressor github](https://github.com/vllm-project/llm-compressor?tab=readme-ov-file#apply-quantization).

The proposed change is made accordingly.

Files changed (1) hide show

README.md +4 -6

README.md CHANGED Viewed

@@ -107,12 +107,10 @@ ds = load_dataset("neuralmagic/LLM_compression_calibration", split="train")
 ds = ds.shuffle().select(range(num_samples))
 ds = ds.map(preprocess_fn)
-recipe = GPTQModifier(
-  targets="Linear",
-  scheme="W8A8",
-  ignore=["lm_head"],
-  dampening_frac=0.1,
-)
 model = SparseAutoModelForCausalLM.from_pretrained(
   model_id,

 ds = ds.shuffle().select(range(num_samples))
 ds = ds.map(preprocess_fn)
+recipe = [
+  SmoothQuantModifier(smoothing_strength=0.7),
+  GPTQModifier(scheme="W8A8", targets="Linear", ignore=["lm_head"]),
+]
 model = SparseAutoModelForCausalLM.from_pretrained(
   model_id,