jerryzh168 commited on
Commit
88f3761
·
verified ·
1 Parent(s): 183a2ac

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +1 -1
README.md CHANGED
@@ -236,7 +236,7 @@ from transformers import AutoModelForCausalLM, AutoTokenizer, TorchAoConfig
236
 
237
  # use "Qwen/Qwen3-8B" or "pytorch/Qwen3-8B-INT4"
238
  model_id = "pytorch/Qwen3-8B-INT4"
239
- quantized_model = AutoModelForCausalLM.from_pretrained(model_id, device_map="auto", torch_dtype=torch.bfloat16)
240
  tokenizer = AutoTokenizer.from_pretrained(model_id)
241
 
242
  torch.cuda.reset_peak_memory_stats()
 
236
 
237
  # use "Qwen/Qwen3-8B" or "pytorch/Qwen3-8B-INT4"
238
  model_id = "pytorch/Qwen3-8B-INT4"
239
+ quantized_model = AutoModelForCausalLM.from_pretrained(model_id, device_map="cuda:0", torch_dtype=torch.bfloat16)
240
  tokenizer = AutoTokenizer.from_pretrained(model_id)
241
 
242
  torch.cuda.reset_peak_memory_stats()