jerryzh168 commited on
Commit
c17b1dc
·
verified ·
1 Parent(s): d080fca

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +1 -1
README.md CHANGED
@@ -261,7 +261,7 @@ from transformers import AutoModelForCausalLM, AutoTokenizer, TorchAoConfig
261
 
262
  # use "microsoft/Phi-4-mini-instruct" or "pytorch/Phi-4-mini-instruct-FP8"
263
  model_id = "pytorch/Phi-4-mini-instruct-FP8"
264
- quantized_model = AutoModelForCausalLM.from_pretrained(model_id, device_map="auto", torch_dtype=torch.bfloat16)
265
  tokenizer = AutoTokenizer.from_pretrained(model_id)
266
 
267
  torch.cuda.reset_peak_memory_stats()
 
261
 
262
  # use "microsoft/Phi-4-mini-instruct" or "pytorch/Phi-4-mini-instruct-FP8"
263
  model_id = "pytorch/Phi-4-mini-instruct-FP8"
264
+ quantized_model = AutoModelForCausalLM.from_pretrained(model_id, device_map="cuda:0", torch_dtype=torch.bfloat16)
265
  tokenizer = AutoTokenizer.from_pretrained(model_id)
266
 
267
  torch.cuda.reset_peak_memory_stats()