Imran1 commited on
Commit
88fa6b0
·
verified ·
1 Parent(s): 36cfeeb

Update inference.py

Browse files
Files changed (1) hide show
  1. inference.py +1 -1
inference.py CHANGED
@@ -40,7 +40,7 @@ def model_fn(model_dir, context=None):
40
  model = LLM(
41
  model=model_dir,
42
  trust_remote_code=True,
43
- tensor_parallel_size=2, # Use 2 GPUs for parallelism
44
  gpu_memory_utilization=0.9 # Optimal GPU usage
45
  )
46
  return model
 
40
  model = LLM(
41
  model=model_dir,
42
  trust_remote_code=True,
43
+ tensor_parallel_size=8, # Use 8 GPUs for parallelism
44
  gpu_memory_utilization=0.9 # Optimal GPU usage
45
  )
46
  return model