Update inference.py
Browse files- inference.py +1 -1
inference.py
CHANGED
@@ -40,7 +40,7 @@ def model_fn(model_dir, context=None):
|
|
40 |
model = LLM(
|
41 |
model=model_dir,
|
42 |
trust_remote_code=True,
|
43 |
-
tensor_parallel_size=
|
44 |
gpu_memory_utilization=0.9 # Optimal GPU usage
|
45 |
)
|
46 |
return model
|
|
|
40 |
model = LLM(
|
41 |
model=model_dir,
|
42 |
trust_remote_code=True,
|
43 |
+
tensor_parallel_size=8, # Use 8 GPUs for parallelism
|
44 |
gpu_memory_utilization=0.9 # Optimal GPU usage
|
45 |
)
|
46 |
return model
|