from llama_cpp import Llama | |
# Path to the downloaded GGUF file (update this to match your folder and file) | |
model_path = "models/oss_20b_gguf/gpt-oss-20b-Q2_K_L.gguf" | |
# Load the model | |
llm = Llama( | |
model_path=model_path, | |
n_ctx=2048, # Context size | |
n_threads=8 # Adjust based on your CPU cores | |
) | |
# Generate text | |
prompt = "Who is the iron man" | |
output = llm( | |
prompt, | |
max_tokens=200, | |
temperature=0.7, | |
stop=["</s>"] | |
) | |
print(output["choices"][0]["text"]) | |