from llama_cpp import Llama # Path to the downloaded GGUF file (update this to match your folder and file) model_path = "models/oss_20b_gguf/gpt-oss-20b-Q2_K_L.gguf" # Load the model llm = Llama( model_path=model_path, n_ctx=2048, # Context size n_threads=8 # Adjust based on your CPU cores ) # Generate text prompt = "Who is the iron man" output = llm( prompt, max_tokens=200, temperature=0.7, stop=[""] ) print(output["choices"][0]["text"])