from flask import Flask, render_template, request, jsonify
from llama_cpp import Llama
import re

app = Flask(__name__)

# Path to the local GGUF model weights
MODEL_PATH = "models/oss_20b_gguf/gpt-oss-20b-Q2_K_L.gguf"  # update this path

# Initialize model
llm = Llama(
    model_path=MODEL_PATH,
    n_ctx=2048,
    n_threads=8  # adjust based on your CPU
)

# Build adaptive prompt
def build_prompt(history, user_text):
    system_prompt = (
        "You are a helpful and adaptive assistant. Follow these rules strictly:\n"
        "- If the user asks a simple or factual question, give a short, precise answer.\n"
        "- If the user requests a story, essay, or letter, provide a longer, well-structured response.\n"
        "- If the user asks for programming help or code, provide correct, complete, well-formatted code.\n"
        "- Always keep answers clear, neat, and structured; use points when helpful.\n"
        "- Output code inside proper Markdown code blocks with language tags for syntax highlighting.\n"
    )
    prompt = system_prompt + "\n\n"
    for turn in history:
        prompt += f"User: {turn['user']}\nAssistant: {turn['assistant']}\n"
    prompt += f"User: {user_text}\nAssistant:"
    return prompt

@app.route("/")
def index():
    return render_template("index.html")

@app.route("/chat", methods=["POST"])
def chat():
    data = request.get_json()
    user_message = data.get("message")
    history = data.get("history", [])

    prompt = build_prompt(history, user_message)

    # Adjust max_tokens dynamically
    if any(word in user_message.lower() for word in ["story", "letter", "essay"]):
        max_out = 800
    elif any(word in user_message.lower() for word in ["code", "program", "script", "python", "java", "html", "c++"]):
        max_out = 1000
    else:
        max_out = 200

    resp = llm(
        prompt,
        max_tokens=max_out,
        temperature=0.7,
        stop=["\nUser:", "\nAssistant:"]
    )

    text = resp["choices"][0]["text"].strip()

    # Wrap fenced code blocks with copy button (handled in JS)
    return jsonify({"response": text})

if __name__ == "__main__":
    app.run(host="0.0.0.0", port=5000, debug=True)