import matplotlib.pyplot as plt
import numpy as np

# Labels for x-axis
criteria = [
    "Code Quality", "Security Features", "Feature Completeness", "Gas Optimization",
    "Error Handling", "Documentation", "Contract Structure", "Token Integration", 
    "Event Implementation", "Success Rate"
]

# Data for each model (scores out of 10 for the first 10 criteria, and success rate as percentage of True/60)
models = {
    "Solidity LLM": [9, 9, 9, 9, 8, 9, 9, 9, 9, 55/60*10],
    "GPT-4.5-preview": [9, 9, 8, 7, 8, 8, 8, 9, 8, 37/60*10],
    "GPT-4o-mini": [4, 3, 4, 4, 5, 4, 5, 3, 5, 9/60*10],
    # "gpt-4.o-preview": [8, 8, 8, 6, 6, 5, 7, 8, 6, 25/60*10],
    # "gpt-4o": [3, 3, 4, 4, 4, 4, 5, 2, 4, 30/60*10],
    "gpt-4.1": [8, 8, 8, 6, 6, 7, 7, 8, 6, 19/60*10],
    # "gpt-4.1-mini": [5, 5, 5, 5, 6, 7, 6, 3, 5, 21/60*10],
    # "gpt-4.1-nano": [9, 9, 7, 6, 7, 8, 8, 7, 7, 37/60*10],
    # "GPT-o3": [3, 2, 4, 3, 4, 3, 4, 5, 4, 5/60*10],
    # "llama-4-scout": [2, 2, 3, 2, 3, 2, 3, 4, 3, 3/60*10],
    "llama-4-maverick": [4, 3, 5, 4, 5, 6, 6, 6, 5, 8/60*10]
}

# X-axis positions
x = np.arange(len(criteria))
width = 0.08

# Plotting
fig, ax = plt.subplots(figsize=(20, 8))

for i, (model, values) in enumerate(models.items()):
    ax.bar(x + i*width, values, width, label=model)

# Labels and formatting
ax.set_ylabel('Score (Out of 10)')
ax.set_title('Comparison of LLMs on Solidity Smart Contract Generation')
ax.set_xticks(x + width * len(models) / 2)
ax.set_xticklabels(criteria, rotation=45, ha="right")
ax.set_ylim(0, 10)
ax.legend(loc='upper left', bbox_to_anchor=(1, 1))

plt.tight_layout()
plt.show()
plt.savefig('model_comparison_new.png', dpi=500)