import matplotlib.pyplot as plt import numpy as np # Labels for x-axis criteria = [ "Code Quality", "Security Features", "Feature Completeness", "Gas Optimization", "Error Handling", "Documentation", "Contract Structure", "Token Integration", "Event Implementation", "Success Rate" ] # Data for each model (scores out of 10 for the first 10 criteria, and success rate as percentage of True/60) models = { "Solidity LLM": [9, 9, 9, 9, 8, 9, 9, 9, 9, 55/60*10], "GPT-4.5-preview": [9, 9, 8, 7, 8, 8, 8, 9, 8, 37/60*10], "GPT-4o-mini": [4, 3, 4, 4, 5, 4, 5, 3, 5, 9/60*10], # "gpt-4.o-preview": [8, 8, 8, 6, 6, 5, 7, 8, 6, 25/60*10], # "gpt-4o": [3, 3, 4, 4, 4, 4, 5, 2, 4, 30/60*10], "gpt-4.1": [8, 8, 8, 6, 6, 7, 7, 8, 6, 19/60*10], # "gpt-4.1-mini": [5, 5, 5, 5, 6, 7, 6, 3, 5, 21/60*10], # "gpt-4.1-nano": [9, 9, 7, 6, 7, 8, 8, 7, 7, 37/60*10], # "GPT-o3": [3, 2, 4, 3, 4, 3, 4, 5, 4, 5/60*10], # "llama-4-scout": [2, 2, 3, 2, 3, 2, 3, 4, 3, 3/60*10], "llama-4-maverick": [4, 3, 5, 4, 5, 6, 6, 6, 5, 8/60*10] } # X-axis positions x = np.arange(len(criteria)) width = 0.08 # Plotting fig, ax = plt.subplots(figsize=(20, 8)) for i, (model, values) in enumerate(models.items()): ax.bar(x + i*width, values, width, label=model) # Labels and formatting ax.set_ylabel('Score (Out of 10)') ax.set_title('Comparison of LLMs on Solidity Smart Contract Generation') ax.set_xticks(x + width * len(models) / 2) ax.set_xticklabels(criteria, rotation=45, ha="right") ax.set_ylim(0, 10) ax.legend(loc='upper left', bbox_to_anchor=(1, 1)) plt.tight_layout() plt.show() plt.savefig('model_comparison_new.png', dpi=500)