Update README.md
Browse files
README.md
CHANGED
|
@@ -170,8 +170,6 @@ evalplus.evaluate \
|
|
| 170 |
|
| 171 |
### Accuracy
|
| 172 |
|
| 173 |
-
#### OpenLLM Leaderboard V1 evaluation scores
|
| 174 |
-
|
| 175 |
<table>
|
| 176 |
<thead>
|
| 177 |
<tr>
|
|
@@ -183,7 +181,7 @@ evalplus.evaluate \
|
|
| 183 |
</tr>
|
| 184 |
</thead>
|
| 185 |
<tbody>
|
| 186 |
-
<!-- OpenLLM
|
| 187 |
<tr>
|
| 188 |
<td rowspan="7"><b>OpenLLM Leaderboard V1</b></td>
|
| 189 |
<td>ARC-Challenge (Acc-Norm, 25-shot)</td>
|
|
@@ -229,7 +227,7 @@ evalplus.evaluate \
|
|
| 229 |
</tr>
|
| 230 |
<!-- OpenLLM Leaderboard V2 -->
|
| 231 |
<tr>
|
| 232 |
-
<td rowspan="7"><b>OpenLLM
|
| 233 |
<td>IFEval (Inst Level Strict Acc, 0-shot)</td>
|
| 234 |
<td>67.99</td>
|
| 235 |
<td>66.79</td>
|
|
@@ -273,7 +271,7 @@ evalplus.evaluate \
|
|
| 273 |
</tr>
|
| 274 |
<!-- HumanEval -->
|
| 275 |
<tr>
|
| 276 |
-
<td rowspan="2"><b>
|
| 277 |
<td>HumanEval Pass@1</td>
|
| 278 |
<td>53.40</td>
|
| 279 |
<td>54.90</td>
|
|
|
|
| 170 |
|
| 171 |
### Accuracy
|
| 172 |
|
|
|
|
|
|
|
| 173 |
<table>
|
| 174 |
<thead>
|
| 175 |
<tr>
|
|
|
|
| 181 |
</tr>
|
| 182 |
</thead>
|
| 183 |
<tbody>
|
| 184 |
+
<!-- OpenLLM V1 -->
|
| 185 |
<tr>
|
| 186 |
<td rowspan="7"><b>OpenLLM Leaderboard V1</b></td>
|
| 187 |
<td>ARC-Challenge (Acc-Norm, 25-shot)</td>
|
|
|
|
| 227 |
</tr>
|
| 228 |
<!-- OpenLLM Leaderboard V2 -->
|
| 229 |
<tr>
|
| 230 |
+
<td rowspan="7"><b>OpenLLM V2</b></td>
|
| 231 |
<td>IFEval (Inst Level Strict Acc, 0-shot)</td>
|
| 232 |
<td>67.99</td>
|
| 233 |
<td>66.79</td>
|
|
|
|
| 271 |
</tr>
|
| 272 |
<!-- HumanEval -->
|
| 273 |
<tr>
|
| 274 |
+
<td rowspan="2"><b>Coding</b></td>
|
| 275 |
<td>HumanEval Pass@1</td>
|
| 276 |
<td>53.40</td>
|
| 277 |
<td>54.90</td>
|