Update README.md
Browse files
README.md
CHANGED
@@ -170,8 +170,6 @@ evalplus.evaluate \
|
|
170 |
|
171 |
### Accuracy
|
172 |
|
173 |
-
#### OpenLLM Leaderboard V1 evaluation scores
|
174 |
-
|
175 |
<table>
|
176 |
<thead>
|
177 |
<tr>
|
@@ -183,7 +181,7 @@ evalplus.evaluate \
|
|
183 |
</tr>
|
184 |
</thead>
|
185 |
<tbody>
|
186 |
-
<!-- OpenLLM
|
187 |
<tr>
|
188 |
<td rowspan="7"><b>OpenLLM Leaderboard V1</b></td>
|
189 |
<td>ARC-Challenge (Acc-Norm, 25-shot)</td>
|
@@ -229,7 +227,7 @@ evalplus.evaluate \
|
|
229 |
</tr>
|
230 |
<!-- OpenLLM Leaderboard V2 -->
|
231 |
<tr>
|
232 |
-
<td rowspan="7"><b>OpenLLM
|
233 |
<td>IFEval (Inst Level Strict Acc, 0-shot)</td>
|
234 |
<td>67.99</td>
|
235 |
<td>66.79</td>
|
@@ -273,7 +271,7 @@ evalplus.evaluate \
|
|
273 |
</tr>
|
274 |
<!-- HumanEval -->
|
275 |
<tr>
|
276 |
-
<td rowspan="2"><b>
|
277 |
<td>HumanEval Pass@1</td>
|
278 |
<td>53.40</td>
|
279 |
<td>54.90</td>
|
|
|
170 |
|
171 |
### Accuracy
|
172 |
|
|
|
|
|
173 |
<table>
|
174 |
<thead>
|
175 |
<tr>
|
|
|
181 |
</tr>
|
182 |
</thead>
|
183 |
<tbody>
|
184 |
+
<!-- OpenLLM V1 -->
|
185 |
<tr>
|
186 |
<td rowspan="7"><b>OpenLLM Leaderboard V1</b></td>
|
187 |
<td>ARC-Challenge (Acc-Norm, 25-shot)</td>
|
|
|
227 |
</tr>
|
228 |
<!-- OpenLLM Leaderboard V2 -->
|
229 |
<tr>
|
230 |
+
<td rowspan="7"><b>OpenLLM V2</b></td>
|
231 |
<td>IFEval (Inst Level Strict Acc, 0-shot)</td>
|
232 |
<td>67.99</td>
|
233 |
<td>66.79</td>
|
|
|
271 |
</tr>
|
272 |
<!-- HumanEval -->
|
273 |
<tr>
|
274 |
+
<td rowspan="2"><b>Coding</b></td>
|
275 |
<td>HumanEval Pass@1</td>
|
276 |
<td>53.40</td>
|
277 |
<td>54.90</td>
|