Update README.md
Browse files
README.md
CHANGED
|
@@ -21,7 +21,7 @@
|
|
| 21 |
|
| 22 |
<tr><td align="left" colspan="11"><b>w/o LLM</b></td></tr>
|
| 23 |
<tr>
|
| 24 |
-
<td align="left"><b>Conformer-Yue⭐</b></td><td align="center">130</td><td align="center"><b>16.57</b></td><td align="center">7.82</td><td align="center">7.72</td><td align="center">11.42</td><td align="center">5.73</td><td align="center">5.73</td><td align="center">8.97</td><td align="center"><ins>5.05</ins></td><td align="center">8.89</td>
|
| 25 |
</tr>
|
| 26 |
<tr>
|
| 27 |
<td align="left">Paraformer</td><td align="center">220</td><td align="center">83.22</td><td align="center">51.97</td><td align="center">70.16</td><td align="center">68.49</td><td align="center">47.67</td><td align="center">79.31</td><td align="center">69.32</td><td align="center">73.64</td><td align="center">89.00</td>
|
|
@@ -29,7 +29,7 @@
|
|
| 29 |
<tr>
|
| 30 |
<td align="left">SenseVoice-small</td><td align="center">234</td><td align="center">21.08</td><td align="center"><ins>6.52</ins></td><td align="center">8.05</td><td align="center"><b>7.34</b></td><td align="center">6.34</td><td align="center">5.74</td><td align="center"><ins>6.65</ins></td><td align="center">6.69</td><td align="center">9.95</td>
|
| 31 |
<tr>
|
| 32 |
-
<td align="left"><b>SenseVoice-
|
| 33 |
</tr>
|
| 34 |
</tr>
|
| 35 |
<tr>
|
|
@@ -42,7 +42,7 @@
|
|
| 42 |
<td align="left">Whisper-medium</td><td align="center">769</td><td align="center">75.50</td><td align="center">68.69</td><td align="center">59.44</td><td align="center">62.50</td><td align="center">62.31</td><td align="center">64.41</td><td align="center">80.41</td><td align="center">80.82</td><td align="center">50.96</td>
|
| 43 |
</tr>
|
| 44 |
<tr>
|
| 45 |
-
<td align="left"><b>Whisper-
|
| 46 |
</tr>
|
| 47 |
|
| 48 |
<tr>
|
|
@@ -64,6 +64,48 @@
|
|
| 64 |
<td align="left">FireRedASR-LLM-L</td><td align="center">8300</td><td align="center">73.70</td><td align="center">18.72</td><td align="center">43.93</td><td align="center">43.33</td><td align="center">34.53</td><td align="center">48.05</td><td align="center">49.99</td><td align="center">49.87</td><td align="center">45.92</td>
|
| 65 |
</tr>
|
| 66 |
<tr>
|
| 67 |
-
<td align="left"><b>Conformer-LLM-Yue⭐</b></td><td align="center">4200</td><td align="center"><ins>17.22</ins></td><td align="center"><b>6.21</b></td><td align="center"><b>6.23</b></td><td align="center">9.52</td><td align="center"><b>4.35</b></td><td align="center"><b>4.57</b></td><td align="center">6.98</td><td align="center"><b>4.73</b></td><td align="center"><b>7.91</b></td>
|
| 68 |
</tr>
|
| 69 |
-
</table>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 21 |
|
| 22 |
<tr><td align="left" colspan="11"><b>w/o LLM</b></td></tr>
|
| 23 |
<tr>
|
| 24 |
+
<td align="left"><b>U2pp-Conformer-Yue⭐</b></td><td align="center">130</td><td align="center"><b>16.57</b></td><td align="center">7.82</td><td align="center">7.72</td><td align="center">11.42</td><td align="center">5.73</td><td align="center">5.73</td><td align="center">8.97</td><td align="center"><ins>5.05</ins></td><td align="center">8.89</td>
|
| 25 |
</tr>
|
| 26 |
<tr>
|
| 27 |
<td align="left">Paraformer</td><td align="center">220</td><td align="center">83.22</td><td align="center">51.97</td><td align="center">70.16</td><td align="center">68.49</td><td align="center">47.67</td><td align="center">79.31</td><td align="center">69.32</td><td align="center">73.64</td><td align="center">89.00</td>
|
|
|
|
| 29 |
<tr>
|
| 30 |
<td align="left">SenseVoice-small</td><td align="center">234</td><td align="center">21.08</td><td align="center"><ins>6.52</ins></td><td align="center">8.05</td><td align="center"><b>7.34</b></td><td align="center">6.34</td><td align="center">5.74</td><td align="center"><ins>6.65</ins></td><td align="center">6.69</td><td align="center">9.95</td>
|
| 31 |
<tr>
|
| 32 |
+
<td align="left"><b>SenseVoice-small-Yue⭐</b></td><td align="center">234</td><td align="center">19.19</td><td align="center">6.71</td><td align="center">6.87</td><td align="center">8.68</td><td align="center"><ins>5.43</ins></td><td align="center">5.24</td><td align="center">6.93</td><td align="center">5.23</td><td align="center">8.63</td>
|
| 33 |
</tr>
|
| 34 |
</tr>
|
| 35 |
<tr>
|
|
|
|
| 42 |
<td align="left">Whisper-medium</td><td align="center">769</td><td align="center">75.50</td><td align="center">68.69</td><td align="center">59.44</td><td align="center">62.50</td><td align="center">62.31</td><td align="center">64.41</td><td align="center">80.41</td><td align="center">80.82</td><td align="center">50.96</td>
|
| 43 |
</tr>
|
| 44 |
<tr>
|
| 45 |
+
<td align="left"><b>Whisper-medium-Yue⭐</b></td><td align="center">769</td><td align="center">18.69</td><td align="center">6.86</td><td align="center"><ins>6.86</ins></td><td align="center">11.03</td><td align="center">5.49</td><td align="center"><ins>4.70</ins></td><td align="center">8.51</td><td align="center"><ins>5.05</ins></td><td align="center"><ins>8.05</ins></td>
|
| 46 |
</tr>
|
| 47 |
|
| 48 |
<tr>
|
|
|
|
| 64 |
<td align="left">FireRedASR-LLM-L</td><td align="center">8300</td><td align="center">73.70</td><td align="center">18.72</td><td align="center">43.93</td><td align="center">43.33</td><td align="center">34.53</td><td align="center">48.05</td><td align="center">49.99</td><td align="center">49.87</td><td align="center">45.92</td>
|
| 65 |
</tr>
|
| 66 |
<tr>
|
| 67 |
+
<td align="left"><b>U2pp-Conformer-LLM-Yue⭐</b></td><td align="center">4200</td><td align="center"><ins>17.22</ins></td><td align="center"><b>6.21</b></td><td align="center"><b>6.23</b></td><td align="center">9.52</td><td align="center"><b>4.35</b></td><td align="center"><b>4.57</b></td><td align="center">6.98</td><td align="center"><b>4.73</b></td><td align="center"><b>7.91</b></td>
|
| 68 |
</tr>
|
| 69 |
+
</table>
|
| 70 |
+
|
| 71 |
+
## ASR Inference
|
| 72 |
+
### U2pp_Conformer_Yue
|
| 73 |
+
```
|
| 74 |
+
dir=u2pp_conformer_yue
|
| 75 |
+
decode_checkpoint=$dir/u2pp_conformer_yue.pt
|
| 76 |
+
test_set=path/to/test_set
|
| 77 |
+
test_result_dir=path/to/test_result_dir
|
| 78 |
+
|
| 79 |
+
python wenet/bin/recognize.py \
|
| 80 |
+
--gpu 0 \
|
| 81 |
+
--modes attention_rescoring \
|
| 82 |
+
--config $dir/train.yaml \
|
| 83 |
+
--test_data $test_set/data.list \
|
| 84 |
+
--checkpoint $decode_checkpoint \
|
| 85 |
+
--beam_size 10 \
|
| 86 |
+
--batch_size 32 \
|
| 87 |
+
--ctc_weight 0.5 \
|
| 88 |
+
--result_dir $test_result_dir \
|
| 89 |
+
--decoding_chunk_size -1
|
| 90 |
+
```
|
| 91 |
+
### Whisper_Medium_Yue
|
| 92 |
+
```
|
| 93 |
+
dir=whisper_medium_yue
|
| 94 |
+
decode_checkpoint=$dir/whisper_medium_yue.pt
|
| 95 |
+
test_set=path/to/test_set
|
| 96 |
+
test_result_dir=path/to/test_result_dir
|
| 97 |
+
|
| 98 |
+
python wenet/bin/recognize.py \
|
| 99 |
+
--gpu 0 \
|
| 100 |
+
--modes attention \
|
| 101 |
+
--config $dir/train.yaml \
|
| 102 |
+
--test_data $test_set/data.list \
|
| 103 |
+
--checkpoint $decode_checkpoint \
|
| 104 |
+
--beam_size 10 \
|
| 105 |
+
--batch_size 32 \
|
| 106 |
+
--blank_penalty 0.0 \
|
| 107 |
+
--ctc_weight 0.0 \
|
| 108 |
+
--reverse_weight 0.0 \
|
| 109 |
+
--result_dir $test_result_dir \
|
| 110 |
+
--decoding_chunk_size -1
|
| 111 |
+
```
|