Update README.md
Browse files
README.md
CHANGED
@@ -21,7 +21,7 @@
|
|
21 |
|
22 |
<tr><td align="left" colspan="11"><b>w/o LLM</b></td></tr>
|
23 |
<tr>
|
24 |
-
<td align="left"><b>Conformer-Yue⭐</b></td><td align="center">130</td><td align="center"><b>16.57</b></td><td align="center">7.82</td><td align="center">7.72</td><td align="center">11.42</td><td align="center">5.73</td><td align="center">5.73</td><td align="center">8.97</td><td align="center"><ins>5.05</ins></td><td align="center">8.89</td>
|
25 |
</tr>
|
26 |
<tr>
|
27 |
<td align="left">Paraformer</td><td align="center">220</td><td align="center">83.22</td><td align="center">51.97</td><td align="center">70.16</td><td align="center">68.49</td><td align="center">47.67</td><td align="center">79.31</td><td align="center">69.32</td><td align="center">73.64</td><td align="center">89.00</td>
|
@@ -29,7 +29,7 @@
|
|
29 |
<tr>
|
30 |
<td align="left">SenseVoice-small</td><td align="center">234</td><td align="center">21.08</td><td align="center"><ins>6.52</ins></td><td align="center">8.05</td><td align="center"><b>7.34</b></td><td align="center">6.34</td><td align="center">5.74</td><td align="center"><ins>6.65</ins></td><td align="center">6.69</td><td align="center">9.95</td>
|
31 |
<tr>
|
32 |
-
<td align="left"><b>SenseVoice-
|
33 |
</tr>
|
34 |
</tr>
|
35 |
<tr>
|
@@ -42,7 +42,7 @@
|
|
42 |
<td align="left">Whisper-medium</td><td align="center">769</td><td align="center">75.50</td><td align="center">68.69</td><td align="center">59.44</td><td align="center">62.50</td><td align="center">62.31</td><td align="center">64.41</td><td align="center">80.41</td><td align="center">80.82</td><td align="center">50.96</td>
|
43 |
</tr>
|
44 |
<tr>
|
45 |
-
<td align="left"><b>Whisper-
|
46 |
</tr>
|
47 |
|
48 |
<tr>
|
@@ -64,6 +64,48 @@
|
|
64 |
<td align="left">FireRedASR-LLM-L</td><td align="center">8300</td><td align="center">73.70</td><td align="center">18.72</td><td align="center">43.93</td><td align="center">43.33</td><td align="center">34.53</td><td align="center">48.05</td><td align="center">49.99</td><td align="center">49.87</td><td align="center">45.92</td>
|
65 |
</tr>
|
66 |
<tr>
|
67 |
-
<td align="left"><b>Conformer-LLM-Yue⭐</b></td><td align="center">4200</td><td align="center"><ins>17.22</ins></td><td align="center"><b>6.21</b></td><td align="center"><b>6.23</b></td><td align="center">9.52</td><td align="center"><b>4.35</b></td><td align="center"><b>4.57</b></td><td align="center">6.98</td><td align="center"><b>4.73</b></td><td align="center"><b>7.91</b></td>
|
68 |
</tr>
|
69 |
-
</table>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
21 |
|
22 |
<tr><td align="left" colspan="11"><b>w/o LLM</b></td></tr>
|
23 |
<tr>
|
24 |
+
<td align="left"><b>U2pp-Conformer-Yue⭐</b></td><td align="center">130</td><td align="center"><b>16.57</b></td><td align="center">7.82</td><td align="center">7.72</td><td align="center">11.42</td><td align="center">5.73</td><td align="center">5.73</td><td align="center">8.97</td><td align="center"><ins>5.05</ins></td><td align="center">8.89</td>
|
25 |
</tr>
|
26 |
<tr>
|
27 |
<td align="left">Paraformer</td><td align="center">220</td><td align="center">83.22</td><td align="center">51.97</td><td align="center">70.16</td><td align="center">68.49</td><td align="center">47.67</td><td align="center">79.31</td><td align="center">69.32</td><td align="center">73.64</td><td align="center">89.00</td>
|
|
|
29 |
<tr>
|
30 |
<td align="left">SenseVoice-small</td><td align="center">234</td><td align="center">21.08</td><td align="center"><ins>6.52</ins></td><td align="center">8.05</td><td align="center"><b>7.34</b></td><td align="center">6.34</td><td align="center">5.74</td><td align="center"><ins>6.65</ins></td><td align="center">6.69</td><td align="center">9.95</td>
|
31 |
<tr>
|
32 |
+
<td align="left"><b>SenseVoice-small-Yue⭐</b></td><td align="center">234</td><td align="center">19.19</td><td align="center">6.71</td><td align="center">6.87</td><td align="center">8.68</td><td align="center"><ins>5.43</ins></td><td align="center">5.24</td><td align="center">6.93</td><td align="center">5.23</td><td align="center">8.63</td>
|
33 |
</tr>
|
34 |
</tr>
|
35 |
<tr>
|
|
|
42 |
<td align="left">Whisper-medium</td><td align="center">769</td><td align="center">75.50</td><td align="center">68.69</td><td align="center">59.44</td><td align="center">62.50</td><td align="center">62.31</td><td align="center">64.41</td><td align="center">80.41</td><td align="center">80.82</td><td align="center">50.96</td>
|
43 |
</tr>
|
44 |
<tr>
|
45 |
+
<td align="left"><b>Whisper-medium-Yue⭐</b></td><td align="center">769</td><td align="center">18.69</td><td align="center">6.86</td><td align="center"><ins>6.86</ins></td><td align="center">11.03</td><td align="center">5.49</td><td align="center"><ins>4.70</ins></td><td align="center">8.51</td><td align="center"><ins>5.05</ins></td><td align="center"><ins>8.05</ins></td>
|
46 |
</tr>
|
47 |
|
48 |
<tr>
|
|
|
64 |
<td align="left">FireRedASR-LLM-L</td><td align="center">8300</td><td align="center">73.70</td><td align="center">18.72</td><td align="center">43.93</td><td align="center">43.33</td><td align="center">34.53</td><td align="center">48.05</td><td align="center">49.99</td><td align="center">49.87</td><td align="center">45.92</td>
|
65 |
</tr>
|
66 |
<tr>
|
67 |
+
<td align="left"><b>U2pp-Conformer-LLM-Yue⭐</b></td><td align="center">4200</td><td align="center"><ins>17.22</ins></td><td align="center"><b>6.21</b></td><td align="center"><b>6.23</b></td><td align="center">9.52</td><td align="center"><b>4.35</b></td><td align="center"><b>4.57</b></td><td align="center">6.98</td><td align="center"><b>4.73</b></td><td align="center"><b>7.91</b></td>
|
68 |
</tr>
|
69 |
+
</table>
|
70 |
+
|
71 |
+
## ASR Inference
|
72 |
+
### U2pp_Conformer_Yue
|
73 |
+
```
|
74 |
+
dir=u2pp_conformer_yue
|
75 |
+
decode_checkpoint=$dir/u2pp_conformer_yue.pt
|
76 |
+
test_set=path/to/test_set
|
77 |
+
test_result_dir=path/to/test_result_dir
|
78 |
+
|
79 |
+
python wenet/bin/recognize.py \
|
80 |
+
--gpu 0 \
|
81 |
+
--modes attention_rescoring \
|
82 |
+
--config $dir/train.yaml \
|
83 |
+
--test_data $test_set/data.list \
|
84 |
+
--checkpoint $decode_checkpoint \
|
85 |
+
--beam_size 10 \
|
86 |
+
--batch_size 32 \
|
87 |
+
--ctc_weight 0.5 \
|
88 |
+
--result_dir $test_result_dir \
|
89 |
+
--decoding_chunk_size -1
|
90 |
+
```
|
91 |
+
### Whisper_Medium_Yue
|
92 |
+
```
|
93 |
+
dir=whisper_medium_yue
|
94 |
+
decode_checkpoint=$dir/whisper_medium_yue.pt
|
95 |
+
test_set=path/to/test_set
|
96 |
+
test_result_dir=path/to/test_result_dir
|
97 |
+
|
98 |
+
python wenet/bin/recognize.py \
|
99 |
+
--gpu 0 \
|
100 |
+
--modes attention \
|
101 |
+
--config $dir/train.yaml \
|
102 |
+
--test_data $test_set/data.list \
|
103 |
+
--checkpoint $decode_checkpoint \
|
104 |
+
--beam_size 10 \
|
105 |
+
--batch_size 32 \
|
106 |
+
--blank_penalty 0.0 \
|
107 |
+
--ctc_weight 0.0 \
|
108 |
+
--reverse_weight 0.0 \
|
109 |
+
--result_dir $test_result_dir \
|
110 |
+
--decoding_chunk_size -1
|
111 |
+
```
|