vladislavbro commited on
Commit
9872cc6
·
verified ·
1 Parent(s): 798e4a2

Delete tokenizer.json

Browse files
Files changed (1) hide show
  1. tokenizer.json +0 -1435
tokenizer.json DELETED
@@ -1,1435 +0,0 @@
1
- {
2
- "version": "1.0",
3
- "truncation": null,
4
- "padding": null,
5
- "added_tokens": [
6
- {
7
- "id": 0,
8
- "special": true,
9
- "content": "[STOP]",
10
- "single_word": false,
11
- "lstrip": false,
12
- "rstrip": false,
13
- "normalized": false
14
- },
15
- {
16
- "id": 1,
17
- "special": true,
18
- "content": "[UNK]",
19
- "single_word": false,
20
- "lstrip": false,
21
- "rstrip": false,
22
- "normalized": false
23
- },
24
- {
25
- "id": 2,
26
- "special": true,
27
- "content": "[SPACE]",
28
- "single_word": false,
29
- "lstrip": false,
30
- "rstrip": false,
31
- "normalized": false
32
- },
33
- {
34
- "id": 255,
35
- "special": true,
36
- "content": "[START]",
37
- "single_word": false,
38
- "lstrip": false,
39
- "rstrip": false,
40
- "normalized": false
41
- },
42
- {
43
- "id": 604,
44
- "content": "[UH]",
45
- "single_word": false,
46
- "lstrip": false,
47
- "rstrip": false,
48
- "normalized": false,
49
- "special": true
50
- },
51
- {
52
- "id": 605,
53
- "content": "[UM]",
54
- "single_word": false,
55
- "lstrip": false,
56
- "rstrip": false,
57
- "normalized": false,
58
- "special": true
59
- },
60
- {
61
- "id": 606,
62
- "content": "[giggle]",
63
- "single_word": false,
64
- "lstrip": false,
65
- "rstrip": false,
66
- "normalized": false,
67
- "special": true
68
- },
69
- {
70
- "id": 607,
71
- "content": "[laughter]",
72
- "single_word": false,
73
- "lstrip": false,
74
- "rstrip": false,
75
- "normalized": false,
76
- "special": true
77
- },
78
- {
79
- "id": 608,
80
- "content": "[guffaw]",
81
- "single_word": false,
82
- "lstrip": false,
83
- "rstrip": false,
84
- "normalized": false,
85
- "special": true
86
- },
87
- {
88
- "id": 609,
89
- "content": "[inhale]",
90
- "single_word": false,
91
- "lstrip": false,
92
- "rstrip": false,
93
- "normalized": false,
94
- "special": true
95
- },
96
- {
97
- "id": 610,
98
- "content": "[exhale]",
99
- "single_word": false,
100
- "lstrip": false,
101
- "rstrip": false,
102
- "normalized": false,
103
- "special": true
104
- },
105
- {
106
- "id": 611,
107
- "content": "[sigh]",
108
- "single_word": false,
109
- "lstrip": false,
110
- "rstrip": false,
111
- "normalized": false,
112
- "special": true
113
- },
114
- {
115
- "id": 612,
116
- "content": "[cry]",
117
- "single_word": false,
118
- "lstrip": false,
119
- "rstrip": false,
120
- "normalized": false,
121
- "special": true
122
- },
123
- {
124
- "id": 613,
125
- "content": "[bark]",
126
- "single_word": false,
127
- "lstrip": false,
128
- "rstrip": false,
129
- "normalized": false,
130
- "special": true
131
- },
132
- {
133
- "id": 614,
134
- "content": "[howl]",
135
- "single_word": false,
136
- "lstrip": false,
137
- "rstrip": false,
138
- "normalized": false,
139
- "special": true
140
- },
141
- {
142
- "id": 615,
143
- "content": "[meow]",
144
- "single_word": false,
145
- "lstrip": false,
146
- "rstrip": false,
147
- "normalized": false,
148
- "special": true
149
- },
150
- {
151
- "id": 616,
152
- "content": "[singing]",
153
- "single_word": false,
154
- "lstrip": false,
155
- "rstrip": false,
156
- "normalized": false,
157
- "special": true
158
- },
159
- {
160
- "id": 617,
161
- "content": "[music]",
162
- "single_word": false,
163
- "lstrip": false,
164
- "rstrip": false,
165
- "normalized": false,
166
- "special": true
167
- },
168
- {
169
- "id": 618,
170
- "content": "[whistle]",
171
- "single_word": false,
172
- "lstrip": false,
173
- "rstrip": false,
174
- "normalized": false,
175
- "special": true
176
- },
177
- {
178
- "id": 619,
179
- "content": "[humming]",
180
- "single_word": false,
181
- "lstrip": false,
182
- "rstrip": false,
183
- "normalized": false,
184
- "special": true
185
- },
186
- {
187
- "id": 620,
188
- "content": "[gasp]",
189
- "single_word": false,
190
- "lstrip": false,
191
- "rstrip": false,
192
- "normalized": false,
193
- "special": true
194
- },
195
- {
196
- "id": 621,
197
- "content": "[groan]",
198
- "single_word": false,
199
- "lstrip": false,
200
- "rstrip": false,
201
- "normalized": false,
202
- "special": true
203
- },
204
- {
205
- "id": 622,
206
- "content": "[whisper]",
207
- "single_word": false,
208
- "lstrip": false,
209
- "rstrip": false,
210
- "normalized": false,
211
- "special": true
212
- },
213
- {
214
- "id": 623,
215
- "content": "[mumble]",
216
- "single_word": false,
217
- "lstrip": false,
218
- "rstrip": false,
219
- "normalized": false,
220
- "special": true
221
- },
222
- {
223
- "id": 624,
224
- "content": "[sniff]",
225
- "single_word": false,
226
- "lstrip": false,
227
- "rstrip": false,
228
- "normalized": false,
229
- "special": true
230
- },
231
- {
232
- "id": 625,
233
- "content": "[sneeze]",
234
- "single_word": false,
235
- "lstrip": false,
236
- "rstrip": false,
237
- "normalized": false,
238
- "special": true
239
- },
240
- {
241
- "id": 626,
242
- "content": "[cough]",
243
- "single_word": false,
244
- "lstrip": false,
245
- "rstrip": false,
246
- "normalized": false,
247
- "special": true
248
- },
249
- {
250
- "id": 627,
251
- "content": "[snore]",
252
- "single_word": false,
253
- "lstrip": false,
254
- "rstrip": false,
255
- "normalized": false,
256
- "special": true
257
- },
258
- {
259
- "id": 628,
260
- "content": "[chew]",
261
- "single_word": false,
262
- "lstrip": false,
263
- "rstrip": false,
264
- "normalized": false,
265
- "special": true
266
- },
267
- {
268
- "id": 629,
269
- "content": "[sip]",
270
- "single_word": false,
271
- "lstrip": false,
272
- "rstrip": false,
273
- "normalized": false,
274
- "special": true
275
- },
276
- {
277
- "id": 630,
278
- "content": "[clear_throat]",
279
- "single_word": false,
280
- "lstrip": false,
281
- "rstrip": false,
282
- "normalized": false,
283
- "special": true
284
- },
285
- {
286
- "id": 631,
287
- "content": "[kiss]",
288
- "single_word": false,
289
- "lstrip": false,
290
- "rstrip": false,
291
- "normalized": false,
292
- "special": true
293
- },
294
- {
295
- "id": 632,
296
- "content": "[shhh]",
297
- "single_word": false,
298
- "lstrip": false,
299
- "rstrip": false,
300
- "normalized": false,
301
- "special": true
302
- },
303
- {
304
- "id": 633,
305
- "content": "[gibberish]",
306
- "single_word": false,
307
- "lstrip": false,
308
- "rstrip": false,
309
- "normalized": false,
310
- "special": true
311
- },
312
- {
313
- "id": 634,
314
- "content": "[fr]",
315
- "single_word": false,
316
- "lstrip": false,
317
- "rstrip": false,
318
- "normalized": false,
319
- "special": true
320
- },
321
- {
322
- "id": 635,
323
- "content": "[es]",
324
- "single_word": false,
325
- "lstrip": false,
326
- "rstrip": false,
327
- "normalized": false,
328
- "special": true
329
- },
330
- {
331
- "id": 636,
332
- "content": "[de]",
333
- "single_word": false,
334
- "lstrip": false,
335
- "rstrip": false,
336
- "normalized": false,
337
- "special": true
338
- },
339
- {
340
- "id": 637,
341
- "content": "[it]",
342
- "single_word": false,
343
- "lstrip": false,
344
- "rstrip": false,
345
- "normalized": false,
346
- "special": true
347
- },
348
- {
349
- "id": 638,
350
- "content": "[ipa]",
351
- "single_word": false,
352
- "lstrip": false,
353
- "rstrip": false,
354
- "normalized": false,
355
- "special": true
356
- },
357
- {
358
- "id": 639,
359
- "content": "[end_of_label]",
360
- "single_word": false,
361
- "lstrip": false,
362
- "rstrip": false,
363
- "normalized": false,
364
- "special": true
365
- },
366
- {
367
- "id": 695,
368
- "content": "[PLACEHOLDER55]",
369
- "single_word": false,
370
- "lstrip": false,
371
- "rstrip": false,
372
- "normalized": false,
373
- "special": true
374
- },
375
- {
376
- "id": 696,
377
- "content": "[PLACEHOLDER56]",
378
- "single_word": false,
379
- "lstrip": false,
380
- "rstrip": false,
381
- "normalized": false,
382
- "special": true
383
- },
384
- {
385
- "id": 697,
386
- "content": "[PLACEHOLDER57]",
387
- "single_word": false,
388
- "lstrip": false,
389
- "rstrip": false,
390
- "normalized": false,
391
- "special": true
392
- },
393
- {
394
- "id": 698,
395
- "content": "[PLACEHOLDER58]",
396
- "single_word": false,
397
- "lstrip": false,
398
- "rstrip": false,
399
- "normalized": false,
400
- "special": true
401
- },
402
- {
403
- "id": 699,
404
- "content": "[PLACEHOLDER59]",
405
- "single_word": false,
406
- "lstrip": false,
407
- "rstrip": false,
408
- "normalized": false,
409
- "special": true
410
- },
411
- {
412
- "id": 700,
413
- "content": "[PLACEHOLDER60]",
414
- "single_word": false,
415
- "lstrip": false,
416
- "rstrip": false,
417
- "normalized": false,
418
- "special": true
419
- },
420
- {
421
- "id": 701,
422
- "content": "[PLACEHOLDER61]",
423
- "single_word": false,
424
- "lstrip": false,
425
- "rstrip": false,
426
- "normalized": false,
427
- "special": true
428
- },
429
- {
430
- "id": 702,
431
- "content": "[PLACEHOLDER62]",
432
- "single_word": false,
433
- "lstrip": false,
434
- "rstrip": false,
435
- "normalized": false,
436
- "special": true
437
- },
438
- {
439
- "id": 703,
440
- "content": "[PLACEHOLDER63]",
441
- "single_word": false,
442
- "lstrip": false,
443
- "rstrip": false,
444
- "normalized": false,
445
- "special": true
446
- }
447
- ],
448
- "normalizer": null,
449
- "pre_tokenizer": {
450
- "type": "Whitespace"
451
- },
452
- "post_processor": null,
453
- "decoder": null,
454
- "model": {
455
- "type": "BPE",
456
- "dropout": null,
457
- "unk_token": "[UNK]",
458
- "continuing_subword_prefix": null,
459
- "end_of_word_suffix": null,
460
- "fuse_unk": false,
461
- "vocab": {
462
- "[STOP]": 0,
463
- "[UNK]": 1,
464
- "[SPACE]": 2,
465
- "!": 3,
466
- "'": 4,
467
- "(": 5,
468
- ")": 6,
469
- ",": 7,
470
- "-": 8,
471
- ".": 9,
472
- "/": 10,
473
- ":": 11,
474
- ";": 12,
475
- "?": 13,
476
- "a": 14,
477
- "b": 15,
478
- "c": 16,
479
- "d": 17,
480
- "e": 18,
481
- "f": 19,
482
- "g": 20,
483
- "h": 21,
484
- "i": 22,
485
- "j": 23,
486
- "k": 24,
487
- "l": 25,
488
- "m": 26,
489
- "n": 27,
490
- "o": 28,
491
- "p": 29,
492
- "q": 30,
493
- "r": 31,
494
- "s": 32,
495
- "t": 33,
496
- "u": 34,
497
- "v": 35,
498
- "w": 36,
499
- "x": 37,
500
- "y": 38,
501
- "z": 39,
502
- "th": 40,
503
- "in": 41,
504
- "the": 42,
505
- "an": 43,
506
- "er": 44,
507
- "ou": 45,
508
- "re": 46,
509
- "on": 47,
510
- "at": 48,
511
- "ed": 49,
512
- "en": 50,
513
- "to": 51,
514
- "ing": 52,
515
- "and": 53,
516
- "is": 54,
517
- "as": 55,
518
- "al": 56,
519
- "or": 57,
520
- "of": 58,
521
- "ar": 59,
522
- "it": 60,
523
- "es": 61,
524
- "he": 62,
525
- "st": 63,
526
- "le": 64,
527
- "om": 65,
528
- "se": 66,
529
- "be": 67,
530
- "ad": 68,
531
- "ow": 69,
532
- "ly": 70,
533
- "ch": 71,
534
- "wh": 72,
535
- "that": 73,
536
- "you": 74,
537
- "li": 75,
538
- "ve": 76,
539
- "ac": 77,
540
- "ti": 78,
541
- "ld": 79,
542
- "me": 80,
543
- "was": 81,
544
- "gh": 82,
545
- "id": 83,
546
- "ll": 84,
547
- "wi": 85,
548
- "ent": 86,
549
- "for": 87,
550
- "ay": 88,
551
- "ro": 89,
552
- "ver": 90,
553
- "ic": 91,
554
- "her": 92,
555
- "ke": 93,
556
- "his": 94,
557
- "no": 95,
558
- "ut": 96,
559
- "un": 97,
560
- "ir": 98,
561
- "lo": 99,
562
- "we": 100,
563
- "ri": 101,
564
- "ha": 102,
565
- "with": 103,
566
- "ght": 104,
567
- "out": 105,
568
- "im": 106,
569
- "ion": 107,
570
- "all": 108,
571
- "ab": 109,
572
- "one": 110,
573
- "ne": 111,
574
- "ge": 112,
575
- "ould": 113,
576
- "ter": 114,
577
- "mo": 115,
578
- "had": 116,
579
- "ce": 117,
580
- "she": 118,
581
- "go": 119,
582
- "sh": 120,
583
- "ur": 121,
584
- "am": 122,
585
- "so": 123,
586
- "pe": 124,
587
- "my": 125,
588
- "de": 126,
589
- "are": 127,
590
- "but": 128,
591
- "ome": 129,
592
- "fr": 130,
593
- "ther": 131,
594
- "fe": 132,
595
- "su": 133,
596
- "do": 134,
597
- "con": 135,
598
- "te": 136,
599
- "ain": 137,
600
- "ere": 138,
601
- "po": 139,
602
- "if": 140,
603
- "they": 141,
604
- "us": 142,
605
- "ag": 143,
606
- "tr": 144,
607
- "now": 145,
608
- "oun": 146,
609
- "this": 147,
610
- "have": 148,
611
- "not": 149,
612
- "sa": 150,
613
- "il": 151,
614
- "up": 152,
615
- "thing": 153,
616
- "from": 154,
617
- "ap": 155,
618
- "him": 156,
619
- "ack": 157,
620
- "ation": 158,
621
- "ant": 159,
622
- "our": 160,
623
- "op": 161,
624
- "like": 162,
625
- "ust": 163,
626
- "ess": 164,
627
- "bo": 165,
628
- "ok": 166,
629
- "ul": 167,
630
- "ind": 168,
631
- "ex": 169,
632
- "com": 170,
633
- "some": 171,
634
- "there": 172,
635
- "ers": 173,
636
- "co": 174,
637
- "res": 175,
638
- "man": 176,
639
- "ard": 177,
640
- "pl": 178,
641
- "wor": 179,
642
- "way": 180,
643
- "tion": 181,
644
- "fo": 182,
645
- "ca": 183,
646
- "were": 184,
647
- "by": 185,
648
- "ate": 186,
649
- "pro": 187,
650
- "ted": 188,
651
- "ound": 189,
652
- "own": 190,
653
- "would": 191,
654
- "ts": 192,
655
- "what": 193,
656
- "qu": 194,
657
- "ally": 195,
658
- "ight": 196,
659
- "ck": 197,
660
- "gr": 198,
661
- "when": 199,
662
- "ven": 200,
663
- "can": 201,
664
- "ough": 202,
665
- "ine": 203,
666
- "end": 204,
667
- "per": 205,
668
- "ous": 206,
669
- "od": 207,
670
- "ide": 208,
671
- "know": 209,
672
- "ty": 210,
673
- "very": 211,
674
- "si": 212,
675
- "ak": 213,
676
- "who": 214,
677
- "about": 215,
678
- "ill": 216,
679
- "them": 217,
680
- "est": 218,
681
- "red": 219,
682
- "ye": 220,
683
- "could": 221,
684
- "ong": 222,
685
- "your": 223,
686
- "their": 224,
687
- "em": 225,
688
- "just": 226,
689
- "other": 227,
690
- "into": 228,
691
- "any": 229,
692
- "whi": 230,
693
- "um": 231,
694
- "tw": 232,
695
- "ast": 233,
696
- "der": 234,
697
- "did": 235,
698
- "ie": 236,
699
- "been": 237,
700
- "ace": 238,
701
- "ink": 239,
702
- "ity": 240,
703
- "back": 241,
704
- "ting": 242,
705
- "br": 243,
706
- "more": 244,
707
- "ake": 245,
708
- "pp": 246,
709
- "then": 247,
710
- "sp": 248,
711
- "el": 249,
712
- "use": 250,
713
- "bl": 251,
714
- "said": 252,
715
- "over": 253,
716
- "get": 254,
717
- "[START]": 255,
718
- "\"": 256,
719
- "#": 257,
720
- "$": 258,
721
- "%": 259,
722
- "&": 260,
723
- "*": 261,
724
- "+": 262,
725
- "0": 263,
726
- "1": 264,
727
- "2": 265,
728
- "3": 266,
729
- "4": 267,
730
- "5": 268,
731
- "6": 269,
732
- "7": 270,
733
- "8": 271,
734
- "9": 272,
735
- "<": 273,
736
- "=": 274,
737
- ">": 275,
738
- "@": 276,
739
- "A": 277,
740
- "B": 278,
741
- "C": 279,
742
- "D": 280,
743
- "E": 281,
744
- "F": 282,
745
- "G": 283,
746
- "H": 284,
747
- "I": 285,
748
- "J": 286,
749
- "K": 287,
750
- "L": 288,
751
- "M": 289,
752
- "N": 290,
753
- "O": 291,
754
- "P": 292,
755
- "Q": 293,
756
- "R": 294,
757
- "S": 295,
758
- "T": 296,
759
- "U": 297,
760
- "V": 298,
761
- "W": 299,
762
- "X": 300,
763
- "Y": 301,
764
- "Z": 302,
765
- "[": 303,
766
- "\\": 304,
767
- "]": 305,
768
- "^": 306,
769
- "_": 307,
770
- "`": 308,
771
- "{": 309,
772
- "|": 310,
773
- "}": 311,
774
- "~": 312,
775
- "‐": 313,
776
- "‑": 314,
777
- "‒": 315,
778
- "–": 316,
779
- "—": 317,
780
- "―": 318,
781
- "‖": 319,
782
- "‗": 320,
783
- "‘": 321,
784
- "’": 322,
785
- "‚": 323,
786
- "‛": 324,
787
- "“": 325,
788
- "”": 326,
789
- "„": 327,
790
- "‟": 328,
791
- " ": 329,
792
- "¡": 330,
793
- "¢": 331,
794
- "£": 332,
795
- "¤": 333,
796
- "¥": 334,
797
- "¦": 335,
798
- "§": 336,
799
- "¨": 337,
800
- "©": 338,
801
- "ª": 339,
802
- "«": 340,
803
- "¬": 341,
804
- "­": 342,
805
- "®": 343,
806
- "¯": 344,
807
- "°": 345,
808
- "±": 346,
809
- "²": 347,
810
- "³": 348,
811
- "´": 349,
812
- "µ": 350,
813
- "¶": 351,
814
- "·": 352,
815
- "¸": 353,
816
- "¹": 354,
817
- "º": 355,
818
- "»": 356,
819
- "¼": 357,
820
- "½": 358,
821
- "¾": 359,
822
- "¿": 360,
823
- "À": 361,
824
- "Á": 362,
825
- "Â": 363,
826
- "Ã": 364,
827
- "Ä": 365,
828
- "Å": 366,
829
- "Æ": 367,
830
- "Ç": 368,
831
- "È": 369,
832
- "É": 370,
833
- "Ê": 371,
834
- "Ë": 372,
835
- "Ì": 373,
836
- "Í": 374,
837
- "Î": 375,
838
- "Ï": 376,
839
- "Ð": 377,
840
- "Ñ": 378,
841
- "Ò": 379,
842
- "Ó": 380,
843
- "Ô": 381,
844
- "Õ": 382,
845
- "Ö": 383,
846
- "×": 384,
847
- "Ø": 385,
848
- "Ù": 386,
849
- "Ú": 387,
850
- "Û": 388,
851
- "Ü": 389,
852
- "Ý": 390,
853
- "Þ": 391,
854
- "ß": 392,
855
- "à": 393,
856
- "á": 394,
857
- "â": 395,
858
- "ã": 396,
859
- "ä": 397,
860
- "å": 398,
861
- "æ": 399,
862
- "ç": 400,
863
- "è": 401,
864
- "é": 402,
865
- "ê": 403,
866
- "ë": 404,
867
- "ì": 405,
868
- "í": 406,
869
- "î": 407,
870
- "ï": 408,
871
- "ð": 409,
872
- "ñ": 410,
873
- "ò": 411,
874
- "ó": 412,
875
- "ô": 413,
876
- "õ": 414,
877
- "ö": 415,
878
- "÷": 416,
879
- "ø": 417,
880
- "ù": 418,
881
- "ú": 419,
882
- "û": 420,
883
- "ü": 421,
884
- "ý": 422,
885
- "þ": 423,
886
- "ÿ": 424,
887
- "ɐ": 425,
888
- "ɑ": 426,
889
- "ɒ": 427,
890
- "ɓ": 428,
891
- "ɔ": 429,
892
- "ɕ": 430,
893
- "ɖ": 431,
894
- "ɗ": 432,
895
- "ɘ": 433,
896
- "ə": 434,
897
- "ɚ": 435,
898
- "ɛ": 436,
899
- "ɜ": 437,
900
- "ɝ": 438,
901
- "ɞ": 439,
902
- "ɟ": 440,
903
- "ɠ": 441,
904
- "ɡ": 442,
905
- "ɢ": 443,
906
- "ɣ": 444,
907
- "ɤ": 445,
908
- "ɥ": 446,
909
- "ɦ": 447,
910
- "ɧ": 448,
911
- "ɨ": 449,
912
- "ɩ": 450,
913
- "ɪ": 451,
914
- "ɫ": 452,
915
- "ɬ": 453,
916
- "ɭ": 454,
917
- "ɮ": 455,
918
- "ɯ": 456,
919
- "ɰ": 457,
920
- "ɱ": 458,
921
- "ɲ": 459,
922
- "ɳ": 460,
923
- "ɴ": 461,
924
- "ɵ": 462,
925
- "ɶ": 463,
926
- "ɷ": 464,
927
- "ɸ": 465,
928
- "ɹ": 466,
929
- "ɺ": 467,
930
- "ɻ": 468,
931
- "ɼ": 469,
932
- "ɽ": 470,
933
- "ɾ": 471,
934
- "ɿ": 472,
935
- "ʀ": 473,
936
- "ʁ": 474,
937
- "ʂ": 475,
938
- "ʃ": 476,
939
- "ʄ": 477,
940
- "ʅ": 478,
941
- "ʆ": 479,
942
- "ʇ": 480,
943
- "ʈ": 481,
944
- "ʉ": 482,
945
- "ʊ": 483,
946
- "ʋ": 484,
947
- "ʌ": 485,
948
- "ʍ": 486,
949
- "ʎ": 487,
950
- "ʏ": 488,
951
- "ʐ": 489,
952
- "ʑ": 490,
953
- "ʒ": 491,
954
- "ʓ": 492,
955
- "ʔ": 493,
956
- "ʕ": 494,
957
- "ʖ": 495,
958
- "ʗ": 496,
959
- "ʘ": 497,
960
- "ʙ": 498,
961
- "ʚ": 499,
962
- "ʛ": 500,
963
- "ʜ": 501,
964
- "ʝ": 502,
965
- "ʞ": 503,
966
- "ʟ": 504,
967
- "ʠ": 505,
968
- "ʡ": 506,
969
- "ʢ": 507,
970
- "ʣ": 508,
971
- "ʤ": 509,
972
- "ʥ": 510,
973
- "ʦ": 511,
974
- "ʧ": 512,
975
- "ʨ": 513,
976
- "ʩ": 514,
977
- "ʪ": 515,
978
- "ʫ": 516,
979
- "ʬ": 517,
980
- "ʭ": 518,
981
- "ʮ": 519,
982
- "ʯ": 520,
983
- "ʰ": 521,
984
- "ʱ": 522,
985
- "ʲ": 523,
986
- "ʳ": 524,
987
- "ʴ": 525,
988
- "ʵ": 526,
989
- "ʶ": 527,
990
- "ʷ": 528,
991
- "ʸ": 529,
992
- "ʹ": 530,
993
- "ʺ": 531,
994
- "ʻ": 532,
995
- "ʼ": 533,
996
- "ʽ": 534,
997
- "ʾ": 535,
998
- "ʿ": 536,
999
- "ˀ": 537,
1000
- "ˁ": 538,
1001
- "˂": 539,
1002
- "˃": 540,
1003
- "˄": 541,
1004
- "˅": 542,
1005
- "ˆ": 543,
1006
- "ˇ": 544,
1007
- "ˈ": 545,
1008
- "ˉ": 546,
1009
- "ˊ": 547,
1010
- "ˋ": 548,
1011
- "ˌ": 549,
1012
- "ˍ": 550,
1013
- "ˎ": 551,
1014
- "ˏ": 552,
1015
- "ː": 553,
1016
- "ˑ": 554,
1017
- "˒": 555,
1018
- "˓": 556,
1019
- "˔": 557,
1020
- "˕": 558,
1021
- "˖": 559,
1022
- "˗": 560,
1023
- "˘": 561,
1024
- "˙": 562,
1025
- "˚": 563,
1026
- "˛": 564,
1027
- "˜": 565,
1028
- "˝": 566,
1029
- "˞": 567,
1030
- "˟": 568,
1031
- "ˠ": 569,
1032
- "ˡ": 570,
1033
- "ˢ": 571,
1034
- "ˣ": 572,
1035
- "ˤ": 573,
1036
- "˥": 574,
1037
- "˦": 575,
1038
- "˧": 576,
1039
- "˨": 577,
1040
- "˩": 578,
1041
- "˪": 579,
1042
- "˫": 580,
1043
- "ˬ": 581,
1044
- "˭": 582,
1045
- "ˮ": 583,
1046
- "˯": 584,
1047
- "˰": 585,
1048
- "˱": 586,
1049
- "˲": 587,
1050
- "˳": 588,
1051
- "˴": 589,
1052
- "˵": 590,
1053
- "˶": 591,
1054
- "˷": 592,
1055
- "˸": 593,
1056
- "˹": 594,
1057
- "˺": 595,
1058
- "˻": 596,
1059
- "˼": 597,
1060
- "˽": 598,
1061
- "˾": 599,
1062
- "˿": 600,
1063
- "ā": 601,
1064
- "ō": 602,
1065
- "…": 603,
1066
- "[UH]": 604,
1067
- "[UM]": 605,
1068
- "[giggle]": 606,
1069
- "[laughter]": 607,
1070
- "[guffaw]": 608,
1071
- "[inhale]": 609,
1072
- "[exhale]": 610,
1073
- "[sigh]": 611,
1074
- "[cry]": 612,
1075
- "[bark]": 613,
1076
- "[howl]": 614,
1077
- "[meow]": 615,
1078
- "[singing]": 616,
1079
- "[music]": 617,
1080
- "[whistle]": 618,
1081
- "[humming]": 619,
1082
- "[gasp]": 620,
1083
- "[groan]": 621,
1084
- "[whisper]": 622,
1085
- "[mumble]": 623,
1086
- "[sniff]": 624,
1087
- "[sneeze]": 625,
1088
- "[cough]": 626,
1089
- "[snore]": 627,
1090
- "[chew]": 628,
1091
- "[sip]": 629,
1092
- "[clear_throat]": 630,
1093
- "[kiss]": 631,
1094
- "[shhh]": 632,
1095
- "[gibberish]": 633,
1096
- "[fr]": 634,
1097
- "[es]": 635,
1098
- "[de]": 636,
1099
- "[it]": 637,
1100
- "[ipa]": 638,
1101
- "[end_of_label]": 639,
1102
- "ŋ": 640,
1103
- "ᵻ": 641,
1104
- "θ": 642,
1105
- "̩": 643,
1106
- "\u0303": 644,
1107
- "ɑː": 645,
1108
- "iː": 646,
1109
- "uː": 647,
1110
- "ɜː": 648,
1111
- "ɔː": 649,
1112
- "oː": 650,
1113
- "eɪ": 651,
1114
- "oʊ": 652,
1115
- "aɪ": 653,
1116
- "aʊ": 654,
1117
- "ɔɪ": 655,
1118
- "dʒ": 656,
1119
- "tʃ": 657,
1120
- "ɪŋ": 658,
1121
- "ᵻd": 659,
1122
- "ˈiː": 660,
1123
- "ˌiː": 661,
1124
- "ˈɪ": 662,
1125
- "ˌɪ": 663,
1126
- "ˈeɪ": 664,
1127
- "ˌeɪ": 665,
1128
- "ˈɛ": 666,
1129
- "ˌɛ": 667,
1130
- "ˈæ": 668,
1131
- "ˌæ": 669,
1132
- "ˈɑː": 670,
1133
- "ˌɑː": 671,
1134
- "ˈɔː": 672,
1135
- "ˌɔː": 673,
1136
- "oːɹ": 674,
1137
- "ˈoːɹ": 675,
1138
- "ˌoːɹ": 676,
1139
- "ˈoʊ": 677,
1140
- "ˌoʊ": 678,
1141
- "ˈʊ": 679,
1142
- "ˌʊ": 680,
1143
- "ˈuː": 681,
1144
- "ˌuː": 682,
1145
- "ˈɜː": 683,
1146
- "ˌɜː": 684,
1147
- "ˈʌ": 685,
1148
- "ˌʌ": 686,
1149
- "ˈaɪ": 687,
1150
- "ˌaɪ": 688,
1151
- "ˈaʊ": 689,
1152
- "ˌaʊ": 690,
1153
- "ˈɔɪ": 691,
1154
- "ˌɔɪ": 692,
1155
- "ˈɚ": 693,
1156
- "ˌɐ": 694,
1157
- "[PLACEHOLDER55]": 695,
1158
- "[PLACEHOLDER56]": 696,
1159
- "[PLACEHOLDER57]": 697,
1160
- "[PLACEHOLDER58]": 698,
1161
- "[PLACEHOLDER59]": 699,
1162
- "[PLACEHOLDER60]": 700,
1163
- "[PLACEHOLDER61]": 701,
1164
- "[PLACEHOLDER62]": 702,
1165
- "[PLACEHOLDER63]": 703
1166
- },
1167
- "merges": [
1168
- "t h",
1169
- "i n",
1170
- "th e",
1171
- "a n",
1172
- "e r",
1173
- "o u",
1174
- "r e",
1175
- "o n",
1176
- "a t",
1177
- "e d",
1178
- "e n",
1179
- "t o",
1180
- "in g",
1181
- "an d",
1182
- "i s",
1183
- "a s",
1184
- "a l",
1185
- "o r",
1186
- "o f",
1187
- "a r",
1188
- "i t",
1189
- "e s",
1190
- "h e",
1191
- "s t",
1192
- "l e",
1193
- "o m",
1194
- "s e",
1195
- "b e",
1196
- "a d",
1197
- "o w",
1198
- "l y",
1199
- "c h",
1200
- "w h",
1201
- "th at",
1202
- "y ou",
1203
- "l i",
1204
- "v e",
1205
- "a c",
1206
- "t i",
1207
- "l d",
1208
- "m e",
1209
- "w as",
1210
- "g h",
1211
- "i d",
1212
- "l l",
1213
- "w i",
1214
- "en t",
1215
- "f or",
1216
- "a y",
1217
- "r o",
1218
- "v er",
1219
- "i c",
1220
- "h er",
1221
- "k e",
1222
- "h is",
1223
- "n o",
1224
- "u t",
1225
- "u n",
1226
- "i r",
1227
- "l o",
1228
- "w e",
1229
- "r i",
1230
- "h a",
1231
- "wi th",
1232
- "gh t",
1233
- "ou t",
1234
- "i m",
1235
- "i on",
1236
- "al l",
1237
- "a b",
1238
- "on e",
1239
- "n e",
1240
- "g e",
1241
- "ou ld",
1242
- "t er",
1243
- "m o",
1244
- "h ad",
1245
- "c e",
1246
- "s he",
1247
- "g o",
1248
- "s h",
1249
- "u r",
1250
- "a m",
1251
- "s o",
1252
- "p e",
1253
- "m y",
1254
- "d e",
1255
- "a re",
1256
- "b ut",
1257
- "om e",
1258
- "f r",
1259
- "the r",
1260
- "f e",
1261
- "s u",
1262
- "d o",
1263
- "c on",
1264
- "t e",
1265
- "a in",
1266
- "er e",
1267
- "p o",
1268
- "i f",
1269
- "the y",
1270
- "u s",
1271
- "a g",
1272
- "t r",
1273
- "n ow",
1274
- "ou n",
1275
- "th is",
1276
- "ha ve",
1277
- "no t",
1278
- "s a",
1279
- "i l",
1280
- "u p",
1281
- "th ing",
1282
- "fr om",
1283
- "a p",
1284
- "h im",
1285
- "ac k",
1286
- "at ion",
1287
- "an t",
1288
- "ou r",
1289
- "o p",
1290
- "li ke",
1291
- "u st",
1292
- "es s",
1293
- "b o",
1294
- "o k",
1295
- "u l",
1296
- "in d",
1297
- "e x",
1298
- "c om",
1299
- "s ome",
1300
- "the re",
1301
- "er s",
1302
- "c o",
1303
- "re s",
1304
- "m an",
1305
- "ar d",
1306
- "p l",
1307
- "w or",
1308
- "w ay",
1309
- "ti on",
1310
- "f o",
1311
- "c a",
1312
- "w ere",
1313
- "b y",
1314
- "at e",
1315
- "p ro",
1316
- "t ed",
1317
- "oun d",
1318
- "ow n",
1319
- "w ould",
1320
- "t s",
1321
- "wh at",
1322
- "q u",
1323
- "al ly",
1324
- "i ght",
1325
- "c k",
1326
- "g r",
1327
- "wh en",
1328
- "v en",
1329
- "c an",
1330
- "ou gh",
1331
- "in e",
1332
- "en d",
1333
- "p er",
1334
- "ou s",
1335
- "o d",
1336
- "id e",
1337
- "k now",
1338
- "t y",
1339
- "ver y",
1340
- "s i",
1341
- "a k",
1342
- "wh o",
1343
- "ab out",
1344
- "i ll",
1345
- "the m",
1346
- "es t",
1347
- "re d",
1348
- "y e",
1349
- "c ould",
1350
- "on g",
1351
- "you r",
1352
- "the ir",
1353
- "e m",
1354
- "j ust",
1355
- "o ther",
1356
- "in to",
1357
- "an y",
1358
- "wh i",
1359
- "u m",
1360
- "t w",
1361
- "as t",
1362
- "d er",
1363
- "d id",
1364
- "i e",
1365
- "be en",
1366
- "ac e",
1367
- "in k",
1368
- "it y",
1369
- "b ack",
1370
- "t ing",
1371
- "b r",
1372
- "mo re",
1373
- "a ke",
1374
- "p p",
1375
- "the n",
1376
- "s p",
1377
- "e l",
1378
- "u se",
1379
- "b l",
1380
- "sa id",
1381
- "o ver",
1382
- "ge t",
1383
- "ɑ ː",
1384
- "i ː",
1385
- "u ː",
1386
- "ɜ ː",
1387
- "ɔ ː",
1388
- "o ː",
1389
- "e ɪ",
1390
- "o ʊ",
1391
- "a ɪ",
1392
- "a ʊ",
1393
- "ɔ ɪ",
1394
- "d ʒ",
1395
- "t ʃ",
1396
- "ɪ ŋ",
1397
- "ᵻ d",
1398
- "ˈ iː",
1399
- "ˌ iː",
1400
- "ˈ ɪ",
1401
- "ˌ ɪ",
1402
- "ˈ eɪ",
1403
- "ˌ eɪ",
1404
- "ˈ ɛ",
1405
- "ˌ ɛ",
1406
- "ˈ æ",
1407
- "ˌ æ",
1408
- "ˈ ɑː",
1409
- "ˌ ɑː",
1410
- "ˈ ɔː",
1411
- "ˌ ɔː",
1412
- "oː ɹ",
1413
- "ˈ oːɹ",
1414
- "ˌ oːɹ",
1415
- "ˈ oʊ",
1416
- "ˌ oʊ",
1417
- "ˈ ʊ",
1418
- "ˌ ʊ",
1419
- "ˈ uː",
1420
- "ˌ uː",
1421
- "ˈ ɜː",
1422
- "ˌ ɜː",
1423
- "ˈ ʌ",
1424
- "ˌ ʌ",
1425
- "ˈ aɪ",
1426
- "ˌ aɪ",
1427
- "ˈ aʊ",
1428
- "ˌ aʊ",
1429
- "ˈ ɔɪ",
1430
- "ˌ ɔɪ",
1431
- "ˈ ɚ",
1432
- "ˌ ɐ"
1433
- ]
1434
- }
1435
- }