roberta
stefan-it commited on
Commit
7d5882d
·
verified ·
1 Parent(s): cadb455

tokenizer: add fast version

Browse files
Files changed (1) hide show
  1. tokenizer.json +6 -6
tokenizer.json CHANGED
@@ -14,7 +14,7 @@
14
  },
15
  {
16
  "id": 1,
17
- "content": "[UNK]",
18
  "single_word": false,
19
  "lstrip": false,
20
  "rstrip": false,
@@ -23,7 +23,7 @@
23
  },
24
  {
25
  "id": 2,
26
- "content": "[CLS]",
27
  "single_word": false,
28
  "lstrip": false,
29
  "rstrip": false,
@@ -32,7 +32,7 @@
32
  },
33
  {
34
  "id": 3,
35
- "content": "[SEP]",
36
  "single_word": false,
37
  "lstrip": false,
38
  "rstrip": false,
@@ -79,9 +79,9 @@
79
  "ignore_merges": false,
80
  "vocab": {
81
  "[PAD]": 0,
82
- "[UNK]": 1,
83
- "[CLS]": 2,
84
- "[SEP]": 3,
85
  "[MASK]": 4,
86
  "!": 5,
87
  "\"": 6,
 
14
  },
15
  {
16
  "id": 1,
17
+ "content": "[CLS]",
18
  "single_word": false,
19
  "lstrip": false,
20
  "rstrip": false,
 
23
  },
24
  {
25
  "id": 2,
26
+ "content": "[SEP]",
27
  "single_word": false,
28
  "lstrip": false,
29
  "rstrip": false,
 
32
  },
33
  {
34
  "id": 3,
35
+ "content": "[UNK]",
36
  "single_word": false,
37
  "lstrip": false,
38
  "rstrip": false,
 
79
  "ignore_merges": false,
80
  "vocab": {
81
  "[PAD]": 0,
82
+ "[CLS]": 1,
83
+ "[SEP]": 2,
84
+ "[UNK]": 3,
85
  "[MASK]": 4,
86
  "!": 5,
87
  "\"": 6,