goabonga commited on
Commit
47c47af
·
verified ·
1 Parent(s): 42ab2c0

Upload tokenizer files (vocab, config, README)

Browse files
Files changed (1) hide show
  1. tokenizer.json +37 -13
tokenizer.json CHANGED
@@ -1,38 +1,62 @@
1
  {
2
  "version": "1.0",
3
- "added_tokens_decoder": {
4
- "0": {
 
5
  "content": "<pad>",
 
6
  "lstrip": false,
7
- "normalized": false,
8
  "rstrip": false,
9
- "single_word": false,
10
  "special": true
11
  },
12
- "1": {
 
13
  "content": "<unk>",
 
14
  "lstrip": false,
15
- "normalized": false,
16
  "rstrip": false,
17
- "single_word": false,
18
  "special": true
19
  },
20
- "2": {
 
21
  "content": "<bos>",
 
22
  "lstrip": false,
23
- "normalized": false,
24
  "rstrip": false,
25
- "single_word": false,
26
  "special": true
27
  },
28
- "3": {
 
29
  "content": "<eos>",
 
30
  "lstrip": false,
31
- "normalized": false,
32
  "rstrip": false,
33
- "single_word": false,
34
  "special": true
35
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
36
  },
37
  "decoder": {
38
  "type": "WordPiece",
 
1
  {
2
  "version": "1.0",
3
+ "added_tokens": [
4
+ {
5
+ "id": 0,
6
  "content": "<pad>",
7
+ "single_word": false,
8
  "lstrip": false,
 
9
  "rstrip": false,
10
+ "normalized": false,
11
  "special": true
12
  },
13
+ {
14
+ "id": 1,
15
  "content": "<unk>",
16
+ "single_word": false,
17
  "lstrip": false,
 
18
  "rstrip": false,
19
+ "normalized": false,
20
  "special": true
21
  },
22
+ {
23
+ "id": 2,
24
  "content": "<bos>",
25
+ "single_word": false,
26
  "lstrip": false,
 
27
  "rstrip": false,
28
+ "normalized": false,
29
  "special": true
30
  },
31
+ {
32
+ "id": 3,
33
  "content": "<eos>",
34
+ "single_word": false,
35
  "lstrip": false,
 
36
  "rstrip": false,
37
+ "normalized": false,
38
  "special": true
39
  }
40
+ ],
41
+ "normalizer": {
42
+ "type": "Sequence",
43
+ "normalizers": [
44
+ {
45
+ "type": "NFC"
46
+ }
47
+ ]
48
+ },
49
+ "pre_tokenizer": {
50
+ "type": "Sequence",
51
+ "pretokenizers": [
52
+ {
53
+ "type": "Whitespace"
54
+ },
55
+ {
56
+ "type": "Punctuation",
57
+ "behavior": "Isolated"
58
+ }
59
+ ]
60
  },
61
  "decoder": {
62
  "type": "WordPiece",