vladislavbro commited on
Commit
527ed32
·
verified ·
1 Parent(s): 2ab2c69

Update tokenizer.json

Browse files
Files changed (1) hide show
  1. tokenizer.json +11 -2
tokenizer.json CHANGED
@@ -28,7 +28,7 @@
28
  "single_word": false,
29
  "lstrip": false,
30
  "rstrip": false,
31
- "normalized": false
32
  },
33
  {
34
  "id": 255,
@@ -445,7 +445,16 @@
445
  "special": true
446
  }
447
  ],
448
- "normalizer": null,
 
 
 
 
 
 
 
 
 
449
  "pre_tokenizer": {
450
  "type": "Whitespace"
451
  },
 
28
  "single_word": false,
29
  "lstrip": false,
30
  "rstrip": false,
31
+ "normalized": true
32
  },
33
  {
34
  "id": 255,
 
445
  "special": true
446
  }
447
  ],
448
+ "normalizer": {
449
+ "type": "Sequence",
450
+ "normalizers": [
451
+ {
452
+ "type": "Replace",
453
+ "pattern": { "String": " " },
454
+ "content": "[SPACE]"
455
+ }
456
+ ]
457
+ },
458
  "pre_tokenizer": {
459
  "type": "Whitespace"
460
  },