{ "version": "1.0", "truncation": null, "padding": null, "added_tokens": [ { "id": 0, "content": "[UNK]", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 1, "content": "[SEP]", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 2, "content": "[PAD]", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 3, "content": "[CLS]", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 4, "content": "[MASK]", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true } ], "normalizer": { "type": "BertNormalizer", "clean_text": true, "handle_chinese_chars": false, "strip_accents": null, "lowercase": false }, "pre_tokenizer": null, "post_processor": { "type": "TemplateProcessing", "single": [ { "SpecialToken": { "id": "[CLS]", "type_id": 0 } }, { "Sequence": { "id": "A", "type_id": 0 } }, { "SpecialToken": { "id": "[SEP]", "type_id": 0 } } ], "pair": [ { "SpecialToken": { "id": "[CLS]", "type_id": 0 } }, { "Sequence": { "id": "A", "type_id": 0 } }, { "SpecialToken": { "id": "[SEP]", "type_id": 0 } }, { "Sequence": { "id": "B", "type_id": 1 } }, { "SpecialToken": { "id": "[SEP]", "type_id": 1 } } ], "special_tokens": { "[CLS]": { "id": "[CLS]", "ids": [ 3 ], "tokens": [ "[CLS]" ] }, "[SEP]": { "id": "[SEP]", "ids": [ 1 ], "tokens": [ "[SEP]" ] } } }, "decoder": { "type": "WordPiece", "prefix": "##", "cleanup": true }, "model": { "type": "WordLevel", "vocab": { "[UNK]": 0, "[SEP]": 1, "[PAD]": 2, "[CLS]": 3, "[MASK]": 4, "0": 5, "1": 6, "2": 7, "3": 8, "4": 9, "5": 10, "6": 11, "7": 12, "8": 13, "9": 14, "10": 15, "11": 16, "12": 17, "13": 18, "14": 19, "15": 20, "16": 21, "17": 22, "18": 23, "19": 24, "20": 25, "21": 26, "22": 27, "23": 28, "24": 29, "25": 30, "26": 31, "27": 32, "28": 33, "29": 34, "30": 35, "31": 36, "32": 37, "33": 38, "34": 39, "35": 40, "36": 41, "37": 42, "38": 43, "39": 44, "40": 45, "41": 46, "42": 47, "43": 48, "44": 49, "45": 50, "46": 51, "47": 52, "48": 53, "49": 54 }, "unk_token": "[UNK]" } }