Update tokenizer.py
Browse files- tokenizer.py +1 -0
tokenizer.py
CHANGED
|
@@ -57,6 +57,7 @@ class ChatGLMTokenizer:
|
|
| 57 |
prefix_mask += [1, 0]
|
| 58 |
|
| 59 |
if text_pair is not None:
|
|
|
|
| 60 |
pair_tokens = self.text_tokenizer.encode(text_pair)
|
| 61 |
tokens += pair_tokens
|
| 62 |
prefix_mask += [0] * len(pair_tokens)
|
|
|
|
| 57 |
prefix_mask += [1, 0]
|
| 58 |
|
| 59 |
if text_pair is not None:
|
| 60 |
+
text_pair = self.preprocess(text_pair, linebreak, whitespaces)
|
| 61 |
pair_tokens = self.text_tokenizer.encode(text_pair)
|
| 62 |
tokens += pair_tokens
|
| 63 |
prefix_mask += [0] * len(pair_tokens)
|