Files changed (2) hide show
  1. chat_template.jinja +25 -0
  2. tokenizer_config.json +2 -3
chat_template.jinja ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {%- set ns = namespace(has_head=true) -%}
2
+ {%- set loop_messages = messages -%}
3
+ {%- for message in loop_messages -%}
4
+ {%- set content = message['content'] -%}
5
+ {%- if loop.index0 == 0 -%}
6
+ {%- if content == '' -%}
7
+ {%- set ns.has_head = false -%}
8
+ {%- elif message['role'] == 'system' -%}
9
+ {%- set content = '<|startoftext|>' + content + '<|extra_4|>' -%}
10
+ {%- endif -%}
11
+ {%- endif -%}
12
+ {%- if message['role'] == 'user' -%}
13
+ {%- if loop.index0 == 1 and not ns.has_head -%}
14
+ {%- set content = '<|startoftext|>' + content -%}
15
+ {%- endif -%}
16
+ {%- if loop.index0 == 1 and ns.has_head -%}
17
+ {%- set content = content + '<|extra_0|>' -%}
18
+ {%- else -%}
19
+ {%- set content = '<|startoftext|>' + content + '<|extra_0|>' -%}
20
+ {%- endif -%}
21
+ {%- elif message['role'] == 'assistant' -%}
22
+ {%- set content = content + '<|eos|>' -%}
23
+ {%- endif -%}
24
+ {{- content -}}
25
+ {%- endfor -%}
tokenizer_config.json CHANGED
@@ -4,6 +4,5 @@
4
  "eos_token": "<|eos|>",
5
  "model_max_length": 262144,
6
  "pad_token": "<|pad|>",
7
- "tokenizer_class": "PreTrainedTokenizerFast",
8
- "chat_template": "{% set context = {'has_head': true} %}{% set loop_messages = messages %}{% for message in loop_messages %}{% set content = message['content'] %}{% if loop.index0 == 0 %}{% if content == '' %}{% set _ = context.update({'has_head': false}) %}{% elif message['role'] == 'system' %}{% set content = '<|startoftext|>' + content + '<|extra_4|>' %}{% endif %}{% endif %}{% if message['role'] == 'user' %}{% if loop.index0 == 1 and not context.has_head %}{% set content = '<|startoftext|>' + content %}{% endif %}{% if loop.index0 == 1 and context.has_head %}{% set content = content + '<|extra_0|>' %}{% else %}{% set content = '<|startoftext|>' + content + '<|extra_0|>' %}{% endif %}{% elif message['role'] == 'assistant' %}{% set content = content + '<|eos|>' %}{% endif %}{{ content }}{% endfor %}"
9
- }
 
4
  "eos_token": "<|eos|>",
5
  "model_max_length": 262144,
6
  "pad_token": "<|pad|>",
7
+ "tokenizer_class": "PreTrainedTokenizerFast"
8
+ }