trl-internal-testing
/

tiny-GptOssForCausalLM

@@ -117,14 +117,15 @@
         {%- set tool = tool.function %}
         {{- "// " + tool.description + "
 " }}
-        {{- "type "+ tool.name + " = (" }}
-        {%- if tool.parameters and tool.parameters.properties -%}
-            {{- "_: " }}
-            {{- "{
 " }}
             {%- for param_name, param_spec in tool.parameters.properties.items() %}
-                {{- "// " + param_spec.description + "
 " }}
                 {{- param_name }}
                 {%- if param_name not in (tool.parameters.required or []) -%}
                     {{- "?" }}
@@ -132,7 +133,9 @@
                 {{- ": " }}
                 {{- render_typescript_type(param_spec, tool.parameters.required or []) }}
                 {%- if param_spec.default is defined -%}
-                    {%- if param_spec.oneOf %}
                         {{- "// default: " + param_spec.default }}
                     {%- else %}
                         {{- ", // default: " + param_spec.default|tojson }}
@@ -140,20 +143,22 @@
                 {%- endif -%}
                 {%- if not loop.last %}
                     {{- ",
 " }}
                 {%- endif -%}
             {%- endfor %}
-            {{- ",
-}) => any;
 " }}
         {%- else -%}
-            {{- "
-}) => any;
 " }}
         {%- endif -%}
     {%- endfor %}
-    {{- "
-} // namespace " + namespace_name }}
 {%- endmacro -%}
 {%- macro render_builtin_tools(browser_tool, python_tool) -%}
@@ -250,11 +255,10 @@
 {#- System Message Construction ============================================ #}
 {%- macro build_system_message() -%}
     {%- if model_identity is not defined %}
-        {{- "You are ChatGPT, a large language model trained by OpenAI.
-" -}}
-    {%- else %}
-        {{- model_identity }}
     {%- endif %}
     {{- "Knowledge cutoff: 2024-06
 " }}
     {{- "Current date: " + strftime_now("%Y-%m-%d") + "
@@ -263,7 +267,7 @@
     {%- if reasoning_effort is not defined %}
         {%- set reasoning_effort = "medium" %}
     {%- endif %}
-    {{- "reasoning: " + reasoning_effort + "
 " }}
     {%- if builtin_tools %}
@@ -280,9 +284,11 @@
         {%- endfor %}
         {{- render_builtin_tools(available_builtin_tools.browser, available_builtin_tools.python) }}
     {%- endif -%}
-    {{- "# Valid channels: analysis, commentary, final. Channel must be included for every message.
-" }}
-    {{- "Calls to these tools must go to the commentary channel: 'functions'." }}
 {%- endmacro -%}
 {#- Main Template Logic ================================================= #}
@@ -328,42 +334,60 @@
 {%- for message in loop_messages -%}
     {#- At this point only assistant/user/tool messages should remain #}
     {%- if message.role == 'assistant' -%}
         {%- if "tool_calls" in message %}
             {#- We assume max 1 tool call per message, and so we infer the tool call name #}
             {#- in "tool" messages from the most recent assistant tool call name #}
             {%- set tool_call = message.tool_calls[0] %}
             {%- if tool_call.function %}
                 {%- set tool_call = tool_call.function %}
             {%- endif %}
-            {%- if message.content %}
                 {{- "<|start|>assistant<|channel|>analysis<|message|>" + message.content + "<|end|>" }}
             {%- endif %}
             {{- "<|start|>assistant to=" }}
-            {{- "functions." + tool_call.name + "<|channel|>commentary json<|message|>" }}
             {{- tool_call.arguments|tojson }}
-            {{- "<|end|>" }}
             {%- set last_tool_call.name = tool_call.name %}
-        {%- elif "thinking" in message and loop.last and not add_generation_prompt %}
             {#- Only render the CoT if the final turn is an assistant turn and add_generation_prompt is false #}
             {#- This is a situation that should only occur in training, never in inference. #}
-            {{- "<|start|>assistant<|channel|>analysis<|message|>" + message.thinking + "<|end|>" }}
             {#- <|return|> indicates the end of generation, but <|end|> does not #}
             {#- <|return|> should never be an input to the model, but we include it as the final token #}
             {#- when training, so the model learns to emit it. #}
             {{- "<|start|>assistant<|channel|>final<|message|>" + message.content + "<|return|>" }}
-            {%- set last_tool_call.name = none %}
-        {%- elif "thinking" in message %}
             {#- CoT is dropped during all previous turns, so we never render it for inference #}
             {{- "<|start|>assistant<|channel|>final<|message|>" + message.content + "<|end|>" }}
             {%- set last_tool_call.name = none %}
-        {%- elif loop.last and not add_generation_prompt %}
-            {#- <|return|> indicates the end of generation, but <|end|> does not #}
-            {#- <|return|> should never be an input to the model, but we include it as the final token #}
-            {#- when training, so the model learns to emit it. #}
-            {{- "<|start|>assistant<|message|>" + message.content + "<|return|>" }}
-        {%- else %}
-            {{- "<|start|>assistant<|message|>" + message.content + "<|end|>" }}
-            {%- set last_tool_call.name = none %}
         {%- endif %}
     {%- elif message.role == 'tool' -%}
         {%- if last_tool_call.name is none %}

         {%- set tool = tool.function %}
         {{- "// " + tool.description + "
 " }}
+        {{- "type "+ tool.name + " = " }}
+        {%- if tool.parameters and tool.parameters.properties %}
+            {{- "(_: {
 " }}
             {%- for param_name, param_spec in tool.parameters.properties.items() %}
+                {%- if param_spec.description %}
+                    {{- "// " + param_spec.description + "
 " }}
+                {%- endif %}
                 {{- param_name }}
                 {%- if param_name not in (tool.parameters.required or []) -%}
                     {{- "?" }}
                 {{- ": " }}
                 {{- render_typescript_type(param_spec, tool.parameters.required or []) }}
                 {%- if param_spec.default is defined -%}
+                    {%- if param_spec.enum %}
+                        {{- ", // default: " + param_spec.default }}
+                    {%- elif param_spec.oneOf %}
                         {{- "// default: " + param_spec.default }}
                     {%- else %}
                         {{- ", // default: " + param_spec.default|tojson }}
                 {%- endif -%}
                 {%- if not loop.last %}
                     {{- ",
+" }}
+                {%- else %}
+                    {{- ",
 " }}
                 {%- endif -%}
             {%- endfor %}
+            {{- "}) => any;
 " }}
         {%- else -%}
+            {{- "() => any;
 " }}
         {%- endif -%}
     {%- endfor %}
+    {{- "} // namespace " + namespace_name }}
 {%- endmacro -%}
 {%- macro render_builtin_tools(browser_tool, python_tool) -%}
 {#- System Message Construction ============================================ #}
 {%- macro build_system_message() -%}
     {%- if model_identity is not defined %}
+        {%- set model_identity = "You are ChatGPT, a large language model trained by OpenAI." %}
     {%- endif %}
+    {{- model_identity + "
+" }}
     {{- "Knowledge cutoff: 2024-06
 " }}
     {{- "Current date: " + strftime_now("%Y-%m-%d") + "
     {%- if reasoning_effort is not defined %}
         {%- set reasoning_effort = "medium" %}
     {%- endif %}
+    {{- "Reasoning: " + reasoning_effort + "
 " }}
     {%- if builtin_tools %}
         {%- endfor %}
         {{- render_builtin_tools(available_builtin_tools.browser, available_builtin_tools.python) }}
     {%- endif -%}
+    {{- "# Valid channels: analysis, commentary, final. Channel must be included for every message." }}
+    {%- if tools -%}
+        {{- "
+Calls to these tools must go to the commentary channel: 'functions'." }}
+    {%- endif -%}
 {%- endmacro -%}
 {#- Main Template Logic ================================================= #}
 {%- for message in loop_messages -%}
     {#- At this point only assistant/user/tool messages should remain #}
     {%- if message.role == 'assistant' -%}
+        {#- Checks to ensure the messages are being passed in the format we expect #}
+        {%- if "content" in message %}
+            {%- if "<|channel|>analysis<|message|>" in message.content or "<|channel|>final<|message|>" in message.content %}
+                {{- raise_exception("You have passed a message containing <|channel|> tags in the content field. Instead of doing this, you should pass analysis messages (the string between '<|message|>' and '<|end|>') in the 'thinking' field, and final messages (the string between '<|message|>' and '<|end|>') in the 'content' field.") }}
+            {%- endif %}
+        {%- endif %}
+        {%- if "thinking" in message %}
+            {%- if "<|channel|>analysis<|message|>" in message.thinking or "<|channel|>final<|message|>" in message.thinking %}
+                {{- raise_exception("You have passed a message containing <|channel|> tags in the thinking field. Instead of doing this, you should pass analysis messages (the string between '<|message|>' and '<|end|>') in the 'thinking' field, and final messages (the string between '<|message|>' and '<|end|>') in the 'content' field.") }}
+            {%- endif %}
+        {%- endif %}
         {%- if "tool_calls" in message %}
+            {#- We need very careful handling here - we want to drop the tool call analysis message if the model #}
+            {#- has output a later <|final|> message, but otherwise we want to retain it. This is the only case #}
+            {#- when we render CoT/analysis messages in inference. #}
+            {%- set future_final_message = namespace(found=false) %}
+            {%- for future_message in loop_messages[loop.index:] %}
+                {%- if future_message.role == 'assistant' and "tool_calls" not in future_message %}
+                    {%- set future_final_message.found = true %}
+                {%- endif %}
+            {%- endfor %}
             {#- We assume max 1 tool call per message, and so we infer the tool call name #}
             {#- in "tool" messages from the most recent assistant tool call name #}
             {%- set tool_call = message.tool_calls[0] %}
             {%- if tool_call.function %}
                 {%- set tool_call = tool_call.function %}
             {%- endif %}
+            {%- if message.content and message.thinking %}
+                {{- raise_exception("Cannot pass both content and thinking in an assistant message with tool calls! Put the analysis message in one or the other, but not both.") }}
+            {%- elif message.content and not future_final_message.found %}
                 {{- "<|start|>assistant<|channel|>analysis<|message|>" + message.content + "<|end|>" }}
+            {%- elif message.thinking and not future_final_message.found %}
+                {{- "<|start|>assistant<|channel|>analysis<|message|>" + message.thinking + "<|end|>" }}
             {%- endif %}
             {{- "<|start|>assistant to=" }}
+            {{- "functions." + tool_call.name + "<|channel|>commentary " }}
+            {{- (tool_call.content_type if tool_call.content_type is defined else "json") + "<|message|>" }}
             {{- tool_call.arguments|tojson }}
+            {{- "<|call|>" }}
             {%- set last_tool_call.name = tool_call.name %}
+        {%- elif loop.last and not add_generation_prompt %}
             {#- Only render the CoT if the final turn is an assistant turn and add_generation_prompt is false #}
             {#- This is a situation that should only occur in training, never in inference. #}
+            {%- if "thinking" in message %}
+                {{- "<|start|>assistant<|channel|>analysis<|message|>" + message.thinking + "<|end|>" }}
+            {%- endif %}
             {#- <|return|> indicates the end of generation, but <|end|> does not #}
             {#- <|return|> should never be an input to the model, but we include it as the final token #}
             {#- when training, so the model learns to emit it. #}
             {{- "<|start|>assistant<|channel|>final<|message|>" + message.content + "<|return|>" }}
+        {%- else %}
             {#- CoT is dropped during all previous turns, so we never render it for inference #}
             {{- "<|start|>assistant<|channel|>final<|message|>" + message.content + "<|end|>" }}
             {%- set last_tool_call.name = none %}
         {%- endif %}
     {%- elif message.role == 'tool' -%}
         {%- if last_tool_call.name is none %}