{#- ===== HELPER: raise_exception macro ===== Jinja2 doesn't have a built-in raise_exception. This macro outputs an error marker in the rendered output. Callers should check output for "ERROR:" pattern to detect validation failures. -#} {%- macro raise_exception(message) -%} {{- '\n[ERROR: ' ~ message ~ ']' -}} {%- endmacro -%} {#- ===== SECTION 1A: MACRO render_content ===== Handles string, list (image/video/text items), or None/undefined. count_vision=true: increments ns.image_count / ns.video_count. is_system_content=false: Set true when rendering system/developer content to enable media validation (raises exception). count_vision=true: increments vision counters. -#} {%- macro render_content(content, count_vision=false, is_system_content=false) -%} {#- VALIDATION: System messages cannot contain images or videos (from v18) -#} {#- FIX: also exclude strings and handle None - llama.cpp treats strings as non-iterable in for loops -#} {%- if is_system_content and content is iterable and content is not mapping and content is not string and content is not none -%} {%- for item in content -%} {%- if item.type == 'image' or 'image' in item or 'image_url' in item -%} {{- raise_exception('System message cannot contain images.') -}} {%- endif -%} {%- if item.type == 'video' or 'video' in item -%} {{- raise_exception('System message cannot contain videos.') -}} {%- endif -%} {%- endfor -%} {%- endif -%} {#- Main content rendering -#} {#- Handle None/undefined content -#} {%- if content is none or content is defined == false -%} {{- '' -}} {%- elif content is string -%} {{- content -}} {#- FIX: also exclude strings - llama.cpp treats strings as non-iterable in for loops -#} {%- elif content is iterable and content is not mapping and content is not string -%} {%- for item in content -%} {#- Handle different item types -#} {%- if item.type == 'image' or 'image' in item or 'image_url' in item -%} {%- if count_vision -%}{%- set ns.image_count = ns.image_count + 1 -%}{%- endif -%} {%- if add_vision_id is defined and add_vision_id -%} {{- 'Picture ' ~ ns.image_count ~ ': ' -}} {%- endif -%} {{- '<|vision_start|><|image_pad|><|vision_end|>' -}} {%- elif item.type == 'video' or 'video' in item -%} {%- if count_vision -%}{%- set ns.video_count = ns.video_count + 1 -%}{%- endif -%} {%- if add_vision_id is defined and add_vision_id -%} {{- 'Video ' ~ ns.video_count ~ ': ' -}} {%- endif -%} {{- '<|vision_start|><|video_pad|><|vision_end|>' -}} {%- elif item.type == 'text' or 'text' in item -%} {{- item.text -}} {#- ERROR: Unknown content type - raise explicit exception (from v18) -#} {%- else -%} {{- raise_exception('Unexpected content type in message content.') -}} {%- endif -%} {%- endfor -%} {#- ERROR: Unknown content type - raise explicit exception (from v18) -#} {%- elif content is not none and content is defined -%} {{- raise_exception('Unexpected content type.') -}} {%- endif -%} {%- endmacro -%} {#- ===== SECTION 1B: MACRO detect_tool_error (NEW in v0.7) ===== Detects if a tool response contains error indicators. Uses heuristics from v18: - Checks for error keywords (error, exception, traceback, failed to) - Ignores responses with '$ ' (shell output prefix) or 'took ' (timing info) - Ignores responses > 500 chars (likely valid output, not error) Returns: ns.last_tool_failed (true/false) Side effect: Updates ns.consecutive_failures counter -#} {%- macro detect_tool_error(content) -%} {#- Type guard: ensure content is string (llama.cpp compatibility) -#} {%- set content = content if content is string else '' -%} {%- set content_lower = content | lower -%} {%- set content_length = content | length -%} {#- Error detection heuristics: short response + no shell prefix + has error keywords -#} {%- if content_length < 500 and '$ ' not in content and 'took ' not in content_lower and ('"error":' in content_lower or 'error:' in content_lower or 'exception:' in content_lower or 'traceback' in content_lower or 'command not found' in content_lower or 'invalid syntax' in content_lower or 'failed to' in content_lower or 'permission denied' in content_lower) -%} {#- Error detected - update failure tracking -#} {%- set ns.last_tool_failed = true -%} {%- set ns.consecutive_failures = ns.consecutive_failures + 1 -%} {%- else -%} {#- No error - reset failure tracking -#} {%- set ns.last_tool_failed = false -%} {%- set ns.consecutive_failures = 0 -%} {%- endif -%} {%- endmacro -%} {#- ===== SECTION 2: NAMESPACE INITIALISATION ===== Single ns object for all mutable state. enable_thinking: default=true (controls think-block in generation prompt) preserve_thinking: default=true (controls think-block display in conversation history) image_count: Vision counter for images video_count: Vision counter for videos NEW in v0.7: - consecutive_failures: Tracks consecutive tool call failures (from v18) - last_tool_failed: Boolean flag for current tool response (from v18) -#} {%- set ns = namespace( enable_thinking=false, preserve_thinking=false, image_count=0, video_count=0, consecutive_failures=0, last_tool_failed=false ) -%} {#- Resolve enable_thinking kwarg -#} {%- if enable_thinking is defined -%} {%- if enable_thinking -%} {%- set ns.enable_thinking = true -%} {%- else -%} {%- set ns.enable_thinking = false -%} {%- endif -%} {%- endif -%} {#- Resolve preserve_thinking kwarg (FIXED in v0.7: now also affects conversation history, not just generation prompt). preserve_thinking=false => force non-thinking mode (same as enable_thinking=false). preserve_thinking=true => default, no override (thinking controlled by enable_thinking). When not defined => default, no override. -#} {%- if preserve_thinking is defined -%} {%- if not preserve_thinking -%} {%- set ns.enable_thinking = false -%} {%- set ns.preserve_thinking = false -%} {%- else -%} {%- set ns.preserve_thinking = true -%} {%- endif -%} {%- endif -%} {#- ===== SECTION 3: PRE-SCAN ===== Track last /no_think or /think flag in user messages. Also scan system messages for <|think_off|> / <|think_on|> markers (allows apps to control thinking mode via system prompt injection). The model follows the last flag encountered in multi-turn conversations. -#} {%- for i in range(messages | length) -%} {%- set _msg = messages[i] -%} {%- if _msg.role == 'user' -%} {%- set _u = _msg.content if _msg.content is string else '' -%} {%- if _u.rstrip().endswith('/no_think') -%} {%- set ns.enable_thinking = false -%} {%- elif _u.rstrip().endswith('/think') -%} {%- set ns.enable_thinking = true -%} {%- endif -%} {%- elif _msg.role == 'system' or _msg.role == 'developer' -%} {%- set _s = _msg.content if _msg.content is string else '' -%} {%- if '<|think_off|>' in _s -%} {%- set ns.enable_thinking = false -%} {%- elif '<|think_on|>' in _s -%} {%- set ns.enable_thinking = true -%} {%- endif -%} {%- endif -%} {%- endfor -%} {#- ===== SECTION 4: VALIDATE MESSAGES (NEW in v0.7) ===== Validate that messages is provided and not empty. From v18: raises exception if no messages provided. -#} {%- if not messages -%} {{- raise_exception('No messages provided.') -}} {%- endif -%} {#- ===== SECTION 5: COLLECT SYSTEM CONTENT ===== Merge all system/developer messages with \n\n separator. <|think_off|> / <|think_on|> markers are stripped from output. FIXED in v0.7: Pass is_system_content=true to render_content to trigger media validation (raises exception if system contains images/videos). -#} {%- set ns_sys = namespace(content='') -%} {%- for msg in messages -%} {%- if msg.role == 'system' or msg.role == 'developer' -%} {#- Pass is_system_content=true for media validation -#} {%- set _c = render_content(msg.content | default(''), false, true) | trim -%} {%- set _c = _c | replace('<|think_off|>', '') | replace('<|think_on|>', '') | trim -%} {%- if _c -%} {%- if ns_sys.content == '' -%} {%- set ns_sys.content = _c -%} {%- else -%} {%- set ns_sys.content = ns_sys.content + '\n\n' + _c -%} {%- endif -%} {%- endif -%} {%- endif -%} {%- endfor -%} {#- ===== SECTION 6: BUILD TOOLS LIST ===== Normalise each tool to {"type":"function","function":{...}} format. Serialisation happens later at output time (avoids Markup + str escaping bugs). -#} {%- set _has_tools = tools is defined and tools -%} {%- if _has_tools -%} {%- set ns_tb = namespace(list=[]) -%} {%- for tool in tools -%} {%- if tool.function is defined -%} {%- set ns_tb.list = ns_tb.list + [tool] -%} {%- else -%} {%- set ns_tb.list = ns_tb.list + [{"type": "function", "function": tool}] -%} {%- endif -%} {%- endfor -%} {%- endif -%} {#- ===== SECTION 7: OUTPUT SYSTEM TURN ===== Each fragment output via its own {{ }} block so tojson Markup objects are never Python-concatenated with plain strings (would trigger HTML-escaping). User system content appears BEFORE the tools block (correct ordering). No default system prompt injected. -#} {%- if ns_sys.content or _has_tools -%} {{- '<|im_start|>system\n' -}} {%- if ns_sys.content -%} {{- ns_sys.content -}} {%- if _has_tools -%}{{- '\n\n' -}}{%- endif -%} {%- endif -%} {%- if _has_tools -%} {{- '# Tools\n\nYou may call one or more functions to assist with the user query.\n\nYou are provided with function signatures within XML tags:\n\n' -}} {%- for tool in ns_tb.list -%} {{- tool | tojson -}} {%- if not loop.last -%}{{- '\n' -}}{%- endif -%} {%- endfor -%} {{- '\n\n\nFor each function call, return a json object with function name and arguments within XML tags:\n\n{"name": , "arguments": }\n' -}} {%- endif -%} {{- '<|im_end|>\n' -}} {%- endif -%} {#- ===== SECTION 8: MAIN MESSAGE LOOP ===== FIXED in v0.7: - Tool responses now have error detection via detect_tool_error macro - Warning messages injected for failed tool calls - consecutive_failures tracking for escalating warnings -#} {%- for message in messages -%} {#- 8a: System / Developer — already rendered above, skip -#} {%- if message.role == 'system' or message.role == 'developer' -%} {#- 8b: User messages -#} {%- elif message.role == 'user' -%} {%- set _uc = render_content(message.content | default(''), true, false) -%} {{- '<|im_start|>user\n' + _uc + '<|im_end|>\n' -}} {#- 8c: Assistant messages -#} {%- elif message.role == 'assistant' -%} {#- Safely extract content as string — guard against absent key. Also support message.reasoning_content as an explicit think-block source (used by some frameworks that store thinking separately from content). -#} {%- if message.content is defined and message.content is string -%} {%- set _ac = message.content -%} {#- FIX: also exclude strings - llama.cpp treats strings as non-iterable in for loops -#} {%- elif message.content is defined and message.content is iterable and message.content is not mapping and message.content is not string -%} {%- set _ac = render_content(message.content, false, false) -%} {%- else -%} {%- set _ac = '' -%} {%- endif -%} {#- Reconstruct content from reasoning_content + content when the framework stores thinking separately (e.g. OpenAI-style reasoning_content field). Only apply when no think-block already present in _ac. -#} {%- if message.reasoning_content is defined and message.reasoning_content is string and message.reasoning_content | trim and '' not in _ac -%} {%- set _ac = '\n' + message.reasoning_content | trim + '\n\n\n' + _ac -%} {%- endif -%} {#- Collect tool_calls if present -#} {#- Type check: ensure tool_calls is a list, not string (llama.cpp compatibility) -#} {%- set _tc = message.tool_calls if message.tool_calls is defined and message.tool_calls is iterable and message.tool_calls is not string else [] -%} {#- Strip prefix from content when tool_calls also present (some frameworks duplicate the data in both fields) -#} {%- if _tc and '' in _ac -%} {%- set _ac = _ac.split('')[0] | trim -%} {%- endif -%} {#- FIXED in v0.7: Think-block handling with preserve_thinking support New logic (from v18): preserve_thinking controls think-block display on ALL assistant messages, not just generation prompt: - Tool-call turns : never strip (think block is part of the tool-call format) - preserve_thinking : if true, show think blocks on ALL messages - Last-history turn : if preserve_thinking false, apply last-turn handling - Historical turns : if preserve_thinking false, strip think blocks The old behavior (strip unless add_generation_prompt) is now controlled by preserve_thinking parameter. -#} {%- set _show_think = false -%} {%- if _tc -%} {#- Tool calls: always show think block -#} {%- set _show_think = true -%} {%- elif ns.preserve_thinking -%} {#- preserve_thinking=true: show think blocks on all messages -#} {%- set _show_think = true -%} {%- elif loop.last -%} {#- Last message without preserve_thinking: show if thinking enabled -#} {%- set _show_think = ns.enable_thinking -%} {%- endif -%} {#- Apply think-block stripping based on _show_think flag -#} {%- if not _show_think -%} {#- Fuzzy end-tag detection for stripping -#} {%- set _think_end = '' -%} {%- if '' in _ac -%} {%- set _think_end = '' -%} {%- elif '' in _ac -%} {%- set _think_end = '' -%} {%- elif '' in _ac -%} {%- set _think_end = '' -%} {%- elif '' in _ac -%} {%- set _think_end = '' -%} {%- endif -%} {%- if _think_end -%} {%- set _ac = _ac.split(_think_end)[-1].lstrip('\n') -%} {%- endif -%} {%- elif not _tc and loop.last and '' not in _ac and not ns.enable_thinking -%} {#- Last turn, non-thinking: inject empty think block if missing -#} {%- set _ac = '\n\n\n\n' + _ac -%} {%- endif -%} {#- Emit the assistant turn -#} {{- '<|im_start|>assistant\n' -}} {%- if _ac -%} {{- _ac -}} {%- if _tc -%}{{- '\n' -}}{%- endif -%} {%- endif -%} {#- Render tool calls in Hermes format. Each value output via its own {{ }} block — never concatenated with plain strings in Python, which would trigger Markup HTML-escaping. -#} {%- if _tc -%} {%- for tc in _tc -%} {{- '\n' -}} {{- '{"name": ' -}}{{- tc.function.name | tojson -}} {%- if tc.function.arguments is string -%} {{- ', "arguments": ' + tc.function.arguments -}} {%- else -%} {{- ', "arguments": ' -}}{{- tc.function.arguments | tojson -}} {%- endif -%} {{- '}' -}} {%- if not loop.last -%} {{- '\n\n' -}} {%- else -%} {{- '\n' -}} {%- endif -%} {%- endfor -%} {%- endif -%} {{- '<|im_end|>\n' -}} {#- 8d: Tool results — with error detection (NEW in v0.7) -#} {%- elif message.role == 'tool' -%} {%- set _prev_role = messages[loop.index0 - 1].role if loop.index0 > 0 else '' -%} {%- set _next_role = messages[loop.index0 + 1].role if not loop.last else '' -%} {#- NEW in v0.7: Detect errors in tool response -#} {%- set _tool_content = message.content | default('') -%} {{- detect_tool_error(_tool_content) -}} {%- if _prev_role != 'tool' -%} {{- '<|im_start|>user\n' -}} {%- endif -%} {{- '\n' -}} {{- _tool_content -}} {#- NEW in v0.7: Inject warning if tool error detected -#} {#- v0.8: Replaced emoji with text-only for tokenization safety -#} {%- if ns.last_tool_failed -%} {%- if ns.consecutive_failures >= 2 -%} {{- '\n\n[SYSTEM WARNING: ' ~ ns.consecutive_failures ~ ' consecutive tool errors detected. Your previous approach is incorrect.]' -}} {%- else -%} {{- '\n\n[SYSTEM WARNING: The previous tool call returned an error. Diagnose the failure and retry with corrected arguments.]' -}} {%- endif -%} {%- endif -%} {%- if _next_role == 'tool' -%} {{- '\n\n' -}} {%- else -%} {{- '\n' -}} {{- '<|im_end|>\n' -}} {%- endif -%} {#- 8e: Unknown role - explicit error (from v18) -#} {%- else -%} {{- raise_exception('Unexpected message role: ' + message.role) -}} {%- endif -%} {%- endfor -%} {#- ===== SECTION 9: GENERATION PROMPT ===== FIXED in v0.7: preserve_thinking now affects conversation history (Section 8), so generation prompt logic is simplified. enable_thinking=True → open \n prefill so llama.cpp reasoning-budget and other inference engines can hook into the think-stream. The model continues generating inside the open block. enable_thinking=False → exact non-thinking prefill: \n\n NOTE: The \n opener is EPHEMERAL — it lives only in the generation prompt, never in chat history. Historical think-block stripping is handled in Section 8 based on preserve_thinking setting. -#} {%- if add_generation_prompt -%} {{- '<|im_start|>assistant\n' -}} {%- if ns.enable_thinking -%} {{- '\n' -}} {%- else -%} {{- '\n\n\n\n' -}} {%- endif -%} {%- endif -%}