fix: share _safe_json_loads with tool-call parser, not just orchestrator

Move _safe_json_loads from orchestrator.py to llm_client.py and have _extract_tool_calls use it when parsing <tool_call> JSON blocks from model output. orchestrator now imports it from llm_client. Background: in the first full DeepSeek run (runs/2026-05-12T17-25-38), ~10 'Failed to parse tool call JSON' warnings appeared, all from regex patterns where the LLM wrote \. or \* inside JSON string values: Failed to parse tool call JSON: {..., "pattern": "Outlook Express|...|\.dbx"} Failed to parse tool call JSON: {..., "pattern": "ethereal.*\.pcap"} Failed to parse tool call JSON: {..., "pattern": "lookatlan.*\.txt|..."} These are exactly the kind of stray-backslash errors stage-1 sanitize already handles for orchestrator JSON calls — but tool-call extraction was using bare json.loads. Result: each failed tool call silently dropped on the floor, the LLM never got a result, and at least one network agent burned 14m26s spinning before hitting max_iterations=40. Now the sanitize/log-on-failure path is shared. Verified against the three failure cases from yesterday's log: all three now parse cleanly. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-12 20:29:21 +08:00
parent 76df34ed79
commit 0a2b344c84
2 changed files with 32 additions and 32 deletions
--- a/llm_client.py
+++ b/llm_client.py
@@ -80,6 +80,36 @@ def _build_tools_prompt(tools: list[dict]) -> str:
    return "\n".join(lines)


+def _safe_json_loads(text: str):
+    """Parse JSON with progressive sanitization for LLM-produced output.
+
+    Tries (0) as-is, (1) escape stray backslashes outside valid JSON escapes
+    (\\" \\\\ \\/ \\b \\f \\n \\r \\t \\uXXXX). On final failure, logs raw
+    input (first 600 chars) so we can diagnose what the model emitted.
+
+    Used both by orchestrator JSON callsites and by _extract_tool_calls
+    when parsing <tool_call> blocks from model output.
+    """
+    try:
+        return json.loads(text)
+    except json.JSONDecodeError:
+        pass
+
+    stage1 = re.sub(
+        r'\\(?!["\\/bfnrt]|u[0-9a-fA-F]{4})',
+        r'\\\\',
+        text,
+    )
+    try:
+        return json.loads(stage1)
+    except json.JSONDecodeError as e:
+        logger.warning(
+            "_safe_json_loads failed after sanitize (%s); raw head[:600]=%r",
+            e, text[:600],
+        )
+        raise
+
+
 def _extract_tool_calls(text: str) -> list[dict]:
    """Extract tool call JSON blocks from model output."""
    pattern = re.compile(
@@ -90,7 +120,7 @@ def _extract_tool_calls(text: str) -> list[dict]:
    for match in pattern.finditer(text):
        raw = match.group(1).strip()
        try:
-            parsed = json.loads(raw)
+            parsed = _safe_json_loads(raw)
            calls.append(parsed)
        except json.JSONDecodeError:
            logger.warning("Failed to parse tool call JSON: %s", raw[:200])
--- a/orchestrator.py
+++ b/orchestrator.py
@@ -12,41 +12,11 @@ from pathlib import Path

 from agent_factory import AgentFactory
 from evidence_graph import EvidenceGraph
-from llm_client import LLMClient
+from llm_client import LLMClient, _safe_json_loads

 logger = logging.getLogger(__name__)


-def _safe_json_loads(text: str):
-    """Parse JSON with progressive sanitization for LLM-produced output.
-
-    Tries: (0) as-is, (1) escape stray backslashes outside valid JSON
-    escapes (\\" \\\\ \\/ \\b \\f \\n \\r \\t \\uXXXX). On final failure,
-    logs raw input (first 600 chars) so we can diagnose what the model
-    actually emitted.
-    """
-    try:
-        return json.loads(text)
-    except json.JSONDecodeError:
-        pass
-
-    # Escape backslashes not followed by a valid JSON escape character.
-    # NOTE: \\u must be followed by exactly 4 hex digits to be valid.
-    stage1 = re.sub(
-        r'\\(?!["\\/bfnrt]|u[0-9a-fA-F]{4})',
-        r'\\\\',
-        text,
-    )
-    try:
-        return json.loads(stage1)
-    except json.JSONDecodeError as e:
-        logger.warning(
-            "_safe_json_loads failed after sanitize (%s); raw head[:600]=%r",
-            e, text[:600],
-        )
-        raise
-
-
 def _log(msg: str, **extra) -> None:
    """Emit a structured log message with extra fields for the terminal formatter."""
    logger.info(msg, extra=extra)