fix: share _safe_json_loads with tool-call parser, not just orchestrator
Move _safe_json_loads from orchestrator.py to llm_client.py and have
_extract_tool_calls use it when parsing <tool_call> JSON blocks from
model output. orchestrator now imports it from llm_client.
Background: in the first full DeepSeek run (runs/2026-05-12T17-25-38),
~10 'Failed to parse tool call JSON' warnings appeared, all from regex
patterns where the LLM wrote \. or \* inside JSON string values:
Failed to parse tool call JSON: {..., "pattern": "Outlook Express|...|\.dbx"}
Failed to parse tool call JSON: {..., "pattern": "ethereal.*\.pcap"}
Failed to parse tool call JSON: {..., "pattern": "lookatlan.*\.txt|..."}
These are exactly the kind of stray-backslash errors stage-1 sanitize
already handles for orchestrator JSON calls — but tool-call extraction
was using bare json.loads. Result: each failed tool call silently dropped
on the floor, the LLM never got a result, and at least one network agent
burned 14m26s spinning before hitting max_iterations=40.
Now the sanitize/log-on-failure path is shared. Verified against the
three failure cases from yesterday's log: all three now parse cleanly.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -80,6 +80,36 @@ def _build_tools_prompt(tools: list[dict]) -> str:
|
||||
return "\n".join(lines)
|
||||
|
||||
|
||||
def _safe_json_loads(text: str):
|
||||
"""Parse JSON with progressive sanitization for LLM-produced output.
|
||||
|
||||
Tries (0) as-is, (1) escape stray backslashes outside valid JSON escapes
|
||||
(\\" \\\\ \\/ \\b \\f \\n \\r \\t \\uXXXX). On final failure, logs raw
|
||||
input (first 600 chars) so we can diagnose what the model emitted.
|
||||
|
||||
Used both by orchestrator JSON callsites and by _extract_tool_calls
|
||||
when parsing <tool_call> blocks from model output.
|
||||
"""
|
||||
try:
|
||||
return json.loads(text)
|
||||
except json.JSONDecodeError:
|
||||
pass
|
||||
|
||||
stage1 = re.sub(
|
||||
r'\\(?!["\\/bfnrt]|u[0-9a-fA-F]{4})',
|
||||
r'\\\\',
|
||||
text,
|
||||
)
|
||||
try:
|
||||
return json.loads(stage1)
|
||||
except json.JSONDecodeError as e:
|
||||
logger.warning(
|
||||
"_safe_json_loads failed after sanitize (%s); raw head[:600]=%r",
|
||||
e, text[:600],
|
||||
)
|
||||
raise
|
||||
|
||||
|
||||
def _extract_tool_calls(text: str) -> list[dict]:
|
||||
"""Extract tool call JSON blocks from model output."""
|
||||
pattern = re.compile(
|
||||
@@ -90,7 +120,7 @@ def _extract_tool_calls(text: str) -> list[dict]:
|
||||
for match in pattern.finditer(text):
|
||||
raw = match.group(1).strip()
|
||||
try:
|
||||
parsed = json.loads(raw)
|
||||
parsed = _safe_json_loads(raw)
|
||||
calls.append(parsed)
|
||||
except json.JSONDecodeError:
|
||||
logger.warning("Failed to parse tool call JSON: %s", raw[:200])
|
||||
|
||||
@@ -12,41 +12,11 @@ from pathlib import Path
|
||||
|
||||
from agent_factory import AgentFactory
|
||||
from evidence_graph import EvidenceGraph
|
||||
from llm_client import LLMClient
|
||||
from llm_client import LLMClient, _safe_json_loads
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def _safe_json_loads(text: str):
|
||||
"""Parse JSON with progressive sanitization for LLM-produced output.
|
||||
|
||||
Tries: (0) as-is, (1) escape stray backslashes outside valid JSON
|
||||
escapes (\\" \\\\ \\/ \\b \\f \\n \\r \\t \\uXXXX). On final failure,
|
||||
logs raw input (first 600 chars) so we can diagnose what the model
|
||||
actually emitted.
|
||||
"""
|
||||
try:
|
||||
return json.loads(text)
|
||||
except json.JSONDecodeError:
|
||||
pass
|
||||
|
||||
# Escape backslashes not followed by a valid JSON escape character.
|
||||
# NOTE: \\u must be followed by exactly 4 hex digits to be valid.
|
||||
stage1 = re.sub(
|
||||
r'\\(?!["\\/bfnrt]|u[0-9a-fA-F]{4})',
|
||||
r'\\\\',
|
||||
text,
|
||||
)
|
||||
try:
|
||||
return json.loads(stage1)
|
||||
except json.JSONDecodeError as e:
|
||||
logger.warning(
|
||||
"_safe_json_loads failed after sanitize (%s); raw head[:600]=%r",
|
||||
e, text[:600],
|
||||
)
|
||||
raise
|
||||
|
||||
|
||||
def _log(msg: str, **extra) -> None:
|
||||
"""Emit a structured log message with extra fields for the terminal formatter."""
|
||||
logger.info(msg, extra=extra)
|
||||
|
||||
Reference in New Issue
Block a user