diff --git a/llm_client.py b/llm_client.py index 315924a..f6abd09 100644 --- a/llm_client.py +++ b/llm_client.py @@ -80,6 +80,36 @@ def _build_tools_prompt(tools: list[dict]) -> str: return "\n".join(lines) +def _safe_json_loads(text: str): + """Parse JSON with progressive sanitization for LLM-produced output. + + Tries (0) as-is, (1) escape stray backslashes outside valid JSON escapes + (\\" \\\\ \\/ \\b \\f \\n \\r \\t \\uXXXX). On final failure, logs raw + input (first 600 chars) so we can diagnose what the model emitted. + + Used both by orchestrator JSON callsites and by _extract_tool_calls + when parsing blocks from model output. + """ + try: + return json.loads(text) + except json.JSONDecodeError: + pass + + stage1 = re.sub( + r'\\(?!["\\/bfnrt]|u[0-9a-fA-F]{4})', + r'\\\\', + text, + ) + try: + return json.loads(stage1) + except json.JSONDecodeError as e: + logger.warning( + "_safe_json_loads failed after sanitize (%s); raw head[:600]=%r", + e, text[:600], + ) + raise + + def _extract_tool_calls(text: str) -> list[dict]: """Extract tool call JSON blocks from model output.""" pattern = re.compile( @@ -90,7 +120,7 @@ def _extract_tool_calls(text: str) -> list[dict]: for match in pattern.finditer(text): raw = match.group(1).strip() try: - parsed = json.loads(raw) + parsed = _safe_json_loads(raw) calls.append(parsed) except json.JSONDecodeError: logger.warning("Failed to parse tool call JSON: %s", raw[:200]) diff --git a/orchestrator.py b/orchestrator.py index 30a33ee..8f654b7 100644 --- a/orchestrator.py +++ b/orchestrator.py @@ -12,41 +12,11 @@ from pathlib import Path from agent_factory import AgentFactory from evidence_graph import EvidenceGraph -from llm_client import LLMClient +from llm_client import LLMClient, _safe_json_loads logger = logging.getLogger(__name__) -def _safe_json_loads(text: str): - """Parse JSON with progressive sanitization for LLM-produced output. - - Tries: (0) as-is, (1) escape stray backslashes outside valid JSON - escapes (\\" \\\\ \\/ \\b \\f \\n \\r \\t \\uXXXX). On final failure, - logs raw input (first 600 chars) so we can diagnose what the model - actually emitted. - """ - try: - return json.loads(text) - except json.JSONDecodeError: - pass - - # Escape backslashes not followed by a valid JSON escape character. - # NOTE: \\u must be followed by exactly 4 hex digits to be valid. - stage1 = re.sub( - r'\\(?!["\\/bfnrt]|u[0-9a-fA-F]{4})', - r'\\\\', - text, - ) - try: - return json.loads(stage1) - except json.JSONDecodeError as e: - logger.warning( - "_safe_json_loads failed after sanitize (%s); raw head[:600]=%r", - e, text[:600], - ) - raise - - def _log(msg: str, **extra) -> None: """Emit a structured log message with extra fields for the terminal formatter.""" logger.info(msg, extra=extra)