"""InvestigationStrategist — the LLM that decides depth vs breadth. DESIGN_STRATEGIST.md §3. The strategist does NOT run forensic tools. Its job per round is exactly one decision: propose 1-3 leads that would move an active hypothesis, OR declare the investigation complete. It reads the graph through four read-only views (graph_overview / source_coverage / marginal_yield / budget_status) and expresses its decision through two write tools (propose_lead / declare_investigation_complete). This is the smallest possible agent in the system — the entire point is that strategy decisions live in one agent so they're auditable and the rest of the codebase doesn't carry implicit depth/breadth policy. """ from __future__ import annotations import logging from base_agent import BaseAgent from evidence_graph import EvidenceGraph from llm_client import LLMClient from tool_registry import TOOL_CATALOG logger = logging.getLogger(__name__) class InvestigationStrategist(BaseAgent): name = "strategist" role = ( "Investigation strategist. You do not run forensic tools yourself. " "Each round you take ONE decision: propose 1-3 new investigation leads " "that would materially affect an active hypothesis, OR declare the " "investigation complete. Your judgment is grounded in the graph " "(hypotheses, sources, coverage, marginal yield, budget) — never in " "speculation." ) # At least one of these must be called every round, otherwise BaseAgent's # forced RECORD retry kicks in and re-prompts the strategist to take a # documented decision. mandatory_record_tools = ("propose_lead", "declare_investigation_complete") # declare_complete is terminal — calling it short-circuits the tool loop, # which is what we want (strategist returns immediately on "done"). terminal_tools = ("declare_investigation_complete",) # Strategist-specific tools, plus the read-only graph queries inherited # from BaseAgent. NO graph write tools (no add_phenomenon / link_to_entity # / observe_identity); the strategist must NOT mutate evidence directly. _STRATEGY_TOOLS = ( "graph_overview", "source_coverage", "marginal_yield", "budget_status", "propose_lead", "declare_investigation_complete", ) def _register_graph_tools(self) -> None: """Strategist gets read-only graph queries + the six strategy tools. It does NOT get write tools (no add_phenomenon, observe_identity, link_to_entity, add_temporal_edge). Every graph mutation must come from a dispatched worker, not from the planner. """ self._register_graph_read_tools() for tool_name in self._STRATEGY_TOOLS: td = TOOL_CATALOG.get(tool_name) if td is None: logger.warning( "Strategist could not find tool %s in TOOL_CATALOG — " "register_all_tools must run before agent instantiation.", tool_name, ) continue self.register_tool(td.name, td.description, td.input_schema, td.executor) def _build_system_prompt(self, task: str) -> str: """Strategist-specific prompt. Replaces the BaseAgent default which walks an INVESTIGATE→RECORD→LINK workflow that is wrong for a planner agent. """ return ( f"You are {self.name}, the investigation strategist.\n" f"Role: {self.role}\n\n" f"Your task: {task}\n\n" f"WORKFLOW (do this exactly):\n" f" 1. Call graph_overview FIRST. Look at: which hypotheses are\n" f" active (conf 0.2-0.8) vs already supported/refuted; which\n" f" ones have many edges but only 1 distinct_source; which had\n" f" a recent_flip vs none in two rounds.\n" f" 2. Call marginal_yield to see if the last rounds produced anything.\n" f" 3. Call budget_status to know your runway.\n" f" 4. For each candidate lead direction, call source_coverage on\n" f" the relevant source to see what's been touched.\n" f" 5. Take exactly ONE of these terminal actions:\n" f" (a) Call propose_lead 1-3 times for leads that would\n" f" materially move an active hypothesis. STOP after this.\n" f" (b) Call declare_investigation_complete with a specific\n" f" reason. STOP after this.\n" f"\n" f"DECISION CRITERIA — when to propose vs when to stop:\n" f" PROPOSE when:\n" f" - A hypothesis is supported only by ONE source — get\n" f" cross-source corroboration. Same-source repeats are\n" f" cheap (harmonic damping).\n" f" - A hypothesis is in the active band (0.2 < conf < 0.8) —\n" f" it needs the deciding evidence.\n" f" - A high-value artefact is ✗ on source_coverage AND an\n" f" active hypothesis depends on the kind of evidence that\n" f" artefact would produce.\n" f" STOP (declare_complete) when:\n" f" - marginal_yield shows zero across 2+ rounds.\n" f" - budget_status warns ≥90% on tool_calls or rounds.\n" f" - all active hypotheses are resolved (supported or refuted).\n" f" - coverage saturation: every ✗ on every source is irrelevant\n" f" to active hypotheses.\n" f"\n" f"HARD RULES:\n" f" - You CANNOT call investigation tools (list_directory,\n" f" sqlite_query, parse_registry_key, extract_file, etc.) — your\n" f" job is to direct workers, not to investigate yourself.\n" f" - You CANNOT call write tools (add_phenomenon, observe_identity,\n" f" link_to_entity, add_hypothesis, add_temporal_edge). All\n" f" evidence mutations come from the workers you dispatch.\n" f" - Every propose_lead MUST cite a real hyp-id from\n" f" graph_overview's table — fabricated ids will be rejected.\n" f" - Don't propose more than 3 leads in one round. Quality over\n" f" quantity — a 4th lead almost always means you're not really\n" f" sure what would move the graph.\n" f" - Don't re-propose a lead that's already pending. The system\n" f" deduplicates (motivating_hyp, expected_type, agent, source)\n" f" so duplicates silently no-op, but they waste your budget." )