feat(strategist) S4: InvestigationStrategist agent

DESIGN_STRATEGIST.md §3. The smallest possible agent — its entire output per round is one decision: propose 1-3 leads (each citing a real hypothesis it expects to move) OR declare the investigation complete with a reason. Constraint surface: mandatory_record_tools = ("propose_lead", "declare_investigation_complete") terminal_tools = ("declare_investigation_complete",) The agent inherits the BaseAgent forced-retry mechanism: if it returns without calling either action tool, the orchestrator force-prompts a RECORD-only retry. declare_complete being terminal means the tool_call_loop short-circuits the moment the strategist decides we're done. _register_graph_tools overrides BaseAgent's default to skip _register_graph_write_tools entirely — the strategist NEVER writes phenomena, entities, edges, or hypotheses directly. All graph mutations come from the workers it dispatches via leads. This keeps the planning agent's responsibility surface narrow: read the graph, choose what to do next, that's it. Prompt walks through the workflow (call graph_overview / marginal_ yield / budget_status / source_coverage first, then take exactly one terminal action) with decision criteria for propose vs stop. Registered in agent_factory._AGENT_CLASSES["strategist"]. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-21 02:22:05 -10:00
parent ff3a05d7ce
commit 65745d21dc
3 changed files with 176 additions and 0 deletions
--- a/tests/test_optimizations.py
+++ b/tests/test_optimizations.py
@@ -3271,6 +3271,46 @@ class TestInvestigationRound:
        )
        assert "≥ 90%" in bs2  # already over 90% (1 of 1 tool calls used)

+    @pytest.mark.asyncio
+    async def test_strategist_agent_registers_correct_toolset(self):
+        """Strategist gets read-only graph queries + the 6 strategy tools;
+        crucially NO graph-write tools (no add_phenomenon, observe_identity,
+        link_to_entity, add_hypothesis, add_temporal_edge).
+        """
+        from tool_registry import register_all_tools
+        from agent_factory import AgentFactory
+        from llm_client import LLMClient
+
+        graph = EvidenceGraph()
+        register_all_tools(graph)
+        llm = LLMClient.__new__(LLMClient)
+        factory = AgentFactory(llm, graph)
+        agent = factory.get_or_create_agent("strategist")
+        agent._register_graph_tools()
+
+        registered = set(agent._tools.keys())
+        assert {
+            "graph_overview", "source_coverage", "marginal_yield",
+            "budget_status", "propose_lead", "declare_investigation_complete",
+        } <= registered
+        assert {"list_phenomena", "get_phenomenon", "search_graph"} <= registered
+        forbidden = {
+            "add_phenomenon", "observe_identity", "link_to_entity",
+            "add_hypothesis", "add_temporal_edge", "add_lead",
+        }
+        leaked = registered & forbidden
+        assert not leaked, f"Strategist must not have write tools: {leaked}"
+
+    def test_strategist_terminal_tool_is_declare_complete(self):
+        """The strategist class declares declare_investigation_complete as
+        its terminal tool — the tool_call_loop must short-circuit on that
+        call (verified at the LLM client level by an existing test).
+        """
+        from agents.strategist import InvestigationStrategist
+        assert InvestigationStrategist.terminal_tools == ("declare_investigation_complete",)
+        assert "propose_lead" in InvestigationStrategist.mandatory_record_tools
+        assert "declare_investigation_complete" in InvestigationStrategist.mandatory_record_tools
+
    @pytest.mark.asyncio
    async def test_propose_lead_validates_hypothesis_id(self):
        """propose_lead must reject leads whose motivating_hypothesis isn't