feat(strategist) S3: propose_lead / declare_investigation_complete

DESIGN_STRATEGIST.md §2.5. The strategist's two write actions. propose_lead validates motivating_hypothesis exists in the graph, validates expected_evidence_type is a real edge type, validates source_id refers to a real source in the case — fast specific errors so the strategist gets fixable feedback rather than a generic crash. On success, calls graph.add_lead with proposed_by= "strategist" and round_number=graph.current_strategist_round so the round-completion code can collect this round's leads. declare_investigation_complete sets graph.strategist_complete_requested which the orchestrator inspects after each strategist run to decide whether to break the loop. reason must come from a closed enum so the audit log is consistent. EvidenceGraph gains two transient run-context fields: current_strategist_round — set by orchestrator at start of round strategist_complete_requested — flipped by declare_complete These are intentionally NOT persisted — they're per-run flags, not graph state. Both tools required to be in InvestigationStrategist.mandatory_record_ tools (added in S4) so the agent's forced-retry mechanism kicks in if it returns without taking a documented decision. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-21 02:21:13 -10:00
parent 6ebbc675c1
commit ff3a05d7ce
3 changed files with 279 additions and 0 deletions
--- a/tests/test_optimizations.py
+++ b/tests/test_optimizations.py
@@ -3271,6 +3271,102 @@ class TestInvestigationRound:
        )
        assert "≥ 90%" in bs2  # already over 90% (1 of 1 tool calls used)

+    @pytest.mark.asyncio
+    async def test_propose_lead_validates_hypothesis_id(self):
+        """propose_lead must reject leads whose motivating_hypothesis isn't
+        actually a registered hypothesis — that's a strategist hallucination
+        analogous to citing a bogus invocation_id.
+        """
+        from tool_registry import register_all_tools, TOOL_CATALOG
+        graph = EvidenceGraph()
+        graph._current_agent = "strategist"
+        graph._current_task_id = "task-strat-1"
+        graph.current_strategist_round = 1
+        register_all_tools(graph)
+        td = TOOL_CATALOG["propose_lead"]
+        result = await td.executor(
+            description="probe X",
+            target_agent="filesystem",
+            motivating_hypothesis="hyp-does-not-exist",
+            expected_evidence_type="supports",
+        )
+        assert "not in graph.hypotheses" in result
+        assert not graph.leads
+
+    @pytest.mark.asyncio
+    async def test_propose_lead_creates_strategist_lead(self):
+        """propose_lead happy path writes a strategist-attributed lead
+        tagged with the current round_number."""
+        from tool_registry import register_all_tools, TOOL_CATALOG
+        graph = EvidenceGraph()
+        graph._current_agent = "strategist"
+        graph._current_task_id = "task-strat-2"
+        graph.current_strategist_round = 3
+        hid = await graph.add_hypothesis("h", "d")
+        register_all_tools(graph)
+        td = TOOL_CATALOG["propose_lead"]
+        result = await td.executor(
+            description="check Safari bookmarks",
+            target_agent="ios_artifact",
+            motivating_hypothesis=hid,
+            expected_evidence_type="supports",
+            rationale="single-source hypothesis needs corroboration",
+        )
+        assert "proposed" in result
+        lead = graph.leads[0]
+        assert lead.proposed_by == "strategist"
+        assert lead.motivating_hypothesis == hid
+        assert lead.round_number == 3
+        assert lead.expected_evidence_type == "supports"
+
+    @pytest.mark.asyncio
+    async def test_propose_lead_rejects_invalid_evidence_type(self):
+        from tool_registry import register_all_tools, TOOL_CATALOG
+        graph = EvidenceGraph()
+        graph._current_agent = "strategist"
+        graph._current_task_id = "task-strat-3"
+        graph.current_strategist_round = 1
+        hid = await graph.add_hypothesis("h", "d")
+        register_all_tools(graph)
+        td = TOOL_CATALOG["propose_lead"]
+        result = await td.executor(
+            description="x", target_agent="filesystem",
+            motivating_hypothesis=hid,
+            expected_evidence_type="bogus_type",
+        )
+        assert "not one of" in result
+        assert not graph.leads
+
+    @pytest.mark.asyncio
+    async def test_declare_complete_flips_request_flag(self):
+        from tool_registry import register_all_tools, TOOL_CATALOG
+        graph = EvidenceGraph()
+        graph._current_agent = "strategist"
+        graph._current_task_id = "task-strat-4"
+        graph.current_strategist_round = 5
+        register_all_tools(graph)
+        td = TOOL_CATALOG["declare_investigation_complete"]
+        assert graph.strategist_complete_requested is False
+        result = await td.executor(
+            reason="marginal_yield_zero",
+            rationale="two rounds with 0 yield",
+        )
+        assert graph.strategist_complete_requested is True
+        assert "round 5" in result
+        assert "marginal_yield_zero" in result
+
+    @pytest.mark.asyncio
+    async def test_declare_complete_rejects_bogus_reason(self):
+        from tool_registry import register_all_tools, TOOL_CATALOG
+        graph = EvidenceGraph()
+        graph._current_agent = "strategist"
+        graph._current_task_id = "task-strat-5"
+        register_all_tools(graph)
+        td = TOOL_CATALOG["declare_investigation_complete"]
+        result = await td.executor(reason="i_just_want_to_quit")
+        assert "not in" in result
+        assert graph.strategist_complete_requested is False
+
    @pytest.mark.asyncio
    async def test_marginal_yield_after_two_rounds(self):
        """Verify marginal_yield captures phenomena/edge/status deltas."""