feat(strategist) S5: Phase 3 strategist loop in orchestrator

DESIGN_STRATEGIST.md §4. Replace the fixed-round hypothesis-directed loop with a belief-driven strategist loop that runs the strategist agent once per round and dispatches the leads it proposes. New helpers on Orchestrator: _budget_exceeded() hard budget caps (tool_calls, wall_clock_minutes), complementing strategist self-throttling. _execute_strategist_lead(lead) dispatch one lead serially; the next strategist round sees the cumulative effect of this lead's graph mutations. _phase3_strategist_loop() main loop. Open round, run strategist, exit on declare_complete or empty proposals, otherwise dispatch each lead, judge new phenomena, close round, apply yield/budget checks. _phase3_legacy_loop() fallback when strategist.enabled is false. Identical to the pre-DESIGN_STRATEGIST behaviour. The run() entry point branches on strategist_cfg.enabled (default true) and always follows up with _retry_failed_leads() + Gap Analysis + mark_remaining_inconclusive() regardless of variant. Orchestrator.__init__ also wires graph.budgets and graph.run_start_monotonic from config so the budget_status tool sees real numbers. Integration tests use a mock strategist + mock workers to verify declare_complete, propose_lead -> worker dispatch, zero-yield-streak hard stop, and budget-cap-stops-the-loop. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-21 02:25:04 -10:00
parent 65745d21dc
commit a103c17bdb
2 changed files with 459 additions and 28 deletions
--- a/tests/test_optimizations.py
+++ b/tests/test_optimizations.py
@@ -3407,6 +3407,202 @@ class TestInvestigationRound:
        assert "not in" in result
        assert graph.strategist_complete_requested is False

+    @pytest.mark.asyncio
+    async def test_strategist_loop_exits_on_declare_complete(self):
+        """Mock strategist that declares complete in round 1 — orchestrator
+        must exit the Phase 3 loop without dispatching any worker."""
+        from unittest.mock import AsyncMock
+        from orchestrator import Orchestrator
+
+        graph = EvidenceGraph()
+        llm = AsyncMock()
+        worker_runs: list[str] = []
+
+        class FakeStrategist:
+            name = "strategist"
+            async def run(self, task, lead_id=None):
+                graph.strategist_complete_requested = True
+                return "complete"
+
+        class FakeFactory:
+            def __init__(self):
+                self._instances = {"strategist": FakeStrategist()}
+            def get_or_create_agent(self, name):
+                return self._instances.get(name)
+
+        orch = Orchestrator(llm, graph, FakeFactory(), config={
+            "strategist": {"enabled": True, "max_rounds": 5},
+        })
+        await orch._phase3_strategist_loop()
+
+        assert len(graph.investigation_rounds) == 1
+        r = graph.investigation_rounds[0]
+        assert r.strategist_action == "declare_complete"
+        assert r.completed_at != ""
+        assert worker_runs == []
+
+    @pytest.mark.asyncio
+    async def test_strategist_loop_dispatches_lead_then_completes(self):
+        """Strategist proposes 1 lead in round 1, declares complete in round 2.
+        Loop must dispatch the worker for the lead, then exit cleanly.
+        """
+        from unittest.mock import AsyncMock
+        from orchestrator import Orchestrator
+        from case import Case, EvidenceSource
+
+        graph = EvidenceGraph()
+        src = EvidenceSource(id="src-A", label="A", type="disk_image",
+                              access_mode="image", path="/tmp/x")
+        graph.case = Case(case_id="c", name="n", sources=[src])
+        graph.set_active_source(src)
+        hid = await graph.add_hypothesis("h", "d")
+        llm = AsyncMock()
+        worker_calls: list[tuple[str, str]] = []
+
+        class FakeStrategist:
+            name = "strategist"
+            def __init__(self):
+                self.round = 0
+            async def run(self, task, lead_id=None):
+                self.round += 1
+                if self.round == 1:
+                    await graph.add_lead(
+                        target_agent="filesystem",
+                        description="probe X",
+                        proposed_by="strategist",
+                        motivating_hypothesis=hid,
+                        expected_evidence_type="supports",
+                        round_number=graph.current_strategist_round,
+                    )
+                else:
+                    graph.strategist_complete_requested = True
+                return "ok"
+
+        class FakeWorker:
+            name = "filesystem"
+            async def run(self, task, lead_id=None):
+                worker_calls.append((self.name, lead_id))
+                return "did the thing"
+
+        class FakeFactory:
+            def __init__(self):
+                self.s = FakeStrategist()
+                self.w = FakeWorker()
+            def get_or_create_agent(self, name):
+                if name == "strategist": return self.s
+                return self.w
+
+        orch = Orchestrator(llm, graph, FakeFactory(), config={
+            "strategist": {"enabled": True, "max_rounds": 5,
+                            "hard_stop_marginal_yield_zero_rounds": 99},
+        })
+        await orch._phase3_strategist_loop()
+
+        assert len(graph.investigation_rounds) == 2
+        assert graph.investigation_rounds[0].strategist_action == "propose_leads"
+        assert graph.investigation_rounds[1].strategist_action == "declare_complete"
+        assert len(worker_calls) == 1
+        assert worker_calls[0][0] == "filesystem"
+        leads = [l for l in graph.leads if l.proposed_by == "strategist"]
+        assert len(leads) == 1
+        assert leads[0].status == "completed"
+
+    @pytest.mark.asyncio
+    async def test_strategist_loop_hard_stop_on_zero_yield(self):
+        """If the strategist insists on more rounds but yield stays zero for
+        N consecutive rounds, the orchestrator force-stops as a safety net."""
+        from unittest.mock import AsyncMock
+        from orchestrator import Orchestrator
+
+        graph = EvidenceGraph()
+        llm = AsyncMock()
+
+        class FakeStrategist:
+            name = "strategist"
+            async def run(self, task, lead_id=None):
+                hid_local = next(iter(graph.hypotheses)) if graph.hypotheses else None
+                await graph.add_lead(
+                    target_agent="filesystem", description="probe",
+                    proposed_by="strategist",
+                    motivating_hypothesis=hid_local or "",
+                    expected_evidence_type="supports",
+                    round_number=graph.current_strategist_round,
+                )
+
+        class FakeWorker:
+            name = "filesystem"
+            async def run(self, task, lead_id=None):
+                return ""
+
+        class FakeFactory:
+            def __init__(self):
+                self.s = FakeStrategist()
+                self.w = FakeWorker()
+            def get_or_create_agent(self, name):
+                return self.s if name == "strategist" else self.w
+
+        hid = await graph.add_hypothesis("h", "d")
+        orch = Orchestrator(llm, graph, FakeFactory(), config={
+            "strategist": {
+                "enabled": True, "max_rounds": 20,
+                "hard_stop_marginal_yield_zero_rounds": 2,
+            },
+        })
+        await orch._phase3_strategist_loop()
+        assert len(graph.investigation_rounds) == 2
+
+    @pytest.mark.asyncio
+    async def test_strategist_loop_budget_exhaustion_stops_loop(self):
+        """Hard budget cap on tool_calls_total kills the loop even when the
+        strategist wants to continue."""
+        from unittest.mock import AsyncMock
+        from orchestrator import Orchestrator
+
+        graph = EvidenceGraph()
+        llm = AsyncMock()
+        # Pre-stuff the invocations log so we're already past the cap.
+        await graph.record_tool_invocation(
+            tool="probe", args={}, output="x",
+        )
+        await graph.record_tool_invocation(
+            tool="probe", args={}, output="y",
+        )
+
+        class FakeStrategist:
+            name = "strategist"
+            async def run(self, task, lead_id=None):
+                hid_local = next(iter(graph.hypotheses)) if graph.hypotheses else ""
+                await graph.add_lead(
+                    target_agent="filesystem", description="x",
+                    proposed_by="strategist",
+                    motivating_hypothesis=hid_local,
+                    expected_evidence_type="supports",
+                    round_number=graph.current_strategist_round,
+                )
+
+        class FakeWorker:
+            name = "filesystem"
+            async def run(self, task, lead_id=None):
+                await graph.record_tool_invocation(
+                    tool="probe", args={}, output="z",
+                )
+
+        class FakeFactory:
+            def __init__(self):
+                self.s = FakeStrategist()
+                self.w = FakeWorker()
+            def get_or_create_agent(self, name):
+                return self.s if name == "strategist" else self.w
+
+        hid = await graph.add_hypothesis("h", "d")
+        orch = Orchestrator(llm, graph, FakeFactory(), config={
+            "strategist": {"enabled": True, "max_rounds": 99,
+                            "hard_stop_marginal_yield_zero_rounds": 99},
+            "budgets": {"tool_calls_total": 2},
+        })
+        await orch._phase3_strategist_loop()
+        assert len(graph.investigation_rounds) == 1
+
    @pytest.mark.asyncio
    async def test_marginal_yield_after_two_rounds(self):
        """Verify marginal_yield captures phenomena/edge/status deltas."""