feat(strategist) S5: Phase 3 strategist loop in orchestrator

DESIGN_STRATEGIST.md §4. Replace the fixed-round hypothesis-directed
loop with a belief-driven strategist loop that runs the strategist
agent once per round and dispatches the leads it proposes.

New helpers on Orchestrator:
  _budget_exceeded()              hard budget caps (tool_calls,
                                  wall_clock_minutes), complementing
                                  strategist self-throttling.
  _execute_strategist_lead(lead)  dispatch one lead serially; the
                                  next strategist round sees the
                                  cumulative effect of this lead's
                                  graph mutations.
  _phase3_strategist_loop()       main loop. Open round, run strategist,
                                  exit on declare_complete or empty
                                  proposals, otherwise dispatch each
                                  lead, judge new phenomena, close round,
                                  apply yield/budget checks.
  _phase3_legacy_loop()           fallback when strategist.enabled is
                                  false. Identical to the
                                  pre-DESIGN_STRATEGIST behaviour.

The run() entry point branches on strategist_cfg.enabled (default
true) and always follows up with _retry_failed_leads() + Gap
Analysis + mark_remaining_inconclusive() regardless of variant.

Orchestrator.__init__ also wires graph.budgets and
graph.run_start_monotonic from config so the budget_status tool
sees real numbers.

Integration tests use a mock strategist + mock workers to verify
declare_complete, propose_lead -> worker dispatch, zero-yield-streak
hard stop, and budget-cap-stops-the-loop.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
BattleTag
2026-05-21 02:25:04 -10:00
parent 65745d21dc
commit a103c17bdb
2 changed files with 459 additions and 28 deletions

View File

@@ -3407,6 +3407,202 @@ class TestInvestigationRound:
assert "not in" in result
assert graph.strategist_complete_requested is False
@pytest.mark.asyncio
async def test_strategist_loop_exits_on_declare_complete(self):
"""Mock strategist that declares complete in round 1 — orchestrator
must exit the Phase 3 loop without dispatching any worker."""
from unittest.mock import AsyncMock
from orchestrator import Orchestrator
graph = EvidenceGraph()
llm = AsyncMock()
worker_runs: list[str] = []
class FakeStrategist:
name = "strategist"
async def run(self, task, lead_id=None):
graph.strategist_complete_requested = True
return "complete"
class FakeFactory:
def __init__(self):
self._instances = {"strategist": FakeStrategist()}
def get_or_create_agent(self, name):
return self._instances.get(name)
orch = Orchestrator(llm, graph, FakeFactory(), config={
"strategist": {"enabled": True, "max_rounds": 5},
})
await orch._phase3_strategist_loop()
assert len(graph.investigation_rounds) == 1
r = graph.investigation_rounds[0]
assert r.strategist_action == "declare_complete"
assert r.completed_at != ""
assert worker_runs == []
@pytest.mark.asyncio
async def test_strategist_loop_dispatches_lead_then_completes(self):
"""Strategist proposes 1 lead in round 1, declares complete in round 2.
Loop must dispatch the worker for the lead, then exit cleanly.
"""
from unittest.mock import AsyncMock
from orchestrator import Orchestrator
from case import Case, EvidenceSource
graph = EvidenceGraph()
src = EvidenceSource(id="src-A", label="A", type="disk_image",
access_mode="image", path="/tmp/x")
graph.case = Case(case_id="c", name="n", sources=[src])
graph.set_active_source(src)
hid = await graph.add_hypothesis("h", "d")
llm = AsyncMock()
worker_calls: list[tuple[str, str]] = []
class FakeStrategist:
name = "strategist"
def __init__(self):
self.round = 0
async def run(self, task, lead_id=None):
self.round += 1
if self.round == 1:
await graph.add_lead(
target_agent="filesystem",
description="probe X",
proposed_by="strategist",
motivating_hypothesis=hid,
expected_evidence_type="supports",
round_number=graph.current_strategist_round,
)
else:
graph.strategist_complete_requested = True
return "ok"
class FakeWorker:
name = "filesystem"
async def run(self, task, lead_id=None):
worker_calls.append((self.name, lead_id))
return "did the thing"
class FakeFactory:
def __init__(self):
self.s = FakeStrategist()
self.w = FakeWorker()
def get_or_create_agent(self, name):
if name == "strategist": return self.s
return self.w
orch = Orchestrator(llm, graph, FakeFactory(), config={
"strategist": {"enabled": True, "max_rounds": 5,
"hard_stop_marginal_yield_zero_rounds": 99},
})
await orch._phase3_strategist_loop()
assert len(graph.investigation_rounds) == 2
assert graph.investigation_rounds[0].strategist_action == "propose_leads"
assert graph.investigation_rounds[1].strategist_action == "declare_complete"
assert len(worker_calls) == 1
assert worker_calls[0][0] == "filesystem"
leads = [l for l in graph.leads if l.proposed_by == "strategist"]
assert len(leads) == 1
assert leads[0].status == "completed"
@pytest.mark.asyncio
async def test_strategist_loop_hard_stop_on_zero_yield(self):
"""If the strategist insists on more rounds but yield stays zero for
N consecutive rounds, the orchestrator force-stops as a safety net."""
from unittest.mock import AsyncMock
from orchestrator import Orchestrator
graph = EvidenceGraph()
llm = AsyncMock()
class FakeStrategist:
name = "strategist"
async def run(self, task, lead_id=None):
hid_local = next(iter(graph.hypotheses)) if graph.hypotheses else None
await graph.add_lead(
target_agent="filesystem", description="probe",
proposed_by="strategist",
motivating_hypothesis=hid_local or "",
expected_evidence_type="supports",
round_number=graph.current_strategist_round,
)
class FakeWorker:
name = "filesystem"
async def run(self, task, lead_id=None):
return ""
class FakeFactory:
def __init__(self):
self.s = FakeStrategist()
self.w = FakeWorker()
def get_or_create_agent(self, name):
return self.s if name == "strategist" else self.w
hid = await graph.add_hypothesis("h", "d")
orch = Orchestrator(llm, graph, FakeFactory(), config={
"strategist": {
"enabled": True, "max_rounds": 20,
"hard_stop_marginal_yield_zero_rounds": 2,
},
})
await orch._phase3_strategist_loop()
assert len(graph.investigation_rounds) == 2
@pytest.mark.asyncio
async def test_strategist_loop_budget_exhaustion_stops_loop(self):
"""Hard budget cap on tool_calls_total kills the loop even when the
strategist wants to continue."""
from unittest.mock import AsyncMock
from orchestrator import Orchestrator
graph = EvidenceGraph()
llm = AsyncMock()
# Pre-stuff the invocations log so we're already past the cap.
await graph.record_tool_invocation(
tool="probe", args={}, output="x",
)
await graph.record_tool_invocation(
tool="probe", args={}, output="y",
)
class FakeStrategist:
name = "strategist"
async def run(self, task, lead_id=None):
hid_local = next(iter(graph.hypotheses)) if graph.hypotheses else ""
await graph.add_lead(
target_agent="filesystem", description="x",
proposed_by="strategist",
motivating_hypothesis=hid_local,
expected_evidence_type="supports",
round_number=graph.current_strategist_round,
)
class FakeWorker:
name = "filesystem"
async def run(self, task, lead_id=None):
await graph.record_tool_invocation(
tool="probe", args={}, output="z",
)
class FakeFactory:
def __init__(self):
self.s = FakeStrategist()
self.w = FakeWorker()
def get_or_create_agent(self, name):
return self.s if name == "strategist" else self.w
hid = await graph.add_hypothesis("h", "d")
orch = Orchestrator(llm, graph, FakeFactory(), config={
"strategist": {"enabled": True, "max_rounds": 99,
"hard_stop_marginal_yield_zero_rounds": 99},
"budgets": {"tool_calls_total": 2},
})
await orch._phase3_strategist_loop()
assert len(graph.investigation_rounds) == 1
@pytest.mark.asyncio
async def test_marginal_yield_after_two_rounds(self):
"""Verify marginal_yield captures phenomena/edge/status deltas."""