feat(strategist) S3: propose_lead / declare_investigation_complete

DESIGN_STRATEGIST.md §2.5. The strategist's two write actions.

propose_lead validates motivating_hypothesis exists in the graph,
validates expected_evidence_type is a real edge type, validates
source_id refers to a real source in the case — fast specific
errors so the strategist gets fixable feedback rather than a
generic crash. On success, calls graph.add_lead with proposed_by=
"strategist" and round_number=graph.current_strategist_round so
the round-completion code can collect this round's leads.

declare_investigation_complete sets graph.strategist_complete_requested
which the orchestrator inspects after each strategist run to decide
whether to break the loop. reason must come from a closed enum so
the audit log is consistent.

EvidenceGraph gains two transient run-context fields:
  current_strategist_round       — set by orchestrator at start of round
  strategist_complete_requested  — flipped by declare_complete

These are intentionally NOT persisted — they're per-run flags, not
graph state.

Both tools required to be in InvestigationStrategist.mandatory_record_
tools (added in S4) so the agent's forced-retry mechanism kicks in if
it returns without taking a documented decision.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
BattleTag
2026-05-21 02:21:13 -10:00
parent 6ebbc675c1
commit ff3a05d7ce
3 changed files with 279 additions and 0 deletions

View File

@@ -3271,6 +3271,102 @@ class TestInvestigationRound:
)
assert "≥ 90%" in bs2 # already over 90% (1 of 1 tool calls used)
@pytest.mark.asyncio
async def test_propose_lead_validates_hypothesis_id(self):
"""propose_lead must reject leads whose motivating_hypothesis isn't
actually a registered hypothesis — that's a strategist hallucination
analogous to citing a bogus invocation_id.
"""
from tool_registry import register_all_tools, TOOL_CATALOG
graph = EvidenceGraph()
graph._current_agent = "strategist"
graph._current_task_id = "task-strat-1"
graph.current_strategist_round = 1
register_all_tools(graph)
td = TOOL_CATALOG["propose_lead"]
result = await td.executor(
description="probe X",
target_agent="filesystem",
motivating_hypothesis="hyp-does-not-exist",
expected_evidence_type="supports",
)
assert "not in graph.hypotheses" in result
assert not graph.leads
@pytest.mark.asyncio
async def test_propose_lead_creates_strategist_lead(self):
"""propose_lead happy path writes a strategist-attributed lead
tagged with the current round_number."""
from tool_registry import register_all_tools, TOOL_CATALOG
graph = EvidenceGraph()
graph._current_agent = "strategist"
graph._current_task_id = "task-strat-2"
graph.current_strategist_round = 3
hid = await graph.add_hypothesis("h", "d")
register_all_tools(graph)
td = TOOL_CATALOG["propose_lead"]
result = await td.executor(
description="check Safari bookmarks",
target_agent="ios_artifact",
motivating_hypothesis=hid,
expected_evidence_type="supports",
rationale="single-source hypothesis needs corroboration",
)
assert "proposed" in result
lead = graph.leads[0]
assert lead.proposed_by == "strategist"
assert lead.motivating_hypothesis == hid
assert lead.round_number == 3
assert lead.expected_evidence_type == "supports"
@pytest.mark.asyncio
async def test_propose_lead_rejects_invalid_evidence_type(self):
from tool_registry import register_all_tools, TOOL_CATALOG
graph = EvidenceGraph()
graph._current_agent = "strategist"
graph._current_task_id = "task-strat-3"
graph.current_strategist_round = 1
hid = await graph.add_hypothesis("h", "d")
register_all_tools(graph)
td = TOOL_CATALOG["propose_lead"]
result = await td.executor(
description="x", target_agent="filesystem",
motivating_hypothesis=hid,
expected_evidence_type="bogus_type",
)
assert "not one of" in result
assert not graph.leads
@pytest.mark.asyncio
async def test_declare_complete_flips_request_flag(self):
from tool_registry import register_all_tools, TOOL_CATALOG
graph = EvidenceGraph()
graph._current_agent = "strategist"
graph._current_task_id = "task-strat-4"
graph.current_strategist_round = 5
register_all_tools(graph)
td = TOOL_CATALOG["declare_investigation_complete"]
assert graph.strategist_complete_requested is False
result = await td.executor(
reason="marginal_yield_zero",
rationale="two rounds with 0 yield",
)
assert graph.strategist_complete_requested is True
assert "round 5" in result
assert "marginal_yield_zero" in result
@pytest.mark.asyncio
async def test_declare_complete_rejects_bogus_reason(self):
from tool_registry import register_all_tools, TOOL_CATALOG
graph = EvidenceGraph()
graph._current_agent = "strategist"
graph._current_task_id = "task-strat-5"
register_all_tools(graph)
td = TOOL_CATALOG["declare_investigation_complete"]
result = await td.executor(reason="i_just_want_to_quit")
assert "not in" in result
assert graph.strategist_complete_requested is False
@pytest.mark.asyncio
async def test_marginal_yield_after_two_rounds(self):
"""Verify marginal_yield captures phenomena/edge/status deltas."""