feat(strategist) S3: propose_lead / declare_investigation_complete
DESIGN_STRATEGIST.md §2.5. The strategist's two write actions. propose_lead validates motivating_hypothesis exists in the graph, validates expected_evidence_type is a real edge type, validates source_id refers to a real source in the case — fast specific errors so the strategist gets fixable feedback rather than a generic crash. On success, calls graph.add_lead with proposed_by= "strategist" and round_number=graph.current_strategist_round so the round-completion code can collect this round's leads. declare_investigation_complete sets graph.strategist_complete_requested which the orchestrator inspects after each strategist run to decide whether to break the loop. reason must come from a closed enum so the audit log is consistent. EvidenceGraph gains two transient run-context fields: current_strategist_round — set by orchestrator at start of round strategist_complete_requested — flipped by declare_complete These are intentionally NOT persisted — they're per-run flags, not graph state. Both tools required to be in InvestigationStrategist.mandatory_record_ tools (added in S4) so the agent's forced-retry mechanism kicks in if it returns without taking a documented decision. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -3271,6 +3271,102 @@ class TestInvestigationRound:
|
||||
)
|
||||
assert "≥ 90%" in bs2 # already over 90% (1 of 1 tool calls used)
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_propose_lead_validates_hypothesis_id(self):
|
||||
"""propose_lead must reject leads whose motivating_hypothesis isn't
|
||||
actually a registered hypothesis — that's a strategist hallucination
|
||||
analogous to citing a bogus invocation_id.
|
||||
"""
|
||||
from tool_registry import register_all_tools, TOOL_CATALOG
|
||||
graph = EvidenceGraph()
|
||||
graph._current_agent = "strategist"
|
||||
graph._current_task_id = "task-strat-1"
|
||||
graph.current_strategist_round = 1
|
||||
register_all_tools(graph)
|
||||
td = TOOL_CATALOG["propose_lead"]
|
||||
result = await td.executor(
|
||||
description="probe X",
|
||||
target_agent="filesystem",
|
||||
motivating_hypothesis="hyp-does-not-exist",
|
||||
expected_evidence_type="supports",
|
||||
)
|
||||
assert "not in graph.hypotheses" in result
|
||||
assert not graph.leads
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_propose_lead_creates_strategist_lead(self):
|
||||
"""propose_lead happy path writes a strategist-attributed lead
|
||||
tagged with the current round_number."""
|
||||
from tool_registry import register_all_tools, TOOL_CATALOG
|
||||
graph = EvidenceGraph()
|
||||
graph._current_agent = "strategist"
|
||||
graph._current_task_id = "task-strat-2"
|
||||
graph.current_strategist_round = 3
|
||||
hid = await graph.add_hypothesis("h", "d")
|
||||
register_all_tools(graph)
|
||||
td = TOOL_CATALOG["propose_lead"]
|
||||
result = await td.executor(
|
||||
description="check Safari bookmarks",
|
||||
target_agent="ios_artifact",
|
||||
motivating_hypothesis=hid,
|
||||
expected_evidence_type="supports",
|
||||
rationale="single-source hypothesis needs corroboration",
|
||||
)
|
||||
assert "proposed" in result
|
||||
lead = graph.leads[0]
|
||||
assert lead.proposed_by == "strategist"
|
||||
assert lead.motivating_hypothesis == hid
|
||||
assert lead.round_number == 3
|
||||
assert lead.expected_evidence_type == "supports"
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_propose_lead_rejects_invalid_evidence_type(self):
|
||||
from tool_registry import register_all_tools, TOOL_CATALOG
|
||||
graph = EvidenceGraph()
|
||||
graph._current_agent = "strategist"
|
||||
graph._current_task_id = "task-strat-3"
|
||||
graph.current_strategist_round = 1
|
||||
hid = await graph.add_hypothesis("h", "d")
|
||||
register_all_tools(graph)
|
||||
td = TOOL_CATALOG["propose_lead"]
|
||||
result = await td.executor(
|
||||
description="x", target_agent="filesystem",
|
||||
motivating_hypothesis=hid,
|
||||
expected_evidence_type="bogus_type",
|
||||
)
|
||||
assert "not one of" in result
|
||||
assert not graph.leads
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_declare_complete_flips_request_flag(self):
|
||||
from tool_registry import register_all_tools, TOOL_CATALOG
|
||||
graph = EvidenceGraph()
|
||||
graph._current_agent = "strategist"
|
||||
graph._current_task_id = "task-strat-4"
|
||||
graph.current_strategist_round = 5
|
||||
register_all_tools(graph)
|
||||
td = TOOL_CATALOG["declare_investigation_complete"]
|
||||
assert graph.strategist_complete_requested is False
|
||||
result = await td.executor(
|
||||
reason="marginal_yield_zero",
|
||||
rationale="two rounds with 0 yield",
|
||||
)
|
||||
assert graph.strategist_complete_requested is True
|
||||
assert "round 5" in result
|
||||
assert "marginal_yield_zero" in result
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_declare_complete_rejects_bogus_reason(self):
|
||||
from tool_registry import register_all_tools, TOOL_CATALOG
|
||||
graph = EvidenceGraph()
|
||||
graph._current_agent = "strategist"
|
||||
graph._current_task_id = "task-strat-5"
|
||||
register_all_tools(graph)
|
||||
td = TOOL_CATALOG["declare_investigation_complete"]
|
||||
result = await td.executor(reason="i_just_want_to_quit")
|
||||
assert "not in" in result
|
||||
assert graph.strategist_complete_requested is False
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_marginal_yield_after_two_rounds(self):
|
||||
"""Verify marginal_yield captures phenomena/edge/status deltas."""
|
||||
|
||||
Reference in New Issue
Block a user