feat(strategist) S1: Lead extension + InvestigationRound model

DESIGN_STRATEGIST.md §1. Foundation for the Phase 3 strategist loop.

Lead now carries four annotations that let the orchestrator measure
marginal yield per lead and dedupe strategist proposals:
  - proposed_by         (agent that proposed it: "strategist", "filesystem", …)
  - motivating_hypothesis (hyp-id the lead is meant to corroborate/refute)
  - expected_evidence_type (edge type the lead's worker should produce)
  - round_number        (0 = Phase 1 lead, ≥1 = strategist-proposed)

add_lead idempotently dedupes strategist proposals on
(motivating_hypothesis, expected_evidence_type, target_agent, source_id)
to prevent the "strategist loops on the same lead" failure mode.

New InvestigationRound dataclass records per-round provenance: before/
after hypothesis status snapshots, phenomena + edge count deltas, and
the strategist's decision_rationale. ``new_phenomena_count``,
``new_edges_count``, ``status_flips`` are derived properties that the
marginal_yield tool will use.

start_investigation_round / complete_investigation_round /
get_investigation_round / latest_round / leads_from_round complete the
lifecycle. complete is idempotent on already-closed rounds.

Lead.from_dict is forward-compat for state files written before this
commit. InvestigationRound persists as a top-level list in
graph_state.json (auto-save + load_state both wired).

EvidenceGraph also gains graph.budgets and graph.run_start_monotonic
fields that the budget_status view (S2) will read; orchestrator
populates them in S5.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
BattleTag
2026-05-21 02:18:35 -10:00
parent 8020c24776
commit ca96f29849
2 changed files with 368 additions and 2 deletions

View File

@@ -9,6 +9,7 @@ import pytest
from evidence_graph import (
EvidenceGraph, Phenomenon, Hypothesis, Lead, GroundingError,
InvestigationRound,
_compute_quality_score, _jaccard_similarity,
prob_to_log_odds, log_odds_to_prob,
)
@@ -3036,3 +3037,180 @@ class TestOrchestratorMultiSource:
assert Orchestrator._is_analysable(ok_media)
assert not Orchestrator._is_analysable(no_path)
# ---------------------------------------------------------------------------
# Strategist loop foundation (DESIGN_STRATEGIST.md §1)
# ---------------------------------------------------------------------------
class TestLeadExtensionsForStrategist:
"""Lead now carries strategist-loop annotations: proposed_by,
motivating_hypothesis, expected_evidence_type, round_number. Old state
files predate these fields — from_dict must accept them missing.
"""
@pytest.mark.asyncio
async def test_add_lead_records_strategist_annotations(self):
graph = EvidenceGraph()
hid = await graph.add_hypothesis("h", "d")
lid = await graph.add_lead(
target_agent="filesystem",
description="Check Safari bookmarks for device-switching evidence",
proposed_by="strategist",
motivating_hypothesis=hid,
expected_evidence_type="supports",
round_number=2,
context={"source_id": "src-ios-chan"},
)
lead = next(l for l in graph.leads if l.id == lid)
assert lead.proposed_by == "strategist"
assert lead.motivating_hypothesis == hid
assert lead.expected_evidence_type == "supports"
assert lead.round_number == 2
@pytest.mark.asyncio
async def test_strategist_lead_idempotency(self):
"""Same (motivating_hyp, expected_type, target_agent, source_id) from
the strategist should NOT create a duplicate while pending.
"""
graph = EvidenceGraph()
hid = await graph.add_hypothesis("h", "d")
first = await graph.add_lead(
target_agent="filesystem", description="probe X",
proposed_by="strategist", motivating_hypothesis=hid,
expected_evidence_type="supports", round_number=1,
context={"source_id": "src-A"},
)
again = await graph.add_lead(
target_agent="filesystem", description="probe X (rephrased)",
proposed_by="strategist", motivating_hypothesis=hid,
expected_evidence_type="supports", round_number=2,
context={"source_id": "src-A"},
)
assert first == again
assert len(graph.leads) == 1
@pytest.mark.asyncio
async def test_non_strategist_leads_not_deduped(self):
"""Phase 1 worker leads (proposed_by != 'strategist') should NOT be
deduped — agents can legitimately propose the same kind of lead
multiple times from different contexts.
"""
graph = EvidenceGraph()
hid = await graph.add_hypothesis("h", "d")
a = await graph.add_lead(
target_agent="filesystem", description="x", proposed_by="filesystem",
motivating_hypothesis=hid, expected_evidence_type="supports",
)
b = await graph.add_lead(
target_agent="filesystem", description="x", proposed_by="filesystem",
motivating_hypothesis=hid, expected_evidence_type="supports",
)
assert a != b
assert len(graph.leads) == 2
@pytest.mark.asyncio
async def test_lead_from_old_state_file_loads_with_defaults(self, tmp_path):
"""Forward-compat: a state file written before the strategist fields
existed must still load. The new fields take their defaults.
"""
legacy = {
"id": "lead-legacy01",
"target_agent": "filesystem",
"description": "old-style lead",
"priority": 5,
"context": {},
"status": "pending",
"hypothesis_id": None,
}
lead = Lead.from_dict(legacy)
assert lead.proposed_by == ""
assert lead.motivating_hypothesis == ""
assert lead.round_number == 0
class TestInvestigationRound:
"""The InvestigationRound provenance node + start/complete lifecycle."""
@pytest.mark.asyncio
async def test_round_lifecycle_captures_before_and_after(self):
graph = EvidenceGraph()
h1 = await graph.add_hypothesis("h1", "d")
h2 = await graph.add_hypothesis("h2", "d")
rid = await graph.start_investigation_round(1)
r = graph.get_investigation_round(rid)
assert r is not None
assert r.round_number == 1
assert r.hypothesis_status_snapshot_before == {h1: "active", h2: "active"}
assert r.phenomena_count_before == 0
assert r.completed_at == ""
pid, _ = await graph.add_phenomenon(
"fs", "filesystem", "found something", "interp", source_tool="t",
)
await graph.update_hypothesis_confidence(h1, pid, "direct_evidence", "")
closed = await graph.complete_investigation_round(
rid, strategist_action="propose_leads",
decision_rationale="found new evidence for h1",
)
assert closed is not None
assert closed.completed_at != ""
assert closed.hypothesis_status_snapshot_after[h1] == "supported"
assert closed.hypothesis_status_snapshot_after[h2] == "active"
assert closed.new_phenomena_count == 1
assert closed.new_edges_count == 1
assert closed.status_flips == 1
@pytest.mark.asyncio
async def test_complete_round_is_idempotent(self):
graph = EvidenceGraph()
rid = await graph.start_investigation_round(1)
first = await graph.complete_investigation_round(rid)
await graph.add_phenomenon("fs", "x", "y", "z", source_tool="t")
second = await graph.complete_investigation_round(rid)
assert first is second
assert first.phenomena_count_after == 0
@pytest.mark.asyncio
async def test_leads_from_round_filters_correctly(self):
graph = EvidenceGraph()
hid = await graph.add_hypothesis("h", "d")
await graph.add_lead(
target_agent="filesystem", description="r1 lead",
proposed_by="strategist", motivating_hypothesis=hid,
expected_evidence_type="supports", round_number=1,
)
await graph.add_lead(
target_agent="filesystem", description="r2 lead",
proposed_by="strategist", motivating_hypothesis=hid,
expected_evidence_type="supports", round_number=2,
context={"source_id": "src-different"},
)
await graph.add_lead(
target_agent="ios_artifact", description="phase 1 finding",
proposed_by="filesystem", round_number=0,
)
r1 = graph.leads_from_round(1)
r2 = graph.leads_from_round(2)
r0 = graph.leads_from_round(0)
assert len(r1) == 1 and r1[0].description == "r1 lead"
assert len(r2) == 1 and r2[0].description == "r2 lead"
assert len(r0) == 1 and r0[0].proposed_by == "filesystem"
@pytest.mark.asyncio
async def test_round_persistence_round_trip(self, tmp_path):
"""Investigation rounds must survive save/load."""
path = tmp_path / "state.json"
graph = EvidenceGraph(persist_path=path)
hid = await graph.add_hypothesis("h", "d")
rid = await graph.start_investigation_round(1)
await graph.complete_investigation_round(
rid, decision_rationale="probe complete",
)
loaded = EvidenceGraph.load_state(path)
assert len(loaded.investigation_rounds) == 1
r = loaded.investigation_rounds[0]
assert r.id == rid
assert r.decision_rationale == "probe complete"
assert hid in r.hypothesis_status_snapshot_before