feat(strategist) S1: Lead extension + InvestigationRound model

DESIGN_STRATEGIST.md §1. Foundation for the Phase 3 strategist loop. Lead now carries four annotations that let the orchestrator measure marginal yield per lead and dedupe strategist proposals: - proposed_by (agent that proposed it: "strategist", "filesystem", …) - motivating_hypothesis (hyp-id the lead is meant to corroborate/refute) - expected_evidence_type (edge type the lead's worker should produce) - round_number (0 = Phase 1 lead, ≥1 = strategist-proposed) add_lead idempotently dedupes strategist proposals on (motivating_hypothesis, expected_evidence_type, target_agent, source_id) to prevent the "strategist loops on the same lead" failure mode. New InvestigationRound dataclass records per-round provenance: before/ after hypothesis status snapshots, phenomena + edge count deltas, and the strategist's decision_rationale. ``new_phenomena_count``, ``new_edges_count``, ``status_flips`` are derived properties that the marginal_yield tool will use. start_investigation_round / complete_investigation_round / get_investigation_round / latest_round / leads_from_round complete the lifecycle. complete is idempotent on already-closed rounds. Lead.from_dict is forward-compat for state files written before this commit. InvestigationRound persists as a top-level list in graph_state.json (auto-save + load_state both wired). EvidenceGraph also gains graph.budgets and graph.run_start_monotonic fields that the budget_status view (S2) will read; orchestrator populates them in S5. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-21 02:18:35 -10:00
parent 8020c24776
commit ca96f29849
2 changed files with 368 additions and 2 deletions
--- a/evidence_graph.py
+++ b/evidence_graph.py
@@ -417,7 +417,14 @@ class Edge:

@dataclass
 class Lead:
-    """An investigative lead that should be followed up by an agent."""
+    """An investigative lead that should be followed up by an agent.
+
+    Phase 1 agents create leads as "things outside my scope but worth chasing".
+    The strategist (DESIGN_STRATEGIST.md) also creates leads, and additionally
+    annotates each with the hypothesis it's meant to corroborate or refute plus
+    the kind of edge it expects to produce — so the orchestrator can later
+    measure "did this lead actually change any belief".
+    """

    id: str
    target_agent: str
@@ -426,13 +433,77 @@ class Lead:
    context: dict = field(default_factory=dict)
    status: str = "pending"     # pending, assigned, completed, failed
    hypothesis_id: str | None = None
+    # Strategist-loop annotations. proposed_by names the agent that created
+    # the lead ("filesystem", "strategist", ...). motivating_hypothesis and
+    # expected_evidence_type let the orchestrator measure marginal yield.
+    # round_number is 0 for Phase 1 leads, ≥1 for strategist-produced leads.
+    proposed_by: str = ""
+    motivating_hypothesis: str = ""
+    expected_evidence_type: str = ""
+    round_number: int = 0

    def to_dict(self) -> dict:
        return asdict(self)

    @classmethod
    def from_dict(cls, d: dict) -> Lead:
-        return cls(**d)
+        # Forward-compatible: old state files predate the strategist annotations.
+        known = set(cls.__dataclass_fields__)
+        return cls(**{k: v for k, v in d.items() if k in known})
+
+
+@dataclass
+class InvestigationRound:
+    """One round of strategist-driven investigation.
+
+    DESIGN_STRATEGIST.md §1.2: provenance for the strategist's decisions. Each
+    round records what hypothesis statuses looked like before vs. after, what
+    leads were proposed, which actually got executed, and how many new
+    phenomena/edges resulted. ``marginal_yield`` over recent rounds is what
+    the strategist consults to decide whether to keep digging or declare
+    complete.
+    """
+
+    id: str                          # "round-{nnn}"
+    round_number: int
+    started_at: str
+    completed_at: str = ""
+    strategist_action: str = ""      # "propose_leads" | "declare_complete"
+    leads_proposed: list[str] = field(default_factory=list)
+    leads_executed: list[str] = field(default_factory=list)
+    hypothesis_status_snapshot_before: dict = field(default_factory=dict)
+    hypothesis_status_snapshot_after: dict = field(default_factory=dict)
+    phenomena_count_before: int = 0
+    phenomena_count_after: int = 0
+    edges_count_before: int = 0
+    edges_count_after: int = 0
+    decision_rationale: str = ""
+
+    @property
+    def new_phenomena_count(self) -> int:
+        return max(0, self.phenomena_count_after - self.phenomena_count_before)
+
+    @property
+    def new_edges_count(self) -> int:
+        return max(0, self.edges_count_after - self.edges_count_before)
+
+    @property
+    def status_flips(self) -> int:
+        before = self.hypothesis_status_snapshot_before
+        after = self.hypothesis_status_snapshot_after
+        flips = 0
+        for hid, after_status in after.items():
+            if before.get(hid) and before.get(hid) != after_status:
+                flips += 1
+        return flips
+
+    def to_dict(self) -> dict:
+        return asdict(self)
+
+    @classmethod
+    def from_dict(cls, d: dict) -> InvestigationRound:
+        known = set(cls.__dataclass_fields__)
+        return cls(**{k: v for k, v in d.items() if k in known})


@dataclass
@@ -598,6 +669,17 @@ class EvidenceGraph:
        # claimed fact values against real tool outputs.
        self.tool_invocations: dict[str, ToolInvocation] = {}

+        # Investigation rounds — provenance for the strategist's per-round
+        # decisions (DESIGN_STRATEGIST.md). Empty for runs that don't reach
+        # Phase 3 or that disable the strategist via config.
+        self.investigation_rounds: list[InvestigationRound] = []
+
+        # Budget config + run-start monotonic clock. Set by the orchestrator
+        # when it boots; the budget_status strategy tool reads these. None
+        # means unbounded / not yet started.
+        self.budgets: dict[str, int] = {}
+        self.run_start_monotonic: float | None = None
+
        # _current_agent / _current_task_id are exposed as @property below,
        # backed by module-level ContextVars (race-free under asyncio.gather).

@@ -658,6 +740,9 @@ class EvidenceGraph:
                "tool_invocations": {
                    iid: inv.to_dict() for iid, inv in self.tool_invocations.items()
                },
+                "investigation_rounds": [
+                    r.to_dict() for r in self.investigation_rounds
+                ],
                "saved_at": datetime.now().isoformat(),
            }
            tmp = self._persist_path.with_suffix(".tmp")
@@ -730,6 +815,10 @@ class EvidenceGraph:
            iid: ToolInvocation.from_dict(inv)
            for iid, inv in data.get("tool_invocations", {}).items()
        }
+        graph.investigation_rounds = [
+            InvestigationRound.from_dict(r)
+            for r in data.get("investigation_rounds", [])
+        ]
        graph._rebuild_adjacency()
        logger.info(
            "EvidenceGraph restored: %d phenomena, %d hypotheses, %d entities, "
@@ -1497,9 +1586,29 @@ class EvidenceGraph:
        priority: int = 5,
        context: dict | None = None,
        hypothesis_id: str | None = None,
+        proposed_by: str = "",
+        motivating_hypothesis: str = "",
+        expected_evidence_type: str = "",
+        round_number: int = 0,
    ) -> str:
        async with self._lock:
            lid = f"lead-{uuid.uuid4().hex[:8]}"
+            # Idempotency for strategist proposals: identical
+            # (motivating_hypothesis, expected_evidence_type, target_agent,
+            # source_id) triple should not be created twice — this guards
+            # against the "strategist loops on the same lead" failure mode.
+            if motivating_hypothesis and proposed_by == "strategist":
+                source_id = (context or {}).get("source_id", "")
+                for existing in self.leads:
+                    if (
+                        existing.proposed_by == "strategist"
+                        and existing.motivating_hypothesis == motivating_hypothesis
+                        and existing.expected_evidence_type == expected_evidence_type
+                        and existing.target_agent == target_agent
+                        and (existing.context or {}).get("source_id", "") == source_id
+                        and existing.status in ("pending", "assigned")
+                    ):
+                        return existing.id
            self.leads.append(Lead(
                id=lid,
                target_agent=target_agent,
@@ -1507,10 +1616,89 @@ class EvidenceGraph:
                priority=priority,
                context=context or {},
                hypothesis_id=hypothesis_id,
+                proposed_by=proposed_by,
+                motivating_hypothesis=motivating_hypothesis,
+                expected_evidence_type=expected_evidence_type,
+                round_number=round_number,
            ))
            self._auto_save()
            return lid

+    # ---- Investigation rounds (strategist loop) ----------------------------
+
+    async def start_investigation_round(
+        self, round_number: int,
+    ) -> str:
+        """Open a new investigation round + capture pre-round snapshot.
+
+        Called by the orchestrator at the top of each strategist iteration.
+        The snapshot records hypothesis status, phenomena count, and edges
+        count so that ``complete_investigation_round`` can compute the
+        round's yield deltas.
+        """
+        async with self._lock:
+            rid = f"round-{round_number:03d}"
+            snapshot_before = {
+                hid: h.status for hid, h in self.hypotheses.items()
+            }
+            self.investigation_rounds.append(InvestigationRound(
+                id=rid,
+                round_number=round_number,
+                started_at=datetime.now().isoformat(),
+                hypothesis_status_snapshot_before=snapshot_before,
+                phenomena_count_before=len(self.phenomena),
+                edges_count_before=len(self.edges),
+            ))
+            self._auto_save()
+            return rid
+
+    async def complete_investigation_round(
+        self,
+        round_id: str,
+        strategist_action: str = "propose_leads",
+        leads_executed: list[str] | None = None,
+        decision_rationale: str = "",
+    ) -> InvestigationRound | None:
+        """Close a round, recording after-snapshot + which leads got executed.
+
+        Idempotent on already-closed rounds (returns the existing record).
+        """
+        async with self._lock:
+            for r in self.investigation_rounds:
+                if r.id != round_id:
+                    continue
+                if r.completed_at:
+                    return r
+                r.completed_at = datetime.now().isoformat()
+                r.strategist_action = strategist_action
+                r.leads_executed = list(leads_executed or [])
+                r.leads_proposed = [
+                    l.id for l in self.leads
+                    if l.round_number == r.round_number
+                    and l.proposed_by == "strategist"
+                ]
+                r.hypothesis_status_snapshot_after = {
+                    hid: h.status for hid, h in self.hypotheses.items()
+                }
+                r.phenomena_count_after = len(self.phenomena)
+                r.edges_count_after = len(self.edges)
+                r.decision_rationale = decision_rationale
+                self._auto_save()
+                return r
+            return None
+
+    def get_investigation_round(self, round_id: str) -> InvestigationRound | None:
+        for r in self.investigation_rounds:
+            if r.id == round_id:
+                return r
+        return None
+
+    def latest_round(self) -> InvestigationRound | None:
+        return self.investigation_rounds[-1] if self.investigation_rounds else None
+
+    def leads_from_round(self, round_number: int) -> list[Lead]:
+        return [l for l in self.leads if l.round_number == round_number]
+
    async def get_pending_leads(self, agent_type: str | None = None) -> list[Lead]:
        async with self._lock:
            leads = [l for l in self.leads if l.status == "pending"]