refactor: lead provenance, unified link path, SSOT cleanup, configurable weights

Five interrelated cleanups: 1. Lead -> Phenomenon provenance - Phenomenon.from_lead_id field on the dataclass - BaseAgent.run(lead_id=...) writes self._current_lead_id - _add_phenomenon auto-injects from agent state (LLM unaware) - Orchestrator dispatch passes lead.id; Phase 1/2-auto/4/5 stay None - Merge path preserves the first non-None lead_id on collision 2. Unified Phenomenon <-> Hypothesis link path - HypothesisAgent only adds hypotheses, never links - link_phenomenon_to_hypothesis tool + executor removed - All links go through Orchestrator._judge_new_phenomena - Phase 2 unconditionally judges after hypothesis generation - Gap Analysis judges after each dispatch round (Three previously-missing judge calls now in place.) 3. SSOT in agent subclasses - Remove RoleTemplate dataclass, ROLE_TEMPLATES dict, _instantiate_from_template method - Each agent subclass owns name, role, and tool list - agent_factory.py shrinks from 299 to 153 lines - All 7 agents now route through _AGENT_CLASSES (filesystem, registry, communication, network, timeline were previously dead subclasses overridden by templates) 4. Configurable edge weights - HYPOTHESIS_EDGE_WEIGHTS -> _DEFAULT_EDGE_WEIGHTS (private default) - EvidenceGraph(edge_weights=...) override via config.yaml - hypothesis_edge_weights section in config.yaml (commented example) - main.py and regenerate_report.py read and pass through 5. regenerate_report.py auto-picks the latest run/*/graph_state.json when no CLI arg is given (was a hardcoded date path) Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-12 14:10:15 +08:00
parent fde96c7d9f
commit 74e6bde13a
7 changed files with 92 additions and 254 deletions
--- a/evidence_graph.py
+++ b/evidence_graph.py
@@ -18,10 +18,12 @@ from pathlib import Path
 logger = logging.getLogger(__name__)

 # ---------------------------------------------------------------------------
-# Predefined edge weights for Phenomenon → Hypothesis relationships.
+# Default edge weights for Phenomenon → Hypothesis relationships.
 # LLM only picks the edge type (categorical); the weight is looked up here.
+# Override per-graph via EvidenceGraph(edge_weights=...) or config.yaml's
+# `hypothesis_edge_weights` section.
 # ---------------------------------------------------------------------------
-HYPOTHESIS_EDGE_WEIGHTS: dict[str, float] = {
+_DEFAULT_EDGE_WEIGHTS: dict[str, float] = {
    "direct_evidence": +0.25,
    "supports": +0.15,
    "prerequisite_met": +0.10,
@@ -94,6 +96,7 @@ class Phenomenon:
    confidence: float = 1.0
    source_tool: str = ""
    corroborating_agents: list[str] = field(default_factory=list)
+    from_lead_id: str | None = None
    created_at: str = ""

    def to_dict(self) -> dict:
@@ -239,8 +242,12 @@ class EvidenceGraph:
        self,
        case_info: dict | None = None,
        persist_path: Path | None = None,
+        edge_weights: dict[str, float] | None = None,
    ) -> None:
        self.case_info: dict = case_info or {}
+        self.edge_weights: dict[str, float] = (
+            dict(edge_weights) if edge_weights else dict(_DEFAULT_EDGE_WEIGHTS)
+        )
        self.image_path: str = ""
        self.partition_offset: int = 0
        self.extracted_dir: str = "extracted"
@@ -304,12 +311,17 @@ class EvidenceGraph:
        self._persist_path = old

    @classmethod
-    def load_state(cls, path: Path) -> EvidenceGraph:
+    def load_state(
+        cls,
+        path: Path,
+        edge_weights: dict[str, float] | None = None,
+    ) -> EvidenceGraph:
        """Restore an EvidenceGraph from a saved JSON state file."""
        data = json.loads(path.read_text())
        graph = cls(
            case_info=data.get("case_info", {}),
            persist_path=path,
+            edge_weights=edge_weights,
        )
        graph.image_path = data.get("image_path", "")
        graph.partition_offset = data.get("partition_offset", 0)
@@ -403,6 +415,7 @@ class EvidenceGraph:
        raw_data: dict | None = None,
        timestamp: str | None = None,
        source_tool: str = "",
+        from_lead_id: str | None = None,
    ) -> tuple[str, bool]:
        """Add a phenomenon. Returns (id, was_merged).

@@ -419,6 +432,8 @@ class EvidenceGraph:
                    for k, v in raw_data.items():
                        if k not in similar.raw_data:
                            similar.raw_data[k] = v
+                if from_lead_id and similar.from_lead_id is None:
+                    similar.from_lead_id = from_lead_id
                self._auto_save()
                return similar.id, True

@@ -437,6 +452,7 @@ class EvidenceGraph:
                timestamp=timestamp,
                confidence=confidence,
                source_tool=source_tool,
+                from_lead_id=from_lead_id,
                created_at=datetime.now().isoformat(),
            )
            self.phenomena[pid] = ph
@@ -532,14 +548,14 @@ class EvidenceGraph:
    ) -> float:
        """Update hypothesis confidence based on a phenomenon linkage.

-        The edge_type must be one of HYPOTHESIS_EDGE_WEIGHTS keys.
-        Weight is looked up from the predefined table, NOT judged by LLM.
+        The edge_type must be one of self.edge_weights keys.
+        Weight is looked up from the configured table, NOT judged by LLM.
        Returns the new confidence value.
        """
-        if edge_type not in HYPOTHESIS_EDGE_WEIGHTS:
+        if edge_type not in self.edge_weights:
            raise ValueError(
                f"Invalid hypothesis edge type: {edge_type}. "
-                f"Must be one of: {list(HYPOTHESIS_EDGE_WEIGHTS.keys())}"
+                f"Must be one of: {list(self.edge_weights.keys())}"
            )

        async with self._lock:
@@ -549,7 +565,7 @@ class EvidenceGraph:
            if hyp is None:
                raise ValueError(f"Hypothesis not found: {hyp_id}")

-            weight = HYPOTHESIS_EDGE_WEIGHTS[edge_type]
+            weight = self.edge_weights[edge_type]
            old_conf = hyp.confidence

            if weight > 0: