refactor: lead provenance, unified link path, SSOT cleanup, configurable weights

Five interrelated cleanups:

1. Lead -> Phenomenon provenance
   - Phenomenon.from_lead_id field on the dataclass
   - BaseAgent.run(lead_id=...) writes self._current_lead_id
   - _add_phenomenon auto-injects from agent state (LLM unaware)
   - Orchestrator dispatch passes lead.id; Phase 1/2-auto/4/5 stay None
   - Merge path preserves the first non-None lead_id on collision

2. Unified Phenomenon <-> Hypothesis link path
   - HypothesisAgent only adds hypotheses, never links
   - link_phenomenon_to_hypothesis tool + executor removed
   - All links go through Orchestrator._judge_new_phenomena
   - Phase 2 unconditionally judges after hypothesis generation
   - Gap Analysis judges after each dispatch round
   (Three previously-missing judge calls now in place.)

3. SSOT in agent subclasses
   - Remove RoleTemplate dataclass, ROLE_TEMPLATES dict,
     _instantiate_from_template method
   - Each agent subclass owns name, role, and tool list
   - agent_factory.py shrinks from 299 to 153 lines
   - All 7 agents now route through _AGENT_CLASSES (filesystem,
     registry, communication, network, timeline were previously dead
     subclasses overridden by templates)

4. Configurable edge weights
   - HYPOTHESIS_EDGE_WEIGHTS -> _DEFAULT_EDGE_WEIGHTS (private default)
   - EvidenceGraph(edge_weights=...) override via config.yaml
   - hypothesis_edge_weights section in config.yaml (commented example)
   - main.py and regenerate_report.py read and pass through

5. regenerate_report.py auto-picks the latest run/*/graph_state.json
   when no CLI arg is given (was a hardcoded date path)

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
BattleTag
2026-05-12 14:10:15 +08:00
parent fde96c7d9f
commit 74e6bde13a
7 changed files with 92 additions and 254 deletions

View File

@@ -18,10 +18,12 @@ from pathlib import Path
logger = logging.getLogger(__name__)
# ---------------------------------------------------------------------------
# Predefined edge weights for Phenomenon → Hypothesis relationships.
# Default edge weights for Phenomenon → Hypothesis relationships.
# LLM only picks the edge type (categorical); the weight is looked up here.
# Override per-graph via EvidenceGraph(edge_weights=...) or config.yaml's
# `hypothesis_edge_weights` section.
# ---------------------------------------------------------------------------
HYPOTHESIS_EDGE_WEIGHTS: dict[str, float] = {
_DEFAULT_EDGE_WEIGHTS: dict[str, float] = {
"direct_evidence": +0.25,
"supports": +0.15,
"prerequisite_met": +0.10,
@@ -94,6 +96,7 @@ class Phenomenon:
confidence: float = 1.0
source_tool: str = ""
corroborating_agents: list[str] = field(default_factory=list)
from_lead_id: str | None = None
created_at: str = ""
def to_dict(self) -> dict:
@@ -239,8 +242,12 @@ class EvidenceGraph:
self,
case_info: dict | None = None,
persist_path: Path | None = None,
edge_weights: dict[str, float] | None = None,
) -> None:
self.case_info: dict = case_info or {}
self.edge_weights: dict[str, float] = (
dict(edge_weights) if edge_weights else dict(_DEFAULT_EDGE_WEIGHTS)
)
self.image_path: str = ""
self.partition_offset: int = 0
self.extracted_dir: str = "extracted"
@@ -304,12 +311,17 @@ class EvidenceGraph:
self._persist_path = old
@classmethod
def load_state(cls, path: Path) -> EvidenceGraph:
def load_state(
cls,
path: Path,
edge_weights: dict[str, float] | None = None,
) -> EvidenceGraph:
"""Restore an EvidenceGraph from a saved JSON state file."""
data = json.loads(path.read_text())
graph = cls(
case_info=data.get("case_info", {}),
persist_path=path,
edge_weights=edge_weights,
)
graph.image_path = data.get("image_path", "")
graph.partition_offset = data.get("partition_offset", 0)
@@ -403,6 +415,7 @@ class EvidenceGraph:
raw_data: dict | None = None,
timestamp: str | None = None,
source_tool: str = "",
from_lead_id: str | None = None,
) -> tuple[str, bool]:
"""Add a phenomenon. Returns (id, was_merged).
@@ -419,6 +432,8 @@ class EvidenceGraph:
for k, v in raw_data.items():
if k not in similar.raw_data:
similar.raw_data[k] = v
if from_lead_id and similar.from_lead_id is None:
similar.from_lead_id = from_lead_id
self._auto_save()
return similar.id, True
@@ -437,6 +452,7 @@ class EvidenceGraph:
timestamp=timestamp,
confidence=confidence,
source_tool=source_tool,
from_lead_id=from_lead_id,
created_at=datetime.now().isoformat(),
)
self.phenomena[pid] = ph
@@ -532,14 +548,14 @@ class EvidenceGraph:
) -> float:
"""Update hypothesis confidence based on a phenomenon linkage.
The edge_type must be one of HYPOTHESIS_EDGE_WEIGHTS keys.
Weight is looked up from the predefined table, NOT judged by LLM.
The edge_type must be one of self.edge_weights keys.
Weight is looked up from the configured table, NOT judged by LLM.
Returns the new confidence value.
"""
if edge_type not in HYPOTHESIS_EDGE_WEIGHTS:
if edge_type not in self.edge_weights:
raise ValueError(
f"Invalid hypothesis edge type: {edge_type}. "
f"Must be one of: {list(HYPOTHESIS_EDGE_WEIGHTS.keys())}"
f"Must be one of: {list(self.edge_weights.keys())}"
)
async with self._lock:
@@ -549,7 +565,7 @@ class EvidenceGraph:
if hyp is None:
raise ValueError(f"Hypothesis not found: {hyp_id}")
weight = HYPOTHESIS_EDGE_WEIGHTS[edge_type]
weight = self.edge_weights[edge_type]
old_conf = hyp.confidence
if weight > 0: