"""Evidence Knowledge Graph for multi-agent forensic analysis. Replaces the flat Blackboard with a graph-based evidence store. Nodes: Phenomenon (observable artifacts), Hypothesis (interpretive claims), Entity (recurring objects). Edges: typed relationships with predefined weights for hypothesis confidence computation. """ from __future__ import annotations import asyncio import contextvars import hashlib import json import logging import re import uuid from dataclasses import asdict, dataclass, field from datetime import datetime from pathlib import Path # Per-asyncio-task scoped values for "which agent is currently running" and # "which task scope does that agent's grounding live in". Backed by # ContextVars so concurrent agent runs (Phase 3's _dispatch_leads_parallel) # don't clobber each other — asyncio.create_task / asyncio.gather copies # the parent context per child task, and writes inside one task stay there. # Pre-P0 these were plain attributes on EvidenceGraph; the last setter won # under concurrency, tagging tool invocations with the WRONG agent and # making the grounding gateway falsely reject legitimate facts. _current_agent_ctx: contextvars.ContextVar[str] = contextvars.ContextVar( "masforensics_current_agent", default="", ) _current_task_id_ctx: contextvars.ContextVar[str] = contextvars.ContextVar( "masforensics_current_task_id", default="", ) from case import Case, EvidenceSource, single_source_case logger = logging.getLogger(__name__) # --------------------------------------------------------------------------- # Per-edge-type log₁₀(LR) — the calibration table backing hypothesis # confidence updates (DESIGN.md §4.5). # # The LLM only picks the *category* (direct_evidence, supports, …); the # numerical contribution is looked up here. Updates use the additive, # order-independent log-odds form # L_post = L_prior + Σ log10(LR_i) # confidence = 1 / (1 + 10^(−L_post)) # which fixes the pre-S3 delta-update bug whose result depended on the # order edges arrived in. # # Override per-graph via EvidenceGraph(edge_log_lr=...) or config.yaml's # `hypothesis_log_lr` section. # --------------------------------------------------------------------------- _DEFAULT_LOG_LR: dict[str, float] = { "direct_evidence": +2.0, "supports": +1.0, "consequence_observed": +1.0, "prerequisite_met": +0.5, "weakens": -0.5, "contradicts": -2.0, # S5 cross-source coref (DESIGN.md §4.6) — same calibration scale. # A single shared strong identifier (email, phone, wallet, IMEI…) is # near-decisive; weak identifiers (nickname) accumulate slowly; a # conflicting strong identifier is strong negative evidence. "shared_strong_identifier": +2.0, "shared_weak_identifier": +0.5, "conflicting_strong_identifier": -2.0, } # DESIGN.md §4.6 identifier taxonomy. Strong identifiers approximate # global uniqueness — sharing one is high-confidence coref evidence. # Weak identifiers are nicknames / display names — accumulate via Bayes. STRONG_IDENTIFIER_TYPES: set[str] = { "email", "phone_number", "imei", "imsi", "apple_id", "icloud_id", "google_account", "wallet_address", "udid", "mac_address", "device_serial", } WEAK_IDENTIFIER_TYPES: set[str] = { "nickname", "display_name", "username", "screen_name", } def is_strong_identifier(identifier_type: str) -> bool: """True if the identifier carries enough uniqueness for a strong LR edge.""" return identifier_type in STRONG_IDENTIFIER_TYPES def _normalize_identifier(identifier_type: str, value: str) -> str: """Canonicalise an identifier value so trivial spelling variants match. - Lowercase for case-insensitive identifiers (email, hostnames, hex). - Strip whitespace and the leading '+' on phone numbers / international dialling, then keep only digits for phone matching. - Pass-through for free-form strings (nicknames). """ v = (value or "").strip() if identifier_type in {"email", "apple_id", "icloud_id", "google_account", "mac_address", "wallet_address", "udid", "imei", "imsi", "device_serial"}: v = v.lower() if identifier_type == "phone_number": import re as _re v = _re.sub(r"\D", "", v) return v def prob_to_log_odds(p: float) -> float: """Logit (base 10). Clipped to keep ±∞ out of the graph.""" p = max(1e-9, min(1 - 1e-9, float(p))) import math return math.log10(p / (1.0 - p)) def log_odds_to_prob(log_odds: float) -> float: """Inverse of :func:`prob_to_log_odds`: 1 / (1 + 10^(−L)).""" return 1.0 / (1.0 + 10.0 ** (-float(log_odds))) _WS_RUN = re.compile(r"\s+") def _normalize_for_grounding(s: str) -> str: """Canonicalise a string for the loose-match branch of fact grounding. Strict ``value in inv.output`` rejected real evidence because the LLM routinely normalises tool output before quoting: - case-folds hex (``89 50 4e 47`` → ``89 50 4E 47``) - flips path separators (``Sunny\\foo.exe`` → ``Sunny/foo.exe``) - collapses whitespace across newlines (``AppleID:\\n alice@x.com`` → ``AppleID: alice@x.com``) None of those are hallucinations — they're presentation choices. This normaliser does the inverse so both sides line up: - lowercase everything (handles hex case + email case + MAC case) - collapse any run of whitespace to a single space - replace ``\\`` with ``/`` (path-sep flip) Genuine fabrications still fail: a value that doesn't appear (in any form) inside the output normalises to a string that isn't a substring of the normalised output, and the gateway rejects exactly as before. """ if not s: return "" s = s.lower().replace("\\", "/") s = _WS_RUN.sub(" ", s) return s.strip() class GroundingError(ValueError): """Raised by the add_phenomenon gateway when one or more verified_facts fail the grounding check (missing/wrong invocation_id, wrong agent or task, or fact.value not present in the cited tool output). Carries the failed facts so callers (BaseAgent) can format them back to the LLM for a corrective retry. """ def __init__(self, message: str, failures: list[dict]) -> None: super().__init__(message) self.failures = failures # All valid edge types across the graph. VALID_EDGE_TYPES: set[str] = { # Phenomenon → Hypothesis "direct_evidence", "supports", "prerequisite_met", "consequence_observed", "contradicts", "weakens", # Phenomenon → Hypothesis (S5 coref-specific — used between identifier # observation phenomena and the "Entity A ≡ Entity B" coref hypothesis) "shared_strong_identifier", "shared_weak_identifier", "conflicting_strong_identifier", # Phenomenon → Phenomenon "temporal", "causal", "input_to", "modifies", "co_located", "corroborates", # Phenomenon → Entity "created_by", "executed_by", "owned_by", "targets", "associated_with", "found_on", "used_by", # Hypothesis → Hypothesis "refines", "conflicts", "depends_on", # Entity → Entity (S5 — backed by a coref hypothesis ≥ threshold) "same_as", } # --------------------------------------------------------------------------- # Graph node types # --------------------------------------------------------------------------- def _compute_quality_score( source_tool: str, timestamp: str | None, raw_data: dict, interpretation: str, verified_facts: list[dict], related_ids: list[str], ) -> float: """Compute a quality score (0.0-1.0) based on evidence completeness. A grounded phenomenon (any verified_facts) outweighs a long free-text interpretation: the facts carry provenance, the interpretation doesn't. """ score = 0.0 if source_tool: score += 0.20 if timestamp is not None: score += 0.15 if raw_data: score += 0.15 if verified_facts: # Capped contribution: 0.05 per fact up to 0.25. score += min(0.25, 0.05 * len(verified_facts)) if len(interpretation) >= 50: score += 0.10 if related_ids: score += 0.15 return min(1.0, score) def _jaccard_similarity(a: str, b: str) -> float: """Token-level Jaccard similarity between two strings.""" tokens_a = set(a.lower().split()) tokens_b = set(b.lower().split()) if not tokens_a or not tokens_b: return 0.0 return len(tokens_a & tokens_b) / len(tokens_a | tokens_b) @dataclass class Phenomenon: """Raw observable artifact found on disk. DESIGN.md §4.4: a phenomenon is split into provenance-bound *facts* and free-text *interpretation*. The gateway hard-validates every fact against the recorded tool invocation it cites; interpretation is the agent's narrative and is rendered as "agent analysis" in the final report — not as truth. """ id: str # "ph-{uuid8}" source_agent: str category: str # filesystem, registry, email, network, timeline title: str # Free-form analysis text — the agent's reasoning. NOT verified. interpretation: str = "" # Grounded atoms. Each fact: {type, value, invocation_id}. # type ∈ {path, timestamp, inode, hash, identifier, count, raw, ...} verified_facts: list[dict] = field(default_factory=list) raw_data: dict = field(default_factory=dict) timestamp: str | None = None confidence: float = 1.0 source_tool: str = "" source_id: str = "" # id of the EvidenceSource this finding came from corroborating_agents: list[str] = field(default_factory=list) from_lead_id: str | None = None created_at: str = "" def to_dict(self) -> dict: return asdict(self) @classmethod def from_dict(cls, d: dict) -> Phenomenon: """Reconstruct from a dict; migrate legacy ``description`` field. Older runs persisted free text in ``description``; treat that as ``interpretation`` so old graph_state.json files keep loading. """ d = dict(d) if "description" in d: legacy = d.pop("description") d.setdefault("interpretation", legacy or "") d.setdefault("verified_facts", []) known = set(cls.__dataclass_fields__) return cls(**{k: v for k, v in d.items() if k in known}) def summary(self) -> str: ts = f" @ {self.timestamp}" if self.timestamp else "" nf = len(self.verified_facts) facts_note = f" facts={nf}" if nf else "" return ( f"[{self.id}] [{self.category}] {self.title}{ts} " f"(conf={self.confidence:.2f}{facts_note})" ) @dataclass class Hypothesis: """Interpretive claim about what happened on the system. Confidence is a *derived* projection of ``log_odds`` (DESIGN.md §4.5): every Phenomenon→Hypothesis edge contributes log₁₀(LR) to ``log_odds``, and ``confidence = 1 / (1 + 10^(−log_odds))``. ``log_odds`` is the canonical state; ``confidence`` is kept in sync for display and threshold checks (≥0.8 supported / ≤0.2 refuted). ``prior_prob`` seeds the starting log_odds (default 0.5 → 0.0). """ id: str # "hyp-{uuid8}" title: str description: str prior_prob: float = 0.5 log_odds: float = 0.0 confidence: float = 0.5 # derived from log_odds — kept in sync on update status: str = "active" # active, supported, refuted, inconclusive parent_id: str | None = None created_by: str = "" # "manual", "hypothesis_agent", agent name created_at: str = "" confidence_log: list[dict] = field(default_factory=list) # S5 coref-specific: pair of entity ids this hypothesis claims are the # same actor. Lets update_hypothesis_confidence sync the backing # ``same_as`` edge automatically when contradicting evidence arrives. coref_entity_pair: list[str] = field(default_factory=list) def to_dict(self) -> dict: return asdict(self) @classmethod def from_dict(cls, d: dict) -> Hypothesis: """Reconstruct from a dict. Migrates pre-S3 records that only had ``confidence`` by deriving ``log_odds`` via the logit transform. """ d = dict(d) if "log_odds" not in d: d["log_odds"] = prob_to_log_odds(d.get("confidence", 0.5)) d.setdefault("prior_prob", 0.5) # Re-sync confidence from log_odds in case of drift in old files. d["confidence"] = log_odds_to_prob(d["log_odds"]) known = set(cls.__dataclass_fields__) return cls(**{k: v for k, v in d.items() if k in known}) def summary(self) -> str: return ( f"[{self.id}] {self.title} " f"(conf={self.confidence:.2f}, L={self.log_odds:+.2f}, {self.status})" ) @dataclass class Entity: """Recurring actor or object across phenomena. DESIGN.md §4.6 attaches typed identifiers directly to the entity for fast blocking lookups during coref. Each identifier entry: {type, value, normalized, strong, invocation_id, phenomenon_id, observed_at} where ``normalized`` is the canonicalised form used for matching (lower-cased email, digits-only phone, …). """ id: str # "ent-{uuid8}" name: str entity_type: str # person, program, file, host, ip_address description: str = "" identifiers: list[dict] = field(default_factory=list) created_at: str = "" def to_dict(self) -> dict: return asdict(self) @classmethod def from_dict(cls, d: dict) -> Entity: d = dict(d) d.setdefault("identifiers", []) known = set(cls.__dataclass_fields__) return cls(**{k: v for k, v in d.items() if k in known}) def has_identifier(self, identifier_type: str, normalized_value: str) -> bool: return any( i.get("type") == identifier_type and i.get("normalized") == normalized_value for i in self.identifiers ) def summary(self) -> str: idents = "" if self.identifiers: top = self.identifiers[:3] preview = ", ".join(f"{i.get('type')}={i.get('value')}" for i in top) extra = ( f" (+{len(self.identifiers) - 3} more)" if len(self.identifiers) > 3 else "" ) idents = f" [{preview}{extra}]" return f"[{self.id}] {self.entity_type}: {self.name}{idents}" @dataclass class Edge: """Directed edge in the evidence graph.""" id: str # "edge-{uuid8}" source_id: str target_id: str edge_type: str metadata: dict = field(default_factory=dict) created_by: str = "" created_at: str = "" def to_dict(self) -> dict: return asdict(self) @classmethod def from_dict(cls, d: dict) -> Edge: return cls(**d) @dataclass class Lead: """An investigative lead that should be followed up by an agent.""" id: str target_agent: str description: str priority: int = 5 # 1 (highest) - 10 (lowest) context: dict = field(default_factory=dict) status: str = "pending" # pending, assigned, completed, failed hypothesis_id: str | None = None def to_dict(self) -> dict: return asdict(self) @classmethod def from_dict(cls, d: dict) -> Lead: return cls(**d) @dataclass class InvestigationArea: """An area to investigate to confirm/refute one or more hypotheses. Derived by the orchestrator from active hypotheses after Phase 2; also seeded from config.yaml:investigation_areas as an optional manual override. Each area carries its own keywords + expected tools so the gap-analysis coverage check is generic, not tied to hard-coded constants. """ id: str # "area-{slug}" area: str # snake_case slug (dedupe key) description: str suggested_agent: str # filesystem / registry / communication / network / timeline expected_keywords: list[str] = field(default_factory=list) expected_tools: list[str] = field(default_factory=list) priority: int = 5 # 1 (highest) - 10 (lowest) motivating_hypothesis_ids: list[str] = field(default_factory=list) created_by: str = "" # "manual" | "llm_derive" | "fallback" created_at: str = "" def to_dict(self) -> dict: return asdict(self) @classmethod def from_dict(cls, d: dict) -> InvestigationArea: return cls(**d) def summary(self) -> str: return ( f"[{self.area}] P{self.priority} agent={self.suggested_agent} " f"(motivating: {len(self.motivating_hypothesis_ids)})" ) @dataclass class ExtractedAsset: """A file extracted from the disk image and tracked in the asset library.""" id: str # "asset-{uuid8}" inode: str # e.g. "334-128-4" original_path: str # disk image path from ffind local_path: str # "extracted/SYSTEM" category: str # registry_hive, chat_log, prefetch, ... filename: str # "SYSTEM" size_bytes: int extracted_by: str # agent name extracted_at: str # ISO timestamp def to_dict(self) -> dict: return asdict(self) @classmethod def from_dict(cls, d: dict) -> ExtractedAsset: return cls(**d) def summary(self) -> str: size_kb = self.size_bytes / 1024 return ( f"[{self.id}] {self.filename} ({self.category}) " f"— {size_kb:.1f}KB @ {self.local_path} [inode:{self.inode}]" ) @dataclass class ToolInvocation: """One recorded tool call — the provenance unit for grounded facts. Every wrapped tool executor records a ToolInvocation when it runs. The grounding gateway looks these up by id when validating that a fact in an ``add_phenomenon`` call traces back to a real tool output. Persisted with the graph so a re-loaded run can still verify provenance. """ id: str # "inv-{uuid8}" tool: str # tool name as registered in TOOL_CATALOG args: dict # kwargs passed to the executor output: str # the raw output string the tool produced output_sha256: str # hexdigest — tamper-evident hash of output agent: str # agent that issued the call task_id: str # agent run scope (graph._current_task_id at call time) source_id: str # active evidence source at call time created_at: str # ISO timestamp cached: bool = False # served from result cache without re-running def to_dict(self) -> dict: return asdict(self) @classmethod def from_dict(cls, d: dict) -> ToolInvocation: return cls(**d) def summary(self) -> str: return ( f"[{self.id}] {self.tool}({json.dumps(self.args, ensure_ascii=False)}) " f"@{self.created_at} agent={self.agent} cached={self.cached}" ) # --------------------------------------------------------------------------- # Evidence Graph # --------------------------------------------------------------------------- class EvidenceGraph: """Graph-based evidence store for multi-agent forensic analysis. Agents interact with the graph via query tools (list_phenomena, get_phenomenon, search_graph, get_related) rather than reading a full dump in the system prompt. """ def __init__( self, case_info: dict | None = None, persist_path: Path | None = None, edge_log_lr: dict[str, float] | None = None, ) -> None: self.case_info: dict = case_info or {} # log₁₀(LR) per edge type — calibration table for confidence updates. # Renamed from edge_weights (S3): the values are no longer deltas in # confidence space, they are log-likelihood ratios in odds space. self.edge_log_lr: dict[str, float] = ( dict(edge_log_lr) if edge_log_lr else dict(_DEFAULT_LOG_LR) ) self.image_path: str = "" self.partition_offset: int = 0 self.extracted_dir: str = "extracted" # Multi-evidence: the case and the source tools/phenomena bind to. # image_path / partition_offset above mirror active_source for # backward-compatible readers; set_active_source keeps them in sync. self.case: Case | None = None self.active_source: EvidenceSource | None = None # Graph storage self.phenomena: dict[str, Phenomenon] = {} self.hypotheses: dict[str, Hypothesis] = {} self.entities: dict[str, Entity] = {} self.edges: list[Edge] = [] # Adjacency index for fast traversal self._adj: dict[str, list[Edge]] = {} # node_id → outgoing edges self._adj_rev: dict[str, list[Edge]] = {} # node_id → incoming edges # Lead / status management (carried over from Blackboard) self.leads: list[Lead] = [] self.agent_status: dict[str, str] = {} # Asset library — tracks all files extracted from the disk image self.asset_library: dict[str, ExtractedAsset] = {} self._inode_index: dict[str, str] = {} # inode → asset_id # Investigation areas — derived from hypotheses (LLM) and/or seeded # from config.yaml:investigation_areas (manual override). Drives the # gap-analysis coverage check. self.investigation_areas: dict[str, InvestigationArea] = {} # Tool invocations — provenance log for grounded facts. Every wrapped # tool executor records one entry; add_phenomenon's grounding gateway # looks them up to validate cited invocation_ids and substring-match # claimed fact values against real tool outputs. self.tool_invocations: dict[str, ToolInvocation] = {} # _current_agent / _current_task_id are exposed as @property below, # backed by module-level ContextVars (race-free under asyncio.gather). self._lock = asyncio.Lock() self._persist_path: Path | None = persist_path # ---- Per-asyncio-task scoped state --------------------------------------- # # Reads/writes through these properties hit ContextVars rather than # instance attributes. Concurrent agent runs (Phase 3 parallel # dispatch) each have their own task-local context, so writes inside # one agent's run() are invisible to siblings — which means # ``record_tool_invocation`` always tags an invocation with the agent # and task scope that actually issued it. @property def _current_agent(self) -> str: return _current_agent_ctx.get() @_current_agent.setter def _current_agent(self, value: str) -> None: _current_agent_ctx.set(value or "") @property def _current_task_id(self) -> str: return _current_task_id_ctx.get() @_current_task_id.setter def _current_task_id(self, value: str) -> None: _current_task_id_ctx.set(value or "") # ---- Persistence ------------------------------------------------------- def _auto_save(self) -> None: """Persist full state to disk. Must be called inside _lock.""" if self._persist_path is None: return try: state = { "case_info": self.case_info, "case": self.case.to_dict() if self.case else None, "active_source_id": ( self.active_source.id if self.active_source else "" ), "image_path": self.image_path, "partition_offset": self.partition_offset, "extracted_dir": self.extracted_dir, "phenomena": {pid: p.to_dict() for pid, p in self.phenomena.items()}, "hypotheses": {hid: h.to_dict() for hid, h in self.hypotheses.items()}, "entities": {eid: e.to_dict() for eid, e in self.entities.items()}, "edges": [e.to_dict() for e in self.edges], "leads": [l.to_dict() for l in self.leads], "agent_status": dict(self.agent_status), "asset_library": {aid: a.to_dict() for aid, a in self.asset_library.items()}, "investigation_areas": { aid: a.to_dict() for aid, a in self.investigation_areas.items() }, "tool_invocations": { iid: inv.to_dict() for iid, inv in self.tool_invocations.items() }, "saved_at": datetime.now().isoformat(), } tmp = self._persist_path.with_suffix(".tmp") tmp.write_text(json.dumps(state, ensure_ascii=False, indent=2)) tmp.replace(self._persist_path) except Exception as e: logger.error("EvidenceGraph auto-save failed: %s", e) def save_state(self, path: Path) -> None: """Explicitly save state to the given path.""" old = self._persist_path self._persist_path = path self._auto_save() self._persist_path = old @classmethod def load_state( cls, path: Path, edge_log_lr: dict[str, float] | None = None, ) -> EvidenceGraph: """Restore an EvidenceGraph from a saved JSON state file.""" data = json.loads(path.read_text()) graph = cls( case_info=data.get("case_info", {}), persist_path=path, edge_log_lr=edge_log_lr, ) graph.image_path = data.get("image_path", "") graph.partition_offset = data.get("partition_offset", 0) graph.extracted_dir = data.get("extracted_dir", "extracted") # Restore the evidence-source model. State files predating the Case # model carry only image_path/partition_offset → wrap as one source. case_data = data.get("case") if case_data: graph.case = Case.from_dict(case_data) elif graph.image_path: graph.case = single_source_case( graph.image_path, graph.partition_offset ) if graph.case and graph.case.sources: active = graph.case.get_source(data.get("active_source_id", "")) graph.set_active_source(active or graph.case.sources[0]) graph.phenomena = { pid: Phenomenon.from_dict(p) for pid, p in data.get("phenomena", {}).items() } graph.hypotheses = { hid: Hypothesis.from_dict(h) for hid, h in data.get("hypotheses", {}).items() } graph.entities = { eid: Entity.from_dict(e) for eid, e in data.get("entities", {}).items() } graph.edges = [Edge.from_dict(e) for e in data.get("edges", [])] graph.leads = [Lead.from_dict(l) for l in data.get("leads", [])] graph.agent_status = data.get("agent_status", {}) for aid, a_data in data.get("asset_library", {}).items(): asset = ExtractedAsset.from_dict(a_data) graph.asset_library[aid] = asset graph._inode_index[asset.inode] = aid graph.investigation_areas = { aid: InvestigationArea.from_dict(a) for aid, a in data.get("investigation_areas", {}).items() } graph.tool_invocations = { iid: ToolInvocation.from_dict(inv) for iid, inv in data.get("tool_invocations", {}).items() } graph._rebuild_adjacency() logger.info( "EvidenceGraph restored: %d phenomena, %d hypotheses, %d entities, " "%d edges, %d assets", len(graph.phenomena), len(graph.hypotheses), len(graph.entities), len(graph.edges), len(graph.asset_library), ) return graph def _rebuild_adjacency(self) -> None: """Rebuild adjacency index from edges list.""" self._adj.clear() self._adj_rev.clear() for edge in self.edges: self._adj.setdefault(edge.source_id, []).append(edge) self._adj_rev.setdefault(edge.target_id, []).append(edge) # ---- Evidence source ---------------------------------------------------- def set_active_source(self, source: EvidenceSource | None) -> None: """Bind tools and newly recorded phenomena to *source*. Syncs the legacy image_path / partition_offset fields so existing readers (orchestrator logs, report naming, agent prompts) keep working unchanged. The orchestrator calls this before dispatching an agent; single-source runs call it once at startup. """ self.active_source = source if source is not None: self.image_path = source.path self.partition_offset = source.partition_offset # ---- Node helpers ------------------------------------------------------- def _node_exists(self, node_id: str) -> bool: if node_id.startswith("ph-"): return node_id in self.phenomena if node_id.startswith("hyp-"): return node_id in self.hypotheses if node_id.startswith("ent-"): return node_id in self.entities return False def get_node(self, node_id: str) -> Phenomenon | Hypothesis | Entity | None: if node_id.startswith("ph-"): return self.phenomena.get(node_id) if node_id.startswith("hyp-"): return self.hypotheses.get(node_id) if node_id.startswith("ent-"): return self.entities.get(node_id) return None # ---- Similarity merging (Phenomenon only) -------------------------------- def _find_similar_phenomenon( self, title: str, interpretation: str, category: str, ) -> Phenomenon | None: best_match: Phenomenon | None = None best_score = 0.0 for ph in self.phenomena.values(): if ph.category != category: continue title_sim = _jaccard_similarity(ph.title, title) if title_sim <= 0.6: continue desc_sim = _jaccard_similarity( ph.interpretation[:200], interpretation[:200], ) if desc_sim <= 0.4: continue combined = title_sim * 0.6 + desc_sim * 0.4 if combined > best_score: best_score = combined best_match = ph return best_match # ---- Mutation methods (async, under lock) -------------------------------- async def add_phenomenon( self, source_agent: str, category: str, title: str, interpretation: str = "", verified_facts: list[dict] | None = None, raw_data: dict | None = None, timestamp: str | None = None, source_tool: str = "", from_lead_id: str | None = None, task_id: str | None = None, # Pre-S2 callers passed analysis text as ``description``. Accept it # as an alias for ``interpretation`` so legacy tests and any in-flight # tool-call messages don't break. Not advertised in the LLM-facing # tool schema — BaseAgent's add_phenomenon advertises the new fields. description: str | None = None, ) -> tuple[str, bool]: """Add a phenomenon under the grounding gateway. Returns (id, was_merged). Each fact in ``verified_facts`` must point at a real ToolInvocation made by this agent within ``task_id`` (defaults to the graph's current task scope). Any fact failing grounding raises :class:`GroundingError` — the whole call is rejected; the caller must fix and retry. This is the code-level enforcement of DESIGN.md §4.4. """ if description and not interpretation: interpretation = description facts = list(verified_facts or []) active_task_id = task_id if task_id is not None else self._current_task_id # Grounding gateway — validate every fact BEFORE acquiring the lock # (read-only check; lookup uses dict access which is thread-safe). failures: list[dict] = [] for fact in facts: ok, reason = self.validate_fact_grounding( fact, agent=source_agent, task_id=active_task_id or "", ) if not ok: failures.append({"fact": fact, "reason": reason}) if failures: raise GroundingError( "Phenomenon rejected — one or more facts are not grounded:\n" + "\n".join( f" - {f['reason']}: {json.dumps(f['fact'], ensure_ascii=False)}" for f in failures ), failures=failures, ) async with self._lock: similar = self._find_similar_phenomenon(title, interpretation, category) if similar is not None: similar.confidence = min(1.0, similar.confidence + 0.15) if source_agent not in similar.corroborating_agents: similar.corroborating_agents.append(source_agent) if raw_data: for k, v in raw_data.items(): if k not in similar.raw_data: similar.raw_data[k] = v # Merge any new facts whose (type, value, invocation_id) # tuple isn't already on the existing phenomenon. if facts: seen = { (f.get("type"), f.get("value"), f.get("invocation_id")) for f in similar.verified_facts } for f in facts: key = (f.get("type"), f.get("value"), f.get("invocation_id")) if key not in seen: similar.verified_facts.append(f) seen.add(key) if from_lead_id and similar.from_lead_id is None: similar.from_lead_id = from_lead_id self._auto_save() return similar.id, True pid = f"ph-{uuid.uuid4().hex[:8]}" confidence = _compute_quality_score( source_tool, timestamp, raw_data or {}, interpretation, facts, [], ) ph = Phenomenon( id=pid, source_agent=source_agent, category=category, title=title, interpretation=interpretation, verified_facts=facts, raw_data=raw_data or {}, timestamp=timestamp, confidence=confidence, source_tool=source_tool, source_id=self.active_source.id if self.active_source else "", from_lead_id=from_lead_id, created_at=datetime.now().isoformat(), ) self.phenomena[pid] = ph self._auto_save() return pid, False async def add_hypothesis( self, title: str, description: str, created_by: str = "", parent_id: str | None = None, prior_prob: float = 0.5, ) -> str: """Add a hypothesis. Returns the hypothesis ID. ``prior_prob`` seeds the starting log_odds (default 0.5 → 0.0). Pick a different prior when you have base-rate knowledge — e.g. prior_prob=0.1 for an unusual claim, 0.9 for a strong default. """ async with self._lock: hid = f"hyp-{uuid.uuid4().hex[:8]}" l_prior = prob_to_log_odds(prior_prob) hyp = Hypothesis( id=hid, title=title, description=description, prior_prob=prior_prob, log_odds=l_prior, confidence=log_odds_to_prob(l_prior), status="active", parent_id=parent_id, created_by=created_by, created_at=datetime.now().isoformat(), ) self.hypotheses[hid] = hyp self._auto_save() return hid async def add_entity( self, name: str, entity_type: str, description: str = "", ) -> tuple[str, bool]: """Add an entity. Deduplicates on (name, entity_type). Returns (id, was_existing).""" async with self._lock: for ent in self.entities.values(): if ent.name == name and ent.entity_type == entity_type: return ent.id, True eid = f"ent-{uuid.uuid4().hex[:8]}" self.entities[eid] = Entity( id=eid, name=name, entity_type=entity_type, description=description, created_at=datetime.now().isoformat(), ) self._auto_save() return eid, False async def add_edge( self, source_id: str, target_id: str, edge_type: str, metadata: dict | None = None, created_by: str = "", ) -> str: """Add a directed edge. Validates nodes exist and edge type is valid.""" async with self._lock: if not self._node_exists(source_id): raise ValueError(f"Source node not found: {source_id}") if not self._node_exists(target_id): raise ValueError(f"Target node not found: {target_id}") if edge_type not in VALID_EDGE_TYPES: raise ValueError(f"Invalid edge type: {edge_type}") eid = f"edge-{uuid.uuid4().hex[:8]}" edge = Edge( id=eid, source_id=source_id, target_id=target_id, edge_type=edge_type, metadata=metadata or {}, created_by=created_by, created_at=datetime.now().isoformat(), ) self.edges.append(edge) self._adj.setdefault(source_id, []).append(edge) self._adj_rev.setdefault(target_id, []).append(edge) self._auto_save() return eid async def update_hypothesis_confidence( self, hyp_id: str, phenomenon_id: str, edge_type: str, reason: str = "", ) -> float: """Apply one phenomenon→hypothesis edge as an additive log_odds update. DESIGN.md §4.5: edge_type → log₁₀(LR) is looked up in ``self.edge_log_lr`` (LLM never emits the number). The update is ``L_post = L_prior + log_lr`` and ``confidence = sigmoid(L_post)`` — commutative and order-independent, fixing the pre-S3 ordering bug. Status flips at ≥0.8 → supported / ≤0.2 → refuted. **Idempotency**: if a ``(phenomenon, hypothesis, edge_type)`` edge already exists, this is a no-op — the same agent re-recording the same link (or two agents linking via the orchestrator's batch judge and a manual override) does not double-count. Independent evidence — *different* phenomena pointing the same way — still accumulates fully. """ if edge_type not in self.edge_log_lr: raise ValueError( f"Invalid hypothesis edge type: {edge_type}. " f"Must be one of: {list(self.edge_log_lr.keys())}" ) async with self._lock: if not self._node_exists(phenomenon_id): raise ValueError(f"Phenomenon not found: {phenomenon_id}") hyp = self.hypotheses.get(hyp_id) if hyp is None: raise ValueError(f"Hypothesis not found: {hyp_id}") # Idempotency check — same (ph, hyp, edge_type) already on graph. for existing in self._adj.get(phenomenon_id, []): if ( existing.target_id == hyp_id and existing.edge_type == edge_type ): return hyp.confidence log_lr = self.edge_log_lr[edge_type] old_log_odds = hyp.log_odds old_conf = hyp.confidence new_log_odds = old_log_odds + log_lr new_conf = log_odds_to_prob(new_log_odds) hyp.log_odds = new_log_odds hyp.confidence = new_conf if new_conf >= 0.8: hyp.status = "supported" elif new_conf <= 0.2: hyp.status = "refuted" else: hyp.status = "active" hyp.confidence_log.append({ "timestamp": datetime.now().isoformat(), "phenomenon_id": phenomenon_id, "edge_type": edge_type, "log_lr": log_lr, "old_log_odds": round(old_log_odds, 4), "new_log_odds": round(new_log_odds, 4), "old_confidence": round(old_conf, 4), "new_confidence": round(new_conf, 4), "reason": reason, }) # Also create the edge in the graph eid = f"edge-{uuid.uuid4().hex[:8]}" edge = Edge( id=eid, source_id=phenomenon_id, target_id=hyp_id, edge_type=edge_type, metadata={"reason": reason, "log_lr": log_lr}, created_by="hypothesis_engine", created_at=datetime.now().isoformat(), ) self.edges.append(edge) self._adj.setdefault(phenomenon_id, []).append(edge) self._adj_rev.setdefault(hyp_id, []).append(edge) self._auto_save() # If this is a coref hypothesis, mirror the new confidence into the # entity-level same_as edge. Done OUTSIDE the lock — _sync_same_as_edge # re-acquires it internally — so we avoid reentrant locking. if hyp.coref_entity_pair and len(hyp.coref_entity_pair) == 2: await self._sync_same_as_edge( hyp.coref_entity_pair[0], hyp.coref_entity_pair[1], hyp_id, ) return new_conf # ---- Cross-source entity coreference (DESIGN.md §4.6) ------------------- @staticmethod def _coref_hypothesis_id(eid_a: str, eid_b: str) -> str: """Deterministic id for the coref hypothesis between an entity pair. Same pair (regardless of arg order) always maps to the same id so repeated observations augment the existing hypothesis rather than spawning duplicates. """ pair = "|".join(sorted([eid_a, eid_b])) return f"hyp-coref-{hashlib.sha256(pair.encode()).hexdigest()[:10]}" async def get_or_create_coref_hypothesis( self, eid_a: str, eid_b: str, ) -> tuple[str, bool]: """Look up (or insert) the coreference hypothesis for an entity pair. Uses a low prior (``prior_prob=0.1``) — saying any two entities are the same actor is a strong claim, so the default should be skeptical and let evidence move the needle. """ hid = self._coref_hypothesis_id(eid_a, eid_b) async with self._lock: if hid in self.hypotheses: return hid, False ea = self.entities.get(eid_a) eb = self.entities.get(eid_b) if ea is None or eb is None: raise ValueError(f"Unknown entity in coref pair: {eid_a}, {eid_b}") l_prior = prob_to_log_odds(0.1) self.hypotheses[hid] = Hypothesis( id=hid, title=f"Coreference: {ea.name} ≡ {eb.name}", description=( f"Hypothesis that {ea.id} ({ea.name}, {ea.entity_type}) " f"and {eb.id} ({eb.name}, {eb.entity_type}) refer to " f"the same actor across evidence sources." ), prior_prob=0.1, log_odds=l_prior, confidence=log_odds_to_prob(l_prior), status="active", created_by="coref_engine", created_at=datetime.now().isoformat(), coref_entity_pair=sorted([eid_a, eid_b]), ) self._auto_save() return hid, True async def _sync_same_as_edge( self, eid_a: str, eid_b: str, hyp_id: str, ) -> None: """Mirror coref hypothesis confidence into a ``same_as`` entity edge. - Confidence ≥ 0.8 → ensure an active ``same_as`` edge exists. - Confidence < 0.8 → mark any existing edge inactive (audit, not delete). Idempotent on both transitions. """ hyp = self.hypotheses.get(hyp_id) if hyp is None: return active = hyp.confidence >= 0.8 async with self._lock: existing = None for edge in self.edges: if (edge.edge_type == "same_as" and {edge.source_id, edge.target_id} == {eid_a, eid_b}): existing = edge break if active: if existing is None: eid = f"edge-{uuid.uuid4().hex[:8]}" edge = Edge( id=eid, source_id=eid_a, target_id=eid_b, edge_type="same_as", metadata={ "backed_by": hyp_id, "active": True, "confidence_at_creation": hyp.confidence, }, created_by="coref_engine", created_at=datetime.now().isoformat(), ) self.edges.append(edge) self._adj.setdefault(eid_a, []).append(edge) self._adj_rev.setdefault(eid_b, []).append(edge) elif not existing.metadata.get("active"): existing.metadata["active"] = True existing.metadata["reactivated_at"] = datetime.now().isoformat() else: if existing is not None and existing.metadata.get("active"): existing.metadata["active"] = False existing.metadata["deactivated_at"] = datetime.now().isoformat() self._auto_save() async def observe_identity( self, entity_name: str, entity_type: str, identifier_type: str, value: str, source_agent: str, invocation_id: str, source_tool: str = "", task_id: str | None = None, ) -> dict: """Record a typed identifier for an entity through the grounding gateway. DESIGN.md §4.6. Steps: 1. Validate ``invocation_id`` + ``value`` via the same gateway ``add_phenomenon`` uses (raises :class:`GroundingError` on failure). 2. Get-or-create the entity. 3. Record an ``identity_observation`` phenomenon carrying the identifier as its sole verified fact. 4. Attach the identifier to the entity (idempotent by ``(type, normalized_value)``). 5. If the attachment is new, scan other entities for shared identifiers (strong / weak) and any conflicting strong identifiers, then propose / strengthen / weaken the coref hypothesis between each candidate pair. ``same_as`` edges are kept in sync with the hypothesis confidence. Returns a dict summarising the entity id, observation phenomenon, whether the identifier was new, and any coref proposals fired. """ if identifier_type not in (STRONG_IDENTIFIER_TYPES | WEAK_IDENTIFIER_TYPES): raise ValueError( f"Unknown identifier_type: {identifier_type}. " f"Strong: {sorted(STRONG_IDENTIFIER_TYPES)}; " f"Weak: {sorted(WEAK_IDENTIFIER_TYPES)}." ) if not value: raise ValueError("identifier value must be non-empty") # add_phenomenon enforces the grounding contract for the fact below. active_task = task_id if task_id is not None else self._current_task_id fact = {"type": identifier_type, "value": value, "invocation_id": invocation_id} # Get-or-create entity first so we can attribute the observation. eid, _existed = await self.add_entity(entity_name, entity_type) norm = _normalize_identifier(identifier_type, value) title = f"{identifier_type}={value} on {entity_name}" pid, _merged = await self.add_phenomenon( source_agent=source_agent, category="identity_observation", title=title, interpretation=( f"Agent attributed identifier {identifier_type}={value} " f"(normalized={norm}) to entity {entity_name} ({entity_type})." ), verified_facts=[fact], source_tool=source_tool, task_id=active_task, ) # Attach identifier to entity (idempotent on type + normalized value). new_identifier = False async with self._lock: ent = self.entities[eid] if not ent.has_identifier(identifier_type, norm): ent.identifiers.append({ "type": identifier_type, "value": value, "normalized": norm, "strong": is_strong_identifier(identifier_type), "invocation_id": invocation_id, "phenomenon_id": pid, "observed_at": datetime.now().isoformat(), }) new_identifier = True self._auto_save() coref_proposals: list[dict] = [] if new_identifier: coref_proposals = await self._propose_coref_for_new_identifier( new_eid=eid, new_type=identifier_type, new_norm=norm, new_phenomenon_id=pid, ) return { "entity_id": eid, "phenomenon_id": pid, "new_identifier": new_identifier, "coref_proposals": coref_proposals, } async def _propose_coref_for_new_identifier( self, new_eid: str, new_type: str, new_norm: str, new_phenomenon_id: str, ) -> list[dict]: """Blocking + propose: find candidate entities that share this identifier with ``new_eid``, register / strengthen the coref hypothesis for each pair, and emit conflicting-identifier edges where the two entities have *different* values for the same strong identifier type. O(|entities| × identifiers) — blocking is implicit in the fact that the new identifier is fixed. """ new_ent = self.entities.get(new_eid) if new_ent is None: return [] is_strong_new = is_strong_identifier(new_type) match_edge = "shared_strong_identifier" if is_strong_new else "shared_weak_identifier" proposals: list[dict] = [] for other_eid, other_ent in list(self.entities.items()): if other_eid == new_eid: continue # Match: other entity carries the same (type, normalized). if not other_ent.has_identifier(new_type, new_norm): continue # Collect conflicting strong identifiers between the pair — # they'll fire negative-LR edges on the same coref hypothesis. conflicts: list[dict] = [] for a_ident in new_ent.identifiers: if not a_ident.get("strong"): continue for b_ident in other_ent.identifiers: if (b_ident.get("type") == a_ident.get("type") and b_ident.get("strong") and b_ident.get("normalized") != a_ident.get("normalized")): conflicts.append({ "type": a_ident.get("type"), "new_value": a_ident.get("value"), "other_value": b_ident.get("value"), "new_phenomenon_id": a_ident.get("phenomenon_id"), }) hid, _created = await self.get_or_create_coref_hypothesis( new_eid, other_eid, ) # +shared identifier edge (one per identifier, anchored to the # newly recorded observation phenomenon). update_hypothesis_ # confidence is idempotent on (ph, hyp, edge_type), so re-running # the same observation does not double-count. await self.update_hypothesis_confidence( hid, new_phenomenon_id, match_edge, reason=f"shared {new_type}={new_norm}", ) # −conflicting strong identifier edges — one per conflict, anchored # to the *new* entity's observation phenomenon for that identifier. for c in conflicts: ph_src = c["new_phenomenon_id"] if not ph_src: continue await self.update_hypothesis_confidence( hid, ph_src, "conflicting_strong_identifier", reason=( f"conflict {c['type']}: " f"{c['new_value']} vs {c['other_value']}" ), ) await self._sync_same_as_edge(new_eid, other_eid, hid) proposals.append({ "hypothesis_id": hid, "other_entity_id": other_eid, "match": {"type": new_type, "normalized": new_norm, "edge_type": match_edge}, "conflicts": conflicts, "confidence": self.hypotheses[hid].confidence, }) return proposals # ---- Cross-source entity cluster queries (DESIGN.md §4.6) ---------------- def _active_same_as_neighbors(self, entity_id: str) -> set[str]: """Neighbours of *entity_id* via ``same_as`` edges that are still active. ``same_as`` edges are non-destructive: a coref hypothesis that drops below threshold marks ``metadata['active']=False`` rather than deleting, so the audit trail survives. Cluster queries respect that. """ out: set[str] = set() for edge in self.edges: if edge.edge_type != "same_as": continue if not edge.metadata.get("active", True): continue if edge.source_id == entity_id: out.add(edge.target_id) elif edge.target_id == entity_id: out.add(edge.source_id) return out def resolve_actor_cluster(self, entity_id: str) -> set[str]: """Return the connected component containing *entity_id* via active ``same_as`` edges — the set of entity ids that current coref evidence treats as the same actor. Reversible: deactivating a ``same_as`` edge (because the backing coref hypothesis drops below 0.8) breaks the component, so this always reflects the *current* state of the graph. """ if entity_id not in self.entities: return set() seen: set[str] = {entity_id} frontier: list[str] = [entity_id] while frontier: cur = frontier.pop() for nbr in self._active_same_as_neighbors(cur): if nbr not in seen: seen.add(nbr) frontier.append(nbr) return seen def actor_clusters(self) -> list[dict]: """Group all entities into actor clusters via active ``same_as``. Returns a list of ``{members: [...], identifiers: [...], coref_hypotheses: [...]}`` for the report agent and the orchestrator's cross-source views. """ unseen = set(self.entities.keys()) clusters: list[dict] = [] while unseen: start = next(iter(unseen)) members = self.resolve_actor_cluster(start) unseen -= members # Aggregate identifiers across the cluster (deduped on type+normalized). ident_seen: set[tuple[str, str]] = set() idents: list[dict] = [] for eid in members: for ident in self.entities[eid].identifiers: key = (ident.get("type"), ident.get("normalized")) if key in ident_seen: continue ident_seen.add(key) idents.append({ "type": ident.get("type"), "value": ident.get("value"), "strong": ident.get("strong"), "on_entity": eid, }) coref_hyps = sorted({ e.metadata.get("backed_by", "") for e in self.edges if e.edge_type == "same_as" and e.metadata.get("active", True) and (e.source_id in members or e.target_id in members) } - {""}) clusters.append({ "members": sorted(members), "identifiers": idents, "coref_hypotheses": coref_hyps, }) return clusters # ---- Lead management (same as old Blackboard) ---------------------------- async def add_lead( self, target_agent: str, description: str, priority: int = 5, context: dict | None = None, hypothesis_id: str | None = None, ) -> str: async with self._lock: lid = f"lead-{uuid.uuid4().hex[:8]}" self.leads.append(Lead( id=lid, target_agent=target_agent, description=description, priority=priority, context=context or {}, hypothesis_id=hypothesis_id, )) self._auto_save() return lid async def get_pending_leads(self, agent_type: str | None = None) -> list[Lead]: async with self._lock: leads = [l for l in self.leads if l.status == "pending"] if agent_type: leads = [l for l in leads if l.target_agent == agent_type] return sorted(leads, key=lambda l: l.priority) async def mark_lead_completed(self, lead_id: str) -> None: async with self._lock: for lead in self.leads: if lead.id == lead_id: lead.status = "completed" break self._auto_save() async def mark_lead_failed(self, lead_id: str, error: str = "") -> None: async with self._lock: for lead in self.leads: if lead.id == lead_id: lead.status = "failed" lead.context["failure_reason"] = error break self._auto_save() # ---- Investigation areas ------------------------------------------------- async def add_investigation_area( self, area: str, description: str, suggested_agent: str, expected_keywords: list[str] | None = None, expected_tools: list[str] | None = None, priority: int = 5, motivating_hypothesis_ids: list[str] | None = None, created_by: str = "", ) -> tuple[str, bool]: """Add or merge an investigation area. Dedupe key is the `area` slug. On collision, union the three list fields (keywords / tools / motivating_hypothesis_ids); description / suggested_agent / priority are preserved from the first writer (manual seed wins over LLM derive). Returns (id, was_existing). """ async with self._lock: for existing in self.investigation_areas.values(): if existing.area == area: for kw in (expected_keywords or []): if kw not in existing.expected_keywords: existing.expected_keywords.append(kw) for t in (expected_tools or []): if t not in existing.expected_tools: existing.expected_tools.append(t) for hid in (motivating_hypothesis_ids or []): if hid not in existing.motivating_hypothesis_ids: existing.motivating_hypothesis_ids.append(hid) self._auto_save() return existing.id, True aid = f"area-{area}" self.investigation_areas[aid] = InvestigationArea( id=aid, area=area, description=description, suggested_agent=suggested_agent, expected_keywords=list(expected_keywords or []), expected_tools=list(expected_tools or []), priority=priority, motivating_hypothesis_ids=list(motivating_hypothesis_ids or []), created_by=created_by, created_at=datetime.now().isoformat(), ) self._auto_save() return aid, False # ---- Tool invocation log ------------------------------------------------- async def record_tool_invocation( self, tool: str, args: dict, output: str, cached: bool = False, ) -> str: """Record one tool call. Returns the invocation_id. Source / agent / task_id are read from the graph's current run context (set by BaseAgent.run and set_active_source) so executors can stay stateless. """ iid = f"inv-{uuid.uuid4().hex[:8]}" src_id = self.active_source.id if self.active_source else "" inv = ToolInvocation( id=iid, tool=tool, args=dict(args), output=output, output_sha256=hashlib.sha256(output.encode("utf-8", errors="replace")).hexdigest(), agent=self._current_agent or "unknown", task_id=self._current_task_id or "", source_id=src_id, created_at=datetime.now().isoformat(), cached=cached, ) async with self._lock: self.tool_invocations[iid] = inv # Cheap on cache hit; expensive but bounded otherwise. Skip # auto-save here — too noisy if every tool call rewrites the # state file; the next phenomenon write will flush. return iid def validate_fact_grounding( self, fact: dict, agent: str, task_id: str, ) -> tuple[bool, str]: """Check a single verified_fact's grounding. Returns (ok, reason). Rules (DESIGN.md §4.4, refined after first end-to-end run): 1. invocation_id must exist in self.tool_invocations. 2. The invocation must have been made by `agent` within `task_id`. 3. fact.value must appear in invocation.output — either as a strict substring, OR (loose-match fallback) once both sides are normalised via :func:`_normalize_for_grounding` (case-folded, whitespace-collapsed, path-sep unified). The loose match catches the LLM's routine presentation normalisations (case-folded hex, slash-flipped paths, collapsed multi-line labels) without enabling fabrication: a string that isn't present in ANY form still fails the normalised check. """ inv_id = fact.get("invocation_id", "") value = fact.get("value", "") if not inv_id: return False, "missing invocation_id" inv = self.tool_invocations.get(inv_id) if inv is None: return False, f"invocation_id {inv_id} not found in invocation log" if inv.agent != agent: return False, ( f"invocation {inv_id} was made by agent '{inv.agent}', " f"not '{agent}' — cannot be cited by a different agent" ) if task_id and inv.task_id and inv.task_id != task_id: return False, ( f"invocation {inv_id} was made in a different task scope " f"({inv.task_id}) — cite only invocations from your current task" ) if not isinstance(value, str) or not value: return False, "fact.value must be a non-empty string" if value in inv.output: return True, "ok" # Loose fallback: normalised comparison absorbs case / whitespace / # path-sep differences but a genuinely absent value still fails. if _normalize_for_grounding(value) in _normalize_for_grounding(inv.output): return True, "ok-normalized" return False, ( f"fact.value not found in invocation {inv_id} output — even after " f"case/whitespace/path-sep normalisation. Copy a literal substring " f"from that tool's result; if the content is a guess (device model, " f"constructed path, label-joined value), move it into `interpretation` " f"instead of `verified_facts`." ) # ---- Asset library ------------------------------------------------------- async def register_asset( self, inode: str, original_path: str, local_path: str, category: str, filename: str, size_bytes: int, extracted_by: str, ) -> tuple[str, bool]: """Register an extracted file. Deduplicates by inode. Returns (id, already_existed).""" async with self._lock: if inode in self._inode_index: return self._inode_index[inode], True aid = f"asset-{uuid.uuid4().hex[:8]}" asset = ExtractedAsset( id=aid, inode=inode, original_path=original_path, local_path=local_path, category=category, filename=filename, size_bytes=size_bytes, extracted_by=extracted_by, extracted_at=datetime.now().isoformat(), ) self.asset_library[aid] = asset self._inode_index[inode] = aid self._auto_save() return aid, False def lookup_asset_by_inode(self, inode: str) -> ExtractedAsset | None: """Look up an extracted asset by inode (synchronous, no lock needed for reads).""" aid = self._inode_index.get(inode) return self.asset_library.get(aid) if aid else None def list_assets(self, category: str | None = None) -> list[str]: """Return one-line summaries of all assets, optionally filtered.""" results = [] for asset in self.asset_library.values(): if category and asset.category != category: continue results.append(asset.summary()) return results def query_assets( self, category: str | None = None, filename_pattern: str | None = None, ) -> list[ExtractedAsset]: """Query the asset library with optional filters.""" results = [] for asset in self.asset_library.values(): if category and asset.category != category: continue if filename_pattern and filename_pattern.lower() not in asset.filename.lower(): continue results.append(asset) return results # ---- Query methods (for agent tools) ------------------------------------ def list_phenomena(self, category: str | None = None) -> list[str]: """Return one-line summaries of all phenomena, optionally filtered.""" results = [] for ph in self.phenomena.values(): if category and ph.category != category: continue results.append(ph.summary()) return results def get_phenomenon(self, ph_id: str) -> dict | None: """Return full phenomenon details as dict, or None.""" ph = self.phenomena.get(ph_id) return ph.to_dict() if ph else None def search_graph(self, keyword: str) -> list[str]: """Search across all node types by keyword. Returns summaries.""" kw = keyword.lower() results = [] for ph in self.phenomena.values(): haystack = ( ph.title.lower() + " " + ph.interpretation.lower() + " " + " ".join(str(f.get("value", "")).lower() for f in ph.verified_facts) ) if kw in haystack: results.append(ph.summary()) for hyp in self.hypotheses.values(): if kw in hyp.title.lower() or kw in hyp.description.lower(): results.append(hyp.summary()) for ent in self.entities.values(): if kw in ent.name.lower() or kw in ent.description.lower(): results.append(ent.summary()) return results def get_related( self, node_id: str, edge_type: str | None = None, direction: str = "both", ) -> list[dict]: """Get nodes connected to the given node. Returns list of {node_summary, edge_type, direction}.""" results = [] if direction in ("out", "both"): for edge in self._adj.get(node_id, []): if edge_type and edge.edge_type != edge_type: continue node = self.get_node(edge.target_id) if node: results.append({ "node": node.summary(), "edge_type": edge.edge_type, "direction": "outgoing", "metadata": edge.metadata, }) if direction in ("in", "both"): for edge in self._adj_rev.get(node_id, []): if edge_type and edge.edge_type != edge_type: continue node = self.get_node(edge.source_id) if node: results.append({ "node": node.summary(), "edge_type": edge.edge_type, "direction": "incoming", "metadata": edge.metadata, }) return results def get_hypothesis_status(self) -> list[str]: """Return summaries of all hypotheses.""" return [h.summary() for h in self.hypotheses.values()] def get_phenomena_by_category(self, category: str) -> list[Phenomenon]: return [p for p in self.phenomena.values() if p.category == category] def hypotheses_converged(self) -> bool: """True if no hypotheses are still active.""" return all(h.status != "active" for h in self.hypotheses.values()) def mark_remaining_inconclusive(self) -> None: """Mark all still-active hypotheses as inconclusive.""" for h in self.hypotheses.values(): if h.status == "active": h.status = "inconclusive" # ---- Hypothesis × Evidence matrix (DESIGN.md §4.5) ----------------------- def hypothesis_evidence_matrix(self) -> dict: """Structured pivot of every Phenomenon→Hypothesis edge. Returns ``{"hypotheses": [...], "phenomena": [...], "cells": {...}, "counts_by_edge_type": {hyp_id: {edge_type: count}}}`` — the cells map ``(hyp_id, ph_id)`` to a *list* of edge_type strings (a single phenomenon may link via several edge_types after a manual override plus an LLM judge call). Drives report rendering and gap selection. """ cells: dict[tuple[str, str], list[str]] = {} counts: dict[str, dict[str, int]] = {h: {} for h in self.hypotheses} for edge in self.edges: if not ( edge.source_id.startswith("ph-") and edge.target_id.startswith("hyp-") and edge.edge_type in self.edge_log_lr ): continue key = (edge.target_id, edge.source_id) cells.setdefault(key, []).append(edge.edge_type) counts.setdefault(edge.target_id, {})[edge.edge_type] = ( counts.setdefault(edge.target_id, {}).get(edge.edge_type, 0) + 1 ) hypotheses = [ { "id": h.id, "title": h.title, "confidence": h.confidence, "log_odds": h.log_odds, "status": h.status, } for h in self.hypotheses.values() ] referenced = {ph_id for (_, ph_id) in cells} phenomena = [ {"id": ph.id, "title": ph.title, "category": ph.category} for ph in self.phenomena.values() if ph.id in referenced ] return { "hypotheses": hypotheses, "phenomena": phenomena, "cells": {f"{h}|{p}": types for (h, p), types in cells.items()}, "counts_by_edge_type": counts, } def hypothesis_evidence_matrix_markdown(self) -> str: """Render the matrix as a compact markdown pivot. Columns are the edge types (counts), plus log_odds, confidence, status — enough for the report agent to ground every hypothesis in its supporting and contradicting evidence at a glance. """ if not self.hypotheses: return "(no hypotheses)" matrix = self.hypothesis_evidence_matrix() edge_types = sorted(self.edge_log_lr.keys()) header = ( "| Hypothesis | " + " | ".join(edge_types) + " | log_odds | conf | status |" ) sep = ( "|---|" + "|".join(["---:"] * len(edge_types)) + "|---:|---:|---|" ) rows = [header, sep] for h in matrix["hypotheses"]: counts = matrix["counts_by_edge_type"].get(h["id"], {}) cells = [str(counts.get(et, 0)) for et in edge_types] rows.append( f"| {h['title']} | " + " | ".join(cells) + f" | {h['log_odds']:+.2f} | {h['confidence']:.2f} | {h['status']} |" ) return "\n".join(rows) # ---- Summary (lightweight, for system prompt) ---------------------------- def stats_summary(self) -> str: """Ultra-compact stats for inclusion in system prompt.""" active_hyp = sum(1 for h in self.hypotheses.values() if h.status == "active") return ( f"Graph: {len(self.phenomena)} phenomena, " f"{len(self.hypotheses)} hypotheses ({active_hyp} active), " f"{len(self.entities)} entities, {len(self.edges)} edges. " f"Asset library: {len(self.asset_library)} extracted files. " f"Pending leads: {sum(1 for l in self.leads if l.status == 'pending')}." )