feat(refit): complete S1-S6 — case abstraction, grounding, log-odds, plugins, coref, multi-source

Consolidates the long-running refit work (DESIGN.md as authoritative spec) into a single baseline commit. Six stages landed together: S1 Case + EvidenceSource abstraction; tools parameterised by source_id (case.py, main.py multi-source bootstrap, .bin extension support) S2 Grounding gateway in add_phenomenon: verified_facts cite real ToolInvocation ids; substring / normalised match enforced; agent + task scope checked. Phenomenon.description split into verified_facts (grounded) + interpretation (free text). [invocation: inv-xxx] prefix on every wrapped tool result so the LLM can cite. S3 Confidence as additive log-odds: edge_type → log10(LR) calibration table; commutative updates; supported / refuted thresholds derived from log_odds; hypothesis × evidence matrix view. S4 iOS plugin: unzip_archive + parse_plist / sqlite_tables / sqlite_query / parse_ios_keychain / read_idevice_info; IOSArtifactAgent; SOURCE_TYPE_AGENTS routing. S5 Cross-source entity resolution: typed identifiers on Entity, observe_identity gateway, auto coref hypothesis with shared / conflicting strong/weak LR edges, reversible same_as edges, actor_clusters() view. S6 Android partition probe + AndroidArtifactAgent; MediaAgent with OCR fallback; orchestrator Phase 1 iterates every analysable source; platform-aware get_triage_agent_type; ReportAgent renders actor clusters + per-source breakdown. 142 unit tests / 1 skipped — full coverage of the new gateway, log-odds math, coref hypothesis fall-out, and orchestrator multi-source dispatch. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-21 02:12:10 -10:00
parent 444d58726a
commit 81ade8f7ac
24 changed files with 5137 additions and 244 deletions
--- a/agents/android_artifact.py
+++ b/agents/android_artifact.py
@@ -0,0 +1,58 @@
+"""Android Artifact Agent — multi-partition analysis of raw Android dumps.
+
+DESIGN.md §4.7 安卓: ``mmls`` slices the dump into partitions; each one is
+its own analysable surface. Ext4-backed partitions (typically SYSTEM,
+USERDATA when not FBE-encrypted, EFS in some variants) yield to TSK; raw
+partitions (BOOT, RECOVERY, RADIO, MODEM blobs) are best mined with
+``search_strings``. Userdata is the prize and is often FBE-encrypted on
+modern devices — the agent must check fsstat before assuming readability
+(see ``probe_android_partitions`` for the survey).
+"""
+
+from __future__ import annotations
+
+from base_agent import BaseAgent
+from evidence_graph import EvidenceGraph
+from llm_client import LLMClient
+from tool_registry import TOOL_CATALOG
+
+
+class AndroidArtifactAgent(BaseAgent):
+    name = "android_artifact"
+    role = (
+        "Android forensic analyst. You navigate raw Android disk dumps "
+        "(blk0_sda-style images) partition by partition. Workflow: call "
+        "probe_android_partitions ONCE to map the disk; pick the partitions "
+        "with fs_type=Ext4 or fs_type=F2FS (SYSTEM, USERDATA if readable, "
+        "EFS); for each, call set_active_partition(offset_from_512_sector_column) "
+        "and then list_directory / extract_file / search_strings as usual. "
+        "For raw partitions (BOOT, RECOVERY, RADIO, TOMBSTONES) skip directly "
+        "to search_strings — they have no filesystem. If USERDATA shows "
+        "fs_type=unknown it is almost certainly FBE-encrypted: record that "
+        "as a negative finding (the absence IS evidence) and move on to "
+        "what's reachable."
+    )
+
+    def __init__(self, llm: LLMClient, graph: EvidenceGraph) -> None:
+        super().__init__(llm, graph)
+        self._register_tools()
+
+    def _register_tools(self) -> None:
+        tool_names = [
+            # Android-specific
+            "probe_android_partitions",
+            "set_active_partition",
+            # Reused TSK toolset — partition_offset comes from active_source
+            "partition_info", "filesystem_info", "list_directory",
+            "extract_file", "find_file", "search_strings",
+            "count_deleted_files", "build_filesystem_timeline",
+            # Generic parsers
+            "read_text_file", "read_binary_preview", "search_text_file",
+            "read_text_file_section", "list_extracted_dir", "find_files",
+            # SQLite — Android apps store data in sqlite too (WhatsApp, etc.)
+            "sqlite_tables", "sqlite_query",
+        ]
+        for name in tool_names:
+            td = TOOL_CATALOG.get(name)
+            if td:
+                self.register_tool(td.name, td.description, td.input_schema, td.executor)
--- a/agents/ios_artifact.py
+++ b/agents/ios_artifact.py
@@ -0,0 +1,49 @@
+"""iOS Artifact Agent — analyses unpacked iOS extractions.
+
+DESIGN.md §4.7/§4.8: tree-mode iOS sources are the third evidence family
+the system handles (alongside disk images and pcaps). This agent owns the
+iOS-specific toolset; the grounded ``add_phenomenon`` contract from
+BaseAgent applies unchanged — every fact must cite a tool invocation.
+"""
+
+from __future__ import annotations
+
+from base_agent import BaseAgent
+from evidence_graph import EvidenceGraph
+from llm_client import LLMClient
+from tool_registry import TOOL_CATALOG
+
+
+class IOSArtifactAgent(BaseAgent):
+    name = "ios_artifact"
+    role = (
+        "iOS forensic analyst. You analyse unpacked iOS extractions — "
+        "binary/XML plists, SQLite databases (sms.db, ChatStorage.sqlite, "
+        "AddressBook.sqlitedb), the keychain (keychain-2.db), and the "
+        "iDevice_info.txt summary — to extract device identity, accounts, "
+        "messaging, contacts, and credential metadata. Domain-rooted iOS "
+        "trees (HomeDomain, AppDomain*, ProtectedDomain, NetworkDomain) "
+        "are your map; navigate by path, not by inode."
+    )
+
+    def __init__(self, llm: LLMClient, graph: EvidenceGraph) -> None:
+        super().__init__(llm, graph)
+        self._register_tools()
+
+    def _register_tools(self) -> None:
+        tool_names = [
+            # navigation — find_files is the workhorse on 10k+-file iOS trees;
+            # list_extracted_dir is for initial layout summary only.
+            "list_extracted_dir", "find_files",
+            "read_text_file", "read_text_file_section", "read_binary_preview",
+            "search_text_file",
+            # iOS-specific parsers
+            "parse_plist",
+            "sqlite_tables", "sqlite_query",
+            "parse_ios_keychain",
+            "read_idevice_info",
+        ]
+        for name in tool_names:
+            td = TOOL_CATALOG.get(name)
+            if td:
+                self.register_tool(td.name, td.description, td.input_schema, td.executor)
--- a/agents/media.py
+++ b/agents/media.py
@@ -0,0 +1,52 @@
+"""Media Agent — OCR-based analysis of screenshot/photo evidence.
+
+DESIGN.md §4.7: the LLM backend has no vision capability, so JPEG/PNG
+evidence must go through tesseract first. The agent runs OCR, then
+records extracted strings — especially identifiers (wallet addresses,
+phone numbers, usernames) — via the grounded observe_identity gateway so
+they participate in cross-source coref the same way iOS keychain entries
+or Windows account names do.
+
+If the OCR runtime is missing on the host, ocr_image returns an explicit
+install hint; the agent should record that as a negative finding ("no
+text extracted — tesseract not installed") rather than guessing.
+"""
+
+from __future__ import annotations
+
+from base_agent import BaseAgent
+from evidence_graph import EvidenceGraph
+from llm_client import LLMClient
+from tool_registry import TOOL_CATALOG
+
+
+class MediaAgent(BaseAgent):
+    name = "media"
+    role = (
+        "Media / OCR forensic analyst. You analyse screenshots, photos, and "
+        "scanned documents — any pixel-based evidence the LLM cannot read "
+        "directly. Workflow: list_extracted_dir to enumerate images, "
+        "ocr_image on each promising one, then add_phenomenon (with the "
+        "OCR'd text as the verified_fact value) and observe_identity for "
+        "any wallet addresses, phone numbers, email addresses, or "
+        "usernames the text contains. If OCR fails because tesseract is "
+        "missing, RECORD that as a negative finding instead of fabricating "
+        "image content — the absence is a real fact about this run."
+    )
+
+    def __init__(self, llm: LLMClient, graph: EvidenceGraph) -> None:
+        super().__init__(llm, graph)
+        self._register_tools()
+
+    def _register_tools(self) -> None:
+        tool_names = [
+            "ocr_image",
+            "list_extracted_dir", "find_files",
+            "read_binary_preview",
+            "read_text_file",
+            "search_text_file",
+        ]
+        for name in tool_names:
+            td = TOOL_CATALOG.get(name)
+            if td:
+                self.register_tool(td.name, td.description, td.input_schema, td.executor)
--- a/agents/report.py
+++ b/agents/report.py
@@ -12,9 +12,20 @@ class ReportAgent(BaseAgent):
    role = (
        "Forensic report writer. You synthesize all findings from the investigation "
        "into a structured, professional forensic analysis report organized by hypotheses.\n\n"
-        "Only include findings that have a source_tool attribution (marked VERIFIED). "
-        "If evidence lacks source attribution, mark it as UNVERIFIED. "
-        "Do NOT invent or fabricate any data, timestamps, or findings not present in the evidence."
+        "Phenomena are marked GROUNDED (verified_facts cite a real tool invocation), "
+        "TOOL-ONLY (source_tool set but no facts), or UNVERIFIED (neither). When "
+        "writing the report, render verified_facts as primary evidence with their "
+        "invocation citations, and render interpretation as 'agent analysis' so the "
+        "reader can tell ground truth from inference. Do NOT invent or fabricate any "
+        "data, timestamps, or findings not present in the evidence.\n\n"
+        "This is a cross-source case: phenomena come from multiple evidence "
+        "sources, and entities discovered on different sources may refer to the "
+        "same real-world actor. ALWAYS include:\n"
+        "  - 'Findings by Source' section sourced from get_phenomena_by_source\n"
+        "  - 'Actor Clusters' section sourced from get_actor_clusters (the "
+        "cross-source attribution view — multi-source clusters answer "
+        "'which findings on different devices belong to the same person')\n"
+        "  - 'Hypothesis × Evidence Matrix' from get_hypothesis_evidence_matrix"
    )
    # Calling save_report is BOTH the recording action and the completion
    # signal. tool_call_loop returns the moment save_report executes; the
@@ -38,9 +49,12 @@ class ReportAgent(BaseAgent):
            f"Investigation state:\n{self.graph.stats_summary()}\n\n"
            f"Your task: {task}\n\n"
            f"WORKFLOW:\n"
-            f"1. Call get_hypotheses_with_evidence, get_all_phenomena, get_entities, get_case_info "
-            f"   to gather all the data needed for the report. Make these calls in parallel.\n"
-            f"2. Assemble the complete markdown forensic report.\n"
+            f"1. Call get_hypotheses_with_evidence, get_all_phenomena, get_entities,\n"
+            f"   get_case_info, get_hypothesis_evidence_matrix, get_actor_clusters,\n"
+            f"   and get_phenomena_by_source in parallel — these are the eight data\n"
+            f"   sources you assemble the report from.\n"
+            f"2. Assemble the complete markdown forensic report. Cross-source\n"
+            f"   actor clusters and per-source breakdown are MANDATORY sections.\n"
            f"3. Call save_report(content=<full markdown>, output_path=\"report.md\").\n"
            f"   This single call is the completion signal — the run ENDS the moment it executes.\n"
            f"   Do NOT call any read tools after this point; they will not run.\n"
@@ -83,6 +97,45 @@ class ReportAgent(BaseAgent):
            executor=self._get_entities,
        )

+        self.register_tool(
+            name="get_hypothesis_evidence_matrix",
+            description=(
+                "Render the hypothesis × evidence pivot as a markdown table. "
+                "Columns: per edge_type counts, log_odds, confidence, status. "
+                "Embed this directly in the report to show how each hypothesis "
+                "stands relative to the others on a single screen."
+            ),
+            input_schema={"type": "object", "properties": {}},
+            executor=self._get_hypothesis_evidence_matrix,
+        )
+
+        self.register_tool(
+            name="get_actor_clusters",
+            description=(
+                "Render the cross-source actor clusters: each cluster is the "
+                "set of Entity nodes the system currently treats as the same "
+                "actor (via active same_as edges backed by coref hypotheses "
+                "≥ 0.8). Includes the aggregated identifier evidence per "
+                "cluster. Use this in the report's 'Entities / Actors' "
+                "section so readers see who-is-who across devices, not just "
+                "raw entity rows."
+            ),
+            input_schema={"type": "object", "properties": {}},
+            executor=self._get_actor_clusters,
+        )
+
+        self.register_tool(
+            name="get_phenomena_by_source",
+            description=(
+                "Group every phenomenon by its originating evidence source "
+                "(source_id). Use this to drive the report's 'Findings by "
+                "Source' section so each evidence item's per-device "
+                "contribution is auditable."
+            ),
+            input_schema={"type": "object", "properties": {}},
+            executor=self._get_phenomena_by_source,
+        )
+
        self.register_tool(
            name="save_report",
            description="Save the final report to a file.",
@@ -115,12 +168,24 @@ class ReportAgent(BaseAgent):
            items = [ph for ph in phenomena.values() if ph.category == cat]
            lines.append(f"\n--- {cat.upper()} ({len(items)} entries) ---")
            for ph in items:
-                verified = "VERIFIED" if ph.source_tool else "UNVERIFIED"
-                lines.append(f"\n[{verified}] {ph.title} ({ph.id})")
+                # Grounded = at least one verified fact AND a source_tool.
+                grounded = bool(ph.verified_facts) and bool(ph.source_tool)
+                marker = "GROUNDED" if grounded else (
+                    "TOOL-ONLY" if ph.source_tool else "UNVERIFIED"
+                )
+                lines.append(f"\n[{marker}] {ph.title} ({ph.id})")
                lines.append(f"  Source: {ph.source_agent} | Tool: {ph.source_tool or 'N/A'}")
                if ph.timestamp:
                    lines.append(f"  Timestamp: {ph.timestamp}")
-                lines.append(f"  {ph.description[:500]}")
+                if ph.verified_facts:
+                    lines.append(f"  Verified facts ({len(ph.verified_facts)}):")
+                    for f in ph.verified_facts:
+                        lines.append(
+                            f"    - [{f.get('type','?')}] {str(f.get('value',''))[:200]} "
+                            f"(cite: {f.get('invocation_id','?')})"
+                        )
+                if ph.interpretation:
+                    lines.append(f"  Analysis: {ph.interpretation[:500]}")
        return "\n".join(lines)

    async def _get_hypotheses_with_evidence(self) -> str:
@@ -150,12 +215,87 @@ class ReportAgent(BaseAgent):
        return "\n".join(lines)

    async def _get_case_info(self) -> str:
-        info = self.graph.case_info
        lines = ["=== Case Information ==="]
-        for k, v in info.items():
-            lines.append(f"  {k}: {v}")
-        lines.append(f"  Image path: {self.graph.image_path}")
-        lines.append(f"  Partition offset: {self.graph.partition_offset}")
+        case = self.graph.case
+        if case is not None:
+            lines.append(f"  case_id: {case.case_id}")
+            lines.append(f"  name: {case.name}")
+            for k, v in (case.meta or {}).items():
+                lines.append(f"  {k}: {v}")
+            lines.append(f"  sources: {len(case.sources)}")
+            for s in case.sources:
+                owner = f", owner={s.owner}" if s.owner else ""
+                platform = s.meta.get("platform") if s.meta else None
+                plat = f", platform={platform}" if platform else ""
+                lines.append(
+                    f"    - {s.id}: {s.label} "
+                    f"(type={s.type}, mode={s.access_mode}{plat}{owner})"
+                )
+        else:
+            # Legacy single-image fallback — surface whatever case_info dict
+            # was passed in (e.g. the old CFReDS MD5 block).
+            for k, v in (self.graph.case_info or {}).items():
+                lines.append(f"  {k}: {v}")
+            lines.append(f"  Image path: {self.graph.image_path}")
+            lines.append(f"  Partition offset: {self.graph.partition_offset}")
+        return "\n".join(lines)
+
+    async def _get_hypothesis_evidence_matrix(self) -> str:
+        return self.graph.hypothesis_evidence_matrix_markdown()
+
+    async def _get_actor_clusters(self) -> str:
+        clusters = self.graph.actor_clusters()
+        if not clusters:
+            return "(no entities recorded)"
+        # Show multi-member clusters first — they're the cross-source links
+        # the human reader most needs to see.
+        clusters.sort(key=lambda c: (-len(c["members"]), c["members"]))
+        lines = [f"=== Actor Clusters ({len(clusters)}) ==="]
+        for i, c in enumerate(clusters, 1):
+            members = c["members"]
+            label = "MULTI-SOURCE CLUSTER" if len(members) > 1 else "Single entity"
+            lines.append(f"\n[{label} #{i}] {len(members)} member(s):")
+            for eid in members:
+                ent = self.graph.entities.get(eid)
+                if ent:
+                    lines.append(f"  - {ent.summary()}")
+            if c["identifiers"]:
+                lines.append("  Aggregated identifiers:")
+                for ident in c["identifiers"]:
+                    strong_tag = "strong" if ident.get("strong") else "weak"
+                    lines.append(
+                        f"    [{strong_tag}] {ident.get('type')}={ident.get('value')} "
+                        f"(on {ident.get('on_entity')})"
+                    )
+            if c["coref_hypotheses"]:
+                lines.append("  Backing coref hypotheses (≥0.8 active):")
+                for hid in c["coref_hypotheses"]:
+                    hyp = self.graph.hypotheses.get(hid)
+                    if hyp:
+                        lines.append(f"    - {hid}: conf={hyp.confidence:.2f}, L={hyp.log_odds:+.2f}")
+        return "\n".join(lines)
+
+    async def _get_phenomena_by_source(self) -> str:
+        by_src: dict[str, list] = {}
+        for ph in self.graph.phenomena.values():
+            by_src.setdefault(ph.source_id or "(unbound)", []).append(ph)
+        if not by_src:
+            return "(no phenomena recorded)"
+        # Resolve source labels via graph.case when possible.
+        def _label(src_id: str) -> str:
+            if self.graph.case:
+                src = self.graph.case.get_source(src_id)
+                if src:
+                    return f"{src_id} — {src.label} ({src.type})"
+            return src_id
+
+        lines = [f"=== Phenomena by Source ({len(by_src)} source(s)) ==="]
+        for src_id in sorted(by_src):
+            phs = by_src[src_id]
+            lines.append(f"\n--- {_label(src_id)} ({len(phs)} phenomena) ---")
+            for ph in phs:
+                grounded = "G" if ph.verified_facts and ph.source_tool else "·"
+                lines.append(f"  [{grounded}] {ph.summary()}")
        return "\n".join(lines)

    async def _get_entities(self) -> str:
@@ -174,18 +314,27 @@ class ReportAgent(BaseAgent):
        return "\n".join(lines)

    async def _verify_phenomena(self) -> str:
-        verified = []
-        unverified = []
+        grounded: list[str] = []
+        tool_only: list[str] = []
+        unverified: list[str] = []
        for ph in self.graph.phenomena.values():
-            entry = f"  [{ph.category}] {ph.title} (agent: {ph.source_agent}, tool: {ph.source_tool or 'N/A'})"
-            if ph.source_tool:
-                verified.append(entry)
+            nf = len(ph.verified_facts)
+            entry = (
+                f"  [{ph.category}] {ph.title} "
+                f"(agent: {ph.source_agent}, tool: {ph.source_tool or 'N/A'}, facts: {nf})"
+            )
+            if ph.verified_facts and ph.source_tool:
+                grounded.append(entry)
+            elif ph.source_tool:
+                tool_only.append(entry)
            else:
                unverified.append(entry)

        lines = ["=== Phenomena Verification Report ==="]
-        lines.append(f"\nVERIFIED ({len(verified)} — have source_tool):")
-        lines.extend(verified)
+        lines.append(f"\nGROUNDED ({len(grounded)} — facts + source_tool):")
+        lines.extend(grounded)
+        lines.append(f"\nTOOL-ONLY ({len(tool_only)} — source_tool, no facts):")
+        lines.extend(tool_only)
        lines.append(f"\nUNVERIFIED ({len(unverified)} — no source_tool):")
        lines.extend(unverified)
        return "\n".join(lines)
--- a/agents/timeline.py
+++ b/agents/timeline.py
@@ -122,7 +122,15 @@ class TimelineAgent(BaseAgent):
        lines = []
        for ph in items:
            lines.append(f"{ph.timestamp} | [{ph.category}] {ph.title} ({ph.id})")
-            lines.append(f"  {ph.description[:150]}")
+            preview = ph.interpretation[:150] if ph.interpretation else ""
+            if ph.verified_facts:
+                fact_preview = ", ".join(
+                    f"{f.get('type','?')}={str(f.get('value',''))[:40]}"
+                    for f in ph.verified_facts[:3]
+                )
+                preview = f"{preview} [facts: {fact_preview}]" if preview else f"[facts: {fact_preview}]"
+            if preview:
+                lines.append(f"  {preview}")
        return "\n".join(lines)

    async def _add_temporal_edge(