feat(refit): complete S1-S6 — case abstraction, grounding, log-odds, plugins, coref, multi-source

Consolidates the long-running refit work (DESIGN.md as authoritative spec)
into a single baseline commit. Six stages landed together:

  S1  Case + EvidenceSource abstraction; tools parameterised by source_id
      (case.py, main.py multi-source bootstrap, .bin extension support)
  S2  Grounding gateway in add_phenomenon: verified_facts cite real
      ToolInvocation ids; substring / normalised match enforced; agent +
      task scope checked. Phenomenon.description split into verified_facts
      (grounded) + interpretation (free text). [invocation: inv-xxx]
      prefix on every wrapped tool result so the LLM can cite.
  S3  Confidence as additive log-odds: edge_type → log10(LR) calibration
      table; commutative updates; supported / refuted thresholds derived
      from log_odds; hypothesis × evidence matrix view.
  S4  iOS plugin: unzip_archive + parse_plist / sqlite_tables /
      sqlite_query / parse_ios_keychain / read_idevice_info;
      IOSArtifactAgent; SOURCE_TYPE_AGENTS routing.
  S5  Cross-source entity resolution: typed identifiers on Entity,
      observe_identity gateway, auto coref hypothesis with shared /
      conflicting strong/weak LR edges, reversible same_as edges,
      actor_clusters() view.
  S6  Android partition probe + AndroidArtifactAgent; MediaAgent with
      OCR fallback; orchestrator Phase 1 iterates every analysable
      source; platform-aware get_triage_agent_type; ReportAgent renders
      actor clusters + per-source breakdown.

142 unit tests / 1 skipped — full coverage of the new gateway, log-odds
math, coref hypothesis fall-out, and orchestrator multi-source dispatch.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
BattleTag
2026-05-21 02:12:10 -10:00
parent 444d58726a
commit 81ade8f7ac
24 changed files with 5137 additions and 244 deletions

View File

@@ -0,0 +1,58 @@
"""Android Artifact Agent — multi-partition analysis of raw Android dumps.
DESIGN.md §4.7 安卓: ``mmls`` slices the dump into partitions; each one is
its own analysable surface. Ext4-backed partitions (typically SYSTEM,
USERDATA when not FBE-encrypted, EFS in some variants) yield to TSK; raw
partitions (BOOT, RECOVERY, RADIO, MODEM blobs) are best mined with
``search_strings``. Userdata is the prize and is often FBE-encrypted on
modern devices — the agent must check fsstat before assuming readability
(see ``probe_android_partitions`` for the survey).
"""
from __future__ import annotations
from base_agent import BaseAgent
from evidence_graph import EvidenceGraph
from llm_client import LLMClient
from tool_registry import TOOL_CATALOG
class AndroidArtifactAgent(BaseAgent):
name = "android_artifact"
role = (
"Android forensic analyst. You navigate raw Android disk dumps "
"(blk0_sda-style images) partition by partition. Workflow: call "
"probe_android_partitions ONCE to map the disk; pick the partitions "
"with fs_type=Ext4 or fs_type=F2FS (SYSTEM, USERDATA if readable, "
"EFS); for each, call set_active_partition(offset_from_512_sector_column) "
"and then list_directory / extract_file / search_strings as usual. "
"For raw partitions (BOOT, RECOVERY, RADIO, TOMBSTONES) skip directly "
"to search_strings — they have no filesystem. If USERDATA shows "
"fs_type=unknown it is almost certainly FBE-encrypted: record that "
"as a negative finding (the absence IS evidence) and move on to "
"what's reachable."
)
def __init__(self, llm: LLMClient, graph: EvidenceGraph) -> None:
super().__init__(llm, graph)
self._register_tools()
def _register_tools(self) -> None:
tool_names = [
# Android-specific
"probe_android_partitions",
"set_active_partition",
# Reused TSK toolset — partition_offset comes from active_source
"partition_info", "filesystem_info", "list_directory",
"extract_file", "find_file", "search_strings",
"count_deleted_files", "build_filesystem_timeline",
# Generic parsers
"read_text_file", "read_binary_preview", "search_text_file",
"read_text_file_section", "list_extracted_dir", "find_files",
# SQLite — Android apps store data in sqlite too (WhatsApp, etc.)
"sqlite_tables", "sqlite_query",
]
for name in tool_names:
td = TOOL_CATALOG.get(name)
if td:
self.register_tool(td.name, td.description, td.input_schema, td.executor)

49
agents/ios_artifact.py Normal file
View File

@@ -0,0 +1,49 @@
"""iOS Artifact Agent — analyses unpacked iOS extractions.
DESIGN.md §4.7/§4.8: tree-mode iOS sources are the third evidence family
the system handles (alongside disk images and pcaps). This agent owns the
iOS-specific toolset; the grounded ``add_phenomenon`` contract from
BaseAgent applies unchanged — every fact must cite a tool invocation.
"""
from __future__ import annotations
from base_agent import BaseAgent
from evidence_graph import EvidenceGraph
from llm_client import LLMClient
from tool_registry import TOOL_CATALOG
class IOSArtifactAgent(BaseAgent):
name = "ios_artifact"
role = (
"iOS forensic analyst. You analyse unpacked iOS extractions — "
"binary/XML plists, SQLite databases (sms.db, ChatStorage.sqlite, "
"AddressBook.sqlitedb), the keychain (keychain-2.db), and the "
"iDevice_info.txt summary — to extract device identity, accounts, "
"messaging, contacts, and credential metadata. Domain-rooted iOS "
"trees (HomeDomain, AppDomain*, ProtectedDomain, NetworkDomain) "
"are your map; navigate by path, not by inode."
)
def __init__(self, llm: LLMClient, graph: EvidenceGraph) -> None:
super().__init__(llm, graph)
self._register_tools()
def _register_tools(self) -> None:
tool_names = [
# navigation — find_files is the workhorse on 10k+-file iOS trees;
# list_extracted_dir is for initial layout summary only.
"list_extracted_dir", "find_files",
"read_text_file", "read_text_file_section", "read_binary_preview",
"search_text_file",
# iOS-specific parsers
"parse_plist",
"sqlite_tables", "sqlite_query",
"parse_ios_keychain",
"read_idevice_info",
]
for name in tool_names:
td = TOOL_CATALOG.get(name)
if td:
self.register_tool(td.name, td.description, td.input_schema, td.executor)

52
agents/media.py Normal file
View File

@@ -0,0 +1,52 @@
"""Media Agent — OCR-based analysis of screenshot/photo evidence.
DESIGN.md §4.7: the LLM backend has no vision capability, so JPEG/PNG
evidence must go through tesseract first. The agent runs OCR, then
records extracted strings — especially identifiers (wallet addresses,
phone numbers, usernames) — via the grounded observe_identity gateway so
they participate in cross-source coref the same way iOS keychain entries
or Windows account names do.
If the OCR runtime is missing on the host, ocr_image returns an explicit
install hint; the agent should record that as a negative finding ("no
text extracted — tesseract not installed") rather than guessing.
"""
from __future__ import annotations
from base_agent import BaseAgent
from evidence_graph import EvidenceGraph
from llm_client import LLMClient
from tool_registry import TOOL_CATALOG
class MediaAgent(BaseAgent):
name = "media"
role = (
"Media / OCR forensic analyst. You analyse screenshots, photos, and "
"scanned documents — any pixel-based evidence the LLM cannot read "
"directly. Workflow: list_extracted_dir to enumerate images, "
"ocr_image on each promising one, then add_phenomenon (with the "
"OCR'd text as the verified_fact value) and observe_identity for "
"any wallet addresses, phone numbers, email addresses, or "
"usernames the text contains. If OCR fails because tesseract is "
"missing, RECORD that as a negative finding instead of fabricating "
"image content — the absence is a real fact about this run."
)
def __init__(self, llm: LLMClient, graph: EvidenceGraph) -> None:
super().__init__(llm, graph)
self._register_tools()
def _register_tools(self) -> None:
tool_names = [
"ocr_image",
"list_extracted_dir", "find_files",
"read_binary_preview",
"read_text_file",
"search_text_file",
]
for name in tool_names:
td = TOOL_CATALOG.get(name)
if td:
self.register_tool(td.name, td.description, td.input_schema, td.executor)

View File

@@ -12,9 +12,20 @@ class ReportAgent(BaseAgent):
role = (
"Forensic report writer. You synthesize all findings from the investigation "
"into a structured, professional forensic analysis report organized by hypotheses.\n\n"
"Only include findings that have a source_tool attribution (marked VERIFIED). "
"If evidence lacks source attribution, mark it as UNVERIFIED. "
"Do NOT invent or fabricate any data, timestamps, or findings not present in the evidence."
"Phenomena are marked GROUNDED (verified_facts cite a real tool invocation), "
"TOOL-ONLY (source_tool set but no facts), or UNVERIFIED (neither). When "
"writing the report, render verified_facts as primary evidence with their "
"invocation citations, and render interpretation as 'agent analysis' so the "
"reader can tell ground truth from inference. Do NOT invent or fabricate any "
"data, timestamps, or findings not present in the evidence.\n\n"
"This is a cross-source case: phenomena come from multiple evidence "
"sources, and entities discovered on different sources may refer to the "
"same real-world actor. ALWAYS include:\n"
" - 'Findings by Source' section sourced from get_phenomena_by_source\n"
" - 'Actor Clusters' section sourced from get_actor_clusters (the "
"cross-source attribution view — multi-source clusters answer "
"'which findings on different devices belong to the same person')\n"
" - 'Hypothesis × Evidence Matrix' from get_hypothesis_evidence_matrix"
)
# Calling save_report is BOTH the recording action and the completion
# signal. tool_call_loop returns the moment save_report executes; the
@@ -38,9 +49,12 @@ class ReportAgent(BaseAgent):
f"Investigation state:\n{self.graph.stats_summary()}\n\n"
f"Your task: {task}\n\n"
f"WORKFLOW:\n"
f"1. Call get_hypotheses_with_evidence, get_all_phenomena, get_entities, get_case_info "
f" to gather all the data needed for the report. Make these calls in parallel.\n"
f"2. Assemble the complete markdown forensic report.\n"
f"1. Call get_hypotheses_with_evidence, get_all_phenomena, get_entities,\n"
f" get_case_info, get_hypothesis_evidence_matrix, get_actor_clusters,\n"
f" and get_phenomena_by_source in parallel — these are the eight data\n"
f" sources you assemble the report from.\n"
f"2. Assemble the complete markdown forensic report. Cross-source\n"
f" actor clusters and per-source breakdown are MANDATORY sections.\n"
f"3. Call save_report(content=<full markdown>, output_path=\"report.md\").\n"
f" This single call is the completion signal — the run ENDS the moment it executes.\n"
f" Do NOT call any read tools after this point; they will not run.\n"
@@ -83,6 +97,45 @@ class ReportAgent(BaseAgent):
executor=self._get_entities,
)
self.register_tool(
name="get_hypothesis_evidence_matrix",
description=(
"Render the hypothesis × evidence pivot as a markdown table. "
"Columns: per edge_type counts, log_odds, confidence, status. "
"Embed this directly in the report to show how each hypothesis "
"stands relative to the others on a single screen."
),
input_schema={"type": "object", "properties": {}},
executor=self._get_hypothesis_evidence_matrix,
)
self.register_tool(
name="get_actor_clusters",
description=(
"Render the cross-source actor clusters: each cluster is the "
"set of Entity nodes the system currently treats as the same "
"actor (via active same_as edges backed by coref hypotheses "
"≥ 0.8). Includes the aggregated identifier evidence per "
"cluster. Use this in the report's 'Entities / Actors' "
"section so readers see who-is-who across devices, not just "
"raw entity rows."
),
input_schema={"type": "object", "properties": {}},
executor=self._get_actor_clusters,
)
self.register_tool(
name="get_phenomena_by_source",
description=(
"Group every phenomenon by its originating evidence source "
"(source_id). Use this to drive the report's 'Findings by "
"Source' section so each evidence item's per-device "
"contribution is auditable."
),
input_schema={"type": "object", "properties": {}},
executor=self._get_phenomena_by_source,
)
self.register_tool(
name="save_report",
description="Save the final report to a file.",
@@ -115,12 +168,24 @@ class ReportAgent(BaseAgent):
items = [ph for ph in phenomena.values() if ph.category == cat]
lines.append(f"\n--- {cat.upper()} ({len(items)} entries) ---")
for ph in items:
verified = "VERIFIED" if ph.source_tool else "UNVERIFIED"
lines.append(f"\n[{verified}] {ph.title} ({ph.id})")
# Grounded = at least one verified fact AND a source_tool.
grounded = bool(ph.verified_facts) and bool(ph.source_tool)
marker = "GROUNDED" if grounded else (
"TOOL-ONLY" if ph.source_tool else "UNVERIFIED"
)
lines.append(f"\n[{marker}] {ph.title} ({ph.id})")
lines.append(f" Source: {ph.source_agent} | Tool: {ph.source_tool or 'N/A'}")
if ph.timestamp:
lines.append(f" Timestamp: {ph.timestamp}")
lines.append(f" {ph.description[:500]}")
if ph.verified_facts:
lines.append(f" Verified facts ({len(ph.verified_facts)}):")
for f in ph.verified_facts:
lines.append(
f" - [{f.get('type','?')}] {str(f.get('value',''))[:200]} "
f"(cite: {f.get('invocation_id','?')})"
)
if ph.interpretation:
lines.append(f" Analysis: {ph.interpretation[:500]}")
return "\n".join(lines)
async def _get_hypotheses_with_evidence(self) -> str:
@@ -150,12 +215,87 @@ class ReportAgent(BaseAgent):
return "\n".join(lines)
async def _get_case_info(self) -> str:
info = self.graph.case_info
lines = ["=== Case Information ==="]
for k, v in info.items():
lines.append(f" {k}: {v}")
lines.append(f" Image path: {self.graph.image_path}")
lines.append(f" Partition offset: {self.graph.partition_offset}")
case = self.graph.case
if case is not None:
lines.append(f" case_id: {case.case_id}")
lines.append(f" name: {case.name}")
for k, v in (case.meta or {}).items():
lines.append(f" {k}: {v}")
lines.append(f" sources: {len(case.sources)}")
for s in case.sources:
owner = f", owner={s.owner}" if s.owner else ""
platform = s.meta.get("platform") if s.meta else None
plat = f", platform={platform}" if platform else ""
lines.append(
f" - {s.id}: {s.label} "
f"(type={s.type}, mode={s.access_mode}{plat}{owner})"
)
else:
# Legacy single-image fallback — surface whatever case_info dict
# was passed in (e.g. the old CFReDS MD5 block).
for k, v in (self.graph.case_info or {}).items():
lines.append(f" {k}: {v}")
lines.append(f" Image path: {self.graph.image_path}")
lines.append(f" Partition offset: {self.graph.partition_offset}")
return "\n".join(lines)
async def _get_hypothesis_evidence_matrix(self) -> str:
return self.graph.hypothesis_evidence_matrix_markdown()
async def _get_actor_clusters(self) -> str:
clusters = self.graph.actor_clusters()
if not clusters:
return "(no entities recorded)"
# Show multi-member clusters first — they're the cross-source links
# the human reader most needs to see.
clusters.sort(key=lambda c: (-len(c["members"]), c["members"]))
lines = [f"=== Actor Clusters ({len(clusters)}) ==="]
for i, c in enumerate(clusters, 1):
members = c["members"]
label = "MULTI-SOURCE CLUSTER" if len(members) > 1 else "Single entity"
lines.append(f"\n[{label} #{i}] {len(members)} member(s):")
for eid in members:
ent = self.graph.entities.get(eid)
if ent:
lines.append(f" - {ent.summary()}")
if c["identifiers"]:
lines.append(" Aggregated identifiers:")
for ident in c["identifiers"]:
strong_tag = "strong" if ident.get("strong") else "weak"
lines.append(
f" [{strong_tag}] {ident.get('type')}={ident.get('value')} "
f"(on {ident.get('on_entity')})"
)
if c["coref_hypotheses"]:
lines.append(" Backing coref hypotheses (≥0.8 active):")
for hid in c["coref_hypotheses"]:
hyp = self.graph.hypotheses.get(hid)
if hyp:
lines.append(f" - {hid}: conf={hyp.confidence:.2f}, L={hyp.log_odds:+.2f}")
return "\n".join(lines)
async def _get_phenomena_by_source(self) -> str:
by_src: dict[str, list] = {}
for ph in self.graph.phenomena.values():
by_src.setdefault(ph.source_id or "(unbound)", []).append(ph)
if not by_src:
return "(no phenomena recorded)"
# Resolve source labels via graph.case when possible.
def _label(src_id: str) -> str:
if self.graph.case:
src = self.graph.case.get_source(src_id)
if src:
return f"{src_id}{src.label} ({src.type})"
return src_id
lines = [f"=== Phenomena by Source ({len(by_src)} source(s)) ==="]
for src_id in sorted(by_src):
phs = by_src[src_id]
lines.append(f"\n--- {_label(src_id)} ({len(phs)} phenomena) ---")
for ph in phs:
grounded = "G" if ph.verified_facts and ph.source_tool else "·"
lines.append(f" [{grounded}] {ph.summary()}")
return "\n".join(lines)
async def _get_entities(self) -> str:
@@ -174,18 +314,27 @@ class ReportAgent(BaseAgent):
return "\n".join(lines)
async def _verify_phenomena(self) -> str:
verified = []
unverified = []
grounded: list[str] = []
tool_only: list[str] = []
unverified: list[str] = []
for ph in self.graph.phenomena.values():
entry = f" [{ph.category}] {ph.title} (agent: {ph.source_agent}, tool: {ph.source_tool or 'N/A'})"
if ph.source_tool:
verified.append(entry)
nf = len(ph.verified_facts)
entry = (
f" [{ph.category}] {ph.title} "
f"(agent: {ph.source_agent}, tool: {ph.source_tool or 'N/A'}, facts: {nf})"
)
if ph.verified_facts and ph.source_tool:
grounded.append(entry)
elif ph.source_tool:
tool_only.append(entry)
else:
unverified.append(entry)
lines = ["=== Phenomena Verification Report ==="]
lines.append(f"\nVERIFIED ({len(verified)}have source_tool):")
lines.extend(verified)
lines.append(f"\nGROUNDED ({len(grounded)}facts + source_tool):")
lines.extend(grounded)
lines.append(f"\nTOOL-ONLY ({len(tool_only)} — source_tool, no facts):")
lines.extend(tool_only)
lines.append(f"\nUNVERIFIED ({len(unverified)} — no source_tool):")
lines.extend(unverified)
return "\n".join(lines)

View File

@@ -122,7 +122,15 @@ class TimelineAgent(BaseAgent):
lines = []
for ph in items:
lines.append(f"{ph.timestamp} | [{ph.category}] {ph.title} ({ph.id})")
lines.append(f" {ph.description[:150]}")
preview = ph.interpretation[:150] if ph.interpretation else ""
if ph.verified_facts:
fact_preview = ", ".join(
f"{f.get('type','?')}={str(f.get('value',''))[:40]}"
for f in ph.verified_facts[:3]
)
preview = f"{preview} [facts: {fact_preview}]" if preview else f"[facts: {fact_preview}]"
if preview:
lines.append(f" {preview}")
return "\n".join(lines)
async def _add_temporal_edge(