feat(refit): complete S1-S6 — case abstraction, grounding, log-odds, plugins, coref, multi-source
Consolidates the long-running refit work (DESIGN.md as authoritative spec)
into a single baseline commit. Six stages landed together:
S1 Case + EvidenceSource abstraction; tools parameterised by source_id
(case.py, main.py multi-source bootstrap, .bin extension support)
S2 Grounding gateway in add_phenomenon: verified_facts cite real
ToolInvocation ids; substring / normalised match enforced; agent +
task scope checked. Phenomenon.description split into verified_facts
(grounded) + interpretation (free text). [invocation: inv-xxx]
prefix on every wrapped tool result so the LLM can cite.
S3 Confidence as additive log-odds: edge_type → log10(LR) calibration
table; commutative updates; supported / refuted thresholds derived
from log_odds; hypothesis × evidence matrix view.
S4 iOS plugin: unzip_archive + parse_plist / sqlite_tables /
sqlite_query / parse_ios_keychain / read_idevice_info;
IOSArtifactAgent; SOURCE_TYPE_AGENTS routing.
S5 Cross-source entity resolution: typed identifiers on Entity,
observe_identity gateway, auto coref hypothesis with shared /
conflicting strong/weak LR edges, reversible same_as edges,
actor_clusters() view.
S6 Android partition probe + AndroidArtifactAgent; MediaAgent with
OCR fallback; orchestrator Phase 1 iterates every analysable
source; platform-aware get_triage_agent_type; ReportAgent renders
actor clusters + per-source breakdown.
142 unit tests / 1 skipped — full coverage of the new gateway, log-odds
math, coref hypothesis fall-out, and orchestrator multi-source dispatch.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
58
agents/android_artifact.py
Normal file
58
agents/android_artifact.py
Normal file
@@ -0,0 +1,58 @@
|
||||
"""Android Artifact Agent — multi-partition analysis of raw Android dumps.
|
||||
|
||||
DESIGN.md §4.7 安卓: ``mmls`` slices the dump into partitions; each one is
|
||||
its own analysable surface. Ext4-backed partitions (typically SYSTEM,
|
||||
USERDATA when not FBE-encrypted, EFS in some variants) yield to TSK; raw
|
||||
partitions (BOOT, RECOVERY, RADIO, MODEM blobs) are best mined with
|
||||
``search_strings``. Userdata is the prize and is often FBE-encrypted on
|
||||
modern devices — the agent must check fsstat before assuming readability
|
||||
(see ``probe_android_partitions`` for the survey).
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from base_agent import BaseAgent
|
||||
from evidence_graph import EvidenceGraph
|
||||
from llm_client import LLMClient
|
||||
from tool_registry import TOOL_CATALOG
|
||||
|
||||
|
||||
class AndroidArtifactAgent(BaseAgent):
|
||||
name = "android_artifact"
|
||||
role = (
|
||||
"Android forensic analyst. You navigate raw Android disk dumps "
|
||||
"(blk0_sda-style images) partition by partition. Workflow: call "
|
||||
"probe_android_partitions ONCE to map the disk; pick the partitions "
|
||||
"with fs_type=Ext4 or fs_type=F2FS (SYSTEM, USERDATA if readable, "
|
||||
"EFS); for each, call set_active_partition(offset_from_512_sector_column) "
|
||||
"and then list_directory / extract_file / search_strings as usual. "
|
||||
"For raw partitions (BOOT, RECOVERY, RADIO, TOMBSTONES) skip directly "
|
||||
"to search_strings — they have no filesystem. If USERDATA shows "
|
||||
"fs_type=unknown it is almost certainly FBE-encrypted: record that "
|
||||
"as a negative finding (the absence IS evidence) and move on to "
|
||||
"what's reachable."
|
||||
)
|
||||
|
||||
def __init__(self, llm: LLMClient, graph: EvidenceGraph) -> None:
|
||||
super().__init__(llm, graph)
|
||||
self._register_tools()
|
||||
|
||||
def _register_tools(self) -> None:
|
||||
tool_names = [
|
||||
# Android-specific
|
||||
"probe_android_partitions",
|
||||
"set_active_partition",
|
||||
# Reused TSK toolset — partition_offset comes from active_source
|
||||
"partition_info", "filesystem_info", "list_directory",
|
||||
"extract_file", "find_file", "search_strings",
|
||||
"count_deleted_files", "build_filesystem_timeline",
|
||||
# Generic parsers
|
||||
"read_text_file", "read_binary_preview", "search_text_file",
|
||||
"read_text_file_section", "list_extracted_dir", "find_files",
|
||||
# SQLite — Android apps store data in sqlite too (WhatsApp, etc.)
|
||||
"sqlite_tables", "sqlite_query",
|
||||
]
|
||||
for name in tool_names:
|
||||
td = TOOL_CATALOG.get(name)
|
||||
if td:
|
||||
self.register_tool(td.name, td.description, td.input_schema, td.executor)
|
||||
49
agents/ios_artifact.py
Normal file
49
agents/ios_artifact.py
Normal file
@@ -0,0 +1,49 @@
|
||||
"""iOS Artifact Agent — analyses unpacked iOS extractions.
|
||||
|
||||
DESIGN.md §4.7/§4.8: tree-mode iOS sources are the third evidence family
|
||||
the system handles (alongside disk images and pcaps). This agent owns the
|
||||
iOS-specific toolset; the grounded ``add_phenomenon`` contract from
|
||||
BaseAgent applies unchanged — every fact must cite a tool invocation.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from base_agent import BaseAgent
|
||||
from evidence_graph import EvidenceGraph
|
||||
from llm_client import LLMClient
|
||||
from tool_registry import TOOL_CATALOG
|
||||
|
||||
|
||||
class IOSArtifactAgent(BaseAgent):
|
||||
name = "ios_artifact"
|
||||
role = (
|
||||
"iOS forensic analyst. You analyse unpacked iOS extractions — "
|
||||
"binary/XML plists, SQLite databases (sms.db, ChatStorage.sqlite, "
|
||||
"AddressBook.sqlitedb), the keychain (keychain-2.db), and the "
|
||||
"iDevice_info.txt summary — to extract device identity, accounts, "
|
||||
"messaging, contacts, and credential metadata. Domain-rooted iOS "
|
||||
"trees (HomeDomain, AppDomain*, ProtectedDomain, NetworkDomain) "
|
||||
"are your map; navigate by path, not by inode."
|
||||
)
|
||||
|
||||
def __init__(self, llm: LLMClient, graph: EvidenceGraph) -> None:
|
||||
super().__init__(llm, graph)
|
||||
self._register_tools()
|
||||
|
||||
def _register_tools(self) -> None:
|
||||
tool_names = [
|
||||
# navigation — find_files is the workhorse on 10k+-file iOS trees;
|
||||
# list_extracted_dir is for initial layout summary only.
|
||||
"list_extracted_dir", "find_files",
|
||||
"read_text_file", "read_text_file_section", "read_binary_preview",
|
||||
"search_text_file",
|
||||
# iOS-specific parsers
|
||||
"parse_plist",
|
||||
"sqlite_tables", "sqlite_query",
|
||||
"parse_ios_keychain",
|
||||
"read_idevice_info",
|
||||
]
|
||||
for name in tool_names:
|
||||
td = TOOL_CATALOG.get(name)
|
||||
if td:
|
||||
self.register_tool(td.name, td.description, td.input_schema, td.executor)
|
||||
52
agents/media.py
Normal file
52
agents/media.py
Normal file
@@ -0,0 +1,52 @@
|
||||
"""Media Agent — OCR-based analysis of screenshot/photo evidence.
|
||||
|
||||
DESIGN.md §4.7: the LLM backend has no vision capability, so JPEG/PNG
|
||||
evidence must go through tesseract first. The agent runs OCR, then
|
||||
records extracted strings — especially identifiers (wallet addresses,
|
||||
phone numbers, usernames) — via the grounded observe_identity gateway so
|
||||
they participate in cross-source coref the same way iOS keychain entries
|
||||
or Windows account names do.
|
||||
|
||||
If the OCR runtime is missing on the host, ocr_image returns an explicit
|
||||
install hint; the agent should record that as a negative finding ("no
|
||||
text extracted — tesseract not installed") rather than guessing.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from base_agent import BaseAgent
|
||||
from evidence_graph import EvidenceGraph
|
||||
from llm_client import LLMClient
|
||||
from tool_registry import TOOL_CATALOG
|
||||
|
||||
|
||||
class MediaAgent(BaseAgent):
|
||||
name = "media"
|
||||
role = (
|
||||
"Media / OCR forensic analyst. You analyse screenshots, photos, and "
|
||||
"scanned documents — any pixel-based evidence the LLM cannot read "
|
||||
"directly. Workflow: list_extracted_dir to enumerate images, "
|
||||
"ocr_image on each promising one, then add_phenomenon (with the "
|
||||
"OCR'd text as the verified_fact value) and observe_identity for "
|
||||
"any wallet addresses, phone numbers, email addresses, or "
|
||||
"usernames the text contains. If OCR fails because tesseract is "
|
||||
"missing, RECORD that as a negative finding instead of fabricating "
|
||||
"image content — the absence is a real fact about this run."
|
||||
)
|
||||
|
||||
def __init__(self, llm: LLMClient, graph: EvidenceGraph) -> None:
|
||||
super().__init__(llm, graph)
|
||||
self._register_tools()
|
||||
|
||||
def _register_tools(self) -> None:
|
||||
tool_names = [
|
||||
"ocr_image",
|
||||
"list_extracted_dir", "find_files",
|
||||
"read_binary_preview",
|
||||
"read_text_file",
|
||||
"search_text_file",
|
||||
]
|
||||
for name in tool_names:
|
||||
td = TOOL_CATALOG.get(name)
|
||||
if td:
|
||||
self.register_tool(td.name, td.description, td.input_schema, td.executor)
|
||||
191
agents/report.py
191
agents/report.py
@@ -12,9 +12,20 @@ class ReportAgent(BaseAgent):
|
||||
role = (
|
||||
"Forensic report writer. You synthesize all findings from the investigation "
|
||||
"into a structured, professional forensic analysis report organized by hypotheses.\n\n"
|
||||
"Only include findings that have a source_tool attribution (marked VERIFIED). "
|
||||
"If evidence lacks source attribution, mark it as UNVERIFIED. "
|
||||
"Do NOT invent or fabricate any data, timestamps, or findings not present in the evidence."
|
||||
"Phenomena are marked GROUNDED (verified_facts cite a real tool invocation), "
|
||||
"TOOL-ONLY (source_tool set but no facts), or UNVERIFIED (neither). When "
|
||||
"writing the report, render verified_facts as primary evidence with their "
|
||||
"invocation citations, and render interpretation as 'agent analysis' so the "
|
||||
"reader can tell ground truth from inference. Do NOT invent or fabricate any "
|
||||
"data, timestamps, or findings not present in the evidence.\n\n"
|
||||
"This is a cross-source case: phenomena come from multiple evidence "
|
||||
"sources, and entities discovered on different sources may refer to the "
|
||||
"same real-world actor. ALWAYS include:\n"
|
||||
" - 'Findings by Source' section sourced from get_phenomena_by_source\n"
|
||||
" - 'Actor Clusters' section sourced from get_actor_clusters (the "
|
||||
"cross-source attribution view — multi-source clusters answer "
|
||||
"'which findings on different devices belong to the same person')\n"
|
||||
" - 'Hypothesis × Evidence Matrix' from get_hypothesis_evidence_matrix"
|
||||
)
|
||||
# Calling save_report is BOTH the recording action and the completion
|
||||
# signal. tool_call_loop returns the moment save_report executes; the
|
||||
@@ -38,9 +49,12 @@ class ReportAgent(BaseAgent):
|
||||
f"Investigation state:\n{self.graph.stats_summary()}\n\n"
|
||||
f"Your task: {task}\n\n"
|
||||
f"WORKFLOW:\n"
|
||||
f"1. Call get_hypotheses_with_evidence, get_all_phenomena, get_entities, get_case_info "
|
||||
f" to gather all the data needed for the report. Make these calls in parallel.\n"
|
||||
f"2. Assemble the complete markdown forensic report.\n"
|
||||
f"1. Call get_hypotheses_with_evidence, get_all_phenomena, get_entities,\n"
|
||||
f" get_case_info, get_hypothesis_evidence_matrix, get_actor_clusters,\n"
|
||||
f" and get_phenomena_by_source in parallel — these are the eight data\n"
|
||||
f" sources you assemble the report from.\n"
|
||||
f"2. Assemble the complete markdown forensic report. Cross-source\n"
|
||||
f" actor clusters and per-source breakdown are MANDATORY sections.\n"
|
||||
f"3. Call save_report(content=<full markdown>, output_path=\"report.md\").\n"
|
||||
f" This single call is the completion signal — the run ENDS the moment it executes.\n"
|
||||
f" Do NOT call any read tools after this point; they will not run.\n"
|
||||
@@ -83,6 +97,45 @@ class ReportAgent(BaseAgent):
|
||||
executor=self._get_entities,
|
||||
)
|
||||
|
||||
self.register_tool(
|
||||
name="get_hypothesis_evidence_matrix",
|
||||
description=(
|
||||
"Render the hypothesis × evidence pivot as a markdown table. "
|
||||
"Columns: per edge_type counts, log_odds, confidence, status. "
|
||||
"Embed this directly in the report to show how each hypothesis "
|
||||
"stands relative to the others on a single screen."
|
||||
),
|
||||
input_schema={"type": "object", "properties": {}},
|
||||
executor=self._get_hypothesis_evidence_matrix,
|
||||
)
|
||||
|
||||
self.register_tool(
|
||||
name="get_actor_clusters",
|
||||
description=(
|
||||
"Render the cross-source actor clusters: each cluster is the "
|
||||
"set of Entity nodes the system currently treats as the same "
|
||||
"actor (via active same_as edges backed by coref hypotheses "
|
||||
"≥ 0.8). Includes the aggregated identifier evidence per "
|
||||
"cluster. Use this in the report's 'Entities / Actors' "
|
||||
"section so readers see who-is-who across devices, not just "
|
||||
"raw entity rows."
|
||||
),
|
||||
input_schema={"type": "object", "properties": {}},
|
||||
executor=self._get_actor_clusters,
|
||||
)
|
||||
|
||||
self.register_tool(
|
||||
name="get_phenomena_by_source",
|
||||
description=(
|
||||
"Group every phenomenon by its originating evidence source "
|
||||
"(source_id). Use this to drive the report's 'Findings by "
|
||||
"Source' section so each evidence item's per-device "
|
||||
"contribution is auditable."
|
||||
),
|
||||
input_schema={"type": "object", "properties": {}},
|
||||
executor=self._get_phenomena_by_source,
|
||||
)
|
||||
|
||||
self.register_tool(
|
||||
name="save_report",
|
||||
description="Save the final report to a file.",
|
||||
@@ -115,12 +168,24 @@ class ReportAgent(BaseAgent):
|
||||
items = [ph for ph in phenomena.values() if ph.category == cat]
|
||||
lines.append(f"\n--- {cat.upper()} ({len(items)} entries) ---")
|
||||
for ph in items:
|
||||
verified = "VERIFIED" if ph.source_tool else "UNVERIFIED"
|
||||
lines.append(f"\n[{verified}] {ph.title} ({ph.id})")
|
||||
# Grounded = at least one verified fact AND a source_tool.
|
||||
grounded = bool(ph.verified_facts) and bool(ph.source_tool)
|
||||
marker = "GROUNDED" if grounded else (
|
||||
"TOOL-ONLY" if ph.source_tool else "UNVERIFIED"
|
||||
)
|
||||
lines.append(f"\n[{marker}] {ph.title} ({ph.id})")
|
||||
lines.append(f" Source: {ph.source_agent} | Tool: {ph.source_tool or 'N/A'}")
|
||||
if ph.timestamp:
|
||||
lines.append(f" Timestamp: {ph.timestamp}")
|
||||
lines.append(f" {ph.description[:500]}")
|
||||
if ph.verified_facts:
|
||||
lines.append(f" Verified facts ({len(ph.verified_facts)}):")
|
||||
for f in ph.verified_facts:
|
||||
lines.append(
|
||||
f" - [{f.get('type','?')}] {str(f.get('value',''))[:200]} "
|
||||
f"(cite: {f.get('invocation_id','?')})"
|
||||
)
|
||||
if ph.interpretation:
|
||||
lines.append(f" Analysis: {ph.interpretation[:500]}")
|
||||
return "\n".join(lines)
|
||||
|
||||
async def _get_hypotheses_with_evidence(self) -> str:
|
||||
@@ -150,12 +215,87 @@ class ReportAgent(BaseAgent):
|
||||
return "\n".join(lines)
|
||||
|
||||
async def _get_case_info(self) -> str:
|
||||
info = self.graph.case_info
|
||||
lines = ["=== Case Information ==="]
|
||||
for k, v in info.items():
|
||||
lines.append(f" {k}: {v}")
|
||||
lines.append(f" Image path: {self.graph.image_path}")
|
||||
lines.append(f" Partition offset: {self.graph.partition_offset}")
|
||||
case = self.graph.case
|
||||
if case is not None:
|
||||
lines.append(f" case_id: {case.case_id}")
|
||||
lines.append(f" name: {case.name}")
|
||||
for k, v in (case.meta or {}).items():
|
||||
lines.append(f" {k}: {v}")
|
||||
lines.append(f" sources: {len(case.sources)}")
|
||||
for s in case.sources:
|
||||
owner = f", owner={s.owner}" if s.owner else ""
|
||||
platform = s.meta.get("platform") if s.meta else None
|
||||
plat = f", platform={platform}" if platform else ""
|
||||
lines.append(
|
||||
f" - {s.id}: {s.label} "
|
||||
f"(type={s.type}, mode={s.access_mode}{plat}{owner})"
|
||||
)
|
||||
else:
|
||||
# Legacy single-image fallback — surface whatever case_info dict
|
||||
# was passed in (e.g. the old CFReDS MD5 block).
|
||||
for k, v in (self.graph.case_info or {}).items():
|
||||
lines.append(f" {k}: {v}")
|
||||
lines.append(f" Image path: {self.graph.image_path}")
|
||||
lines.append(f" Partition offset: {self.graph.partition_offset}")
|
||||
return "\n".join(lines)
|
||||
|
||||
async def _get_hypothesis_evidence_matrix(self) -> str:
|
||||
return self.graph.hypothesis_evidence_matrix_markdown()
|
||||
|
||||
async def _get_actor_clusters(self) -> str:
|
||||
clusters = self.graph.actor_clusters()
|
||||
if not clusters:
|
||||
return "(no entities recorded)"
|
||||
# Show multi-member clusters first — they're the cross-source links
|
||||
# the human reader most needs to see.
|
||||
clusters.sort(key=lambda c: (-len(c["members"]), c["members"]))
|
||||
lines = [f"=== Actor Clusters ({len(clusters)}) ==="]
|
||||
for i, c in enumerate(clusters, 1):
|
||||
members = c["members"]
|
||||
label = "MULTI-SOURCE CLUSTER" if len(members) > 1 else "Single entity"
|
||||
lines.append(f"\n[{label} #{i}] {len(members)} member(s):")
|
||||
for eid in members:
|
||||
ent = self.graph.entities.get(eid)
|
||||
if ent:
|
||||
lines.append(f" - {ent.summary()}")
|
||||
if c["identifiers"]:
|
||||
lines.append(" Aggregated identifiers:")
|
||||
for ident in c["identifiers"]:
|
||||
strong_tag = "strong" if ident.get("strong") else "weak"
|
||||
lines.append(
|
||||
f" [{strong_tag}] {ident.get('type')}={ident.get('value')} "
|
||||
f"(on {ident.get('on_entity')})"
|
||||
)
|
||||
if c["coref_hypotheses"]:
|
||||
lines.append(" Backing coref hypotheses (≥0.8 active):")
|
||||
for hid in c["coref_hypotheses"]:
|
||||
hyp = self.graph.hypotheses.get(hid)
|
||||
if hyp:
|
||||
lines.append(f" - {hid}: conf={hyp.confidence:.2f}, L={hyp.log_odds:+.2f}")
|
||||
return "\n".join(lines)
|
||||
|
||||
async def _get_phenomena_by_source(self) -> str:
|
||||
by_src: dict[str, list] = {}
|
||||
for ph in self.graph.phenomena.values():
|
||||
by_src.setdefault(ph.source_id or "(unbound)", []).append(ph)
|
||||
if not by_src:
|
||||
return "(no phenomena recorded)"
|
||||
# Resolve source labels via graph.case when possible.
|
||||
def _label(src_id: str) -> str:
|
||||
if self.graph.case:
|
||||
src = self.graph.case.get_source(src_id)
|
||||
if src:
|
||||
return f"{src_id} — {src.label} ({src.type})"
|
||||
return src_id
|
||||
|
||||
lines = [f"=== Phenomena by Source ({len(by_src)} source(s)) ==="]
|
||||
for src_id in sorted(by_src):
|
||||
phs = by_src[src_id]
|
||||
lines.append(f"\n--- {_label(src_id)} ({len(phs)} phenomena) ---")
|
||||
for ph in phs:
|
||||
grounded = "G" if ph.verified_facts and ph.source_tool else "·"
|
||||
lines.append(f" [{grounded}] {ph.summary()}")
|
||||
return "\n".join(lines)
|
||||
|
||||
async def _get_entities(self) -> str:
|
||||
@@ -174,18 +314,27 @@ class ReportAgent(BaseAgent):
|
||||
return "\n".join(lines)
|
||||
|
||||
async def _verify_phenomena(self) -> str:
|
||||
verified = []
|
||||
unverified = []
|
||||
grounded: list[str] = []
|
||||
tool_only: list[str] = []
|
||||
unverified: list[str] = []
|
||||
for ph in self.graph.phenomena.values():
|
||||
entry = f" [{ph.category}] {ph.title} (agent: {ph.source_agent}, tool: {ph.source_tool or 'N/A'})"
|
||||
if ph.source_tool:
|
||||
verified.append(entry)
|
||||
nf = len(ph.verified_facts)
|
||||
entry = (
|
||||
f" [{ph.category}] {ph.title} "
|
||||
f"(agent: {ph.source_agent}, tool: {ph.source_tool or 'N/A'}, facts: {nf})"
|
||||
)
|
||||
if ph.verified_facts and ph.source_tool:
|
||||
grounded.append(entry)
|
||||
elif ph.source_tool:
|
||||
tool_only.append(entry)
|
||||
else:
|
||||
unverified.append(entry)
|
||||
|
||||
lines = ["=== Phenomena Verification Report ==="]
|
||||
lines.append(f"\nVERIFIED ({len(verified)} — have source_tool):")
|
||||
lines.extend(verified)
|
||||
lines.append(f"\nGROUNDED ({len(grounded)} — facts + source_tool):")
|
||||
lines.extend(grounded)
|
||||
lines.append(f"\nTOOL-ONLY ({len(tool_only)} — source_tool, no facts):")
|
||||
lines.extend(tool_only)
|
||||
lines.append(f"\nUNVERIFIED ({len(unverified)} — no source_tool):")
|
||||
lines.extend(unverified)
|
||||
return "\n".join(lines)
|
||||
|
||||
@@ -122,7 +122,15 @@ class TimelineAgent(BaseAgent):
|
||||
lines = []
|
||||
for ph in items:
|
||||
lines.append(f"{ph.timestamp} | [{ph.category}] {ph.title} ({ph.id})")
|
||||
lines.append(f" {ph.description[:150]}")
|
||||
preview = ph.interpretation[:150] if ph.interpretation else ""
|
||||
if ph.verified_facts:
|
||||
fact_preview = ", ".join(
|
||||
f"{f.get('type','?')}={str(f.get('value',''))[:40]}"
|
||||
for f in ph.verified_facts[:3]
|
||||
)
|
||||
preview = f"{preview} [facts: {fact_preview}]" if preview else f"[facts: {fact_preview}]"
|
||||
if preview:
|
||||
lines.append(f" {preview}")
|
||||
return "\n".join(lines)
|
||||
|
||||
async def _add_temporal_edge(
|
||||
|
||||
Reference in New Issue
Block a user