feat(refit): complete S1-S6 — case abstraction, grounding, log-odds, plugins, coref, multi-source

Consolidates the long-running refit work (DESIGN.md as authoritative spec)
into a single baseline commit. Six stages landed together:

  S1  Case + EvidenceSource abstraction; tools parameterised by source_id
      (case.py, main.py multi-source bootstrap, .bin extension support)
  S2  Grounding gateway in add_phenomenon: verified_facts cite real
      ToolInvocation ids; substring / normalised match enforced; agent +
      task scope checked. Phenomenon.description split into verified_facts
      (grounded) + interpretation (free text). [invocation: inv-xxx]
      prefix on every wrapped tool result so the LLM can cite.
  S3  Confidence as additive log-odds: edge_type → log10(LR) calibration
      table; commutative updates; supported / refuted thresholds derived
      from log_odds; hypothesis × evidence matrix view.
  S4  iOS plugin: unzip_archive + parse_plist / sqlite_tables /
      sqlite_query / parse_ios_keychain / read_idevice_info;
      IOSArtifactAgent; SOURCE_TYPE_AGENTS routing.
  S5  Cross-source entity resolution: typed identifiers on Entity,
      observe_identity gateway, auto coref hypothesis with shared /
      conflicting strong/weak LR edges, reversible same_as edges,
      actor_clusters() view.
  S6  Android partition probe + AndroidArtifactAgent; MediaAgent with
      OCR fallback; orchestrator Phase 1 iterates every analysable
      source; platform-aware get_triage_agent_type; ReportAgent renders
      actor clusters + per-source breakdown.

142 unit tests / 1 skipped — full coverage of the new gateway, log-odds
math, coref hypothesis fall-out, and orchestrator multi-source dispatch.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
BattleTag
2026-05-21 02:12:10 -10:00
parent 444d58726a
commit 81ade8f7ac
24 changed files with 5137 additions and 244 deletions

162
main.py
View File

@@ -15,17 +15,21 @@ from pathlib import Path
import yaml
from agent_factory import AgentFactory
from case import (
DISK_IMAGE_EXTS, Case, EvidenceSource, load_case, single_source_case,
)
from evidence_graph import EvidenceGraph
from llm_client import LLMClient
from log_config import setup_logging
from orchestrator import AnalysisAborted, Orchestrator
from tool_registry import register_all_tools
from tools.archive import unzip_archive_sync
RUNS_DIR = Path("runs")
IMAGE_DIR = Path("image")
# Common forensic image extensions (only first segment / single-file formats)
_IMAGE_GLOBS = ["*.001", "*.dd", "*.raw", "*.img", "*.E01", "*.iso"]
# Persistent unpack cache for tree-mode sources (zip extractions). Lives
# at project root so multiple runs can reuse the same unpacked tree.
SOURCE_CACHE_DIR = Path(".cache/sources")
def load_config(path: str = "config.yaml") -> dict:
@@ -38,11 +42,13 @@ def load_config(path: str = "config.yaml") -> dict:
# ---------------------------------------------------------------------------
def _discover_images(search_dir: Path = IMAGE_DIR) -> list[Path]:
"""Find forensic disk image files under *search_dir*."""
images: set[Path] = set()
for glob in _IMAGE_GLOBS:
images.update(search_dir.glob(glob))
return sorted(images)
"""Find forensic disk image files under *search_dir* (case-insensitive ext)."""
if not search_dir.is_dir():
return []
return sorted(
p for p in search_dir.iterdir()
if p.is_file() and p.suffix.lower() in DISK_IMAGE_EXTS
)
def _parse_mmls(output: str) -> list[dict]:
@@ -110,7 +116,7 @@ def select_image_interactive(image_dir: Path | None = None) -> tuple[str, int]:
images = _discover_images(image_dir)
if not images:
print(f"No disk images found in {image_dir}/")
print("Supported formats: " + ", ".join(_IMAGE_GLOBS))
print("Supported extensions: " + ", ".join(sorted(DISK_IMAGE_EXTS)))
sys.exit(1)
if len(images) == 1:
@@ -153,6 +159,118 @@ def select_image_interactive(image_dir: Path | None = None) -> tuple[str, int]:
print("Invalid choice.")
def resolve_case() -> Case:
"""Resolve the Case to analyze.
Priority: an explicit case file given as a CLI argument, then ./case.yaml
in the working directory, then legacy interactive single-image selection.
"""
# 1. Explicit case file passed on the command line
if len(sys.argv) > 1 and sys.argv[1].lower().endswith((".yaml", ".yml")):
case = load_case(sys.argv[1])
if case is None:
print(f"Error: could not load case file {sys.argv[1]}")
sys.exit(1)
print(f"Loaded case: {case.name} ({len(case.sources)} sources)")
return case
# 2. ./case.yaml in the working directory
case = load_case()
if case is not None:
print(f"Loaded case: {case.name} ({len(case.sources)} sources)")
return case
# 3. Legacy interactive single-image selection
cli_dir = Path(sys.argv[1]) if len(sys.argv) > 1 else None
image_path, partition_offset = select_image_interactive(cli_dir)
return single_source_case(image_path, partition_offset)
def _is_analysable(src: EvidenceSource) -> bool:
"""A source is analysable when it has a path AND its mode has tooling.
S4 lights up tree-mode iOS extractions; image-mode disks were already
supported. Media-collection (screenshots) remain skipped until S6.
"""
if not src.path:
return False
if src.access_mode == "image":
return True
if src.access_mode == "tree" and src.type in ("mobile_extraction", "archive"):
return True
return False
def list_analysable_sources(case: Case) -> list[EvidenceSource]:
"""Return every analysable source in the case (orchestrator iterates them).
Pre-S6 main.py used to force-choose one source here; the multi-source
orchestrator (Phase 1 per-source triage) now consumes the full list.
Skipped sources are still reported for visibility.
"""
analysable = [s for s in case.sources if _is_analysable(s)]
skipped = [s for s in case.sources if not _is_analysable(s)]
if skipped:
print(
f"Note: {len(skipped)} source(s) not analysable in this build: "
+ ", ".join(f"{s.label} ({s.type})" for s in skipped)
)
if not analysable:
print("No analysable sources in this case.")
sys.exit(1)
print(f"Analysing {len(analysable)} source(s) — orchestrator will triage each in Phase 1:")
for s in analysable:
print(f" - {s.summary()}")
return analysable
def prepare_source(src: EvidenceSource) -> EvidenceSource:
"""Materialise a tree-mode source for analysis.
Mobile / archive sources arrive as .zip files. We unpack once into a
project-level cache (``.cache/sources/<src.id>/``) and rewrite
``src.path`` to point at the unpacked directory. Idempotent — a
second run with the cache present is a no-op (unzip_archive_sync
skips files that already exist with the matching size).
Disk-image and already-tree sources pass through unchanged.
"""
if src.access_mode != "tree":
return src
p = Path(src.path)
if p.is_dir():
return src # already a directory, nothing to do
if not p.is_file():
print(f"Warning: source path {src.path} does not exist; leaving as-is.")
return src
if p.suffix.lower() != ".zip":
# Other archive types (tar, 7z, ...) — not handled yet.
print(f"Warning: tree-mode source {src.id} is not a .zip "
f"({p.suffix}); leaving as-is.")
return src
dest = SOURCE_CACHE_DIR / src.id
dest.mkdir(parents=True, exist_ok=True)
# Password-protected zips (e.g. CTF artefacts) carry their key in
# case.yaml's meta.password — never logged, never persisted.
password = (src.meta or {}).get("password")
pw_note = " (password from meta)" if password else ""
print(f"Unpacking {p.name}{dest}{pw_note} (idempotent) ...")
result = unzip_archive_sync(str(p), str(dest), password=password)
first_line = result.split("\n", 1)[0]
print(" " + first_line)
if first_line.startswith("Error:"):
# Surface the multi-line guidance from _do_extract verbatim.
for extra in result.split("\n")[1:]:
print(" " + extra)
print(f" Source {src.id} stays unanalysable until this is resolved.")
# Leave src.path unchanged so the source remains marked unanalysable.
return src
src.path = str(dest)
src.access_mode = "tree"
return src
def find_resumable_run() -> Path | None:
"""Find the most recent incomplete run with a saved graph state."""
if not RUNS_DIR.exists():
@@ -225,22 +343,30 @@ async def async_main() -> None:
# Initialize evidence graph
if graph is None:
# CLI arg takes priority, otherwise interactive prompt
cli_dir = Path(sys.argv[1]) if len(sys.argv) > 1 else None
image_path, partition_offset = select_image_interactive(cli_dir)
case = resolve_case()
# case_info derived from THIS case's meta (case.yaml), not from
# config.yaml's legacy `cfreds_hacking_case` block. Without this,
# the old CFReDS evidence MD5s would be embedded in reports for
# every subsequent unrelated case.
graph = EvidenceGraph(
case_info=config.get("cfreds_hacking_case", {}),
case_info=dict(case.meta or {}),
persist_path=run_dir / "graph_state.json",
edge_weights=config.get("hypothesis_edge_weights"),
edge_log_lr=config.get("hypothesis_log_lr"),
)
graph.image_path = image_path
graph.partition_offset = partition_offset
graph.case = case
graph.extracted_dir = str(run_dir / "extracted")
analysable = list_analysable_sources(case)
# Prepare every analysable source up front (unzip tree-mode zips,
# etc.). Idempotent on cache hits — second run is a no-op.
prepared = [prepare_source(s) for s in analysable]
# Seed the active source so tools that resolve lazily have a target
# before Phase 1 begins; the orchestrator resets it per source.
graph.set_active_source(prepared[0])
else:
graph._persist_path = run_dir / "graph_state.json"
# Register all tools with bound image path
register_all_tools(graph.image_path, graph.partition_offset, graph, graph.extracted_dir)
# Register all tools — they resolve the active evidence source at call time
register_all_tools(graph)
# Create agent factory
factory = AgentFactory(llm, graph)