"""Case and evidence-source model — the foundation for multi-evidence analysis. A :class:`Case` is a collection of :class:`EvidenceSource` entries. Each source has a *type* (disk image, mobile extraction, archive, ...) and an *access mode* that determines how forensic tools reach its contents: - ``"image"`` — a block device / disk image, navigated by The Sleuth Kit via inode addressing (raw, E01, dd, ...). - ``"tree"`` — an already-mounted filesystem or unpacked extraction, navigated by ordinary filesystem paths. This module is pure data model + loading. Partition probing and interactive selection live in ``main.py``. """ from __future__ import annotations import logging import re from dataclasses import asdict, dataclass, field from pathlib import Path logger = logging.getLogger(__name__) # Recognised source types and access modes. SOURCE_TYPES = {"disk_image", "mobile_extraction", "archive", "media_collection"} ACCESS_MODES = {"image", "tree"} # Disk-image file extensions for interactive discovery. # P6 fix: ``.bin`` (and vmdk/vhd) added — extension globbing previously missed # raw block-device dumps such as ``blk0_sda.bin``. DISK_IMAGE_EXTS = { ".001", ".dd", ".raw", ".img", ".bin", ".e01", ".iso", ".vmdk", ".vhd", } # Default access mode per source type. _DEFAULT_ACCESS_MODE = { "disk_image": "image", "mobile_extraction": "tree", "archive": "tree", "media_collection": "tree", } def slugify(text: str) -> str: """Reduce *text* to a lowercase, hyphen-separated slug for use in IDs.""" slug = re.sub(r"[^a-z0-9]+", "-", text.lower()).strip("-") return slug or "src" @dataclass class EvidenceSource: """One piece of evidence within a :class:`Case`.""" id: str # "src-" label: str # human-readable name type: str # one of SOURCE_TYPES path: str # filesystem path to the evidence access_mode: str # "image" | "tree" owner: str = "" # associated person, if known partition_offset: int = 0 # sector offset (image-mode sources only) meta: dict = field(default_factory=dict) def to_dict(self) -> dict: return asdict(self) @classmethod def from_dict(cls, d: dict) -> EvidenceSource: """Reconstruct from a dict, ignoring unknown keys (forward-compatible).""" known = set(cls.__dataclass_fields__) return cls(**{k: v for k, v in d.items() if k in known}) def summary(self) -> str: loc = ( f"@{self.partition_offset}" if self.access_mode == "image" and self.partition_offset else "" ) owner = f" owner={self.owner}" if self.owner else "" return f"[{self.id}] {self.label} ({self.type}/{self.access_mode}{loc}){owner}" @dataclass class Case: """A forensic case: a set of evidence sources plus metadata.""" case_id: str name: str sources: list[EvidenceSource] = field(default_factory=list) meta: dict = field(default_factory=dict) def to_dict(self) -> dict: return { "case_id": self.case_id, "name": self.name, "sources": [s.to_dict() for s in self.sources], "meta": dict(self.meta), } @classmethod def from_dict(cls, d: dict) -> Case: return cls( case_id=d.get("case_id", ""), name=d.get("name", ""), sources=[EvidenceSource.from_dict(s) for s in d.get("sources", [])], meta=d.get("meta", {}), ) def get_source(self, source_id: str) -> EvidenceSource | None: for s in self.sources: if s.id == source_id: return s return None # --------------------------------------------------------------------------- # case.yaml loading # --------------------------------------------------------------------------- def _build_source(raw: dict, base_dir: Path, index: int) -> EvidenceSource: """Validate and normalise one source entry from case.yaml. Missing ``id`` is derived from the label; missing ``access_mode`` defaults by type; relative paths are resolved against *base_dir* (the case file's directory). """ label = str(raw.get("label") or raw.get("id") or f"source-{index}") src_type = str(raw.get("type", "disk_image")) if src_type not in SOURCE_TYPES: logger.warning("Unknown source type %r for %r — treating as disk_image", src_type, label) src_type = "disk_image" access_mode = str(raw.get("access_mode") or _DEFAULT_ACCESS_MODE.get(src_type, "tree")) if access_mode not in ACCESS_MODES: logger.warning("Unknown access_mode %r for %r — defaulting", access_mode, label) access_mode = _DEFAULT_ACCESS_MODE.get(src_type, "tree") src_id = str(raw.get("id") or f"src-{slugify(label)}") if not src_id.startswith("src-"): src_id = f"src-{slugify(src_id)}" raw_path = str(raw.get("path", "")).strip() path = raw_path if raw_path: p = Path(raw_path).expanduser() if not p.is_absolute(): p = (base_dir / p) path = str(p) return EvidenceSource( id=src_id, label=label, type=src_type, path=path, access_mode=access_mode, owner=str(raw.get("owner", "")), partition_offset=int(raw.get("partition_offset", 0) or 0), meta=dict(raw.get("meta", {})), ) def build_case(data: dict, base_dir: Path | None = None) -> Case: """Build a validated :class:`Case` from a loosely-typed case.yaml dict.""" base_dir = base_dir or Path.cwd() sources: list[EvidenceSource] = [] seen_ids: set[str] = set() for i, raw in enumerate(data.get("sources", []) or []): if not isinstance(raw, dict): logger.warning("Skipping malformed source entry #%d", i) continue src = _build_source(raw, base_dir, i) if src.id in seen_ids: src.id = f"{src.id}-{i}" seen_ids.add(src.id) if not src.path: logger.warning("Source %r has no path — keeping but it is not analysable", src.label) sources.append(src) return Case( case_id=str(data.get("case_id", "case")), name=str(data.get("name", "Untitled case")), sources=sources, meta=dict(data.get("meta", {})), ) def load_case(path: str | Path = "case.yaml") -> Case | None: """Load a :class:`Case` from a case.yaml file. Returns None if absent.""" case_path = Path(path) if not case_path.exists(): return None import yaml try: data = yaml.safe_load(case_path.read_text()) or {} except Exception as e: logger.error("Failed to parse %s: %s", case_path, e) return None if not isinstance(data, dict): logger.error("%s is not a YAML mapping", case_path) return None case = build_case(data, base_dir=case_path.resolve().parent) logger.info("Loaded case %r with %d source(s) from %s", case.name, len(case.sources), case_path) return case def single_source_case( image_path: str, partition_offset: int = 0, label: str | None = None, ) -> Case: """Wrap a single disk image as a one-source Case (interactive fallback).""" name = label or Path(image_path).name src = EvidenceSource( id=f"src-{slugify(Path(image_path).stem)}", label=name, type="disk_image", path=image_path, access_mode="image", partition_offset=partition_offset, ) return Case(case_id="adhoc", name=name, sources=[src])