MASForensic/agents/media.py

"""Media Agent — OCR-based analysis of screenshot/photo evidence.

DESIGN.md §4.7: the LLM backend has no vision capability, so JPEG/PNG
evidence must go through tesseract first. The agent runs OCR, then
records extracted strings — especially identifiers (wallet addresses,
phone numbers, usernames) — via the grounded observe_identity gateway so
they participate in cross-source coref the same way iOS keychain entries
or Windows account names do.

If the OCR runtime is missing on the host, ocr_image returns an explicit
install hint; the agent should record that as a negative finding ("no
text extracted — tesseract not installed") rather than guessing.
"""

from __future__ import annotations

from base_agent import BaseAgent
from evidence_graph import EvidenceGraph
from llm_client import LLMClient
from tool_registry import TOOL_CATALOG


class MediaAgent(BaseAgent):
    name = "media"
    role = (
        "Media / OCR forensic analyst. You analyse screenshots, photos, and "
        "scanned documents — any pixel-based evidence the LLM cannot read "
        "directly. Workflow: list_extracted_dir to enumerate images, "
        "ocr_image on each promising one, then add_phenomenon (with the "
        "OCR'd text as the verified_fact value) and observe_identity for "
        "any wallet addresses, phone numbers, email addresses, or "
        "usernames the text contains. If OCR fails because tesseract is "
        "missing, RECORD that as a negative finding instead of fabricating "
        "image content — the absence is a real fact about this run."
    )

    def __init__(self, llm: LLMClient, graph: EvidenceGraph) -> None:
        super().__init__(llm, graph)
        self._register_tools()

    def _register_tools(self) -> None:
        tool_names = [
            "ocr_image",
            "list_extracted_dir", "find_files",
            "read_binary_preview",
            "read_text_file",
            "search_text_file",
        ]
        for name in tool_names:
            td = TOOL_CATALOG.get(name)
            if td:
                self.register_tool(td.name, td.description, td.input_schema, td.executor)