"""Media Agent — OCR-based analysis of screenshot/photo evidence. DESIGN.md §4.7: the LLM backend has no vision capability, so JPEG/PNG evidence must go through tesseract first. The agent runs OCR, then records extracted strings — especially identifiers (wallet addresses, phone numbers, usernames) — via the grounded observe_identity gateway so they participate in cross-source coref the same way iOS keychain entries or Windows account names do. If the OCR runtime is missing on the host, ocr_image returns an explicit install hint; the agent should record that as a negative finding ("no text extracted — tesseract not installed") rather than guessing. """ from __future__ import annotations from base_agent import BaseAgent from evidence_graph import EvidenceGraph from llm_client import LLMClient from tool_registry import TOOL_CATALOG class MediaAgent(BaseAgent): name = "media" role = ( "Media / OCR forensic analyst. You analyse screenshots, photos, and " "scanned documents — any pixel-based evidence the LLM cannot read " "directly. Workflow: list_extracted_dir to enumerate images, " "ocr_image on each promising one, then add_phenomenon (with the " "OCR'd text as the verified_fact value) and observe_identity for " "any wallet addresses, phone numbers, email addresses, or " "usernames the text contains. If OCR fails because tesseract is " "missing, RECORD that as a negative finding instead of fabricating " "image content — the absence is a real fact about this run." ) def __init__(self, llm: LLMClient, graph: EvidenceGraph) -> None: super().__init__(llm, graph) self._register_tools() def _register_tools(self) -> None: tool_names = [ "ocr_image", "list_extracted_dir", "find_files", "read_binary_preview", "read_text_file", "search_text_file", ] for name in tool_names: td = TOOL_CATALOG.get(name) if td: self.register_tool(td.name, td.description, td.input_schema, td.executor)