Initial commit

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-09 17:36:26 +08:00
commit 097d2ce472
25 changed files with 5944 additions and 0 deletions
--- a/agents/init.py
+++ b/agents/init.py
--- a/agents/communication.py
+++ b/agents/communication.py
@@ -0,0 +1,33 @@
+"""Communication Agent — analyzes email, chat logs, and messaging artifacts."""
+
+from __future__ import annotations
+
+from base_agent import BaseAgent
+from evidence_graph import EvidenceGraph
+from llm_client import LLMClient
+from tool_registry import TOOL_CATALOG
+
+
+class CommunicationAgent(BaseAgent):
+    name = "communication"
+    role = (
+        "Communication forensic analyst. You analyze email files (.dbx, .pst), "
+        "IRC/mIRC chat logs, newsgroup data, and other messaging artifacts "
+        "to identify communication patterns, contacts, and content."
+    )
+
+    def __init__(self, llm: LLMClient, graph: EvidenceGraph) -> None:
+        super().__init__(llm, graph)
+        self._register_tools()
+
+    def _register_tools(self) -> None:
+        tool_names = [
+            "list_directory", "extract_file",
+            "read_text_file", "read_binary_preview",
+            "list_extracted_dir", "search_strings",
+            "search_text_file", "read_text_file_section",
+        ]
+        for name in tool_names:
+            td = TOOL_CATALOG.get(name)
+            if td:
+                self.register_tool(td.name, td.description, td.input_schema, td.executor)
--- a/agents/filesystem.py
+++ b/agents/filesystem.py
@@ -0,0 +1,34 @@
+"""FileSystem Agent — analyzes disk structure, files, and deleted data."""
+
+from __future__ import annotations
+
+from base_agent import BaseAgent
+from evidence_graph import EvidenceGraph
+from llm_client import LLMClient
+from tool_registry import TOOL_CATALOG
+
+
+class FileSystemAgent(BaseAgent):
+    name = "filesystem"
+    role = (
+        "File system forensic analyst. You examine disk image partition layouts, "
+        "directory structures, file metadata, and recover deleted files. "
+        "You identify suspicious files, installed programs, and user data locations. "
+        "You also handle malware analysis, Recycle Bin forensics, and Prefetch execution evidence."
+    )
+
+    def __init__(self, llm: LLMClient, graph: EvidenceGraph) -> None:
+        super().__init__(llm, graph)
+        self._register_tools()
+
+    def _register_tools(self) -> None:
+        tool_names = [
+            "partition_info", "filesystem_info", "list_directory",
+            "extract_file", "find_file", "search_strings",
+            "parse_prefetch", "count_deleted_files",
+            "read_text_file", "search_text_file", "read_binary_preview",
+        ]
+        for name in tool_names:
+            td = TOOL_CATALOG.get(name)
+            if td:
+                self.register_tool(td.name, td.description, td.input_schema, td.executor)
--- a/agents/hypothesis.py
+++ b/agents/hypothesis.py
@@ -0,0 +1,130 @@
+"""Hypothesis Agent — analyzes phenomena and generates investigative hypotheses."""
+
+from __future__ import annotations
+
+import json
+import logging
+
+from base_agent import BaseAgent
+from evidence_graph import EvidenceGraph, HYPOTHESIS_EDGE_WEIGHTS
+from llm_client import LLMClient
+
+logger = logging.getLogger(__name__)
+
+
+class HypothesisAgent(BaseAgent):
+    name = "hypothesis"
+    role = (
+        "Hypothesis analyst. You review all phenomena discovered so far "
+        "and formulate investigative hypotheses about what happened on this system. "
+        "Your ultimate goal: build the most complete picture of events that occurred. "
+        "For each hypothesis, identify which existing phenomena support or contradict it."
+    )
+
+    def __init__(self, llm: LLMClient, graph: EvidenceGraph) -> None:
+        super().__init__(llm, graph)
+        self._register_hypothesis_tools()
+
+    def _register_hypothesis_tools(self) -> None:
+        """Register hypothesis-specific tools."""
+
+        valid_edge_types = list(HYPOTHESIS_EDGE_WEIGHTS.keys())
+
+        self.register_tool(
+            name="add_hypothesis",
+            description=(
+                "Create a new investigative hypothesis about what happened on the system. "
+                "Each hypothesis should be a specific, testable claim."
+            ),
+            input_schema={
+                "type": "object",
+                "properties": {
+                    "title": {
+                        "type": "string",
+                        "description": "Short title for the hypothesis.",
+                    },
+                    "description": {
+                        "type": "string",
+                        "description": "Detailed description of what this hypothesis claims.",
+                    },
+                },
+                "required": ["title", "description"],
+            },
+            executor=self._add_hypothesis,
+        )
+
+        self.register_tool(
+            name="link_phenomenon_to_hypothesis",
+            description=(
+                "Link an existing phenomenon to a hypothesis with a relationship type. "
+                f"Valid relationship types: {', '.join(valid_edge_types)}. "
+                "direct_evidence = the phenomenon IS the hypothesis. "
+                "supports = consistent with the hypothesis. "
+                "prerequisite_met = a necessary condition is satisfied. "
+                "consequence_observed = an expected result of the hypothesis is found. "
+                "contradicts = directly contradicts the hypothesis. "
+                "weakens = makes the hypothesis less likely."
+            ),
+            input_schema={
+                "type": "object",
+                "properties": {
+                    "phenomenon_id": {
+                        "type": "string",
+                        "description": "ID of the phenomenon (e.g. 'ph-a1b2c3d4').",
+                    },
+                    "hypothesis_id": {
+                        "type": "string",
+                        "description": "ID of the hypothesis (e.g. 'hyp-e5f6g7h8').",
+                    },
+                    "edge_type": {
+                        "type": "string",
+                        "enum": valid_edge_types,
+                        "description": "The edge_type of the relationship.",
+                    },
+                    "reason": {
+                        "type": "string",
+                        "description": "The reason this relationship holds (1-2 sentences).",
+                    },
+                },
+                "required": ["phenomenon_id", "hypothesis_id", "edge_type", "reason"],
+            },
+            executor=self._link_phenomenon_to_hypothesis,
+        )
+
+    async def _add_hypothesis(self, title: str, description: str) -> str:
+        hid = await self.graph.add_hypothesis(
+            title=title,
+            description=description,
+            created_by=self.name,
+        )
+        return f"Hypothesis created: {hid} — {title} (confidence: 0.50)"
+
+    async def _link_phenomenon_to_hypothesis(
+        self,
+        phenomenon_id: str,
+        hypothesis_id: str,
+        edge_type: str = "",
+        reason: str = "",
+        # Common LLM misnaming — accept as fallbacks
+        relationship: str = "",
+        note: str = "",
+    ) -> str:
+        edge_type = edge_type or relationship
+        reason = reason or note
+        if not edge_type:
+            return "Error: edge_type is required."
+        try:
+            new_conf = await self.graph.update_hypothesis_confidence(
+                hyp_id=hypothesis_id,
+                phenomenon_id=phenomenon_id,
+                edge_type=edge_type,
+                reason=reason,
+            )
+            weight = HYPOTHESIS_EDGE_WEIGHTS[edge_type]
+            direction = "+" if weight > 0 else ""
+            return (
+                f"Linked: {phenomenon_id} —[{edge_type}]→ {hypothesis_id} "
+                f"(weight: {direction}{weight}, new confidence: {new_conf:.3f})"
+            )
+        except ValueError as e:
+            return f"Error linking: {e}"
--- a/agents/network.py
+++ b/agents/network.py
@@ -0,0 +1,34 @@
+"""Network Agent — analyzes browser history, network tool artifacts, and wireless evidence."""
+
+from __future__ import annotations
+
+from base_agent import BaseAgent
+from evidence_graph import EvidenceGraph
+from llm_client import LLMClient
+from tool_registry import TOOL_CATALOG
+
+
+class NetworkAgent(BaseAgent):
+    name = "network"
+    role = (
+        "Network forensic analyst. You analyze browser history, cookies, "
+        "network captures (PCAP), wireless artifacts, and other network-related "
+        "evidence to reconstruct online activities and network attacks."
+    )
+
+    def __init__(self, llm: LLMClient, graph: EvidenceGraph) -> None:
+        super().__init__(llm, graph)
+        self._register_tools()
+
+    def _register_tools(self) -> None:
+        tool_names = [
+            "list_directory", "extract_file",
+            "read_text_file", "read_binary_preview",
+            "list_extracted_dir", "search_strings",
+            "search_text_file", "read_text_file_section",
+            "parse_pcap_strings",
+        ]
+        for name in tool_names:
+            td = TOOL_CATALOG.get(name)
+            if td:
+                self.register_tool(td.name, td.description, td.input_schema, td.executor)
--- a/agents/registry.py
+++ b/agents/registry.py
@@ -0,0 +1,36 @@
+"""Registry Agent — analyzes Windows registry hives."""
+
+from __future__ import annotations
+
+from base_agent import BaseAgent
+from evidence_graph import EvidenceGraph
+from llm_client import LLMClient
+from tool_registry import TOOL_CATALOG
+
+
+class RegistryAgent(BaseAgent):
+    name = "registry"
+    role = (
+        "Windows registry forensic analyst. You parse registry hive files "
+        "(SYSTEM, SOFTWARE, SAM, NTUSER.DAT) to extract system configuration, "
+        "user accounts, installed software, network settings, email accounts, "
+        "and other Windows artifacts."
+    )
+
+    def __init__(self, llm: LLMClient, graph: EvidenceGraph) -> None:
+        super().__init__(llm, graph)
+        self._register_tools()
+
+    def _register_tools(self) -> None:
+        tool_names = [
+            "extract_file", "list_directory",
+            "parse_registry_key", "list_installed_software",
+            "get_user_activity", "search_registry",
+            "get_system_info", "get_timezone_info", "get_computer_name",
+            "get_shutdown_time", "enumerate_users",
+            "get_network_interfaces", "get_email_config",
+        ]
+        for name in tool_names:
+            td = TOOL_CATALOG.get(name)
+            if td:
+                self.register_tool(td.name, td.description, td.input_schema, td.executor)
--- a/agents/report.py
+++ b/agents/report.py
@@ -0,0 +1,191 @@
+"""Report Agent — generates forensic analysis reports."""
+
+from __future__ import annotations
+
+import json
+import os
+
+from base_agent import BaseAgent
+from evidence_graph import EvidenceGraph
+from llm_client import LLMClient
+
+
+class ReportAgent(BaseAgent):
+    name = "report"
+    role = (
+        "Forensic report writer. You synthesize all findings from the investigation "
+        "into a structured, professional forensic analysis report organized by hypotheses.\n\n"
+        "IMPORTANT: Only include findings that have a source_tool attribution (marked VERIFIED). "
+        "If evidence lacks source attribution, mark it as UNVERIFIED. "
+        "Do NOT invent or fabricate any data, timestamps, or findings not present in the evidence.\n\n"
+        "CRITICAL: You MUST call save_report to write the final report."
+    )
+
+    def __init__(self, llm: LLMClient, graph: EvidenceGraph) -> None:
+        super().__init__(llm, graph)
+        self._register_tools()
+
+    def _build_system_prompt(self, task: str) -> str:
+        """Report agent gets a clean prompt — no Phase A/B/C/D workflow."""
+        return (
+            f"You are a forensic report writer.\n"
+            f"Role: {self.role}\n\n"
+            f"Investigation state:\n{self.graph.stats_summary()}\n\n"
+            f"Your task: {task}\n\n"
+            f"WORKFLOW:\n"
+            f"1. Call get_hypotheses_with_evidence to get all hypotheses and their linked evidence\n"
+            f"2. Call get_all_phenomena to get detailed findings by category\n"
+            f"3. Call get_entities to get people, programs, and hosts\n"
+            f"4. Call get_case_info for case metadata\n"
+            f"5. Write the complete report directly in your <answer> block\n\n"
+            f"RULES:\n"
+            f"- Write the report DIRECTLY in <answer> — do NOT use save_report tool\n"
+            f"- Only include findings present in the evidence graph\n"
+            f"- Do NOT invent timestamps, file paths, or data not in the phenomena\n"
+            f"- The report must be complete — do not cut off mid-section\n"
+        )
+
+    def _register_tools(self) -> None:
+        self.register_tool(
+            name="get_all_phenomena",
+            description="Get all phenomena across all categories with full details.",
+            input_schema={"type": "object", "properties": {}},
+            executor=self._get_all_phenomena,
+        )
+
+        self.register_tool(
+            name="get_hypotheses_with_evidence",
+            description="Get all hypotheses with their linked phenomena (supporting and contradicting).",
+            input_schema={"type": "object", "properties": {}},
+            executor=self._get_hypotheses_with_evidence,
+        )
+
+        self.register_tool(
+            name="get_case_info",
+            description="Get case metadata (image info, drive details, etc.).",
+            input_schema={"type": "object", "properties": {}},
+            executor=self._get_case_info,
+        )
+
+        self.register_tool(
+            name="get_entities",
+            description="Get all entities (people, programs, hosts) and their connections.",
+            input_schema={"type": "object", "properties": {}},
+            executor=self._get_entities,
+        )
+
+        self.register_tool(
+            name="save_report",
+            description="Save the final report to a file.",
+            input_schema={
+                "type": "object",
+                "properties": {
+                    "content": {"type": "string", "description": "Report content in Markdown."},
+                    "output_path": {"type": "string", "description": "File path to save the report."},
+                },
+                "required": ["content", "output_path"],
+            },
+            executor=self._save_report,
+        )
+
+        self.register_tool(
+            name="verify_phenomena",
+            description="Check phenomena provenance — VERIFIED (has source_tool) vs UNVERIFIED.",
+            input_schema={"type": "object", "properties": {}},
+            executor=self._verify_phenomena,
+        )
+
+    async def _get_all_phenomena(self) -> str:
+        phenomena = self.graph.phenomena
+        if not phenomena:
+            return "No phenomena in the evidence graph."
+
+        categories = sorted(set(ph.category for ph in phenomena.values()))
+        lines = [f"=== All Phenomena ({len(phenomena)} entries) ==="]
+        for cat in categories:
+            items = [ph for ph in phenomena.values() if ph.category == cat]
+            lines.append(f"\n--- {cat.upper()} ({len(items)} entries) ---")
+            for ph in items:
+                verified = "VERIFIED" if ph.source_tool else "UNVERIFIED"
+                lines.append(f"\n[{verified}] {ph.title} ({ph.id})")
+                lines.append(f"  Source: {ph.source_agent} | Tool: {ph.source_tool or 'N/A'}")
+                if ph.timestamp:
+                    lines.append(f"  Timestamp: {ph.timestamp}")
+                lines.append(f"  {ph.description[:500]}")
+        return "\n".join(lines)
+
+    async def _get_hypotheses_with_evidence(self) -> str:
+        if not self.graph.hypotheses:
+            return "No hypotheses defined."
+
+        lines = [f"=== Hypotheses ({len(self.graph.hypotheses)}) ==="]
+        for hyp in self.graph.hypotheses.values():
+            lines.append(f"\n### {hyp.title}")
+            lines.append(f"Confidence: {hyp.confidence:.2f} | Status: {hyp.status}")
+            lines.append(f"Description: {hyp.description}")
+
+            related = self.graph.get_related(hyp.id, direction="in")
+            supporting = [r for r in related if r["edge_type"] in ("direct_evidence", "supports", "prerequisite_met", "consequence_observed")]
+            contradicting = [r for r in related if r["edge_type"] in ("contradicts", "weakens")]
+
+            if supporting:
+                lines.append(f"\nSupporting evidence ({len(supporting)}):")
+                for r in supporting:
+                    lines.append(f"  [{r['edge_type']}] {r['node']}")
+            if contradicting:
+                lines.append(f"\nContradicting evidence ({len(contradicting)}):")
+                for r in contradicting:
+                    lines.append(f"  [{r['edge_type']}] {r['node']}")
+            if not supporting and not contradicting:
+                lines.append("  (no evidence linked)")
+        return "\n".join(lines)
+
+    async def _get_case_info(self) -> str:
+        info = self.graph.case_info
+        lines = ["=== Case Information ==="]
+        for k, v in info.items():
+            lines.append(f"  {k}: {v}")
+        lines.append(f"  Image path: {self.graph.image_path}")
+        lines.append(f"  Partition offset: {self.graph.partition_offset}")
+        return "\n".join(lines)
+
+    async def _get_entities(self) -> str:
+        if not self.graph.entities:
+            return "No entities recorded."
+
+        lines = [f"=== Entities ({len(self.graph.entities)}) ==="]
+        for ent in self.graph.entities.values():
+            lines.append(f"\n{ent.name} ({ent.entity_type})")
+            if ent.description:
+                lines.append(f"  {ent.description}")
+            related = self.graph.get_related(ent.id, direction="in")
+            if related:
+                for r in related:
+                    lines.append(f"  ← [{r['edge_type']}] {r['node']}")
+        return "\n".join(lines)
+
+    async def _verify_phenomena(self) -> str:
+        verified = []
+        unverified = []
+        for ph in self.graph.phenomena.values():
+            entry = f"  [{ph.category}] {ph.title} (agent: {ph.source_agent}, tool: {ph.source_tool or 'N/A'})"
+            if ph.source_tool:
+                verified.append(entry)
+            else:
+                unverified.append(entry)
+
+        lines = ["=== Phenomena Verification Report ==="]
+        lines.append(f"\nVERIFIED ({len(verified)} — have source_tool):")
+        lines.extend(verified)
+        lines.append(f"\nUNVERIFIED ({len(unverified)} — no source_tool):")
+        lines.extend(unverified)
+        return "\n".join(lines)
+
+    async def _save_report(self, content: str, output_path: str) -> str:
+        try:
+            os.makedirs(os.path.dirname(output_path) or ".", exist_ok=True)
+            with open(output_path, "w") as f:
+                f.write(content)
+            return f"Report saved to {output_path} ({len(content)} chars)"
+        except Exception as e:
+            return f"Error saving report: {e}"
--- a/agents/timeline.py
+++ b/agents/timeline.py
@@ -0,0 +1,88 @@
+"""Timeline Agent — correlates evidence across time."""
+
+from __future__ import annotations
+
+import json
+
+from base_agent import BaseAgent
+from evidence_graph import EvidenceGraph
+from llm_client import LLMClient
+from tool_registry import TOOL_CATALOG
+
+
+class TimelineAgent(BaseAgent):
+    name = "timeline"
+    role = (
+        "Timeline forensic analyst. You build chronological timelines from filesystem "
+        "MAC timestamps and correlate events across all phenomena categories in the "
+        "evidence graph to reconstruct the sequence of activities on the system."
+    )
+
+    def __init__(self, llm: LLMClient, graph: EvidenceGraph) -> None:
+        super().__init__(llm, graph)
+        self._register_tools()
+
+    def _register_tools(self) -> None:
+        # Filesystem timeline tool from catalog
+        td = TOOL_CATALOG.get("build_filesystem_timeline")
+        if td:
+            self.register_tool(td.name, td.description, td.input_schema, td.executor)
+
+        # Custom tool to get all phenomena with timestamps for correlation
+        self.register_tool(
+            name="get_timestamped_phenomena",
+            description="Get all phenomena that have timestamps, sorted chronologically. Use for timeline correlation.",
+            input_schema={"type": "object", "properties": {}},
+            executor=self._get_timestamped_phenomena,
+        )
+
+        # Tool to add temporal edges between phenomena
+        self.register_tool(
+            name="add_temporal_edge",
+            description="Add a temporal relationship between two phenomena (before, after, or concurrent).",
+            input_schema={
+                "type": "object",
+                "properties": {
+                    "source_id": {"type": "string", "description": "ID of the earlier/source phenomenon."},
+                    "target_id": {"type": "string", "description": "ID of the later/target phenomenon."},
+                    "relation": {
+                        "type": "string",
+                        "enum": ["before", "after", "concurrent"],
+                        "description": "Temporal relationship.",
+                    },
+                },
+                "required": ["source_id", "target_id", "relation"],
+            },
+            executor=self._add_temporal_edge,
+        )
+
+    async def _get_timestamped_phenomena(self) -> str:
+        items = [
+            ph for ph in self.graph.phenomena.values()
+            if ph.timestamp
+        ]
+        items.sort(key=lambda ph: ph.timestamp or "")
+
+        if not items:
+            return "No phenomena with timestamps found."
+
+        lines = []
+        for ph in items:
+            lines.append(f"{ph.timestamp} | [{ph.category}] {ph.title} ({ph.id})")
+            lines.append(f"  {ph.description[:150]}")
+        return "\n".join(lines)
+
+    async def _add_temporal_edge(
+        self, source_id: str, target_id: str, relation: str,
+    ) -> str:
+        try:
+            await self.graph.add_edge(
+                source_id=source_id,
+                target_id=target_id,
+                edge_type="temporal",
+                metadata={"relation": relation},
+                created_by=self.name,
+            )
+            return f"Temporal edge added: {source_id} —[{relation}]→ {target_id}"
+        except ValueError as e:
+            return f"Error: {e}"