Initial commit

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
BattleTag
2026-05-09 17:36:26 +08:00
commit 097d2ce472
25 changed files with 5944 additions and 0 deletions

0
agents/__init__.py Normal file
View File

33
agents/communication.py Normal file
View File

@@ -0,0 +1,33 @@
"""Communication Agent — analyzes email, chat logs, and messaging artifacts."""
from __future__ import annotations
from base_agent import BaseAgent
from evidence_graph import EvidenceGraph
from llm_client import LLMClient
from tool_registry import TOOL_CATALOG
class CommunicationAgent(BaseAgent):
name = "communication"
role = (
"Communication forensic analyst. You analyze email files (.dbx, .pst), "
"IRC/mIRC chat logs, newsgroup data, and other messaging artifacts "
"to identify communication patterns, contacts, and content."
)
def __init__(self, llm: LLMClient, graph: EvidenceGraph) -> None:
super().__init__(llm, graph)
self._register_tools()
def _register_tools(self) -> None:
tool_names = [
"list_directory", "extract_file",
"read_text_file", "read_binary_preview",
"list_extracted_dir", "search_strings",
"search_text_file", "read_text_file_section",
]
for name in tool_names:
td = TOOL_CATALOG.get(name)
if td:
self.register_tool(td.name, td.description, td.input_schema, td.executor)

34
agents/filesystem.py Normal file
View File

@@ -0,0 +1,34 @@
"""FileSystem Agent — analyzes disk structure, files, and deleted data."""
from __future__ import annotations
from base_agent import BaseAgent
from evidence_graph import EvidenceGraph
from llm_client import LLMClient
from tool_registry import TOOL_CATALOG
class FileSystemAgent(BaseAgent):
name = "filesystem"
role = (
"File system forensic analyst. You examine disk image partition layouts, "
"directory structures, file metadata, and recover deleted files. "
"You identify suspicious files, installed programs, and user data locations. "
"You also handle malware analysis, Recycle Bin forensics, and Prefetch execution evidence."
)
def __init__(self, llm: LLMClient, graph: EvidenceGraph) -> None:
super().__init__(llm, graph)
self._register_tools()
def _register_tools(self) -> None:
tool_names = [
"partition_info", "filesystem_info", "list_directory",
"extract_file", "find_file", "search_strings",
"parse_prefetch", "count_deleted_files",
"read_text_file", "search_text_file", "read_binary_preview",
]
for name in tool_names:
td = TOOL_CATALOG.get(name)
if td:
self.register_tool(td.name, td.description, td.input_schema, td.executor)

130
agents/hypothesis.py Normal file
View File

@@ -0,0 +1,130 @@
"""Hypothesis Agent — analyzes phenomena and generates investigative hypotheses."""
from __future__ import annotations
import json
import logging
from base_agent import BaseAgent
from evidence_graph import EvidenceGraph, HYPOTHESIS_EDGE_WEIGHTS
from llm_client import LLMClient
logger = logging.getLogger(__name__)
class HypothesisAgent(BaseAgent):
name = "hypothesis"
role = (
"Hypothesis analyst. You review all phenomena discovered so far "
"and formulate investigative hypotheses about what happened on this system. "
"Your ultimate goal: build the most complete picture of events that occurred. "
"For each hypothesis, identify which existing phenomena support or contradict it."
)
def __init__(self, llm: LLMClient, graph: EvidenceGraph) -> None:
super().__init__(llm, graph)
self._register_hypothesis_tools()
def _register_hypothesis_tools(self) -> None:
"""Register hypothesis-specific tools."""
valid_edge_types = list(HYPOTHESIS_EDGE_WEIGHTS.keys())
self.register_tool(
name="add_hypothesis",
description=(
"Create a new investigative hypothesis about what happened on the system. "
"Each hypothesis should be a specific, testable claim."
),
input_schema={
"type": "object",
"properties": {
"title": {
"type": "string",
"description": "Short title for the hypothesis.",
},
"description": {
"type": "string",
"description": "Detailed description of what this hypothesis claims.",
},
},
"required": ["title", "description"],
},
executor=self._add_hypothesis,
)
self.register_tool(
name="link_phenomenon_to_hypothesis",
description=(
"Link an existing phenomenon to a hypothesis with a relationship type. "
f"Valid relationship types: {', '.join(valid_edge_types)}. "
"direct_evidence = the phenomenon IS the hypothesis. "
"supports = consistent with the hypothesis. "
"prerequisite_met = a necessary condition is satisfied. "
"consequence_observed = an expected result of the hypothesis is found. "
"contradicts = directly contradicts the hypothesis. "
"weakens = makes the hypothesis less likely."
),
input_schema={
"type": "object",
"properties": {
"phenomenon_id": {
"type": "string",
"description": "ID of the phenomenon (e.g. 'ph-a1b2c3d4').",
},
"hypothesis_id": {
"type": "string",
"description": "ID of the hypothesis (e.g. 'hyp-e5f6g7h8').",
},
"edge_type": {
"type": "string",
"enum": valid_edge_types,
"description": "The edge_type of the relationship.",
},
"reason": {
"type": "string",
"description": "The reason this relationship holds (1-2 sentences).",
},
},
"required": ["phenomenon_id", "hypothesis_id", "edge_type", "reason"],
},
executor=self._link_phenomenon_to_hypothesis,
)
async def _add_hypothesis(self, title: str, description: str) -> str:
hid = await self.graph.add_hypothesis(
title=title,
description=description,
created_by=self.name,
)
return f"Hypothesis created: {hid}{title} (confidence: 0.50)"
async def _link_phenomenon_to_hypothesis(
self,
phenomenon_id: str,
hypothesis_id: str,
edge_type: str = "",
reason: str = "",
# Common LLM misnaming — accept as fallbacks
relationship: str = "",
note: str = "",
) -> str:
edge_type = edge_type or relationship
reason = reason or note
if not edge_type:
return "Error: edge_type is required."
try:
new_conf = await self.graph.update_hypothesis_confidence(
hyp_id=hypothesis_id,
phenomenon_id=phenomenon_id,
edge_type=edge_type,
reason=reason,
)
weight = HYPOTHESIS_EDGE_WEIGHTS[edge_type]
direction = "+" if weight > 0 else ""
return (
f"Linked: {phenomenon_id} —[{edge_type}]→ {hypothesis_id} "
f"(weight: {direction}{weight}, new confidence: {new_conf:.3f})"
)
except ValueError as e:
return f"Error linking: {e}"

34
agents/network.py Normal file
View File

@@ -0,0 +1,34 @@
"""Network Agent — analyzes browser history, network tool artifacts, and wireless evidence."""
from __future__ import annotations
from base_agent import BaseAgent
from evidence_graph import EvidenceGraph
from llm_client import LLMClient
from tool_registry import TOOL_CATALOG
class NetworkAgent(BaseAgent):
name = "network"
role = (
"Network forensic analyst. You analyze browser history, cookies, "
"network captures (PCAP), wireless artifacts, and other network-related "
"evidence to reconstruct online activities and network attacks."
)
def __init__(self, llm: LLMClient, graph: EvidenceGraph) -> None:
super().__init__(llm, graph)
self._register_tools()
def _register_tools(self) -> None:
tool_names = [
"list_directory", "extract_file",
"read_text_file", "read_binary_preview",
"list_extracted_dir", "search_strings",
"search_text_file", "read_text_file_section",
"parse_pcap_strings",
]
for name in tool_names:
td = TOOL_CATALOG.get(name)
if td:
self.register_tool(td.name, td.description, td.input_schema, td.executor)

36
agents/registry.py Normal file
View File

@@ -0,0 +1,36 @@
"""Registry Agent — analyzes Windows registry hives."""
from __future__ import annotations
from base_agent import BaseAgent
from evidence_graph import EvidenceGraph
from llm_client import LLMClient
from tool_registry import TOOL_CATALOG
class RegistryAgent(BaseAgent):
name = "registry"
role = (
"Windows registry forensic analyst. You parse registry hive files "
"(SYSTEM, SOFTWARE, SAM, NTUSER.DAT) to extract system configuration, "
"user accounts, installed software, network settings, email accounts, "
"and other Windows artifacts."
)
def __init__(self, llm: LLMClient, graph: EvidenceGraph) -> None:
super().__init__(llm, graph)
self._register_tools()
def _register_tools(self) -> None:
tool_names = [
"extract_file", "list_directory",
"parse_registry_key", "list_installed_software",
"get_user_activity", "search_registry",
"get_system_info", "get_timezone_info", "get_computer_name",
"get_shutdown_time", "enumerate_users",
"get_network_interfaces", "get_email_config",
]
for name in tool_names:
td = TOOL_CATALOG.get(name)
if td:
self.register_tool(td.name, td.description, td.input_schema, td.executor)

191
agents/report.py Normal file
View File

@@ -0,0 +1,191 @@
"""Report Agent — generates forensic analysis reports."""
from __future__ import annotations
import json
import os
from base_agent import BaseAgent
from evidence_graph import EvidenceGraph
from llm_client import LLMClient
class ReportAgent(BaseAgent):
name = "report"
role = (
"Forensic report writer. You synthesize all findings from the investigation "
"into a structured, professional forensic analysis report organized by hypotheses.\n\n"
"IMPORTANT: Only include findings that have a source_tool attribution (marked VERIFIED). "
"If evidence lacks source attribution, mark it as UNVERIFIED. "
"Do NOT invent or fabricate any data, timestamps, or findings not present in the evidence.\n\n"
"CRITICAL: You MUST call save_report to write the final report."
)
def __init__(self, llm: LLMClient, graph: EvidenceGraph) -> None:
super().__init__(llm, graph)
self._register_tools()
def _build_system_prompt(self, task: str) -> str:
"""Report agent gets a clean prompt — no Phase A/B/C/D workflow."""
return (
f"You are a forensic report writer.\n"
f"Role: {self.role}\n\n"
f"Investigation state:\n{self.graph.stats_summary()}\n\n"
f"Your task: {task}\n\n"
f"WORKFLOW:\n"
f"1. Call get_hypotheses_with_evidence to get all hypotheses and their linked evidence\n"
f"2. Call get_all_phenomena to get detailed findings by category\n"
f"3. Call get_entities to get people, programs, and hosts\n"
f"4. Call get_case_info for case metadata\n"
f"5. Write the complete report directly in your <answer> block\n\n"
f"RULES:\n"
f"- Write the report DIRECTLY in <answer> — do NOT use save_report tool\n"
f"- Only include findings present in the evidence graph\n"
f"- Do NOT invent timestamps, file paths, or data not in the phenomena\n"
f"- The report must be complete — do not cut off mid-section\n"
)
def _register_tools(self) -> None:
self.register_tool(
name="get_all_phenomena",
description="Get all phenomena across all categories with full details.",
input_schema={"type": "object", "properties": {}},
executor=self._get_all_phenomena,
)
self.register_tool(
name="get_hypotheses_with_evidence",
description="Get all hypotheses with their linked phenomena (supporting and contradicting).",
input_schema={"type": "object", "properties": {}},
executor=self._get_hypotheses_with_evidence,
)
self.register_tool(
name="get_case_info",
description="Get case metadata (image info, drive details, etc.).",
input_schema={"type": "object", "properties": {}},
executor=self._get_case_info,
)
self.register_tool(
name="get_entities",
description="Get all entities (people, programs, hosts) and their connections.",
input_schema={"type": "object", "properties": {}},
executor=self._get_entities,
)
self.register_tool(
name="save_report",
description="Save the final report to a file.",
input_schema={
"type": "object",
"properties": {
"content": {"type": "string", "description": "Report content in Markdown."},
"output_path": {"type": "string", "description": "File path to save the report."},
},
"required": ["content", "output_path"],
},
executor=self._save_report,
)
self.register_tool(
name="verify_phenomena",
description="Check phenomena provenance — VERIFIED (has source_tool) vs UNVERIFIED.",
input_schema={"type": "object", "properties": {}},
executor=self._verify_phenomena,
)
async def _get_all_phenomena(self) -> str:
phenomena = self.graph.phenomena
if not phenomena:
return "No phenomena in the evidence graph."
categories = sorted(set(ph.category for ph in phenomena.values()))
lines = [f"=== All Phenomena ({len(phenomena)} entries) ==="]
for cat in categories:
items = [ph for ph in phenomena.values() if ph.category == cat]
lines.append(f"\n--- {cat.upper()} ({len(items)} entries) ---")
for ph in items:
verified = "VERIFIED" if ph.source_tool else "UNVERIFIED"
lines.append(f"\n[{verified}] {ph.title} ({ph.id})")
lines.append(f" Source: {ph.source_agent} | Tool: {ph.source_tool or 'N/A'}")
if ph.timestamp:
lines.append(f" Timestamp: {ph.timestamp}")
lines.append(f" {ph.description[:500]}")
return "\n".join(lines)
async def _get_hypotheses_with_evidence(self) -> str:
if not self.graph.hypotheses:
return "No hypotheses defined."
lines = [f"=== Hypotheses ({len(self.graph.hypotheses)}) ==="]
for hyp in self.graph.hypotheses.values():
lines.append(f"\n### {hyp.title}")
lines.append(f"Confidence: {hyp.confidence:.2f} | Status: {hyp.status}")
lines.append(f"Description: {hyp.description}")
related = self.graph.get_related(hyp.id, direction="in")
supporting = [r for r in related if r["edge_type"] in ("direct_evidence", "supports", "prerequisite_met", "consequence_observed")]
contradicting = [r for r in related if r["edge_type"] in ("contradicts", "weakens")]
if supporting:
lines.append(f"\nSupporting evidence ({len(supporting)}):")
for r in supporting:
lines.append(f" [{r['edge_type']}] {r['node']}")
if contradicting:
lines.append(f"\nContradicting evidence ({len(contradicting)}):")
for r in contradicting:
lines.append(f" [{r['edge_type']}] {r['node']}")
if not supporting and not contradicting:
lines.append(" (no evidence linked)")
return "\n".join(lines)
async def _get_case_info(self) -> str:
info = self.graph.case_info
lines = ["=== Case Information ==="]
for k, v in info.items():
lines.append(f" {k}: {v}")
lines.append(f" Image path: {self.graph.image_path}")
lines.append(f" Partition offset: {self.graph.partition_offset}")
return "\n".join(lines)
async def _get_entities(self) -> str:
if not self.graph.entities:
return "No entities recorded."
lines = [f"=== Entities ({len(self.graph.entities)}) ==="]
for ent in self.graph.entities.values():
lines.append(f"\n{ent.name} ({ent.entity_type})")
if ent.description:
lines.append(f" {ent.description}")
related = self.graph.get_related(ent.id, direction="in")
if related:
for r in related:
lines.append(f" ← [{r['edge_type']}] {r['node']}")
return "\n".join(lines)
async def _verify_phenomena(self) -> str:
verified = []
unverified = []
for ph in self.graph.phenomena.values():
entry = f" [{ph.category}] {ph.title} (agent: {ph.source_agent}, tool: {ph.source_tool or 'N/A'})"
if ph.source_tool:
verified.append(entry)
else:
unverified.append(entry)
lines = ["=== Phenomena Verification Report ==="]
lines.append(f"\nVERIFIED ({len(verified)} — have source_tool):")
lines.extend(verified)
lines.append(f"\nUNVERIFIED ({len(unverified)} — no source_tool):")
lines.extend(unverified)
return "\n".join(lines)
async def _save_report(self, content: str, output_path: str) -> str:
try:
os.makedirs(os.path.dirname(output_path) or ".", exist_ok=True)
with open(output_path, "w") as f:
f.write(content)
return f"Report saved to {output_path} ({len(content)} chars)"
except Exception as e:
return f"Error saving report: {e}"

88
agents/timeline.py Normal file
View File

@@ -0,0 +1,88 @@
"""Timeline Agent — correlates evidence across time."""
from __future__ import annotations
import json
from base_agent import BaseAgent
from evidence_graph import EvidenceGraph
from llm_client import LLMClient
from tool_registry import TOOL_CATALOG
class TimelineAgent(BaseAgent):
name = "timeline"
role = (
"Timeline forensic analyst. You build chronological timelines from filesystem "
"MAC timestamps and correlate events across all phenomena categories in the "
"evidence graph to reconstruct the sequence of activities on the system."
)
def __init__(self, llm: LLMClient, graph: EvidenceGraph) -> None:
super().__init__(llm, graph)
self._register_tools()
def _register_tools(self) -> None:
# Filesystem timeline tool from catalog
td = TOOL_CATALOG.get("build_filesystem_timeline")
if td:
self.register_tool(td.name, td.description, td.input_schema, td.executor)
# Custom tool to get all phenomena with timestamps for correlation
self.register_tool(
name="get_timestamped_phenomena",
description="Get all phenomena that have timestamps, sorted chronologically. Use for timeline correlation.",
input_schema={"type": "object", "properties": {}},
executor=self._get_timestamped_phenomena,
)
# Tool to add temporal edges between phenomena
self.register_tool(
name="add_temporal_edge",
description="Add a temporal relationship between two phenomena (before, after, or concurrent).",
input_schema={
"type": "object",
"properties": {
"source_id": {"type": "string", "description": "ID of the earlier/source phenomenon."},
"target_id": {"type": "string", "description": "ID of the later/target phenomenon."},
"relation": {
"type": "string",
"enum": ["before", "after", "concurrent"],
"description": "Temporal relationship.",
},
},
"required": ["source_id", "target_id", "relation"],
},
executor=self._add_temporal_edge,
)
async def _get_timestamped_phenomena(self) -> str:
items = [
ph for ph in self.graph.phenomena.values()
if ph.timestamp
]
items.sort(key=lambda ph: ph.timestamp or "")
if not items:
return "No phenomena with timestamps found."
lines = []
for ph in items:
lines.append(f"{ph.timestamp} | [{ph.category}] {ph.title} ({ph.id})")
lines.append(f" {ph.description[:150]}")
return "\n".join(lines)
async def _add_temporal_edge(
self, source_id: str, target_id: str, relation: str,
) -> str:
try:
await self.graph.add_edge(
source_id=source_id,
target_id=target_id,
edge_type="temporal",
metadata={"relation": relation},
created_by=self.name,
)
return f"Temporal edge added: {source_id} —[{relation}]→ {target_id}"
except ValueError as e:
return f"Error: {e}"