Initial commit
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
615
tool_registry.py
Normal file
615
tool_registry.py
Normal file
@@ -0,0 +1,615 @@
|
||||
"""Central tool registry — catalogs all available forensic tools.
|
||||
|
||||
Tools are registered once at startup with bound image_path and offset.
|
||||
The AgentFactory uses this catalog to compose agents dynamically.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import hashlib
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import re
|
||||
from dataclasses import dataclass, field
|
||||
from typing import Any
|
||||
|
||||
from tools import parsers
|
||||
from tools import registry as reg
|
||||
from tools import sleuthkit as tsk
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Tool result cache — keyed by (tool_name, args_hash).
|
||||
# Disk image tools are deterministic (image is read-only), so identical
|
||||
# calls always produce the same output.
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
_tool_result_cache: dict[str, str] = {}
|
||||
|
||||
# Tools safe to cache: deterministic reads with no side effects.
|
||||
CACHEABLE_TOOLS: set[str] = {
|
||||
"partition_info", "filesystem_info", "list_directory", "find_file",
|
||||
"search_strings", "count_deleted_files", "build_filesystem_timeline",
|
||||
"parse_registry_key", "search_registry", "get_user_activity",
|
||||
"read_text_file", "read_binary_preview", "search_text_file",
|
||||
"read_text_file_section", "list_extracted_dir", "parse_pcap_strings",
|
||||
}
|
||||
|
||||
|
||||
def _cache_key(tool_name: str, kwargs: dict) -> str:
|
||||
"""Build a deterministic cache key from tool name + arguments."""
|
||||
args_str = json.dumps(kwargs, sort_keys=True, ensure_ascii=False)
|
||||
args_hash = hashlib.md5(args_str.encode()).hexdigest()
|
||||
return f"{tool_name}:{args_hash}"
|
||||
|
||||
|
||||
def _make_cached(tool_name: str, executor: Any) -> Any:
|
||||
"""Wrap an executor with an in-memory result cache."""
|
||||
|
||||
async def wrapper(**kwargs) -> str:
|
||||
key = _cache_key(tool_name, kwargs)
|
||||
cached = _tool_result_cache.get(key)
|
||||
if cached is not None:
|
||||
logger.debug("Cache hit: %s(%s)", tool_name, kwargs)
|
||||
return cached
|
||||
result = await executor(**kwargs)
|
||||
# Only cache successful results (not errors)
|
||||
if not result.startswith("Error") and not result.startswith("[Command failed"):
|
||||
_tool_result_cache[key] = result
|
||||
return result
|
||||
|
||||
return wrapper
|
||||
|
||||
|
||||
def get_cache_stats() -> dict[str, int]:
|
||||
"""Return cache statistics for diagnostics."""
|
||||
return {"entries": len(_tool_result_cache)}
|
||||
|
||||
# Category auto-detection patterns (filename → category)
|
||||
_REGISTRY_HIVE_NAMES = {"system", "software", "sam", "ntuser.dat", "security", "default"}
|
||||
|
||||
ASSET_CATEGORIES = [
|
||||
"registry_hive", "chat_log", "prefetch", "network_capture",
|
||||
"config_file", "address_book", "recycle_bin", "executable",
|
||||
"text_log", "other",
|
||||
]
|
||||
|
||||
|
||||
def _auto_categorize(filename: str) -> str:
|
||||
"""Infer asset category from filename."""
|
||||
name_lower = filename.lower()
|
||||
ext = os.path.splitext(name_lower)[1]
|
||||
|
||||
# Check full name (with extension) and base name against known hive names
|
||||
if name_lower in _REGISTRY_HIVE_NAMES:
|
||||
return "registry_hive"
|
||||
if ext == ".pf":
|
||||
return "prefetch"
|
||||
if ext in (".pcap", ".cap") or name_lower == "interception":
|
||||
return "network_capture"
|
||||
if ext == ".wab":
|
||||
return "address_book"
|
||||
if name_lower == "info2" or re.match(r"dc\d+\.exe", name_lower):
|
||||
return "recycle_bin"
|
||||
# Extension-based checks before keyword-based (e.g. mirc.ini → config, not chat)
|
||||
if ext in (".ini", ".csv", ".dat", ".cfg"):
|
||||
return "config_file"
|
||||
if ext in (".log", ".lst"):
|
||||
if any(kw in name_lower for kw in ("irc", "mirc", "channel", "chat")):
|
||||
return "chat_log"
|
||||
return "text_log"
|
||||
if any(kw in name_lower for kw in ("irc", "mirc", "channel", "chat")):
|
||||
return "chat_log"
|
||||
if ext in (".exe", ".dll", ".com"):
|
||||
return "executable"
|
||||
return "other"
|
||||
|
||||
|
||||
@dataclass
|
||||
class ToolDefinition:
|
||||
"""A registered tool available for agent composition."""
|
||||
|
||||
name: str
|
||||
description: str
|
||||
input_schema: dict
|
||||
executor: Any # async callable (or sync for some parsers)
|
||||
module: str # "sleuthkit", "registry", "parsers"
|
||||
tags: list[str] = field(default_factory=list)
|
||||
|
||||
|
||||
# Global tool catalog, populated by register_all_tools().
|
||||
TOOL_CATALOG: dict[str, ToolDefinition] = {}
|
||||
|
||||
|
||||
def _make_auto_record(tool_name: str, category: str, executor: Any, graph: Any) -> Any:
|
||||
"""Wrap a forensic tool to auto-record its result as a phenomenon."""
|
||||
|
||||
async def wrapper(**kwargs) -> str:
|
||||
result = await executor(**kwargs)
|
||||
if graph is None or not result or result.startswith("Error") or result.startswith("["):
|
||||
return result
|
||||
# Auto-record: the tool produced a forensic fact
|
||||
agent = getattr(graph, "_current_agent", "") or "unknown"
|
||||
title = f"{tool_name}: {result.split(chr(10))[0][:80]}"
|
||||
await graph.add_phenomenon(
|
||||
source_agent=agent,
|
||||
category=category,
|
||||
title=title,
|
||||
description=result[:2000],
|
||||
source_tool=tool_name,
|
||||
)
|
||||
return result
|
||||
|
||||
return wrapper
|
||||
|
||||
|
||||
def register_all_tools(
|
||||
image_path: str,
|
||||
partition_offset: int,
|
||||
graph: Any = None,
|
||||
extracted_dir: str = "extracted",
|
||||
) -> None:
|
||||
"""Populate TOOL_CATALOG with all available tools, pre-bound to image/offset."""
|
||||
TOOL_CATALOG.clear()
|
||||
|
||||
# ---- Sleuth Kit tools ----
|
||||
|
||||
TOOL_CATALOG["partition_info"] = ToolDefinition(
|
||||
name="partition_info",
|
||||
description="Get the partition table layout of the disk image. Run this first to understand disk structure.",
|
||||
input_schema={"type": "object", "properties": {}},
|
||||
executor=lambda: tsk.partition_info(image_path),
|
||||
module="sleuthkit",
|
||||
tags=["filesystem", "disk", "partition"],
|
||||
)
|
||||
|
||||
TOOL_CATALOG["filesystem_info"] = ToolDefinition(
|
||||
name="filesystem_info",
|
||||
description="Get detailed filesystem information (type, block size, volume name, etc.) for the selected partition.",
|
||||
input_schema={"type": "object", "properties": {}},
|
||||
executor=lambda: tsk.filesystem_info(image_path, partition_offset),
|
||||
module="sleuthkit",
|
||||
tags=["filesystem", "disk"],
|
||||
)
|
||||
|
||||
TOOL_CATALOG["list_directory"] = ToolDefinition(
|
||||
name="list_directory",
|
||||
description="List files and directories. Without inode, lists root. Use recursive=true for all files.",
|
||||
input_schema={
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"inode": {"type": "string", "description": "Inode of directory. Omit for root."},
|
||||
"recursive": {"type": "boolean", "description": "List all files recursively."},
|
||||
},
|
||||
},
|
||||
executor=lambda inode=None, recursive=False: tsk.list_directory(
|
||||
image_path, partition_offset, inode, recursive
|
||||
),
|
||||
module="sleuthkit",
|
||||
tags=["filesystem", "directory", "listing"],
|
||||
)
|
||||
|
||||
async def _extract_with_tracking(inode: str) -> str:
|
||||
"""Extract a file by inode. Name and category are derived from the real disk path."""
|
||||
# Dedup
|
||||
if graph is not None:
|
||||
existing = graph.lookup_asset_by_inode(inode)
|
||||
if existing is not None:
|
||||
return (
|
||||
f"Already extracted: {existing.local_path} "
|
||||
f"({existing.size_bytes} bytes, {existing.category}). "
|
||||
f"Disk path: {existing.original_path}"
|
||||
)
|
||||
|
||||
# Resolve real disk path first
|
||||
orig_path = (await tsk.find_file(image_path, inode, partition_offset)).strip()
|
||||
if not orig_path or "not found" in orig_path.lower():
|
||||
return f"Error: inode {inode} not found on the disk image."
|
||||
|
||||
# Derive local filename from real disk path
|
||||
filename = os.path.basename(orig_path)
|
||||
local_path = os.path.join(extracted_dir, filename)
|
||||
|
||||
# Handle name collisions by appending inode
|
||||
if os.path.exists(local_path):
|
||||
base, ext = os.path.splitext(filename)
|
||||
local_path = os.path.join(extracted_dir, f"{base}_{inode.replace('-', '_')}{ext}")
|
||||
filename = os.path.basename(local_path)
|
||||
|
||||
# Extract
|
||||
result = await tsk.extract_file(image_path, inode, local_path, partition_offset)
|
||||
if result.startswith("[icat failed"):
|
||||
return result
|
||||
|
||||
size = os.path.getsize(local_path) if os.path.exists(local_path) else 0
|
||||
category = _auto_categorize(os.path.basename(orig_path))
|
||||
|
||||
# Register
|
||||
if graph is not None:
|
||||
agent_name = getattr(graph, "_current_agent", "") or "unknown"
|
||||
await graph.register_asset(
|
||||
inode=inode,
|
||||
original_path=orig_path,
|
||||
local_path=local_path,
|
||||
category=category,
|
||||
filename=filename,
|
||||
size_bytes=size,
|
||||
extracted_by=agent_name,
|
||||
)
|
||||
logger.info("Asset registered: %s (%s, %d bytes)", local_path, category, size)
|
||||
|
||||
return (
|
||||
f"Extracted to {local_path} ({size} bytes, {category})\n"
|
||||
f"Disk path: {orig_path}"
|
||||
)
|
||||
|
||||
TOOL_CATALOG["extract_file"] = ToolDefinition(
|
||||
name="extract_file",
|
||||
description=(
|
||||
"Extract a file from the disk image by inode number. "
|
||||
"The filename is automatically determined from the disk path. "
|
||||
"Checks if already extracted (returns existing path if so). "
|
||||
"Returns the local path and the original disk path."
|
||||
),
|
||||
input_schema={
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"inode": {"type": "string", "description": "Inode number of the file (e.g. '334-128-4' or '334')."},
|
||||
},
|
||||
"required": ["inode"],
|
||||
},
|
||||
executor=_extract_with_tracking,
|
||||
module="sleuthkit",
|
||||
tags=["filesystem", "extraction"],
|
||||
)
|
||||
|
||||
TOOL_CATALOG["find_file"] = ToolDefinition(
|
||||
name="find_file",
|
||||
description="Find the file path for a given inode number.",
|
||||
input_schema={
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"inode": {"type": "string", "description": "Inode number to look up."},
|
||||
},
|
||||
"required": ["inode"],
|
||||
},
|
||||
executor=lambda inode: tsk.find_file(image_path, inode, partition_offset),
|
||||
module="sleuthkit",
|
||||
tags=["filesystem"],
|
||||
)
|
||||
|
||||
TOOL_CATALOG["search_strings"] = ToolDefinition(
|
||||
name="search_strings",
|
||||
description="Search for a string pattern across the entire disk image (slow on first call, fast after). Prefer search_text_file on already-extracted files when possible.",
|
||||
input_schema={
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"pattern": {"type": "string", "description": "String pattern (case-insensitive grep)."},
|
||||
},
|
||||
"required": ["pattern"],
|
||||
},
|
||||
executor=lambda pattern: tsk.search_strings(image_path, pattern),
|
||||
module="sleuthkit",
|
||||
tags=["filesystem", "search", "strings"],
|
||||
)
|
||||
|
||||
TOOL_CATALOG["count_deleted_files"] = ToolDefinition(
|
||||
name="count_deleted_files",
|
||||
description="List and count all deleted files. Shows total count, executables, and extension breakdown.",
|
||||
input_schema={"type": "object", "properties": {}},
|
||||
executor=lambda: tsk.count_deleted_files(image_path, partition_offset),
|
||||
module="sleuthkit",
|
||||
tags=["filesystem", "deleted", "recovery"],
|
||||
)
|
||||
|
||||
TOOL_CATALOG["build_filesystem_timeline"] = ToolDefinition(
|
||||
name="build_filesystem_timeline",
|
||||
description="Build a MAC timeline from the filesystem (Modified/Accessed/Changed times for all files).",
|
||||
input_schema={"type": "object", "properties": {}},
|
||||
executor=lambda: tsk.build_timeline(image_path, partition_offset),
|
||||
module="sleuthkit",
|
||||
tags=["filesystem", "timeline"],
|
||||
)
|
||||
|
||||
# ---- Registry tools ----
|
||||
|
||||
TOOL_CATALOG["parse_registry_key"] = ToolDefinition(
|
||||
name="parse_registry_key",
|
||||
description="Parse a registry hive file and list subkeys/values at a given path.",
|
||||
input_schema={
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"hive_path": {"type": "string", "description": "Path to extracted hive file."},
|
||||
"key_path": {"type": "string", "description": "Registry key path to inspect."},
|
||||
},
|
||||
"required": ["hive_path", "key_path"],
|
||||
},
|
||||
executor=lambda hive_path, key_path: reg.parse_registry_key(hive_path, key_path),
|
||||
module="registry",
|
||||
tags=["registry", "hive"],
|
||||
)
|
||||
|
||||
TOOL_CATALOG["list_installed_software"] = ToolDefinition(
|
||||
name="list_installed_software",
|
||||
description="List installed software from a SOFTWARE registry hive.",
|
||||
input_schema={
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"hive_path": {"type": "string", "description": "Path to SOFTWARE hive."},
|
||||
},
|
||||
"required": ["hive_path"],
|
||||
},
|
||||
executor=_make_auto_record("list_installed_software", "registry",
|
||||
lambda hive_path: reg.list_installed_software(hive_path), graph),
|
||||
module="registry",
|
||||
tags=["registry", "software", "installed"],
|
||||
)
|
||||
|
||||
TOOL_CATALOG["get_user_activity"] = ToolDefinition(
|
||||
name="get_user_activity",
|
||||
description="Extract user activity from NTUSER.DAT (recent docs, typed URLs, run dialog history).",
|
||||
input_schema={
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"hive_path": {"type": "string", "description": "Path to NTUSER.DAT."},
|
||||
},
|
||||
"required": ["hive_path"],
|
||||
},
|
||||
executor=lambda hive_path: reg.get_user_activity(hive_path),
|
||||
module="registry",
|
||||
tags=["registry", "user", "activity"],
|
||||
)
|
||||
|
||||
TOOL_CATALOG["search_registry"] = ToolDefinition(
|
||||
name="search_registry",
|
||||
description="Search for a pattern in registry key names and values.",
|
||||
input_schema={
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"hive_path": {"type": "string", "description": "Path to hive file."},
|
||||
"pattern": {"type": "string", "description": "Search pattern."},
|
||||
},
|
||||
"required": ["hive_path", "pattern"],
|
||||
},
|
||||
executor=lambda hive_path, pattern: reg.search_registry(hive_path, pattern),
|
||||
module="registry",
|
||||
tags=["registry", "search"],
|
||||
)
|
||||
|
||||
# ---- Registry tools (auto-record: results are forensic facts) ----
|
||||
|
||||
TOOL_CATALOG["get_system_info"] = ToolDefinition(
|
||||
name="get_system_info",
|
||||
description="Extract OS version, install date, and registered owner from a SOFTWARE hive.",
|
||||
input_schema={
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"hive_path": {"type": "string", "description": "Path to SOFTWARE hive."},
|
||||
},
|
||||
"required": ["hive_path"],
|
||||
},
|
||||
executor=_make_auto_record("get_system_info", "registry",
|
||||
lambda hive_path: reg.get_system_info(hive_path), graph),
|
||||
module="registry",
|
||||
tags=["registry", "system"],
|
||||
)
|
||||
|
||||
TOOL_CATALOG["get_timezone_info"] = ToolDefinition(
|
||||
name="get_timezone_info",
|
||||
description="Extract timezone settings from a SYSTEM hive.",
|
||||
input_schema={
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"hive_path": {"type": "string", "description": "Path to SYSTEM hive."},
|
||||
},
|
||||
"required": ["hive_path"],
|
||||
},
|
||||
executor=_make_auto_record("get_timezone_info", "registry",
|
||||
lambda hive_path: reg.get_timezone_info(hive_path), graph),
|
||||
module="registry",
|
||||
tags=["registry", "timezone", "system"],
|
||||
)
|
||||
|
||||
TOOL_CATALOG["get_computer_name"] = ToolDefinition(
|
||||
name="get_computer_name",
|
||||
description="Extract computer/host name from a SYSTEM hive.",
|
||||
input_schema={
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"hive_path": {"type": "string", "description": "Path to SYSTEM hive."},
|
||||
},
|
||||
"required": ["hive_path"],
|
||||
},
|
||||
executor=_make_auto_record("get_computer_name", "registry",
|
||||
lambda hive_path: reg.get_computer_name(hive_path), graph),
|
||||
module="registry",
|
||||
tags=["registry", "system", "hostname"],
|
||||
)
|
||||
|
||||
TOOL_CATALOG["get_shutdown_time"] = ToolDefinition(
|
||||
name="get_shutdown_time",
|
||||
description="Extract last shutdown time from a SYSTEM hive.",
|
||||
input_schema={
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"hive_path": {"type": "string", "description": "Path to SYSTEM hive."},
|
||||
},
|
||||
"required": ["hive_path"],
|
||||
},
|
||||
executor=_make_auto_record("get_shutdown_time", "registry",
|
||||
lambda hive_path: reg.get_shutdown_time(hive_path), graph),
|
||||
module="registry",
|
||||
tags=["registry", "system", "shutdown"],
|
||||
)
|
||||
|
||||
TOOL_CATALOG["enumerate_users"] = ToolDefinition(
|
||||
name="enumerate_users",
|
||||
description="List all user accounts and RIDs from a SAM hive.",
|
||||
input_schema={
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"hive_path": {"type": "string", "description": "Path to SAM hive."},
|
||||
},
|
||||
"required": ["hive_path"],
|
||||
},
|
||||
executor=_make_auto_record("enumerate_users", "registry",
|
||||
lambda hive_path: reg.enumerate_users(hive_path), graph),
|
||||
module="registry",
|
||||
tags=["registry", "user", "accounts", "sam"],
|
||||
)
|
||||
|
||||
TOOL_CATALOG["get_network_interfaces"] = ToolDefinition(
|
||||
name="get_network_interfaces",
|
||||
description="Extract network adapter and TCP/IP config from a SYSTEM hive.",
|
||||
input_schema={
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"hive_path": {"type": "string", "description": "Path to SYSTEM hive."},
|
||||
},
|
||||
"required": ["hive_path"],
|
||||
},
|
||||
executor=_make_auto_record("get_network_interfaces", "registry",
|
||||
lambda hive_path: reg.get_network_interfaces(hive_path), graph),
|
||||
module="registry",
|
||||
tags=["registry", "network", "adapter", "ip"],
|
||||
)
|
||||
|
||||
TOOL_CATALOG["get_email_config"] = ToolDefinition(
|
||||
name="get_email_config",
|
||||
description="Extract email account configuration (SMTP, POP3, NNTP) from NTUSER.DAT.",
|
||||
input_schema={
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"hive_path": {"type": "string", "description": "Path to NTUSER.DAT."},
|
||||
},
|
||||
"required": ["hive_path"],
|
||||
},
|
||||
executor=_make_auto_record("get_email_config", "registry",
|
||||
lambda hive_path: reg.get_email_config(hive_path), graph),
|
||||
module="registry",
|
||||
tags=["registry", "email", "account"],
|
||||
)
|
||||
|
||||
# ---- Parser tools ----
|
||||
|
||||
TOOL_CATALOG["parse_prefetch"] = ToolDefinition(
|
||||
name="parse_prefetch",
|
||||
description="Parse a Windows Prefetch (.pf) file to extract executable name, last execution time, and run count.",
|
||||
input_schema={
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"file_path": {"type": "string", "description": "Path to extracted .pf file."},
|
||||
},
|
||||
"required": ["file_path"],
|
||||
},
|
||||
executor=_make_auto_record("parse_prefetch", "filesystem",
|
||||
lambda file_path: parsers.parse_prefetch(file_path), graph),
|
||||
module="parsers",
|
||||
tags=["filesystem", "prefetch", "execution"],
|
||||
)
|
||||
|
||||
TOOL_CATALOG["read_text_file"] = ToolDefinition(
|
||||
name="read_text_file",
|
||||
description="Read an extracted text file (configs, logs, chat logs, etc.).",
|
||||
input_schema={
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"file_path": {"type": "string", "description": "Local path to the file."},
|
||||
},
|
||||
"required": ["file_path"],
|
||||
},
|
||||
executor=lambda file_path: parsers.read_text_file(file_path),
|
||||
module="parsers",
|
||||
tags=["text", "read"],
|
||||
)
|
||||
|
||||
TOOL_CATALOG["read_binary_preview"] = ToolDefinition(
|
||||
name="read_binary_preview",
|
||||
description="Preview a binary file in hex+ASCII format.",
|
||||
input_schema={
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"file_path": {"type": "string", "description": "Local path to the file."},
|
||||
},
|
||||
"required": ["file_path"],
|
||||
},
|
||||
executor=lambda file_path: parsers.read_binary_preview(file_path),
|
||||
module="parsers",
|
||||
tags=["binary", "hex", "preview"],
|
||||
)
|
||||
|
||||
TOOL_CATALOG["search_text_file"] = ToolDefinition(
|
||||
name="search_text_file",
|
||||
description="Search for a regex pattern in an extracted text file. Returns matching lines with line numbers.",
|
||||
input_schema={
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"file_path": {"type": "string", "description": "Path to extracted file."},
|
||||
"pattern": {"type": "string", "description": "Regex pattern."},
|
||||
},
|
||||
"required": ["file_path", "pattern"],
|
||||
},
|
||||
executor=lambda file_path, pattern: parsers.search_text_file(file_path, pattern),
|
||||
module="parsers",
|
||||
tags=["text", "search", "regex"],
|
||||
)
|
||||
|
||||
TOOL_CATALOG["read_text_file_section"] = ToolDefinition(
|
||||
name="read_text_file_section",
|
||||
description="Read a section of a large text file starting at a byte offset.",
|
||||
input_schema={
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"file_path": {"type": "string", "description": "Path to file."},
|
||||
"start": {"type": "integer", "description": "Byte offset to start reading."},
|
||||
"max_bytes": {"type": "integer", "description": "Maximum bytes to read."},
|
||||
},
|
||||
"required": ["file_path"],
|
||||
},
|
||||
executor=lambda file_path, start=0, max_bytes=8000: parsers.read_text_file_section(
|
||||
file_path, start, max_bytes
|
||||
),
|
||||
module="parsers",
|
||||
tags=["text", "read", "section"],
|
||||
)
|
||||
|
||||
TOOL_CATALOG["list_extracted_dir"] = ToolDefinition(
|
||||
name="list_extracted_dir",
|
||||
description="List files in an extracted directory with sizes.",
|
||||
input_schema={
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"dir_path": {"type": "string", "description": "Directory path."},
|
||||
},
|
||||
"required": ["dir_path"],
|
||||
},
|
||||
executor=lambda dir_path: parsers.list_extracted_dir(dir_path),
|
||||
module="parsers",
|
||||
tags=["filesystem", "listing", "extracted"],
|
||||
)
|
||||
|
||||
TOOL_CATALOG["parse_pcap_strings"] = ToolDefinition(
|
||||
name="parse_pcap_strings",
|
||||
description="Extract HTTP headers, hosts, User-Agent, cookies, and URLs from a PCAP/capture file.",
|
||||
input_schema={
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"file_path": {"type": "string", "description": "Path to PCAP file."},
|
||||
},
|
||||
"required": ["file_path"],
|
||||
},
|
||||
executor=lambda file_path: parsers.parse_pcap_strings(file_path),
|
||||
module="parsers",
|
||||
tags=["network", "pcap", "http", "capture"],
|
||||
)
|
||||
|
||||
# ---- Apply result caching to deterministic read-only tools ----
|
||||
# Must come AFTER all tools are registered. Auto-record wrapped tools
|
||||
# (e.g. get_system_info) are NOT in CACHEABLE_TOOLS since they write
|
||||
# to the evidence graph as a side effect.
|
||||
_tool_result_cache.clear()
|
||||
for tool_name, td in TOOL_CATALOG.items():
|
||||
if tool_name in CACHEABLE_TOOLS:
|
||||
td.executor = _make_cached(tool_name, td.executor)
|
||||
Reference in New Issue
Block a user