From f04ccd4bc785826512e25ae08e254fd094aedd43 Mon Sep 17 00:00:00 2001
From: BattleTag <hychen3637.com>
Date: Thu, 21 May 2026 02:15:08 -1000
Subject: [PATCH] =?UTF-8?q?fix(base=5Fagent):=20forced-retry=20iter=20cap?=
 =?UTF-8?q?=2010=E2=86=9230=20+=20narrow=20tools=20to=20record+read?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Timeline agent on the 2026-05-20 full run produced 0 phenomena: initial
round hit max_iterations=60 cap before recording, forced retry then hit
max_iterations=10 cap because every grounding-rejected call burns one
iteration in the new gateway. Two changes restore depth without re-
introducing the original "agent wanders off and never records" failure:

  1. Raise retry cap 10 → 30. With grounding auto-rescue (prev commit)
     most rejections heal on the first retry, but some still need 2-3
     turns; 10 is empirically too tight, 30 leaves headroom.

  2. Narrow the retry tool surface to RECORD + graph-write +
     read-only-graph-query tools. Investigation tools (list_directory,
     sqlite_query, parse_registry_key) are dropped on retry so the agent
     can't restart its search loop — the retry is explicitly "record
     what you already found, then stop".

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 base_agent.py               | 19 +++++++++--
 tests/test_optimizations.py | 63 +++++++++++++++++++++++++++++++++++++
 2 files changed, 80 insertions(+), 2 deletions(-)

diff --git a/base_agent.py b/base_agent.py
index 307887d..82c4a54 100644
--- a/base_agent.py
+++ b/base_agent.py
@@ -228,12 +228,27 @@ class BaseAgent:
                         f"what you already found. Then end."
                     ),
                 })
+                # Narrow the retry tool surface so the agent can't wander off
+                # to investigate again — only RECORD and read-only graph
+                # query tools survive. Each grounding-rejected call burns one
+                # iteration, so the cap is 30 (not the original 10): a
+                # Timeline agent writing ~10 temporal edges with one rejection
+                # apiece needs ~20 turns under the rewritten gateway.
+                retry_tool_names = set(registered_mandatory) | {
+                    "list_phenomena", "list_assets", "search_graph",
+                    "add_temporal_edge", "link_to_entity", "add_lead",
+                    "add_hypothesis", "save_report",
+                }
+                retry_tools = [
+                    td for td in self.get_tool_definitions()
+                    if td["name"] in retry_tool_names
+                ]
                 final_text, _ = await self.llm.tool_call_loop(
                     messages=conversation,
-                    tools=self.get_tool_definitions(),
+                    tools=retry_tools,
                     tool_executor=self._executors,
                     system=system,
-                    max_iterations=10,
+                    max_iterations=30,
                     terminal_tools=self.terminal_tools,
                 )
 
diff --git a/tests/test_optimizations.py b/tests/test_optimizations.py
index 2504285..33b6d2b 100644
--- a/tests/test_optimizations.py
+++ b/tests/test_optimizations.py
@@ -1332,6 +1332,69 @@ class TestInvestigationAreaDerivation:
         assert edge_added["n"] == 1
         assert agent._record_call_counts["add_temporal_edge"] == 1
 
+    @pytest.mark.asyncio
+    async def test_forced_retry_uses_higher_cap_and_narrowed_tools(self):
+        """The forced RECORD retry must (a) get a generous iter cap so that
+        grounding-rejected retries don't blow the budget, and (b) hand the
+        LLM a tool surface restricted to RECORD + read-only graph tools so
+        it can't wander back into investigation.
+        """
+        from unittest.mock import AsyncMock
+        from base_agent import BaseAgent
+
+        graph = EvidenceGraph()
+        llm = AsyncMock()
+
+        # Capture per-call kwargs of tool_call_loop so we can assert what
+        # the retry round received.
+        call_kwargs: list[dict] = []
+
+        async def real_add_edge(**kw):
+            return None
+
+        class TimelineLike(BaseAgent):
+            mandatory_record_tools = ("add_temporal_edge",)
+
+        agent = TimelineLike(llm, graph)
+        agent.name = "timeline_like"
+        agent._register_graph_tools = lambda: None
+        agent.register_tool("add_temporal_edge", "", {}, real_add_edge)
+        # An investigation-style tool the retry must NOT expose.
+        async def real_inv(**kw): return ""
+        agent.register_tool("list_directory", "", {}, real_inv)
+        # A read-only graph query — should remain available in retry.
+        async def real_ro(**kw): return ""
+        agent.register_tool("list_phenomena", "", {}, real_ro)
+
+        async def fake_tool_call_loop(messages, tools, tool_executor, system, max_iterations=40, terminal_tools=()):
+            call_kwargs.append({
+                "tools": [t["name"] for t in tools],
+                "max_iterations": max_iterations,
+            })
+            already_retrying = any(
+                "STOP." in (m.get("content", "") if isinstance(m, dict) else "")
+                for m in messages
+            )
+            if not already_retrying:
+                return "no record", list(messages)
+            await tool_executor["add_temporal_edge"]()
+            return "recorded.", []
+
+        llm.tool_call_loop = fake_tool_call_loop
+        await agent.run("build timeline")
+
+        assert len(call_kwargs) == 2
+        first_call, retry_call = call_kwargs
+        # First call: full tool surface, default iter cap.
+        assert "list_directory" in first_call["tools"]
+        # Retry call: investigation tool dropped, mandatory + read-only kept.
+        assert "list_directory" not in retry_call["tools"]
+        assert "add_temporal_edge" in retry_call["tools"]
+        assert "list_phenomena" in retry_call["tools"]
+        # Iter cap on the retry is now generous — 10 was empirically too tight
+        # because grounding-rejected calls burn iterations.
+        assert retry_call["max_iterations"] >= 30
+
     # ---- terminal_tools: real LLMClient.tool_call_loop short-circuit -----
 
     @pytest.mark.asyncio