Files
llmiotsafe/debug_model.py
2026-05-12 17:01:39 +08:00

84 lines
2.8 KiB
Python

#!/usr/bin/env python3
"""debug_model.py — 单条 episode 调试,打印完整的 raw response"""
import json, sys, requests
from pathlib import Path
PROJECT_ROOT = Path(__file__).resolve().parent
sys.path.insert(0, str(PROJECT_ROOT))
from src.evaluation.prompt_builder import build_prompt
# 取第一个 TP episode
ep_file = list((PROJECT_ROOT / "data/benchmark/sq1").glob("SQ1_TP_*.json"))[0]
with open(ep_file, "r", encoding="utf-8") as f:
episode = json.load(f)
mode = sys.argv[1] if len(sys.argv) > 1 else "baseline"
model = sys.argv[2] if len(sys.argv) > 2 else "Qwen/Qwen3.5-9B"
api_base = sys.argv[3] if len(sys.argv) > 3 else "http://localhost:8000/v1"
print(f"Episode: {episode['episode_id']}")
print(f"Mode: {mode}")
print(f"Model: {model}")
print(f"API: {api_base}")
print()
prompt = build_prompt(episode, mode=mode)
print(f"System prompt length: {len(prompt.get('system',''))} chars")
print(f"User prompt length: {len(prompt['user'])} chars")
print()
# 调用 API
url = api_base.rstrip("/") + "/chat/completions"
payload = {
"model": model,
"messages": [
{"role": "system", "content": prompt.get("system", "")},
{"role": "user", "content": prompt["user"]},
],
"temperature": 0.7,
"top_p": 0.95,
"top_k": 20,
"min_p": 0.0,
"presence_penalty": 1.5,
"repetition_penalty": 1.0,
"max_tokens": 4096,
"chat_template_kwargs": {"enable_thinking": False},
}
print("Calling API...")
try:
resp = requests.post(url, json=payload, headers={"Content-Type": "application/json"}, timeout=300)
print(f"HTTP Status: {resp.status_code}")
data = resp.json()
print(f"Response keys: {list(data.keys())}")
if "error" in data:
print(f"API ERROR: {data['error']}")
elif "choices" in data and len(data["choices"]) > 0:
choice = data["choices"][0]
print(f"Finish reason: {choice.get('finish_reason', '?')}")
msg = choice.get("message", {})
raw = msg.get("content")
reasoning = msg.get("reasoning_content") or msg.get("thinking") or msg.get("reasoning")
if reasoning:
print(f"\nTHINKING/REASONING ({len(str(reasoning))} chars):")
print(str(reasoning)[:500])
print("...")
if raw is None:
print("\nCONTENT IS NONE — model returned no content")
print(f"Full message object: {json.dumps(msg, ensure_ascii=False)[:1000]}")
else:
print(f"\nResponse length: {len(raw)} chars")
print("=" * 60)
print("RAW RESPONSE (first 2000 chars):")
print("=" * 60)
print(raw[:2000])
else:
print(f"Unexpected response: {json.dumps(data, ensure_ascii=False)[:1000]}")
except Exception as e:
import traceback
print(f"ERROR: {e}")
traceback.print_exc()