84 lines
2.8 KiB
Python
84 lines
2.8 KiB
Python
#!/usr/bin/env python3
|
|
"""debug_model.py — 单条 episode 调试,打印完整的 raw response"""
|
|
import json, sys, requests
|
|
from pathlib import Path
|
|
|
|
PROJECT_ROOT = Path(__file__).resolve().parent
|
|
sys.path.insert(0, str(PROJECT_ROOT))
|
|
from src.evaluation.prompt_builder import build_prompt
|
|
|
|
# 取第一个 TP episode
|
|
ep_file = list((PROJECT_ROOT / "data/benchmark/sq1").glob("SQ1_TP_*.json"))[0]
|
|
with open(ep_file, "r", encoding="utf-8") as f:
|
|
episode = json.load(f)
|
|
|
|
mode = sys.argv[1] if len(sys.argv) > 1 else "baseline"
|
|
model = sys.argv[2] if len(sys.argv) > 2 else "Qwen/Qwen3.5-9B"
|
|
api_base = sys.argv[3] if len(sys.argv) > 3 else "http://localhost:8000/v1"
|
|
|
|
print(f"Episode: {episode['episode_id']}")
|
|
print(f"Mode: {mode}")
|
|
print(f"Model: {model}")
|
|
print(f"API: {api_base}")
|
|
print()
|
|
|
|
prompt = build_prompt(episode, mode=mode)
|
|
print(f"System prompt length: {len(prompt.get('system',''))} chars")
|
|
print(f"User prompt length: {len(prompt['user'])} chars")
|
|
print()
|
|
|
|
# 调用 API
|
|
url = api_base.rstrip("/") + "/chat/completions"
|
|
payload = {
|
|
"model": model,
|
|
"messages": [
|
|
{"role": "system", "content": prompt.get("system", "")},
|
|
{"role": "user", "content": prompt["user"]},
|
|
],
|
|
"temperature": 0.7,
|
|
"top_p": 0.95,
|
|
"top_k": 20,
|
|
"min_p": 0.0,
|
|
"presence_penalty": 1.5,
|
|
"repetition_penalty": 1.0,
|
|
"max_tokens": 4096,
|
|
"chat_template_kwargs": {"enable_thinking": False},
|
|
}
|
|
|
|
print("Calling API...")
|
|
try:
|
|
resp = requests.post(url, json=payload, headers={"Content-Type": "application/json"}, timeout=300)
|
|
print(f"HTTP Status: {resp.status_code}")
|
|
data = resp.json()
|
|
print(f"Response keys: {list(data.keys())}")
|
|
|
|
if "error" in data:
|
|
print(f"API ERROR: {data['error']}")
|
|
elif "choices" in data and len(data["choices"]) > 0:
|
|
choice = data["choices"][0]
|
|
print(f"Finish reason: {choice.get('finish_reason', '?')}")
|
|
msg = choice.get("message", {})
|
|
raw = msg.get("content")
|
|
reasoning = msg.get("reasoning_content") or msg.get("thinking") or msg.get("reasoning")
|
|
|
|
if reasoning:
|
|
print(f"\nTHINKING/REASONING ({len(str(reasoning))} chars):")
|
|
print(str(reasoning)[:500])
|
|
print("...")
|
|
|
|
if raw is None:
|
|
print("\nCONTENT IS NONE — model returned no content")
|
|
print(f"Full message object: {json.dumps(msg, ensure_ascii=False)[:1000]}")
|
|
else:
|
|
print(f"\nResponse length: {len(raw)} chars")
|
|
print("=" * 60)
|
|
print("RAW RESPONSE (first 2000 chars):")
|
|
print("=" * 60)
|
|
print(raw[:2000])
|
|
else:
|
|
print(f"Unexpected response: {json.dumps(data, ensure_ascii=False)[:1000]}")
|
|
except Exception as e:
|
|
import traceback
|
|
print(f"ERROR: {e}")
|
|
traceback.print_exc()
|