{ "model": "Qwen/Qwen3.5-9B", "api_base": "http://localhost:8000/v1", "timestamp": "2026-05-03T06:31:18.653202", "total_evaluated": 1200, "avg_latency_seconds": 74.59, "api_errors": 1, "summary": { "total_episodes": 1200, "detection_accuracy": 0.5433333333333333, "miss_rate": 0.3109090909090909, "false_alarm_rate": 0.58, "precision": 0.5013227513227513, "recall": 0.6890909090909091, "f1_security": 0.5803981623277181, "threat_type_accuracy": 0.46965699208443273, "parse_failure_rate": 0.025, "breakdown": { "TP": { "total": 550, "correct": 379, "accuracy": 0.6890909090909091 }, "FP": { "total": 400, "correct": 157, "accuracy": 0.3925 }, "TN": { "total": 250, "correct": 116, "accuracy": 0.464 } }, "per_sq": { "SQ1": { "total": 190, "accuracy": 0.531578947368421, "miss_count": 27, "false_alarm_count": 62 }, "SQ2": { "total": 240, "accuracy": 0.5708333333333333, "miss_count": 29, "false_alarm_count": 74 }, "SQ3": { "total": 290, "accuracy": 0.5103448275862069, "miss_count": 81, "false_alarm_count": 61 }, "SQ4": { "total": 290, "accuracy": 0.45517241379310347, "miss_count": 17, "false_alarm_count": 141 }, "SQ5": { "total": 190, "accuracy": 0.7052631578947368, "miss_count": 17, "false_alarm_count": 39 } } }, "errors": { "total_errors": 749, "error_distribution": { "FALSE_ALARM": 374, "MISS": 155, "PARSE_FAIL": 19, "WRONG_TYPE": 201 }, "error_by_sq": { "SQ1": { "FALSE_ALARM": 62, "MISS": 25, "PARSE_FAIL": 2, "WRONG_TYPE": 1 }, "SQ2": { "FALSE_ALARM": 73, "PARSE_FAIL": 4, "WRONG_TYPE": 37, "MISS": 26 }, "SQ3": { "FALSE_ALARM": 61, "MISS": 77, "WRONG_TYPE": 47, "PARSE_FAIL": 4 }, "SQ4": { "FALSE_ALARM": 139, "PARSE_FAIL": 7, "WRONG_TYPE": 68, "MISS": 12 }, "SQ5": { "FALSE_ALARM": 39, "WRONG_TYPE": 48, "MISS": 15, "PARSE_FAIL": 2 } }, "error_by_category": { "device_fault": 26, "water_damage": 14, "fire_gas": 20, "intrusion": 145, "behavioral_anomaly": 85, "child_specific": 28, "elderly_specific": 38 } } }