{ "model": "Qwen/Qwen3.5-9B", "api_base": "http://localhost:8000/v1", "timestamp": "2026-05-04T15:54:43.664262", "total_evaluated": 60, "summary": { "total_episodes": 60, "detection_accuracy": 0.55, "miss_rate": 0.0, "false_alarm_rate": 0.8125, "precision": 0.5185185185185185, "recall": 1.0, "f1_security": 0.6829268292682926, "threat_type_accuracy": 0.2857142857142857, "parse_failure_rate": 0.016666666666666666, "breakdown": { "TP": { "total": 28, "correct": 28, "accuracy": 1.0 }, "FP": { "total": 28, "correct": 3, "accuracy": 0.10714285714285714 }, "TN": { "total": 4, "correct": 2, "accuracy": 0.5 } }, "per_sq": { "SQ1": { "total": 13, "accuracy": 0.6923076923076923, "miss_count": 0, "false_alarm_count": 4 }, "SQ2": { "total": 6, "accuracy": 0.5, "miss_count": 0, "false_alarm_count": 3 }, "SQ3": { "total": 17, "accuracy": 0.5882352941176471, "miss_count": 0, "false_alarm_count": 7 }, "SQ4": { "total": 17, "accuracy": 0.47058823529411764, "miss_count": 0, "false_alarm_count": 9 }, "SQ5": { "total": 7, "accuracy": 0.42857142857142855, "miss_count": 0, "false_alarm_count": 3 } } }, "errors": { "total_errors": 47, "error_distribution": { "FALSE_ALARM": 26, "WRONG_TYPE": 20, "PARSE_FAIL": 1 }, "error_by_sq": { "SQ1": { "FALSE_ALARM": 4 }, "SQ2": { "FALSE_ALARM": 3, "WRONG_TYPE": 2 }, "SQ3": { "FALSE_ALARM": 7, "WRONG_TYPE": 7 }, "SQ4": { "FALSE_ALARM": 9, "WRONG_TYPE": 8 }, "SQ5": { "FALSE_ALARM": 3, "PARSE_FAIL": 1, "WRONG_TYPE": 3 } }, "error_by_category": { "water_damage": 2, "intrusion": 4, "behavioral_anomaly": 4, "child_specific": 4, "elderly_specific": 3, "fire_gas": 3 } }, "pipeline": "EGPv2" }