{ "model": "Qwen/Qwen3.6-35B-A3B-FP8", "api_base": "http://localhost:8000/v1", "timestamp": "2026-05-05T12:54:26.166412", "total_evaluated": 1200, "summary": { "total_episodes": 1200, "detection_accuracy": 0.6066666666666667, "miss_rate": 0.5490909090909091, "false_alarm_rate": 0.26153846153846155, "precision": 0.5933014354066986, "recall": 0.4509090909090909, "f1_security": 0.5123966942148761, "threat_type_accuracy": 0.4637096774193548, "parse_failure_rate": 0.0, "breakdown": { "TP": { "total": 550, "correct": 248, "accuracy": 0.4509090909090909 }, "FP": { "total": 400, "correct": 282, "accuracy": 0.705 }, "TN": { "total": 250, "correct": 198, "accuracy": 0.792 } }, "per_sq": { "SQ1": { "total": 190, "accuracy": 0.6789473684210526, "miss_count": 53, "false_alarm_count": 8 }, "SQ2": { "total": 240, "accuracy": 0.5958333333333333, "miss_count": 70, "false_alarm_count": 27 }, "SQ3": { "total": 290, "accuracy": 0.503448275862069, "miss_count": 67, "false_alarm_count": 77 }, "SQ4": { "total": 290, "accuracy": 0.596551724137931, "miss_count": 69, "false_alarm_count": 48 }, "SQ5": { "total": 190, "accuracy": 0.7210526315789474, "miss_count": 43, "false_alarm_count": 10 } } }, "errors": { "total_errors": 605, "error_distribution": { "FALSE_ALARM": 170, "MISS": 302, "WRONG_TYPE": 133 }, "error_by_sq": { "SQ1": { "FALSE_ALARM": 8, "MISS": 53 }, "SQ2": { "FALSE_ALARM": 27, "MISS": 70, "WRONG_TYPE": 23 }, "SQ3": { "FALSE_ALARM": 77, "MISS": 67, "WRONG_TYPE": 50 }, "SQ4": { "FALSE_ALARM": 48, "MISS": 69, "WRONG_TYPE": 31 }, "SQ5": { "FALSE_ALARM": 10, "MISS": 43, "WRONG_TYPE": 29 } }, "error_by_category": { "device_fault": 53, "fire_gas": 63, "water_damage": 33, "intrusion": 137, "behavioral_anomaly": 85, "child_specific": 29, "elderly_specific": 35 } }, "pipeline": "EGPv2.1" }