{ "model": "deepseek-v4-flash", "api_base": "https://uni-api.cstcloud.cn/v1", "timestamp": "2026-05-04T02:17:43.163035", "total_evaluated": 1200, "avg_latency_seconds": 62.46, "api_errors": 0, "summary": { "total_episodes": 1200, "detection_accuracy": 0.49833333333333335, "miss_rate": 0.09454545454545454, "false_alarm_rate": 0.8446153846153847, "precision": 0.47564469914040114, "recall": 0.9054545454545454, "f1_security": 0.6236693800876644, "threat_type_accuracy": 0.3755020080321285, "parse_failure_rate": 0.004166666666666667, "breakdown": { "TP": { "total": 550, "correct": 498, "accuracy": 0.9054545454545454 }, "FP": { "total": 400, "correct": 59, "accuracy": 0.1475 }, "TN": { "total": 250, "correct": 41, "accuracy": 0.164 } }, "per_sq": { "SQ1": { "total": 190, "accuracy": 0.47368421052631576, "miss_count": 8, "false_alarm_count": 92 }, "SQ2": { "total": 240, "accuracy": 0.5, "miss_count": 7, "false_alarm_count": 113 }, "SQ3": { "total": 290, "accuracy": 0.4517241379310345, "miss_count": 20, "false_alarm_count": 138 }, "SQ4": { "total": 290, "accuracy": 0.4827586206896552, "miss_count": 10, "false_alarm_count": 140 }, "SQ5": { "total": 190, "accuracy": 0.6157894736842106, "miss_count": 7, "false_alarm_count": 66 } } }, "errors": { "total_errors": 913, "error_distribution": { "FALSE_ALARM": 549, "WRONG_TYPE": 311, "MISS": 50, "PARSE_FAIL": 3 }, "error_by_sq": { "SQ1": { "FALSE_ALARM": 92, "WRONG_TYPE": 10, "MISS": 7, "PARSE_FAIL": 1 }, "SQ2": { "FALSE_ALARM": 113, "WRONG_TYPE": 59, "MISS": 7 }, "SQ3": { "FALSE_ALARM": 138, "PARSE_FAIL": 2, "WRONG_TYPE": 96, "MISS": 19 }, "SQ4": { "FALSE_ALARM": 140, "WRONG_TYPE": 79, "MISS": 10 }, "SQ5": { "FALSE_ALARM": 66, "WRONG_TYPE": 67, "MISS": 7 } }, "error_by_category": { "device_fault": 17, "fire_gas": 36, "water_damage": 25, "intrusion": 140, "behavioral_anomaly": 87, "child_specific": 29, "elderly_specific": 27 } } }