{ "model": "deepseek-v4-flash", "api_base": "https://uni-api.cstcloud.cn/v1", "timestamp": "2026-05-02T01:24:44.383239", "total_evaluated": 1200, "avg_latency_seconds": 77.93, "api_errors": 0, "summary": { "total_episodes": 1200, "detection_accuracy": 0.5766666666666667, "miss_rate": 0.5636363636363636, "false_alarm_rate": 0.2969230769230769, "precision": 0.5542725173210161, "recall": 0.43636363636363634, "f1_security": 0.48830111902339773, "threat_type_accuracy": 0.425, "parse_failure_rate": 0.06833333333333333, "breakdown": { "TP": { "total": 550, "correct": 240, "accuracy": 0.43636363636363634 }, "FP": { "total": 400, "correct": 269, "accuracy": 0.6725 }, "TN": { "total": 250, "correct": 183, "accuracy": 0.732 } }, "per_sq": { "SQ1": { "total": 190, "accuracy": 0.5789473684210527, "miss_count": 43, "false_alarm_count": 37 }, "SQ2": { "total": 240, "accuracy": 0.6291666666666667, "miss_count": 52, "false_alarm_count": 35 }, "SQ3": { "total": 290, "accuracy": 0.593103448275862, "miss_count": 73, "false_alarm_count": 44 }, "SQ4": { "total": 290, "accuracy": 0.5310344827586206, "miss_count": 81, "false_alarm_count": 54 }, "SQ5": { "total": 190, "accuracy": 0.5526315789473685, "miss_count": 61, "false_alarm_count": 23 } } }, "errors": { "total_errors": 646, "error_distribution": { "FALSE_ALARM": 181, "MISS": 284, "WRONG_TYPE": 138, "PARSE_FAIL": 42, "OTHER": 1 }, "error_by_sq": { "SQ1": { "FALSE_ALARM": 37, "MISS": 42, "WRONG_TYPE": 10, "PARSE_FAIL": 1 }, "SQ2": { "FALSE_ALARM": 29, "PARSE_FAIL": 13, "MISS": 47, "WRONG_TYPE": 33 }, "SQ3": { "FALSE_ALARM": 41, "OTHER": 1, "PARSE_FAIL": 5, "MISS": 71, "WRONG_TYPE": 43 }, "SQ4": { "FALSE_ALARM": 52, "PARSE_FAIL": 15, "WRONG_TYPE": 31, "MISS": 69 }, "SQ5": { "FALSE_ALARM": 22, "PARSE_FAIL": 8, "MISS": 55, "WRONG_TYPE": 21 } }, "error_by_category": { "device_fault": 93, "fire_gas": 96, "water_damage": 44, "intrusion": 114, "behavioral_anomaly": 36, "child_specific": 14, "elderly_specific": 25 } } }