{ "model": "deepseek-v4-flash", "api_base": "https://uni-api.cstcloud.cn/v1", "timestamp": "2026-05-04T17:03:19.718018", "total_evaluated": 1200, "avg_latency_seconds": 54.19, "api_errors": 0, "summary": { "total_episodes": 1200, "detection_accuracy": 0.5858333333333333, "miss_rate": 0.3836363636363636, "false_alarm_rate": 0.4323076923076923, "precision": 0.5467741935483871, "recall": 0.6163636363636363, "f1_security": 0.5794871794871795, "threat_type_accuracy": 0.5811209439528023, "parse_failure_rate": 0.0525, "breakdown": { "TP": { "total": 550, "correct": 339, "accuracy": 0.6163636363636363 }, "FP": { "total": 400, "correct": 216, "accuracy": 0.54 }, "TN": { "total": 250, "correct": 148, "accuracy": 0.592 } }, "per_sq": { "SQ1": { "total": 190, "accuracy": 0.6052631578947368, "miss_count": 13, "false_alarm_count": 60 }, "SQ2": { "total": 240, "accuracy": 0.6375, "miss_count": 36, "false_alarm_count": 50 }, "SQ3": { "total": 290, "accuracy": 0.5172413793103449, "miss_count": 77, "false_alarm_count": 62 }, "SQ4": { "total": 290, "accuracy": 0.5655172413793104, "miss_count": 49, "false_alarm_count": 76 }, "SQ5": { "total": 190, "accuracy": 0.6368421052631579, "miss_count": 36, "false_alarm_count": 33 } } }, "errors": { "total_errors": 639, "error_distribution": { "FALSE_ALARM": 276, "PARSE_FAIL": 35, "MISS": 186, "WRONG_TYPE": 142 }, "error_by_sq": { "SQ1": { "FALSE_ALARM": 57, "PARSE_FAIL": 7, "MISS": 11, "WRONG_TYPE": 3 }, "SQ2": { "FALSE_ALARM": 50, "PARSE_FAIL": 5, "MISS": 32, "WRONG_TYPE": 20 }, "SQ3": { "FALSE_ALARM": 61, "PARSE_FAIL": 8, "MISS": 71, "WRONG_TYPE": 45 }, "SQ4": { "FALSE_ALARM": 75, "PARSE_FAIL": 8, "WRONG_TYPE": 49, "MISS": 43 }, "SQ5": { "FALSE_ALARM": 33, "MISS": 29, "WRONG_TYPE": 25, "PARSE_FAIL": 7 } }, "error_by_category": { "device_fault": 14, "fire_gas": 40, "water_damage": 14, "intrusion": 111, "behavioral_anomaly": 84, "child_specific": 26, "elderly_specific": 39 } } }