{ "model": "claude-opus-4-6", "api_base": "https://hche3637.com/v1", "timestamp": "2026-04-30T08:38:33.601696", "total_evaluated": 1200, "avg_latency_seconds": 25.45, "api_errors": 15, "summary": { "total_episodes": 1200, "detection_accuracy": 0.48583333333333334, "miss_rate": 0.06727272727272728, "false_alarm_rate": 0.8753846153846154, "precision": 0.47412199630314233, "recall": 0.9327272727272727, "f1_security": 0.6286764705882353, "threat_type_accuracy": 0.25146198830409355, "parse_failure_rate": 0.029166666666666667, "breakdown": { "TP": { "total": 550, "correct": 513, "accuracy": 0.9327272727272727 }, "FP": { "total": 400, "correct": 37, "accuracy": 0.0925 }, "TN": { "total": 250, "correct": 33, "accuracy": 0.132 } }, "per_sq": { "SQ1": { "total": 190, "accuracy": 0.45789473684210524, "miss_count": 17, "false_alarm_count": 81 }, "SQ2": { "total": 240, "accuracy": 0.4625, "miss_count": 5, "false_alarm_count": 124 }, "SQ3": { "total": 290, "accuracy": 0.46206896551724136, "miss_count": 7, "false_alarm_count": 148 }, "SQ4": { "total": 290, "accuracy": 0.4827586206896552, "miss_count": 3, "false_alarm_count": 145 }, "SQ5": { "total": 190, "accuracy": 0.5842105263157895, "miss_count": 5, "false_alarm_count": 71 } } }, "errors": { "total_errors": 1001, "error_distribution": { "FALSE_ALARM": 567, "PARSE_FAIL": 21, "WRONG_TYPE": 384, "MISS": 29 }, "error_by_sq": { "SQ1": { "FALSE_ALARM": 81, "PARSE_FAIL": 5, "WRONG_TYPE": 63, "MISS": 17 }, "SQ2": { "FALSE_ALARM": 124, "WRONG_TYPE": 57, "PARSE_FAIL": 3, "MISS": 2 }, "SQ3": { "FALSE_ALARM": 148, "PARSE_FAIL": 5, "WRONG_TYPE": 107, "MISS": 3 }, "SQ4": { "FALSE_ALARM": 143, "PARSE_FAIL": 4, "WRONG_TYPE": 112, "MISS": 3 }, "SQ5": { "FALSE_ALARM": 71, "PARSE_FAIL": 4, "WRONG_TYPE": 45, "MISS": 4 } }, "error_by_category": { "device_fault": 139, "fire_gas": 106, "intrusion": 86, "behavioral_anomaly": 38, "water_damage": 2, "child_specific": 15, "elderly_specific": 27 } } }