{ "model": "qwen35_2b_sft", "api_base": "http://localhost:8001/v1", "timestamp": "2026-05-10T19:56:10.028905", "total_evaluated": 1200, "avg_latency_seconds": 10.48, "api_errors": 0, "summary": { "total_episodes": 1200, "detection_accuracy": 0.6308333333333334, "miss_rate": 0.7090909090909091, "false_alarm_rate": 0.06307692307692307, "precision": 0.7960199004975125, "recall": 0.2909090909090909, "f1_security": 0.42609853528628494, "threat_type_accuracy": 0.525, "parse_failure_rate": 0.08583333333333333, "breakdown": { "TP": { "total": 550, "correct": 160, "accuracy": 0.2909090909090909 }, "FP": { "total": 400, "correct": 359, "accuracy": 0.8975 }, "TN": { "total": 250, "correct": 238, "accuracy": 0.952 } }, "per_sq": { "SQ1": { "total": 190, "accuracy": 0.5473684210526316, "miss_count": 58, "false_alarm_count": 19 }, "SQ2": { "total": 240, "accuracy": 0.625, "miss_count": 78, "false_alarm_count": 12 }, "SQ3": { "total": 290, "accuracy": 0.5551724137931034, "miss_count": 129, "false_alarm_count": 0 }, "SQ4": { "total": 290, "accuracy": 0.5689655172413793, "miss_count": 121, "false_alarm_count": 2 }, "SQ5": { "total": 190, "accuracy": 0.9315789473684211, "miss_count": 4, "false_alarm_count": 8 } } }, "errors": { "total_errors": 519, "error_distribution": { "PARSE_FAIL": 52, "FALSE_ALARM": 34, "MISS": 357, "WRONG_TYPE": 76 }, "error_by_sq": { "SQ1": { "PARSE_FAIL": 23, "FALSE_ALARM": 17, "MISS": 46, "WRONG_TYPE": 5 }, "SQ2": { "FALSE_ALARM": 12, "MISS": 70, "WRONG_TYPE": 5, "PARSE_FAIL": 8 }, "SQ3": { "MISS": 122, "PARSE_FAIL": 7, "WRONG_TYPE": 1 }, "SQ4": { "PARSE_FAIL": 8, "MISS": 117, "WRONG_TYPE": 2 }, "SQ5": { "FALSE_ALARM": 5, "PARSE_FAIL": 6, "WRONG_TYPE": 63, "MISS": 2 } }, "error_by_category": { "device_fault": 51, "fire_gas": 78, "intrusion": 136, "water_damage": 15, "behavioral_anomaly": 81, "child_specific": 27, "elderly_specific": 45 } } }