116 lines
2.5 KiB
JSON
116 lines
2.5 KiB
JSON
{
|
|
"model": "Qwen/Qwen3.5-2B",
|
|
"api_base": "http://localhost:8000/v1",
|
|
"timestamp": "2026-05-10T13:55:09.665723",
|
|
"total_evaluated": 1200,
|
|
"avg_latency_seconds": 8.5,
|
|
"api_errors": 0,
|
|
"summary": {
|
|
"total_episodes": 1200,
|
|
"detection_accuracy": 0.6291666666666667,
|
|
"miss_rate": 0.72,
|
|
"false_alarm_rate": 0.05846153846153846,
|
|
"precision": 0.8020833333333334,
|
|
"recall": 0.28,
|
|
"f1_security": 0.4150943396226416,
|
|
"threat_type_accuracy": 0.551948051948052,
|
|
"parse_failure_rate": 0.08833333333333333,
|
|
"breakdown": {
|
|
"TP": {
|
|
"total": 550,
|
|
"correct": 154,
|
|
"accuracy": 0.28
|
|
},
|
|
"FP": {
|
|
"total": 400,
|
|
"correct": 365,
|
|
"accuracy": 0.9125
|
|
},
|
|
"TN": {
|
|
"total": 250,
|
|
"correct": 236,
|
|
"accuracy": 0.944
|
|
}
|
|
},
|
|
"per_sq": {
|
|
"SQ1": {
|
|
"total": 190,
|
|
"accuracy": 0.5578947368421052,
|
|
"miss_count": 58,
|
|
"false_alarm_count": 18
|
|
},
|
|
"SQ2": {
|
|
"total": 240,
|
|
"accuracy": 0.6208333333333333,
|
|
"miss_count": 78,
|
|
"false_alarm_count": 12
|
|
},
|
|
"SQ3": {
|
|
"total": 290,
|
|
"accuracy": 0.5551724137931034,
|
|
"miss_count": 129,
|
|
"false_alarm_count": 0
|
|
},
|
|
"SQ4": {
|
|
"total": 290,
|
|
"accuracy": 0.5689655172413793,
|
|
"miss_count": 122,
|
|
"false_alarm_count": 2
|
|
},
|
|
"SQ5": {
|
|
"total": 190,
|
|
"accuracy": 0.9157894736842105,
|
|
"miss_count": 9,
|
|
"false_alarm_count": 6
|
|
}
|
|
}
|
|
},
|
|
"errors": {
|
|
"total_errors": 514,
|
|
"error_distribution": {
|
|
"PARSE_FAIL": 49,
|
|
"FALSE_ALARM": 36,
|
|
"MISS": 360,
|
|
"WRONG_TYPE": 69
|
|
},
|
|
"error_by_sq": {
|
|
"SQ1": {
|
|
"PARSE_FAIL": 20,
|
|
"FALSE_ALARM": 18,
|
|
"MISS": 46,
|
|
"WRONG_TYPE": 4
|
|
},
|
|
"SQ2": {
|
|
"FALSE_ALARM": 12,
|
|
"PARSE_FAIL": 6,
|
|
"MISS": 73,
|
|
"WRONG_TYPE": 4
|
|
},
|
|
"SQ3": {
|
|
"MISS": 119,
|
|
"PARSE_FAIL": 10,
|
|
"WRONG_TYPE": 1
|
|
},
|
|
"SQ4": {
|
|
"PARSE_FAIL": 7,
|
|
"MISS": 118,
|
|
"WRONG_TYPE": 2
|
|
},
|
|
"SQ5": {
|
|
"FALSE_ALARM": 6,
|
|
"PARSE_FAIL": 6,
|
|
"WRONG_TYPE": 58,
|
|
"MISS": 4
|
|
}
|
|
},
|
|
"error_by_category": {
|
|
"device_fault": 50,
|
|
"fire_gas": 77,
|
|
"intrusion": 135,
|
|
"water_damage": 15,
|
|
"behavioral_anomaly": 81,
|
|
"child_specific": 28,
|
|
"elderly_specific": 43
|
|
}
|
|
}
|
|
} |