116 lines
2.5 KiB
JSON
116 lines
2.5 KiB
JSON
{
|
|
"model": "Qwen/Qwen3.5-9B",
|
|
"api_base": "http://localhost:8000/v1",
|
|
"timestamp": "2026-05-04T14:48:06.999403",
|
|
"total_evaluated": 1200,
|
|
"summary": {
|
|
"total_episodes": 1200,
|
|
"detection_accuracy": 0.5875,
|
|
"miss_rate": 0.38727272727272727,
|
|
"false_alarm_rate": 0.42,
|
|
"precision": 0.5524590163934426,
|
|
"recall": 0.6127272727272727,
|
|
"f1_security": 0.5810344827586207,
|
|
"threat_type_accuracy": 0.5459940652818991,
|
|
"parse_failure_rate": 0.013333333333333334,
|
|
"breakdown": {
|
|
"TP": {
|
|
"total": 550,
|
|
"correct": 337,
|
|
"accuracy": 0.6127272727272727
|
|
},
|
|
"FP": {
|
|
"total": 400,
|
|
"correct": 226,
|
|
"accuracy": 0.565
|
|
},
|
|
"TN": {
|
|
"total": 250,
|
|
"correct": 142,
|
|
"accuracy": 0.568
|
|
}
|
|
},
|
|
"per_sq": {
|
|
"SQ1": {
|
|
"total": 190,
|
|
"accuracy": 0.5736842105263158,
|
|
"miss_count": 41,
|
|
"false_alarm_count": 39
|
|
},
|
|
"SQ2": {
|
|
"total": 240,
|
|
"accuracy": 0.6125,
|
|
"miss_count": 43,
|
|
"false_alarm_count": 49
|
|
},
|
|
"SQ3": {
|
|
"total": 290,
|
|
"accuracy": 0.4896551724137931,
|
|
"miss_count": 80,
|
|
"false_alarm_count": 67
|
|
},
|
|
"SQ4": {
|
|
"total": 290,
|
|
"accuracy": 0.5448275862068965,
|
|
"miss_count": 39,
|
|
"false_alarm_count": 89
|
|
},
|
|
"SQ5": {
|
|
"total": 190,
|
|
"accuracy": 0.7842105263157895,
|
|
"miss_count": 10,
|
|
"false_alarm_count": 29
|
|
}
|
|
}
|
|
},
|
|
"errors": {
|
|
"total_errors": 648,
|
|
"error_distribution": {
|
|
"FALSE_ALARM": 273,
|
|
"PARSE_FAIL": 14,
|
|
"MISS": 208,
|
|
"WRONG_TYPE": 153
|
|
},
|
|
"error_by_sq": {
|
|
"SQ1": {
|
|
"FALSE_ALARM": 39,
|
|
"PARSE_FAIL": 2,
|
|
"MISS": 40
|
|
},
|
|
"SQ2": {
|
|
"FALSE_ALARM": 49,
|
|
"PARSE_FAIL": 2,
|
|
"MISS": 42,
|
|
"WRONG_TYPE": 28
|
|
},
|
|
"SQ3": {
|
|
"FALSE_ALARM": 67,
|
|
"PARSE_FAIL": 3,
|
|
"MISS": 78,
|
|
"WRONG_TYPE": 39
|
|
},
|
|
"SQ4": {
|
|
"FALSE_ALARM": 89,
|
|
"PARSE_FAIL": 5,
|
|
"MISS": 38,
|
|
"WRONG_TYPE": 50
|
|
},
|
|
"SQ5": {
|
|
"FALSE_ALARM": 29,
|
|
"PARSE_FAIL": 2,
|
|
"WRONG_TYPE": 36,
|
|
"MISS": 10
|
|
}
|
|
},
|
|
"error_by_category": {
|
|
"device_fault": 40,
|
|
"fire_gas": 53,
|
|
"water_damage": 37,
|
|
"intrusion": 81,
|
|
"behavioral_anomaly": 79,
|
|
"child_specific": 28,
|
|
"elderly_specific": 43
|
|
}
|
|
},
|
|
"pipeline": "EGP"
|
|
} |