111 lines
2.4 KiB
JSON
111 lines
2.4 KiB
JSON
{
|
|
"model": "Qwen/Qwen2.5-7B-Instruct",
|
|
"api_base": "http://localhost:8000/v1",
|
|
"timestamp": "2026-04-30T22:18:00.620006",
|
|
"total_evaluated": 1200,
|
|
"avg_latency_seconds": 7.61,
|
|
"api_errors": 0,
|
|
"summary": {
|
|
"total_episodes": 1200,
|
|
"detection_accuracy": 0.5975,
|
|
"miss_rate": 0.8781818181818182,
|
|
"false_alarm_rate": 0.0,
|
|
"precision": 1.0,
|
|
"recall": 0.12181818181818181,
|
|
"f1_security": 0.2171799027552674,
|
|
"threat_type_accuracy": 0.44776119402985076,
|
|
"parse_failure_rate": 0.04,
|
|
"breakdown": {
|
|
"TP": {
|
|
"total": 550,
|
|
"correct": 67,
|
|
"accuracy": 0.12181818181818181
|
|
},
|
|
"FP": {
|
|
"total": 400,
|
|
"correct": 400,
|
|
"accuracy": 1.0
|
|
},
|
|
"TN": {
|
|
"total": 250,
|
|
"correct": 250,
|
|
"accuracy": 1.0
|
|
}
|
|
},
|
|
"per_sq": {
|
|
"SQ1": {
|
|
"total": 190,
|
|
"accuracy": 0.5894736842105263,
|
|
"miss_count": 78,
|
|
"false_alarm_count": 0
|
|
},
|
|
"SQ2": {
|
|
"total": 240,
|
|
"accuracy": 0.5791666666666667,
|
|
"miss_count": 101,
|
|
"false_alarm_count": 0
|
|
},
|
|
"SQ3": {
|
|
"total": 290,
|
|
"accuracy": 0.5551724137931034,
|
|
"miss_count": 129,
|
|
"false_alarm_count": 0
|
|
},
|
|
"SQ4": {
|
|
"total": 290,
|
|
"accuracy": 0.5517241379310345,
|
|
"miss_count": 130,
|
|
"false_alarm_count": 0
|
|
},
|
|
"SQ5": {
|
|
"total": 190,
|
|
"accuracy": 0.7631578947368421,
|
|
"miss_count": 45,
|
|
"false_alarm_count": 0
|
|
}
|
|
}
|
|
},
|
|
"errors": {
|
|
"total_errors": 520,
|
|
"error_distribution": {
|
|
"MISS": 460,
|
|
"WRONG_TYPE": 37,
|
|
"PARSE_FAIL": 23
|
|
},
|
|
"error_by_sq": {
|
|
"SQ1": {
|
|
"MISS": 76,
|
|
"WRONG_TYPE": 2,
|
|
"PARSE_FAIL": 2
|
|
},
|
|
"SQ2": {
|
|
"MISS": 98,
|
|
"PARSE_FAIL": 3,
|
|
"WRONG_TYPE": 1
|
|
},
|
|
"SQ3": {
|
|
"MISS": 126,
|
|
"PARSE_FAIL": 3,
|
|
"WRONG_TYPE": 1
|
|
},
|
|
"SQ4": {
|
|
"MISS": 125,
|
|
"PARSE_FAIL": 5
|
|
},
|
|
"SQ5": {
|
|
"PARSE_FAIL": 10,
|
|
"MISS": 35,
|
|
"WRONG_TYPE": 33
|
|
}
|
|
},
|
|
"error_by_category": {
|
|
"device_fault": 138,
|
|
"fire_gas": 116,
|
|
"water_damage": 52,
|
|
"intrusion": 115,
|
|
"behavioral_anomaly": 35,
|
|
"child_specific": 14,
|
|
"elderly_specific": 27
|
|
}
|
|
}
|
|
} |