118 lines
2.6 KiB
JSON
118 lines
2.6 KiB
JSON
{
|
|
"model": "Qwen/Qwen3.5-9B",
|
|
"api_base": "http://localhost:8000/v1",
|
|
"timestamp": "2026-05-03T09:31:20.047748",
|
|
"total_evaluated": 1200,
|
|
"avg_latency_seconds": 71.08,
|
|
"api_errors": 1,
|
|
"summary": {
|
|
"total_episodes": 1200,
|
|
"detection_accuracy": 0.5566666666666666,
|
|
"miss_rate": 0.19454545454545455,
|
|
"false_alarm_rate": 0.6430769230769231,
|
|
"precision": 0.5145180023228804,
|
|
"recall": 0.8054545454545454,
|
|
"f1_security": 0.6279234585400425,
|
|
"threat_type_accuracy": 0.40632054176072235,
|
|
"parse_failure_rate": 0.1825,
|
|
"breakdown": {
|
|
"TP": {
|
|
"total": 550,
|
|
"correct": 443,
|
|
"accuracy": 0.8054545454545454
|
|
},
|
|
"FP": {
|
|
"total": 400,
|
|
"correct": 143,
|
|
"accuracy": 0.3575
|
|
},
|
|
"TN": {
|
|
"total": 250,
|
|
"correct": 82,
|
|
"accuracy": 0.328
|
|
}
|
|
},
|
|
"per_sq": {
|
|
"SQ1": {
|
|
"total": 190,
|
|
"accuracy": 0.4842105263157895,
|
|
"miss_count": 18,
|
|
"false_alarm_count": 75
|
|
},
|
|
"SQ2": {
|
|
"total": 240,
|
|
"accuracy": 0.5708333333333333,
|
|
"miss_count": 12,
|
|
"false_alarm_count": 91
|
|
},
|
|
"SQ3": {
|
|
"total": 290,
|
|
"accuracy": 0.5344827586206896,
|
|
"miss_count": 42,
|
|
"false_alarm_count": 93
|
|
},
|
|
"SQ4": {
|
|
"total": 290,
|
|
"accuracy": 0.5586206896551724,
|
|
"miss_count": 16,
|
|
"false_alarm_count": 110
|
|
},
|
|
"SQ5": {
|
|
"total": 190,
|
|
"accuracy": 0.6421052631578947,
|
|
"miss_count": 19,
|
|
"false_alarm_count": 49
|
|
}
|
|
}
|
|
},
|
|
"errors": {
|
|
"total_errors": 795,
|
|
"error_distribution": {
|
|
"FALSE_ALARM": 386,
|
|
"PARSE_FAIL": 91,
|
|
"WRONG_TYPE": 263,
|
|
"MISS": 55
|
|
},
|
|
"error_by_sq": {
|
|
"SQ1": {
|
|
"FALSE_ALARM": 68,
|
|
"PARSE_FAIL": 19,
|
|
"WRONG_TYPE": 3,
|
|
"MISS": 11
|
|
},
|
|
"SQ2": {
|
|
"PARSE_FAIL": 18,
|
|
"FALSE_ALARM": 84,
|
|
"WRONG_TYPE": 56,
|
|
"MISS": 1
|
|
},
|
|
"SQ3": {
|
|
"FALSE_ALARM": 86,
|
|
"PARSE_FAIL": 21,
|
|
"MISS": 28,
|
|
"WRONG_TYPE": 83
|
|
},
|
|
"SQ4": {
|
|
"FALSE_ALARM": 104,
|
|
"PARSE_FAIL": 17,
|
|
"WRONG_TYPE": 69,
|
|
"MISS": 7
|
|
},
|
|
"SQ5": {
|
|
"FALSE_ALARM": 44,
|
|
"PARSE_FAIL": 16,
|
|
"WRONG_TYPE": 52,
|
|
"MISS": 8
|
|
}
|
|
},
|
|
"error_by_category": {
|
|
"device_fault": 14,
|
|
"water_damage": 26,
|
|
"fire_gas": 21,
|
|
"intrusion": 116,
|
|
"behavioral_anomaly": 77,
|
|
"child_specific": 28,
|
|
"elderly_specific": 36
|
|
}
|
|
}
|
|
} |