118 lines
2.6 KiB
JSON
118 lines
2.6 KiB
JSON
{
|
|
"model": "Qwen/Qwen3.5-9B",
|
|
"api_base": "http://localhost:8000/v1",
|
|
"timestamp": "2026-05-03T06:31:18.653202",
|
|
"total_evaluated": 1200,
|
|
"avg_latency_seconds": 74.59,
|
|
"api_errors": 1,
|
|
"summary": {
|
|
"total_episodes": 1200,
|
|
"detection_accuracy": 0.5433333333333333,
|
|
"miss_rate": 0.3109090909090909,
|
|
"false_alarm_rate": 0.58,
|
|
"precision": 0.5013227513227513,
|
|
"recall": 0.6890909090909091,
|
|
"f1_security": 0.5803981623277181,
|
|
"threat_type_accuracy": 0.46965699208443273,
|
|
"parse_failure_rate": 0.025,
|
|
"breakdown": {
|
|
"TP": {
|
|
"total": 550,
|
|
"correct": 379,
|
|
"accuracy": 0.6890909090909091
|
|
},
|
|
"FP": {
|
|
"total": 400,
|
|
"correct": 157,
|
|
"accuracy": 0.3925
|
|
},
|
|
"TN": {
|
|
"total": 250,
|
|
"correct": 116,
|
|
"accuracy": 0.464
|
|
}
|
|
},
|
|
"per_sq": {
|
|
"SQ1": {
|
|
"total": 190,
|
|
"accuracy": 0.531578947368421,
|
|
"miss_count": 27,
|
|
"false_alarm_count": 62
|
|
},
|
|
"SQ2": {
|
|
"total": 240,
|
|
"accuracy": 0.5708333333333333,
|
|
"miss_count": 29,
|
|
"false_alarm_count": 74
|
|
},
|
|
"SQ3": {
|
|
"total": 290,
|
|
"accuracy": 0.5103448275862069,
|
|
"miss_count": 81,
|
|
"false_alarm_count": 61
|
|
},
|
|
"SQ4": {
|
|
"total": 290,
|
|
"accuracy": 0.45517241379310347,
|
|
"miss_count": 17,
|
|
"false_alarm_count": 141
|
|
},
|
|
"SQ5": {
|
|
"total": 190,
|
|
"accuracy": 0.7052631578947368,
|
|
"miss_count": 17,
|
|
"false_alarm_count": 39
|
|
}
|
|
}
|
|
},
|
|
"errors": {
|
|
"total_errors": 749,
|
|
"error_distribution": {
|
|
"FALSE_ALARM": 374,
|
|
"MISS": 155,
|
|
"PARSE_FAIL": 19,
|
|
"WRONG_TYPE": 201
|
|
},
|
|
"error_by_sq": {
|
|
"SQ1": {
|
|
"FALSE_ALARM": 62,
|
|
"MISS": 25,
|
|
"PARSE_FAIL": 2,
|
|
"WRONG_TYPE": 1
|
|
},
|
|
"SQ2": {
|
|
"FALSE_ALARM": 73,
|
|
"PARSE_FAIL": 4,
|
|
"WRONG_TYPE": 37,
|
|
"MISS": 26
|
|
},
|
|
"SQ3": {
|
|
"FALSE_ALARM": 61,
|
|
"MISS": 77,
|
|
"WRONG_TYPE": 47,
|
|
"PARSE_FAIL": 4
|
|
},
|
|
"SQ4": {
|
|
"FALSE_ALARM": 139,
|
|
"PARSE_FAIL": 7,
|
|
"WRONG_TYPE": 68,
|
|
"MISS": 12
|
|
},
|
|
"SQ5": {
|
|
"FALSE_ALARM": 39,
|
|
"WRONG_TYPE": 48,
|
|
"MISS": 15,
|
|
"PARSE_FAIL": 2
|
|
}
|
|
},
|
|
"error_by_category": {
|
|
"device_fault": 26,
|
|
"water_damage": 14,
|
|
"fire_gas": 20,
|
|
"intrusion": 145,
|
|
"behavioral_anomaly": 85,
|
|
"child_specific": 28,
|
|
"elderly_specific": 38
|
|
}
|
|
}
|
|
} |