118 lines
2.6 KiB
JSON
118 lines
2.6 KiB
JSON
{
|
|
"model": "claude-opus-4-6",
|
|
"api_base": "https://hche3637.com/v1",
|
|
"timestamp": "2026-04-30T08:38:33.601696",
|
|
"total_evaluated": 1200,
|
|
"avg_latency_seconds": 25.45,
|
|
"api_errors": 15,
|
|
"summary": {
|
|
"total_episodes": 1200,
|
|
"detection_accuracy": 0.48583333333333334,
|
|
"miss_rate": 0.06727272727272728,
|
|
"false_alarm_rate": 0.8753846153846154,
|
|
"precision": 0.47412199630314233,
|
|
"recall": 0.9327272727272727,
|
|
"f1_security": 0.6286764705882353,
|
|
"threat_type_accuracy": 0.25146198830409355,
|
|
"parse_failure_rate": 0.029166666666666667,
|
|
"breakdown": {
|
|
"TP": {
|
|
"total": 550,
|
|
"correct": 513,
|
|
"accuracy": 0.9327272727272727
|
|
},
|
|
"FP": {
|
|
"total": 400,
|
|
"correct": 37,
|
|
"accuracy": 0.0925
|
|
},
|
|
"TN": {
|
|
"total": 250,
|
|
"correct": 33,
|
|
"accuracy": 0.132
|
|
}
|
|
},
|
|
"per_sq": {
|
|
"SQ1": {
|
|
"total": 190,
|
|
"accuracy": 0.45789473684210524,
|
|
"miss_count": 17,
|
|
"false_alarm_count": 81
|
|
},
|
|
"SQ2": {
|
|
"total": 240,
|
|
"accuracy": 0.4625,
|
|
"miss_count": 5,
|
|
"false_alarm_count": 124
|
|
},
|
|
"SQ3": {
|
|
"total": 290,
|
|
"accuracy": 0.46206896551724136,
|
|
"miss_count": 7,
|
|
"false_alarm_count": 148
|
|
},
|
|
"SQ4": {
|
|
"total": 290,
|
|
"accuracy": 0.4827586206896552,
|
|
"miss_count": 3,
|
|
"false_alarm_count": 145
|
|
},
|
|
"SQ5": {
|
|
"total": 190,
|
|
"accuracy": 0.5842105263157895,
|
|
"miss_count": 5,
|
|
"false_alarm_count": 71
|
|
}
|
|
}
|
|
},
|
|
"errors": {
|
|
"total_errors": 1001,
|
|
"error_distribution": {
|
|
"FALSE_ALARM": 567,
|
|
"PARSE_FAIL": 21,
|
|
"WRONG_TYPE": 384,
|
|
"MISS": 29
|
|
},
|
|
"error_by_sq": {
|
|
"SQ1": {
|
|
"FALSE_ALARM": 81,
|
|
"PARSE_FAIL": 5,
|
|
"WRONG_TYPE": 63,
|
|
"MISS": 17
|
|
},
|
|
"SQ2": {
|
|
"FALSE_ALARM": 124,
|
|
"WRONG_TYPE": 57,
|
|
"PARSE_FAIL": 3,
|
|
"MISS": 2
|
|
},
|
|
"SQ3": {
|
|
"FALSE_ALARM": 148,
|
|
"PARSE_FAIL": 5,
|
|
"WRONG_TYPE": 107,
|
|
"MISS": 3
|
|
},
|
|
"SQ4": {
|
|
"FALSE_ALARM": 143,
|
|
"PARSE_FAIL": 4,
|
|
"WRONG_TYPE": 112,
|
|
"MISS": 3
|
|
},
|
|
"SQ5": {
|
|
"FALSE_ALARM": 71,
|
|
"PARSE_FAIL": 4,
|
|
"WRONG_TYPE": 45,
|
|
"MISS": 4
|
|
}
|
|
},
|
|
"error_by_category": {
|
|
"device_fault": 139,
|
|
"fire_gas": 106,
|
|
"intrusion": 86,
|
|
"behavioral_anomaly": 38,
|
|
"water_damage": 2,
|
|
"child_specific": 15,
|
|
"elderly_specific": 27
|
|
}
|
|
}
|
|
} |