118 lines
2.6 KiB
JSON
118 lines
2.6 KiB
JSON
{
|
|
"model": "deepseek-v4-flash",
|
|
"api_base": "https://uni-api.cstcloud.cn/v1",
|
|
"timestamp": "2026-05-04T17:03:19.718018",
|
|
"total_evaluated": 1200,
|
|
"avg_latency_seconds": 54.19,
|
|
"api_errors": 0,
|
|
"summary": {
|
|
"total_episodes": 1200,
|
|
"detection_accuracy": 0.5858333333333333,
|
|
"miss_rate": 0.3836363636363636,
|
|
"false_alarm_rate": 0.4323076923076923,
|
|
"precision": 0.5467741935483871,
|
|
"recall": 0.6163636363636363,
|
|
"f1_security": 0.5794871794871795,
|
|
"threat_type_accuracy": 0.5811209439528023,
|
|
"parse_failure_rate": 0.0525,
|
|
"breakdown": {
|
|
"TP": {
|
|
"total": 550,
|
|
"correct": 339,
|
|
"accuracy": 0.6163636363636363
|
|
},
|
|
"FP": {
|
|
"total": 400,
|
|
"correct": 216,
|
|
"accuracy": 0.54
|
|
},
|
|
"TN": {
|
|
"total": 250,
|
|
"correct": 148,
|
|
"accuracy": 0.592
|
|
}
|
|
},
|
|
"per_sq": {
|
|
"SQ1": {
|
|
"total": 190,
|
|
"accuracy": 0.6052631578947368,
|
|
"miss_count": 13,
|
|
"false_alarm_count": 60
|
|
},
|
|
"SQ2": {
|
|
"total": 240,
|
|
"accuracy": 0.6375,
|
|
"miss_count": 36,
|
|
"false_alarm_count": 50
|
|
},
|
|
"SQ3": {
|
|
"total": 290,
|
|
"accuracy": 0.5172413793103449,
|
|
"miss_count": 77,
|
|
"false_alarm_count": 62
|
|
},
|
|
"SQ4": {
|
|
"total": 290,
|
|
"accuracy": 0.5655172413793104,
|
|
"miss_count": 49,
|
|
"false_alarm_count": 76
|
|
},
|
|
"SQ5": {
|
|
"total": 190,
|
|
"accuracy": 0.6368421052631579,
|
|
"miss_count": 36,
|
|
"false_alarm_count": 33
|
|
}
|
|
}
|
|
},
|
|
"errors": {
|
|
"total_errors": 639,
|
|
"error_distribution": {
|
|
"FALSE_ALARM": 276,
|
|
"PARSE_FAIL": 35,
|
|
"MISS": 186,
|
|
"WRONG_TYPE": 142
|
|
},
|
|
"error_by_sq": {
|
|
"SQ1": {
|
|
"FALSE_ALARM": 57,
|
|
"PARSE_FAIL": 7,
|
|
"MISS": 11,
|
|
"WRONG_TYPE": 3
|
|
},
|
|
"SQ2": {
|
|
"FALSE_ALARM": 50,
|
|
"PARSE_FAIL": 5,
|
|
"MISS": 32,
|
|
"WRONG_TYPE": 20
|
|
},
|
|
"SQ3": {
|
|
"FALSE_ALARM": 61,
|
|
"PARSE_FAIL": 8,
|
|
"MISS": 71,
|
|
"WRONG_TYPE": 45
|
|
},
|
|
"SQ4": {
|
|
"FALSE_ALARM": 75,
|
|
"PARSE_FAIL": 8,
|
|
"WRONG_TYPE": 49,
|
|
"MISS": 43
|
|
},
|
|
"SQ5": {
|
|
"FALSE_ALARM": 33,
|
|
"MISS": 29,
|
|
"WRONG_TYPE": 25,
|
|
"PARSE_FAIL": 7
|
|
}
|
|
},
|
|
"error_by_category": {
|
|
"device_fault": 14,
|
|
"fire_gas": 40,
|
|
"water_damage": 14,
|
|
"intrusion": 111,
|
|
"behavioral_anomaly": 84,
|
|
"child_specific": 26,
|
|
"elderly_specific": 39
|
|
}
|
|
}
|
|
} |