Files
llmiotsafe/results/qwen35_2b_after_sft/summary.json
2026-05-12 17:01:39 +08:00

116 lines
2.5 KiB
JSON

{
"model": "qwen35_2b_sft",
"api_base": "http://localhost:8001/v1",
"timestamp": "2026-05-10T19:56:10.028905",
"total_evaluated": 1200,
"avg_latency_seconds": 10.48,
"api_errors": 0,
"summary": {
"total_episodes": 1200,
"detection_accuracy": 0.6308333333333334,
"miss_rate": 0.7090909090909091,
"false_alarm_rate": 0.06307692307692307,
"precision": 0.7960199004975125,
"recall": 0.2909090909090909,
"f1_security": 0.42609853528628494,
"threat_type_accuracy": 0.525,
"parse_failure_rate": 0.08583333333333333,
"breakdown": {
"TP": {
"total": 550,
"correct": 160,
"accuracy": 0.2909090909090909
},
"FP": {
"total": 400,
"correct": 359,
"accuracy": 0.8975
},
"TN": {
"total": 250,
"correct": 238,
"accuracy": 0.952
}
},
"per_sq": {
"SQ1": {
"total": 190,
"accuracy": 0.5473684210526316,
"miss_count": 58,
"false_alarm_count": 19
},
"SQ2": {
"total": 240,
"accuracy": 0.625,
"miss_count": 78,
"false_alarm_count": 12
},
"SQ3": {
"total": 290,
"accuracy": 0.5551724137931034,
"miss_count": 129,
"false_alarm_count": 0
},
"SQ4": {
"total": 290,
"accuracy": 0.5689655172413793,
"miss_count": 121,
"false_alarm_count": 2
},
"SQ5": {
"total": 190,
"accuracy": 0.9315789473684211,
"miss_count": 4,
"false_alarm_count": 8
}
}
},
"errors": {
"total_errors": 519,
"error_distribution": {
"PARSE_FAIL": 52,
"FALSE_ALARM": 34,
"MISS": 357,
"WRONG_TYPE": 76
},
"error_by_sq": {
"SQ1": {
"PARSE_FAIL": 23,
"FALSE_ALARM": 17,
"MISS": 46,
"WRONG_TYPE": 5
},
"SQ2": {
"FALSE_ALARM": 12,
"MISS": 70,
"WRONG_TYPE": 5,
"PARSE_FAIL": 8
},
"SQ3": {
"MISS": 122,
"PARSE_FAIL": 7,
"WRONG_TYPE": 1
},
"SQ4": {
"PARSE_FAIL": 8,
"MISS": 117,
"WRONG_TYPE": 2
},
"SQ5": {
"FALSE_ALARM": 5,
"PARSE_FAIL": 6,
"WRONG_TYPE": 63,
"MISS": 2
}
},
"error_by_category": {
"device_fault": 51,
"fire_gas": 78,
"intrusion": 136,
"water_damage": 15,
"behavioral_anomaly": 81,
"child_specific": 27,
"elderly_specific": 45
}
}
}