Files
llmiotsafe/results/qwen35_2b_before_sft/summary.json
2026-05-12 17:01:39 +08:00

116 lines
2.5 KiB
JSON

{
"model": "Qwen/Qwen3.5-2B",
"api_base": "http://localhost:8000/v1",
"timestamp": "2026-05-10T13:55:09.665723",
"total_evaluated": 1200,
"avg_latency_seconds": 8.5,
"api_errors": 0,
"summary": {
"total_episodes": 1200,
"detection_accuracy": 0.6291666666666667,
"miss_rate": 0.72,
"false_alarm_rate": 0.05846153846153846,
"precision": 0.8020833333333334,
"recall": 0.28,
"f1_security": 0.4150943396226416,
"threat_type_accuracy": 0.551948051948052,
"parse_failure_rate": 0.08833333333333333,
"breakdown": {
"TP": {
"total": 550,
"correct": 154,
"accuracy": 0.28
},
"FP": {
"total": 400,
"correct": 365,
"accuracy": 0.9125
},
"TN": {
"total": 250,
"correct": 236,
"accuracy": 0.944
}
},
"per_sq": {
"SQ1": {
"total": 190,
"accuracy": 0.5578947368421052,
"miss_count": 58,
"false_alarm_count": 18
},
"SQ2": {
"total": 240,
"accuracy": 0.6208333333333333,
"miss_count": 78,
"false_alarm_count": 12
},
"SQ3": {
"total": 290,
"accuracy": 0.5551724137931034,
"miss_count": 129,
"false_alarm_count": 0
},
"SQ4": {
"total": 290,
"accuracy": 0.5689655172413793,
"miss_count": 122,
"false_alarm_count": 2
},
"SQ5": {
"total": 190,
"accuracy": 0.9157894736842105,
"miss_count": 9,
"false_alarm_count": 6
}
}
},
"errors": {
"total_errors": 514,
"error_distribution": {
"PARSE_FAIL": 49,
"FALSE_ALARM": 36,
"MISS": 360,
"WRONG_TYPE": 69
},
"error_by_sq": {
"SQ1": {
"PARSE_FAIL": 20,
"FALSE_ALARM": 18,
"MISS": 46,
"WRONG_TYPE": 4
},
"SQ2": {
"FALSE_ALARM": 12,
"PARSE_FAIL": 6,
"MISS": 73,
"WRONG_TYPE": 4
},
"SQ3": {
"MISS": 119,
"PARSE_FAIL": 10,
"WRONG_TYPE": 1
},
"SQ4": {
"PARSE_FAIL": 7,
"MISS": 118,
"WRONG_TYPE": 2
},
"SQ5": {
"FALSE_ALARM": 6,
"PARSE_FAIL": 6,
"WRONG_TYPE": 58,
"MISS": 4
}
},
"error_by_category": {
"device_fault": 50,
"fire_gas": 77,
"intrusion": 135,
"water_damage": 15,
"behavioral_anomaly": 81,
"child_specific": 28,
"elderly_specific": 43
}
}
}