235 lines
5.4 KiB
JSON
235 lines
5.4 KiB
JSON
{
|
|
"split": "train",
|
|
"input_root": "data_dpo_full_log_v1",
|
|
"total_pairs": 2500,
|
|
"counters": {
|
|
"chosen_rule_fallback": 1621,
|
|
"rejected_weak_model": 1679,
|
|
"chosen_strong_model": 879,
|
|
"rejected_constructed_fallback": 821
|
|
},
|
|
"strong_answer_summary": {
|
|
"total_episodes": 2500,
|
|
"detection_accuracy": 0.5428,
|
|
"miss_rate": 0.3529850746268657,
|
|
"false_alarm_rate": 0.5577586206896552,
|
|
"precision": 0.5726552179656539,
|
|
"recall": 0.6470149253731343,
|
|
"f1_security": 0.6075683251576735,
|
|
"threat_type_accuracy": 0.5201845444059977,
|
|
"parse_failure_rate": 0.0672,
|
|
"breakdown": {
|
|
"TP": {
|
|
"total": 1340,
|
|
"correct": 867,
|
|
"accuracy": 0.6470149253731343
|
|
},
|
|
"FP": {
|
|
"total": 980,
|
|
"correct": 431,
|
|
"accuracy": 0.43979591836734694
|
|
},
|
|
"TN": {
|
|
"total": 180,
|
|
"correct": 59,
|
|
"accuracy": 0.3277777777777778
|
|
}
|
|
},
|
|
"per_sq": {
|
|
"SQ1": {
|
|
"total": 575,
|
|
"accuracy": 0.5565217391304348,
|
|
"miss_count": 26,
|
|
"false_alarm_count": 226
|
|
},
|
|
"SQ2": {
|
|
"total": 633,
|
|
"accuracy": 0.6145339652448657,
|
|
"miss_count": 196,
|
|
"false_alarm_count": 45
|
|
},
|
|
"SQ3": {
|
|
"total": 568,
|
|
"accuracy": 0.47183098591549294,
|
|
"miss_count": 166,
|
|
"false_alarm_count": 123
|
|
},
|
|
"SQ4": {
|
|
"total": 399,
|
|
"accuracy": 0.5012531328320802,
|
|
"miss_count": 60,
|
|
"false_alarm_count": 135
|
|
},
|
|
"SQ5": {
|
|
"total": 325,
|
|
"accuracy": 0.5538461538461539,
|
|
"miss_count": 25,
|
|
"false_alarm_count": 118
|
|
}
|
|
},
|
|
"label": "strong_answers",
|
|
"count": 2500,
|
|
"error_taxonomy": {
|
|
"total_errors": 1559,
|
|
"error_distribution": {
|
|
"MISS": 402,
|
|
"WRONG_TYPE": 416,
|
|
"PARSE_FAIL": 100,
|
|
"FALSE_ALARM": 641
|
|
},
|
|
"error_by_sq": {
|
|
"SQ1": {
|
|
"WRONG_TYPE": 65,
|
|
"PARSE_FAIL": 29,
|
|
"MISS": 3,
|
|
"FALSE_ALARM": 223
|
|
},
|
|
"SQ2": {
|
|
"MISS": 186,
|
|
"WRONG_TYPE": 52,
|
|
"PARSE_FAIL": 13,
|
|
"FALSE_ALARM": 45
|
|
},
|
|
"SQ3": {
|
|
"WRONG_TYPE": 136,
|
|
"MISS": 145,
|
|
"PARSE_FAIL": 33,
|
|
"FALSE_ALARM": 122
|
|
},
|
|
"SQ4": {
|
|
"WRONG_TYPE": 110,
|
|
"MISS": 48,
|
|
"PARSE_FAIL": 17,
|
|
"FALSE_ALARM": 134
|
|
},
|
|
"SQ5": {
|
|
"MISS": 20,
|
|
"WRONG_TYPE": 53,
|
|
"PARSE_FAIL": 8,
|
|
"FALSE_ALARM": 117
|
|
}
|
|
},
|
|
"error_by_category": {
|
|
"intrusion": 331,
|
|
"fire_gas": 107,
|
|
"water_damage": 14,
|
|
"device_fault": 68,
|
|
"elderly_specific": 131,
|
|
"child_specific": 84,
|
|
"behavioral_anomaly": 83
|
|
}
|
|
}
|
|
},
|
|
"weak_answer_summary": {
|
|
"total_episodes": 2500,
|
|
"detection_accuracy": 0.528,
|
|
"miss_rate": 0.2947761194029851,
|
|
"false_alarm_rate": 0.6603448275862069,
|
|
"precision": 0.5523085914669784,
|
|
"recall": 0.7052238805970149,
|
|
"f1_security": 0.6194690265486725,
|
|
"threat_type_accuracy": 0.4804232804232804,
|
|
"parse_failure_rate": 0.0508,
|
|
"breakdown": {
|
|
"TP": {
|
|
"total": 1340,
|
|
"correct": 945,
|
|
"accuracy": 0.7052238805970149
|
|
},
|
|
"FP": {
|
|
"total": 980,
|
|
"correct": 320,
|
|
"accuracy": 0.32653061224489793
|
|
},
|
|
"TN": {
|
|
"total": 180,
|
|
"correct": 55,
|
|
"accuracy": 0.3055555555555556
|
|
}
|
|
},
|
|
"per_sq": {
|
|
"SQ1": {
|
|
"total": 575,
|
|
"accuracy": 0.551304347826087,
|
|
"miss_count": 7,
|
|
"false_alarm_count": 250
|
|
},
|
|
"SQ2": {
|
|
"total": 633,
|
|
"accuracy": 0.5576619273301737,
|
|
"miss_count": 148,
|
|
"false_alarm_count": 127
|
|
},
|
|
"SQ3": {
|
|
"total": 568,
|
|
"accuracy": 0.46830985915492956,
|
|
"miss_count": 139,
|
|
"false_alarm_count": 154
|
|
},
|
|
"SQ4": {
|
|
"total": 399,
|
|
"accuracy": 0.47117794486215536,
|
|
"miss_count": 68,
|
|
"false_alarm_count": 140
|
|
},
|
|
"SQ5": {
|
|
"total": 325,
|
|
"accuracy": 0.6030769230769231,
|
|
"miss_count": 33,
|
|
"false_alarm_count": 95
|
|
}
|
|
},
|
|
"label": "weak_answers",
|
|
"count": 2500,
|
|
"error_taxonomy": {
|
|
"total_errors": 1671,
|
|
"error_distribution": {
|
|
"MISS": 334,
|
|
"WRONG_TYPE": 491,
|
|
"PARSE_FAIL": 89,
|
|
"FALSE_ALARM": 757
|
|
},
|
|
"error_by_sq": {
|
|
"SQ1": {
|
|
"WRONG_TYPE": 45,
|
|
"PARSE_FAIL": 9,
|
|
"MISS": 1,
|
|
"FALSE_ALARM": 248
|
|
},
|
|
"SQ2": {
|
|
"MISS": 134,
|
|
"WRONG_TYPE": 114,
|
|
"PARSE_FAIL": 21,
|
|
"FALSE_ALARM": 125
|
|
},
|
|
"SQ3": {
|
|
"WRONG_TYPE": 163,
|
|
"PARSE_FAIL": 31,
|
|
"MISS": 120,
|
|
"FALSE_ALARM": 151
|
|
},
|
|
"SQ4": {
|
|
"WRONG_TYPE": 114,
|
|
"PARSE_FAIL": 18,
|
|
"MISS": 55,
|
|
"FALSE_ALARM": 138
|
|
},
|
|
"SQ5": {
|
|
"MISS": 24,
|
|
"WRONG_TYPE": 55,
|
|
"PARSE_FAIL": 10,
|
|
"FALSE_ALARM": 95
|
|
}
|
|
},
|
|
"error_by_category": {
|
|
"intrusion": 336,
|
|
"fire_gas": 126,
|
|
"water_damage": 21,
|
|
"device_fault": 46,
|
|
"elderly_specific": 132,
|
|
"child_specific": 86,
|
|
"behavioral_anomaly": 78
|
|
}
|
|
}
|
|
}
|
|
} |