232 lines
5.3 KiB
JSON
232 lines
5.3 KiB
JSON
{
|
|
"split": "train",
|
|
"input_root": "data_dpo_v2\\train_pref_v1",
|
|
"total_pairs": 2500,
|
|
"assembly_counts": {
|
|
"chosen_rule_fallback": 1240,
|
|
"rejected_weak_model": 1319,
|
|
"rejected_constructed_fallback": 1181,
|
|
"chosen_strong_model": 1260
|
|
},
|
|
"strong_answer_summary": {
|
|
"total_episodes": 2500,
|
|
"detection_accuracy": 0.61,
|
|
"miss_rate": 0.4298507462686567,
|
|
"false_alarm_rate": 0.3439655172413793,
|
|
"precision": 0.6569217540842648,
|
|
"recall": 0.5701492537313433,
|
|
"f1_security": 0.6104674390731122,
|
|
"threat_type_accuracy": 0.6583769633507853,
|
|
"parse_failure_rate": 0.0052,
|
|
"breakdown": {
|
|
"TP": {
|
|
"total": 1340,
|
|
"correct": 764,
|
|
"accuracy": 0.5701492537313433
|
|
},
|
|
"FP": {
|
|
"total": 980,
|
|
"correct": 615,
|
|
"accuracy": 0.6275510204081632
|
|
},
|
|
"TN": {
|
|
"total": 180,
|
|
"correct": 146,
|
|
"accuracy": 0.8111111111111111
|
|
}
|
|
},
|
|
"per_sq": {
|
|
"SQ1": {
|
|
"total": 575,
|
|
"accuracy": 0.5634782608695652,
|
|
"miss_count": 77,
|
|
"false_alarm_count": 174
|
|
},
|
|
"SQ2": {
|
|
"total": 633,
|
|
"accuracy": 0.6382306477093207,
|
|
"miss_count": 178,
|
|
"false_alarm_count": 51
|
|
},
|
|
"SQ3": {
|
|
"total": 568,
|
|
"accuracy": 0.602112676056338,
|
|
"miss_count": 172,
|
|
"false_alarm_count": 54
|
|
},
|
|
"SQ4": {
|
|
"total": 399,
|
|
"accuracy": 0.6090225563909775,
|
|
"miss_count": 89,
|
|
"false_alarm_count": 67
|
|
},
|
|
"SQ5": {
|
|
"total": 325,
|
|
"accuracy": 0.6523076923076923,
|
|
"miss_count": 60,
|
|
"false_alarm_count": 53
|
|
}
|
|
},
|
|
"label": "strong_answers",
|
|
"count": 2500,
|
|
"error_taxonomy": {
|
|
"total_errors": 1236,
|
|
"error_distribution": {
|
|
"MISS": 568,
|
|
"WRONG_TYPE": 261,
|
|
"PARSE_FAIL": 9,
|
|
"FALSE_ALARM": 398
|
|
},
|
|
"error_by_sq": {
|
|
"SQ1": {
|
|
"MISS": 72,
|
|
"WRONG_TYPE": 9,
|
|
"PARSE_FAIL": 5,
|
|
"FALSE_ALARM": 174
|
|
},
|
|
"SQ2": {
|
|
"MISS": 178,
|
|
"WRONG_TYPE": 26,
|
|
"FALSE_ALARM": 51
|
|
},
|
|
"SQ3": {
|
|
"MISS": 169,
|
|
"WRONG_TYPE": 122,
|
|
"PARSE_FAIL": 3,
|
|
"FALSE_ALARM": 54
|
|
},
|
|
"SQ4": {
|
|
"MISS": 89,
|
|
"WRONG_TYPE": 83,
|
|
"FALSE_ALARM": 67
|
|
},
|
|
"SQ5": {
|
|
"MISS": 60,
|
|
"WRONG_TYPE": 21,
|
|
"FALSE_ALARM": 52,
|
|
"PARSE_FAIL": 1
|
|
}
|
|
},
|
|
"error_by_category": {
|
|
"intrusion": 329,
|
|
"fire_gas": 107,
|
|
"device_fault": 81,
|
|
"elderly_specific": 133,
|
|
"child_specific": 89,
|
|
"behavioral_anomaly": 90
|
|
}
|
|
}
|
|
},
|
|
"weak_answer_summary": {
|
|
"total_episodes": 2500,
|
|
"detection_accuracy": 0.6124,
|
|
"miss_rate": 0.3597014925373134,
|
|
"false_alarm_rate": 0.41206896551724137,
|
|
"precision": 0.6422155688622755,
|
|
"recall": 0.6402985074626866,
|
|
"f1_security": 0.641255605381166,
|
|
"threat_type_accuracy": 0.6118881118881119,
|
|
"parse_failure_rate": 0.026,
|
|
"breakdown": {
|
|
"TP": {
|
|
"total": 1340,
|
|
"correct": 858,
|
|
"accuracy": 0.6402985074626866
|
|
},
|
|
"FP": {
|
|
"total": 980,
|
|
"correct": 510,
|
|
"accuracy": 0.5204081632653061
|
|
},
|
|
"TN": {
|
|
"total": 180,
|
|
"correct": 163,
|
|
"accuracy": 0.9055555555555556
|
|
}
|
|
},
|
|
"per_sq": {
|
|
"SQ1": {
|
|
"total": 575,
|
|
"accuracy": 0.5704347826086956,
|
|
"miss_count": 72,
|
|
"false_alarm_count": 170
|
|
},
|
|
"SQ2": {
|
|
"total": 633,
|
|
"accuracy": 0.6477093206951027,
|
|
"miss_count": 142,
|
|
"false_alarm_count": 79
|
|
},
|
|
"SQ3": {
|
|
"total": 568,
|
|
"accuracy": 0.647887323943662,
|
|
"miss_count": 121,
|
|
"false_alarm_count": 77
|
|
},
|
|
"SQ4": {
|
|
"total": 399,
|
|
"accuracy": 0.5588972431077694,
|
|
"miss_count": 85,
|
|
"false_alarm_count": 91
|
|
},
|
|
"SQ5": {
|
|
"total": 325,
|
|
"accuracy": 0.6215384615384615,
|
|
"miss_count": 62,
|
|
"false_alarm_count": 61
|
|
}
|
|
},
|
|
"label": "weak_answers",
|
|
"count": 2500,
|
|
"error_taxonomy": {
|
|
"total_errors": 1302,
|
|
"error_distribution": {
|
|
"MISS": 460,
|
|
"WRONG_TYPE": 333,
|
|
"PARSE_FAIL": 40,
|
|
"FALSE_ALARM": 469
|
|
},
|
|
"error_by_sq": {
|
|
"SQ1": {
|
|
"MISS": 67,
|
|
"WRONG_TYPE": 32,
|
|
"PARSE_FAIL": 14,
|
|
"FALSE_ALARM": 166
|
|
},
|
|
"SQ2": {
|
|
"MISS": 140,
|
|
"PARSE_FAIL": 6,
|
|
"FALSE_ALARM": 77,
|
|
"WRONG_TYPE": 47
|
|
},
|
|
"SQ3": {
|
|
"MISS": 111,
|
|
"WRONG_TYPE": 142,
|
|
"PARSE_FAIL": 15,
|
|
"FALSE_ALARM": 74
|
|
},
|
|
"SQ4": {
|
|
"MISS": 81,
|
|
"WRONG_TYPE": 93,
|
|
"FALSE_ALARM": 91,
|
|
"PARSE_FAIL": 4
|
|
},
|
|
"SQ5": {
|
|
"MISS": 61,
|
|
"WRONG_TYPE": 19,
|
|
"PARSE_FAIL": 1,
|
|
"FALSE_ALARM": 61
|
|
}
|
|
},
|
|
"error_by_category": {
|
|
"intrusion": 258,
|
|
"fire_gas": 146,
|
|
"water_damage": 6,
|
|
"device_fault": 99,
|
|
"elderly_specific": 134,
|
|
"child_specific": 63,
|
|
"behavioral_anomaly": 87
|
|
}
|
|
}
|
|
}
|
|
} |