228 lines
5.1 KiB
JSON
228 lines
5.1 KiB
JSON
{
|
|
"split": "dev",
|
|
"input_root": "data_dpo_full_log_v1",
|
|
"total_pairs": 300,
|
|
"counters": {
|
|
"chosen_rule_fallback": 201,
|
|
"rejected_weak_model": 190,
|
|
"rejected_constructed_fallback": 110,
|
|
"chosen_strong_model": 99
|
|
},
|
|
"strong_answer_summary": {
|
|
"total_episodes": 300,
|
|
"detection_accuracy": 0.5433333333333333,
|
|
"miss_rate": 0.25625,
|
|
"false_alarm_rate": 0.6571428571428571,
|
|
"precision": 0.5639810426540285,
|
|
"recall": 0.74375,
|
|
"f1_security": 0.6415094339622642,
|
|
"threat_type_accuracy": 0.5714285714285714,
|
|
"parse_failure_rate": 0.11333333333333333,
|
|
"breakdown": {
|
|
"TP": {
|
|
"total": 160,
|
|
"correct": 119,
|
|
"accuracy": 0.74375
|
|
},
|
|
"FP": {
|
|
"total": 120,
|
|
"correct": 35,
|
|
"accuracy": 0.2916666666666667
|
|
},
|
|
"TN": {
|
|
"total": 20,
|
|
"correct": 9,
|
|
"accuracy": 0.45
|
|
}
|
|
},
|
|
"per_sq": {
|
|
"SQ1": {
|
|
"total": 70,
|
|
"accuracy": 0.5428571428571428,
|
|
"miss_count": 5,
|
|
"false_alarm_count": 27
|
|
},
|
|
"SQ2": {
|
|
"total": 38,
|
|
"accuracy": 0.631578947368421,
|
|
"miss_count": 10,
|
|
"false_alarm_count": 4
|
|
},
|
|
"SQ3": {
|
|
"total": 71,
|
|
"accuracy": 0.4507042253521127,
|
|
"miss_count": 14,
|
|
"false_alarm_count": 25
|
|
},
|
|
"SQ4": {
|
|
"total": 59,
|
|
"accuracy": 0.6610169491525424,
|
|
"miss_count": 5,
|
|
"false_alarm_count": 14
|
|
},
|
|
"SQ5": {
|
|
"total": 62,
|
|
"accuracy": 0.4838709677419355,
|
|
"miss_count": 7,
|
|
"false_alarm_count": 22
|
|
}
|
|
},
|
|
"label": "strong_answers",
|
|
"count": 300,
|
|
"error_taxonomy": {
|
|
"total_errors": 188,
|
|
"error_distribution": {
|
|
"MISS": 25,
|
|
"PARSE_FAIL": 21,
|
|
"WRONG_TYPE": 51,
|
|
"FALSE_ALARM": 91
|
|
},
|
|
"error_by_sq": {
|
|
"SQ1": {
|
|
"WRONG_TYPE": 6,
|
|
"PARSE_FAIL": 6,
|
|
"FALSE_ALARM": 26
|
|
},
|
|
"SQ2": {
|
|
"MISS": 10,
|
|
"WRONG_TYPE": 3,
|
|
"FALSE_ALARM": 4
|
|
},
|
|
"SQ3": {
|
|
"WRONG_TYPE": 19,
|
|
"MISS": 7,
|
|
"PARSE_FAIL": 7,
|
|
"FALSE_ALARM": 25
|
|
},
|
|
"SQ4": {
|
|
"FALSE_ALARM": 14,
|
|
"PARSE_FAIL": 3,
|
|
"WRONG_TYPE": 17,
|
|
"MISS": 3
|
|
},
|
|
"SQ5": {
|
|
"MISS": 5,
|
|
"PARSE_FAIL": 5,
|
|
"WRONG_TYPE": 6,
|
|
"FALSE_ALARM": 22
|
|
}
|
|
},
|
|
"error_by_category": {
|
|
"intrusion": 38,
|
|
"device_fault": 6,
|
|
"elderly_specific": 12,
|
|
"child_specific": 11,
|
|
"behavioral_anomaly": 9
|
|
}
|
|
}
|
|
},
|
|
"weak_answer_summary": {
|
|
"total_episodes": 300,
|
|
"detection_accuracy": 0.5566666666666666,
|
|
"miss_rate": 0.2,
|
|
"false_alarm_rate": 0.6928571428571428,
|
|
"precision": 0.5688888888888889,
|
|
"recall": 0.8,
|
|
"f1_security": 0.664935064935065,
|
|
"threat_type_accuracy": 0.5546875,
|
|
"parse_failure_rate": 0.07666666666666666,
|
|
"breakdown": {
|
|
"TP": {
|
|
"total": 160,
|
|
"correct": 128,
|
|
"accuracy": 0.8
|
|
},
|
|
"FP": {
|
|
"total": 120,
|
|
"correct": 33,
|
|
"accuracy": 0.275
|
|
},
|
|
"TN": {
|
|
"total": 20,
|
|
"correct": 6,
|
|
"accuracy": 0.3
|
|
}
|
|
},
|
|
"per_sq": {
|
|
"SQ1": {
|
|
"total": 70,
|
|
"accuracy": 0.5714285714285714,
|
|
"miss_count": 0,
|
|
"false_alarm_count": 30
|
|
},
|
|
"SQ2": {
|
|
"total": 38,
|
|
"accuracy": 0.631578947368421,
|
|
"miss_count": 8,
|
|
"false_alarm_count": 6
|
|
},
|
|
"SQ3": {
|
|
"total": 71,
|
|
"accuracy": 0.4647887323943662,
|
|
"miss_count": 11,
|
|
"false_alarm_count": 27
|
|
},
|
|
"SQ4": {
|
|
"total": 59,
|
|
"accuracy": 0.5423728813559322,
|
|
"miss_count": 7,
|
|
"false_alarm_count": 20
|
|
},
|
|
"SQ5": {
|
|
"total": 62,
|
|
"accuracy": 0.6129032258064516,
|
|
"miss_count": 6,
|
|
"false_alarm_count": 14
|
|
}
|
|
},
|
|
"label": "weak_answers",
|
|
"count": 300,
|
|
"error_taxonomy": {
|
|
"total_errors": 190,
|
|
"error_distribution": {
|
|
"MISS": 21,
|
|
"WRONG_TYPE": 57,
|
|
"PARSE_FAIL": 15,
|
|
"FALSE_ALARM": 97
|
|
},
|
|
"error_by_sq": {
|
|
"SQ1": {
|
|
"WRONG_TYPE": 5,
|
|
"FALSE_ALARM": 30
|
|
},
|
|
"SQ2": {
|
|
"WRONG_TYPE": 8,
|
|
"MISS": 7,
|
|
"PARSE_FAIL": 1,
|
|
"FALSE_ALARM": 6
|
|
},
|
|
"SQ3": {
|
|
"MISS": 6,
|
|
"WRONG_TYPE": 22,
|
|
"PARSE_FAIL": 5,
|
|
"FALSE_ALARM": 27
|
|
},
|
|
"SQ4": {
|
|
"FALSE_ALARM": 20,
|
|
"WRONG_TYPE": 15,
|
|
"MISS": 5,
|
|
"PARSE_FAIL": 2
|
|
},
|
|
"SQ5": {
|
|
"MISS": 3,
|
|
"PARSE_FAIL": 7,
|
|
"WRONG_TYPE": 7,
|
|
"FALSE_ALARM": 14
|
|
}
|
|
},
|
|
"error_by_category": {
|
|
"intrusion": 38,
|
|
"water_damage": 3,
|
|
"device_fault": 5,
|
|
"elderly_specific": 15,
|
|
"child_specific": 9,
|
|
"behavioral_anomaly": 8
|
|
}
|
|
}
|
|
}
|
|
} |