{ "splits": { "train": { "full_total": 2500, "chosen_rule": 1240, "rejected_weak_model_actual_error": 1319, "err_miss": 628, "clean_total": 2435, "rejected_constructed": 1181, "err_fabricated_evidence": 176, "err_wrong_type": 325, "chosen_strong_model": 1260, "err_type_confusion": 181, "err_parse_fail": 65, "dropped_weak_parse_fail": 65, "err_false_alarm": 810, "err_false_alarm_hard": 315 }, "dev": { "full_total": 300, "chosen_rule": 154, "rejected_weak_model_actual_error": 155, "err_parse_fail": 8, "dropped_weak_parse_fail": 8, "err_miss": 74, "clean_total": 292, "err_wrong_type": 31, "rejected_constructed": 145, "err_fabricated_evidence": 25, "err_type_confusion": 26, "chosen_strong_model": 146, "err_false_alarm": 108, "err_false_alarm_hard": 28 } }, "notes": { "clean_rule": "drop weak_model_actual_error pairs whose rejected_error_type == parse_fail", "format": "TRL conversational preference format: prompt/chosen/rejected are message lists" } }