{ "splits": { "train": { "full_total": 2500, "chosen_rule": 1621, "rejected_weak_model_actual_error": 1679, "err_wrong_type": 487, "err_miss": 485, "err_parse_fail": 101, "dropped_weak_parse_fail": 101, "chosen_strong_model": 879, "rejected_constructed": 821, "err_type_confusion": 168, "err_fabricated_evidence": 149, "err_false_alarm": 937, "err_false_alarm_hard": 173, "clean_total": 2399 }, "dev": { "full_total": 300, "chosen_rule": 201, "rejected_weak_model_actual_error": 190, "err_wrong_type": 56, "err_miss": 50, "rejected_constructed": 110, "err_parse_fail": 16, "dropped_weak_parse_fail": 16, "err_fabricated_evidence": 23, "err_false_alarm": 119, "chosen_strong_model": 99, "err_false_alarm_hard": 13, "err_type_confusion": 23, "clean_total": 284 } }, "notes": { "clean_rule": "drop weak_model_actual_error pairs whose rejected_error_type == parse_fail", "format": "TRL conversational preference format: prompt/chosen/rejected are message lists" } }