40 lines
1.1 KiB
JSON
40 lines
1.1 KiB
JSON
{
|
|
"splits": {
|
|
"train": {
|
|
"full_total": 2500,
|
|
"chosen_rule": 1621,
|
|
"rejected_weak_model_actual_error": 1679,
|
|
"err_wrong_type": 487,
|
|
"err_miss": 485,
|
|
"err_parse_fail": 101,
|
|
"dropped_weak_parse_fail": 101,
|
|
"chosen_strong_model": 879,
|
|
"rejected_constructed": 821,
|
|
"err_type_confusion": 168,
|
|
"err_fabricated_evidence": 149,
|
|
"err_false_alarm": 937,
|
|
"err_false_alarm_hard": 173,
|
|
"clean_total": 2399
|
|
},
|
|
"dev": {
|
|
"full_total": 300,
|
|
"chosen_rule": 201,
|
|
"rejected_weak_model_actual_error": 190,
|
|
"err_wrong_type": 56,
|
|
"err_miss": 50,
|
|
"rejected_constructed": 110,
|
|
"err_parse_fail": 16,
|
|
"dropped_weak_parse_fail": 16,
|
|
"err_fabricated_evidence": 23,
|
|
"err_false_alarm": 119,
|
|
"chosen_strong_model": 99,
|
|
"err_false_alarm_hard": 13,
|
|
"err_type_confusion": 23,
|
|
"clean_total": 284
|
|
}
|
|
},
|
|
"notes": {
|
|
"clean_rule": "drop weak_model_actual_error pairs whose rejected_error_type == parse_fail",
|
|
"format": "TRL conversational preference format: prompt/chosen/rejected are message lists"
|
|
}
|
|
} |