40 lines
1.1 KiB
JSON
40 lines
1.1 KiB
JSON
{
|
|
"splits": {
|
|
"train": {
|
|
"full_total": 2500,
|
|
"chosen_rule": 1240,
|
|
"rejected_weak_model_actual_error": 1319,
|
|
"err_miss": 628,
|
|
"clean_total": 2435,
|
|
"rejected_constructed": 1181,
|
|
"err_fabricated_evidence": 176,
|
|
"err_wrong_type": 325,
|
|
"chosen_strong_model": 1260,
|
|
"err_type_confusion": 181,
|
|
"err_parse_fail": 65,
|
|
"dropped_weak_parse_fail": 65,
|
|
"err_false_alarm": 810,
|
|
"err_false_alarm_hard": 315
|
|
},
|
|
"dev": {
|
|
"full_total": 300,
|
|
"chosen_rule": 154,
|
|
"rejected_weak_model_actual_error": 155,
|
|
"err_parse_fail": 8,
|
|
"dropped_weak_parse_fail": 8,
|
|
"err_miss": 74,
|
|
"clean_total": 292,
|
|
"err_wrong_type": 31,
|
|
"rejected_constructed": 145,
|
|
"err_fabricated_evidence": 25,
|
|
"err_type_confusion": 26,
|
|
"chosen_strong_model": 146,
|
|
"err_false_alarm": 108,
|
|
"err_false_alarm_hard": 28
|
|
}
|
|
},
|
|
"notes": {
|
|
"clean_rule": "drop weak_model_actual_error pairs whose rejected_error_type == parse_fail",
|
|
"format": "TRL conversational preference format: prompt/chosen/rejected are message lists"
|
|
}
|
|
} |