Files
2026-05-12 17:01:39 +08:00

45 lines
1.2 KiB
JSON

{
"splits": {
"train": {
"all_total": 2500,
"all_variant_TP": 1340,
"all_sq_SQ2": 633,
"all_source_rule": 1621,
"all_sq_SQ3": 568,
"all_sq_SQ5": 325,
"all_source_strong_model": 879,
"all_variant_FP": 980,
"all_sq_SQ4": 399,
"all_sq_SQ1": 575,
"all_variant_TN": 180,
"focus_total": 5010,
"focus_variant_TP": 3160,
"focus_sq_SQ2": 1074,
"focus_duplicates": 2510,
"focus_sq_SQ3": 1685,
"focus_sq_SQ5": 491,
"focus_variant_FP": 1670,
"focus_sq_SQ4": 1017,
"focus_sq_SQ1": 743,
"focus_variant_TN": 180
},
"dev": {
"all_total": 300,
"all_variant_TP": 160,
"all_sq_SQ2": 38,
"all_source_rule": 201,
"all_sq_SQ5": 62,
"all_sq_SQ3": 71,
"all_variant_FP": 120,
"all_source_strong_model": 99,
"all_sq_SQ4": 59,
"all_sq_SQ1": 70,
"all_variant_TN": 20
}
},
"notes": {
"train_sft_all": "one normalized chosen-only example per pair",
"train_sft_focus": "reweighted train split with extra copies for SQ3/SQ4 and hard TP/FP cases",
"dev_sft": "normalized dev split without reweighting"
}
}