Files
llmiotsafe/qwen35_dpo_ultralowmem_ref_free/run_summary.json
2026-05-12 17:01:39 +08:00

21 lines
598 B
JSON

{
"model_name": "Qwen/Qwen3.5-9B",
"train_file": "data/train_dpo_clean.jsonl",
"dev_file": "data/dev_dpo_clean.jsonl",
"output_dir": "outputs/qwen35_dpo_ultralowmem_ref_free",
"ref_logps_cache_dir": null,
"used_cached_ref_logps": false,
"reference_free": true,
"num_train_examples": 2435,
"num_dev_examples": 292,
"max_length": 2048,
"max_prompt_length": 1536,
"max_completion_length": 256,
"truncation_mode": "keep_end",
"padding_free": false,
"use_logits_to_keep": true,
"precompute_ref_log_probs": false,
"beta": 0.1,
"learning_rate": 5e-07,
"epochs": 1.0
}