{ "model_name": "Qwen/Qwen3.5-9B", "train_file": "data/train_dpo_clean.jsonl", "dev_file": "data/dev_dpo_clean.jsonl", "output_dir": "outputs/qwen35_dpo_ultralowmem_ref_free", "ref_logps_cache_dir": null, "used_cached_ref_logps": false, "reference_free": true, "num_train_examples": 2435, "num_dev_examples": 292, "max_length": 2048, "max_prompt_length": 1536, "max_completion_length": 256, "truncation_mode": "keep_end", "padding_free": false, "use_logits_to_keep": true, "precompute_ref_log_probs": false, "beta": 0.1, "learning_rate": 5e-07, "epochs": 1.0 }