39 lines
1.1 KiB
Bash
39 lines
1.1 KiB
Bash
#!/usr/bin/env bash
|
|
set -euo pipefail
|
|
|
|
export PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True
|
|
export LD_LIBRARY_PATH=$CONDA_PREFIX/lib:$CONDA_PREFIX/targets/x86_64-linux/lib:$CONDA_PREFIX/lib/python3.12/site-packages/nvidia/cu13/lib:$LD_LIBRARY_PATH
|
|
|
|
MODEL_NAME="Qwen/Qwen3.5-9B"
|
|
TRAIN_FILE="data/train_dpo_clean.jsonl"
|
|
DEV_FILE="data/dev_dpo_clean.jsonl"
|
|
OUTPUT_DIR="outputs/qwen35_dpo_ultralowmem_ref_free"
|
|
|
|
torchrun --nproc_per_node=2 scripts/train_dpo.py \
|
|
--model-name "${MODEL_NAME}" \
|
|
--train-file "${TRAIN_FILE}" \
|
|
--dev-file "${DEV_FILE}" \
|
|
--output-dir "${OUTPUT_DIR}" \
|
|
--learning-rate 5e-7 \
|
|
--num-train-epochs 1.0 \
|
|
--per-device-train-batch-size 1 \
|
|
--per-device-eval-batch-size 1 \
|
|
--gradient-accumulation-steps 16 \
|
|
--max-length 2048 \
|
|
--max-prompt-length 1536 \
|
|
--max-completion-length 256 \
|
|
--truncation-mode keep_end \
|
|
--reference-free \
|
|
--use-logits-to-keep \
|
|
--eval-steps 200 \
|
|
--save-steps 200 \
|
|
--logging-steps 10 \
|
|
--torch-empty-cache-steps 5 \
|
|
--beta 0.1 \
|
|
--lora-r 8 \
|
|
--lora-alpha 16 \
|
|
--lora-dropout 0.05 \
|
|
--optim paged_adamw_8bit \
|
|
--attn-implementation sdpa \
|
|
--bf16
|