Files
llmiotsafe/qwen35_dpo_ultralowmem_ref_free/checkpoint-77/trainer_state.json
2026-05-12 17:01:39 +08:00

180 lines
5.9 KiB
JSON

{
"best_global_step": 77,
"best_metric": 0.6231179237365723,
"best_model_checkpoint": "outputs/qwen35_dpo_ultralowmem_ref_free/checkpoint-77",
"epoch": 1.0,
"eval_steps": 200,
"global_step": 77,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"entropy": 0.9454727958887815,
"epoch": 0.13136288998357964,
"grad_norm": 7.8125,
"learning_rate": 4.919332367333748e-07,
"logits/chosen": -1.8099737944940273,
"logits/rejected": -1.832894790703956,
"logps/chosen": -353.753306388855,
"logps/rejected": -344.2661180496216,
"loss": 0.6934457302093506,
"mean_token_accuracy": 0.7538095749914646,
"num_tokens": 1287126.0,
"rewards/accuracies": 0.409375,
"rewards/chosen": -0.00509019878518302,
"rewards/margins": 0.0012955188169144094,
"rewards/rejected": -0.006385717548255343,
"step": 10
},
{
"entropy": 0.960740290209651,
"epoch": 0.2627257799671593,
"grad_norm": 7.0625,
"learning_rate": 4.4450893857960984e-07,
"logits/chosen": -1.8059819363814396,
"logits/rejected": -1.8327529228301944,
"logps/chosen": -383.5576331138611,
"logps/rejected": -348.3659210205078,
"loss": 0.6661795139312744,
"mean_token_accuracy": 0.7455122817307711,
"num_tokens": 2584081.0,
"rewards/accuracies": 0.696875,
"rewards/chosen": 0.01022649770602584,
"rewards/margins": 0.05804813946597278,
"rewards/rejected": -0.04782164186508453,
"step": 20
},
{
"entropy": 0.9594713591039181,
"epoch": 0.39408866995073893,
"grad_norm": 6.34375,
"learning_rate": 3.625509362044183e-07,
"logits/chosen": -1.8090848916866544,
"logits/rejected": -1.839708000919806,
"logps/chosen": -367.3285415649414,
"logps/rejected": -347.3939818382263,
"loss": 0.6498698234558106,
"mean_token_accuracy": 0.7490173149853945,
"num_tokens": 3873129.0,
"rewards/accuracies": 0.76875,
"rewards/chosen": 0.01563858055451419,
"rewards/margins": 0.09292944613844156,
"rewards/rejected": -0.0772908657156222,
"step": 30
},
{
"entropy": 0.9579024501144886,
"epoch": 0.5254515599343186,
"grad_norm": 7.84375,
"learning_rate": 2.606103007990371e-07,
"logits/chosen": -1.822963703035332,
"logits/rejected": -1.8331565552576632,
"logps/chosen": -365.0903636932373,
"logps/rejected": -355.14615325927736,
"loss": 0.6380878925323487,
"mean_token_accuracy": 0.749091599136591,
"num_tokens": 5164485.0,
"rewards/accuracies": 0.7625,
"rewards/chosen": 0.013401065368088893,
"rewards/margins": 0.12074792645871639,
"rewards/rejected": -0.10734686049545417,
"step": 40
},
{
"entropy": 0.9612626571208238,
"epoch": 0.6568144499178982,
"grad_norm": 6.09375,
"learning_rate": 1.5678588055492286e-07,
"logits/chosen": -1.8040991478766433,
"logits/rejected": -1.833159321438822,
"logps/chosen": -402.17723083496094,
"logps/rejected": -356.4602531433105,
"loss": 0.6306396484375,
"mean_token_accuracy": 0.7441606149077415,
"num_tokens": 6453542.0,
"rewards/accuracies": 0.78125,
"rewards/chosen": 0.014387460224679672,
"rewards/margins": 0.13786569883814082,
"rewards/rejected": -0.12347823897434865,
"step": 50
},
{
"entropy": 0.9673831064254046,
"epoch": 0.7881773399014779,
"grad_norm": 6.8125,
"learning_rate": 6.951097651136889e-08,
"logits/chosen": -1.8032587367384516,
"logits/rejected": -1.8365809013216652,
"logps/chosen": -381.1871561050415,
"logps/rejected": -348.20880632400514,
"loss": 0.6237552642822266,
"mean_token_accuracy": 0.7467762563377619,
"num_tokens": 7742778.0,
"rewards/accuracies": 0.79375,
"rewards/chosen": 0.02942158783553168,
"rewards/margins": 0.15444288045400753,
"rewards/rejected": -0.12502129255008185,
"step": 60
},
{
"entropy": 0.9571243241429329,
"epoch": 0.9195402298850575,
"grad_norm": 6.78125,
"learning_rate": 1.4280638634728948e-08,
"logits/chosen": -1.8158756551653077,
"logits/rejected": -1.8351591651305355,
"logps/chosen": -369.4532477378845,
"logps/rejected": -360.18324394226073,
"loss": 0.6174001693725586,
"mean_token_accuracy": 0.7488324739038944,
"num_tokens": 9038143.0,
"rewards/accuracies": 0.8125,
"rewards/chosen": 0.02844569750013761,
"rewards/margins": 0.1687723191542318,
"rewards/rejected": -0.14032662139070454,
"step": 70
},
{
"epoch": 1.0,
"eval_entropy": 0.9807351431617998,
"eval_logits/chosen": -1.8414135470736719,
"eval_logits/rejected": -1.8220581632231228,
"eval_logps/chosen": -395.92160922533844,
"eval_logps/rejected": -371.32895587241813,
"eval_loss": 0.6231179237365723,
"eval_mean_token_accuracy": 0.7525672818699928,
"eval_num_tokens": 9825739.0,
"eval_rewards/accuracies": 0.7602739726027398,
"eval_rewards/chosen": 0.013094972891650125,
"eval_rewards/margins": 0.15859890370693516,
"eval_rewards/rejected": -0.1455039322377846,
"eval_runtime": 256.8556,
"eval_samples_per_second": 1.137,
"eval_steps_per_second": 0.568,
"step": 77
}
],
"logging_steps": 10,
"max_steps": 77,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 200,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 4.73228507514667e+17,
"train_batch_size": 1,
"trial_name": null,
"trial_params": null
}