{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 500, "global_step": 279, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.03590664272890485, "grad_norm": 4.158227443695068, "learning_rate": 5.142857142857143e-06, "loss": 1.4155162811279296, "step": 10 }, { "epoch": 0.0718132854578097, "grad_norm": 1.651964545249939, "learning_rate": 7.992974927057286e-06, "loss": 0.805302906036377, "step": 20 }, { "epoch": 0.10771992818671454, "grad_norm": 1.1457046270370483, "learning_rate": 7.936922311790387e-06, "loss": 0.6863783836364746, "step": 30 }, { "epoch": 0.1436265709156194, "grad_norm": 1.0957046747207642, "learning_rate": 7.825603937106089e-06, "loss": 0.6478697299957276, "step": 40 }, { "epoch": 0.17953321364452424, "grad_norm": 1.0592354536056519, "learning_rate": 7.660582468972073e-06, "loss": 0.630420446395874, "step": 50 }, { "epoch": 0.21543985637342908, "grad_norm": 1.193621277809143, "learning_rate": 7.444174447069421e-06, "loss": 0.6120198726654053, "step": 60 }, { "epoch": 0.2513464991023339, "grad_norm": 1.2658783197402954, "learning_rate": 7.179417765653413e-06, "loss": 0.6067015647888183, "step": 70 }, { "epoch": 0.2872531418312388, "grad_norm": 1.2949614524841309, "learning_rate": 6.870029028177324e-06, "loss": 0.5879001617431641, "step": 80 }, { "epoch": 0.3231597845601436, "grad_norm": 1.060629963874817, "learning_rate": 6.520351374326845e-06, "loss": 0.5899921417236328, "step": 90 }, { "epoch": 0.3590664272890485, "grad_norm": 1.0478813648223877, "learning_rate": 6.1352935118591635e-06, "loss": 0.5808853149414063, "step": 100 }, { "epoch": 0.39497307001795334, "grad_norm": 1.261673092842102, "learning_rate": 5.720260809106082e-06, "loss": 0.5790524482727051, "step": 110 }, { "epoch": 0.43087971274685816, "grad_norm": 0.9995132088661194, "learning_rate": 5.28107941545135e-06, "loss": 0.5731555938720703, "step": 120 }, { "epoch": 0.466786355475763, "grad_norm": 1.0616153478622437, "learning_rate": 4.823914474964393e-06, "loss": 0.5686985969543457, "step": 130 }, { "epoch": 0.5026929982046678, "grad_norm": 1.0009936094284058, "learning_rate": 4.355183581291739e-06, "loss": 0.5693955898284913, "step": 140 }, { "epoch": 0.5385996409335727, "grad_norm": 0.9809271097183228, "learning_rate": 3.8814666887097606e-06, "loss": 0.5598339080810547, "step": 150 }, { "epoch": 0.5745062836624776, "grad_norm": 0.9185540080070496, "learning_rate": 3.4094137439900755e-06, "loss": 0.5563373565673828, "step": 160 }, { "epoch": 0.6104129263913824, "grad_norm": 0.9326531887054443, "learning_rate": 2.94565133572373e-06, "loss": 0.5573836326599121, "step": 170 }, { "epoch": 0.6463195691202872, "grad_norm": 0.9872980117797852, "learning_rate": 2.496689671543048e-06, "loss": 0.5560619354248046, "step": 180 }, { "epoch": 0.6822262118491921, "grad_norm": 0.8914107084274292, "learning_rate": 2.068831189077021e-06, "loss": 0.5496598720550537, "step": 190 }, { "epoch": 0.718132854578097, "grad_norm": 0.9620088338851929, "learning_rate": 1.6680820835421124e-06, "loss": 0.5505017757415771, "step": 200 }, { "epoch": 0.7540394973070018, "grad_norm": 0.8976511359214783, "learning_rate": 1.3000679939271586e-06, "loss": 0.5497483253479004, "step": 210 }, { "epoch": 0.7899461400359067, "grad_norm": 0.8932470679283142, "learning_rate": 9.699550313535194e-07, "loss": 0.5485612392425537, "step": 220 }, { "epoch": 0.8258527827648114, "grad_norm": 0.9887930154800415, "learning_rate": 6.823772581991929e-07, "loss": 0.5506965160369873, "step": 230 }, { "epoch": 0.8617594254937163, "grad_norm": 0.9191465973854065, "learning_rate": 4.413716360210111e-07, "loss": 0.5589346885681152, "step": 240 }, { "epoch": 0.8976660682226212, "grad_norm": 0.8461963534355164, "learning_rate": 2.503213554635164e-07, "loss": 0.5451088905334472, "step": 250 }, { "epoch": 0.933572710951526, "grad_norm": 0.8567756414413452, "learning_rate": 1.11908343678357e-07, "loss": 0.5363705635070801, "step": 260 }, { "epoch": 0.9694793536804309, "grad_norm": 0.8579103946685791, "learning_rate": 2.8075615945932328e-08, "loss": 0.5481849670410156, "step": 270 }, { "epoch": 1.0, "step": 279, "total_flos": 1.3482902468228547e+18, "train_loss": 0.6133034630915597, "train_runtime": 1004.8538, "train_samples_per_second": 35.45, "train_steps_per_second": 0.278 } ], "logging_steps": 10, "max_steps": 279, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1.3482902468228547e+18, "train_batch_size": 4, "trial_name": null, "trial_params": null }