diff --git "a/mistral_safe_unlearning/trainer_state.json" "b/mistral_safe_unlearning/trainer_state.json" new file mode 100644--- /dev/null +++ "b/mistral_safe_unlearning/trainer_state.json" @@ -0,0 +1,3861 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 3.9402985074626864, + "eval_steps": 500, + "global_step": 132, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "DPO Loss": 0.0, + "Negative Geometric Mean": 0.0, + "Negative prob": 0.0, + "Normal Loss": 1.230027198791504, + "Normal prob": -1.230027198791504, + "Positive Loss": 0.0, + "Positive prob": 0.0, + "epoch": 0, + "step": 0 + }, + { + "DPO Loss": 0.34657145664029176, + "Negative Geometric Mean": -0.9493766827771892, + "Negative prob": -0.9493766827771892, + "Normal Loss": 1.0090343952178955, + "Normal prob": -1.0090343952178955, + "Positive Loss": 0.37890636920928955, + "Positive prob": -0.37890636920928955, + "epoch": 0, + "step": 0 + }, + { + "epoch": 0.029850746268656716, + "grad_norm": 52.804205501572916, + "learning_rate": 2.9772727272727274e-06, + "loss": 1.4155, + "step": 1 + }, + { + "DPO Loss": 0.0, + "Negative Geometric Mean": 0.0, + "Negative prob": 0.0, + "Normal Loss": 1.092234492301941, + "Normal prob": -1.092234492301941, + "Positive Loss": 0.0, + "Positive prob": 0.0, + "epoch": 0.029850746268656716, + "step": 1 + }, + { + "DPO Loss": 0.3577462635421841, + "Negative Geometric Mean": -0.3783103787169165, + "Negative prob": -0.3783103787169165, + "Normal Loss": 0.8107529282569885, + "Normal prob": -0.8107529282569885, + "Positive Loss": 0.07748764753341675, + "Positive prob": -0.07748764753341675, + "epoch": 0.029850746268656716, + "step": 1 + }, + { + "epoch": 0.05970149253731343, + "grad_norm": 22.382695244020965, + "learning_rate": 2.9545454545454547e-06, + "loss": 1.1517, + "step": 2 + }, + { + "DPO Loss": 0.0, + "Negative Geometric Mean": 0.0, + "Negative prob": 0.0, + "Normal Loss": 0.8582519292831421, + "Normal prob": -0.8582519292831421, + "Positive Loss": 0.0, + "Positive prob": 0.0, + "epoch": 0.05970149253731343, + "step": 2 + }, + { + "DPO Loss": 0.3623313108408915, + "Negative Geometric Mean": -1.0158045398410653, + "Negative prob": -1.0158045398410653, + "Normal Loss": 0.7299004793167114, + "Normal prob": -0.7299004793167114, + "Positive Loss": 0.07864368706941605, + "Positive prob": -0.07864368706941605, + "epoch": 0.05970149253731343, + "step": 2 + }, + { + "epoch": 0.08955223880597014, + "grad_norm": 12.355752020889257, + "learning_rate": 2.931818181818182e-06, + "loss": 1.0958, + "step": 3 + }, + { + "DPO Loss": 0.0, + "Negative Geometric Mean": 0.0, + "Negative prob": 0.0, + "Normal Loss": 0.9027985334396362, + "Normal prob": -0.9027985334396362, + "Positive Loss": 0.0, + "Positive prob": 0.0, + "epoch": 0.08955223880597014, + "step": 3 + }, + { + "DPO Loss": 0.37659024799225, + "Negative Geometric Mean": -0.6431755968319472, + "Negative prob": -0.6431755968319472, + "Normal Loss": 0.8552955389022827, + "Normal prob": -0.8552955389022827, + "Positive Loss": 0.0777173787355423, + "Positive prob": -0.0777173787355423, + "epoch": 0.08955223880597014, + "step": 3 + }, + { + "epoch": 0.11940298507462686, + "grad_norm": 13.671836146708186, + "learning_rate": 2.9090909090909093e-06, + "loss": 1.065, + "step": 4 + }, + { + "DPO Loss": 0.0, + "Negative Geometric Mean": 0.0, + "Negative prob": 0.0, + "Normal Loss": 0.9766985177993774, + "Normal prob": -0.9766985177993774, + "Positive Loss": 0.0, + "Positive prob": 0.0, + "epoch": 0.11940298507462686, + "step": 4 + }, + { + "DPO Loss": 0.35204142219855233, + "Negative Geometric Mean": -0.8325932820638021, + "Negative prob": -0.8325932820638021, + "Normal Loss": 1.0553878545761108, + "Normal prob": -1.0553878545761108, + "Positive Loss": 0.09103336185216904, + "Positive prob": -0.09103336185216904, + "epoch": 0.11940298507462686, + "step": 4 + }, + { + "epoch": 0.14925373134328357, + "grad_norm": 9.500922911135007, + "learning_rate": 2.8863636363636366e-06, + "loss": 1.2053, + "step": 5 + }, + { + "DPO Loss": 0.0, + "Negative Geometric Mean": 0.0, + "Negative prob": 0.0, + "Normal Loss": 0.9637267589569092, + "Normal prob": -0.9637267589569092, + "Positive Loss": 0.0, + "Positive prob": 0.0, + "epoch": 0.14925373134328357, + "step": 5 + }, + { + "DPO Loss": 0.3369350566486365, + "Negative Geometric Mean": -0.8089229916566171, + "Negative prob": -0.8089229916566171, + "Normal Loss": 0.809516429901123, + "Normal prob": -0.809516429901123, + "Positive Loss": 0.08676248043775558, + "Positive prob": -0.08676248043775558, + "epoch": 0.14925373134328357, + "step": 5 + }, + { + "epoch": 0.1791044776119403, + "grad_norm": 9.50990015460849, + "learning_rate": 2.863636363636364e-06, + "loss": 1.1133, + "step": 6 + }, + { + "DPO Loss": 0.0, + "Negative Geometric Mean": 0.0, + "Negative prob": 0.0, + "Normal Loss": 0.832047700881958, + "Normal prob": -0.832047700881958, + "Positive Loss": 0.0, + "Positive prob": 0.0, + "epoch": 0.1791044776119403, + "step": 6 + }, + { + "DPO Loss": 0.3370728580941308, + "Negative Geometric Mean": -0.8685310105770683, + "Negative prob": -0.8685310105770683, + "Normal Loss": 0.8815029263496399, + "Normal prob": -0.8815029263496399, + "Positive Loss": 0.07432825863361359, + "Positive prob": -0.07432825863361359, + "epoch": 0.1791044776119403, + "step": 6 + }, + { + "epoch": 0.208955223880597, + "grad_norm": 8.039098978654936, + "learning_rate": 2.840909090909091e-06, + "loss": 1.2064, + "step": 7 + }, + { + "DPO Loss": 0.0, + "Negative Geometric Mean": 0.0, + "Negative prob": 0.0, + "Normal Loss": 0.9996135830879211, + "Normal prob": -0.9996135830879211, + "Positive Loss": 0.0, + "Positive prob": 0.0, + "epoch": 0.208955223880597, + "step": 7 + }, + { + "DPO Loss": 0.35429090257439527, + "Negative Geometric Mean": -0.610354295417444, + "Negative prob": -0.610354295417444, + "Normal Loss": 0.716498851776123, + "Normal prob": -0.716498851776123, + "Positive Loss": 0.09210512042045593, + "Positive prob": -0.09210512042045593, + "epoch": 0.208955223880597, + "step": 7 + }, + { + "epoch": 0.23880597014925373, + "grad_norm": 9.566983237882555, + "learning_rate": 2.8181818181818185e-06, + "loss": 1.067, + "step": 8 + }, + { + "DPO Loss": 0.0, + "Negative Geometric Mean": 0.0, + "Negative prob": 0.0, + "Normal Loss": 0.8073037266731262, + "Normal prob": -0.8073037266731262, + "Positive Loss": 0.0, + "Positive prob": 0.0, + "epoch": 0.23880597014925373, + "step": 8 + }, + { + "DPO Loss": 0.31094894833121156, + "Negative Geometric Mean": -0.8462985688606195, + "Negative prob": -0.8462985688606195, + "Normal Loss": 0.5099501013755798, + "Normal prob": -0.5099501013755798, + "Positive Loss": 0.09047375619411469, + "Positive prob": -0.09047375619411469, + "epoch": 0.23880597014925373, + "step": 8 + }, + { + "epoch": 0.26865671641791045, + "grad_norm": 7.87857077645135, + "learning_rate": 2.7954545454545453e-06, + "loss": 1.0028, + "step": 9 + }, + { + "DPO Loss": 0.0, + "Negative Geometric Mean": 0.0, + "Negative prob": 0.0, + "Normal Loss": 0.850799024105072, + "Normal prob": -0.850799024105072, + "Positive Loss": 0.0, + "Positive prob": 0.0, + "epoch": 0.26865671641791045, + "step": 9 + }, + { + "DPO Loss": 0.30296717585093275, + "Negative Geometric Mean": -0.9634897133738725, + "Negative prob": -0.9634897133738725, + "Normal Loss": 1.2854121923446655, + "Normal prob": -1.2854121923446655, + "Positive Loss": 0.07252619415521622, + "Positive prob": -0.07252619415521622, + "epoch": 0.26865671641791045, + "step": 9 + }, + { + "epoch": 0.29850746268656714, + "grad_norm": 9.469089262773355, + "learning_rate": 2.7727272727272726e-06, + "loss": 1.1173, + "step": 10 + }, + { + "DPO Loss": 0.0, + "Negative Geometric Mean": 0.0, + "Negative prob": 0.0, + "Normal Loss": 0.8608676195144653, + "Normal prob": -0.8608676195144653, + "Positive Loss": 0.0, + "Positive prob": 0.0, + "epoch": 0.29850746268656714, + "step": 10 + }, + { + "DPO Loss": 0.26851745737184274, + "Negative Geometric Mean": -1.0714603639998526, + "Negative prob": -1.0714603639998526, + "Normal Loss": 0.7984556555747986, + "Normal prob": -0.7984556555747986, + "Positive Loss": 0.05822120979428291, + "Positive prob": -0.05822120979428291, + "epoch": 0.29850746268656714, + "step": 10 + }, + { + "epoch": 0.3283582089552239, + "grad_norm": 7.287903263195378, + "learning_rate": 2.75e-06, + "loss": 1.013, + "step": 11 + }, + { + "DPO Loss": 0.0, + "Negative Geometric Mean": 0.0, + "Negative prob": 0.0, + "Normal Loss": 0.8310959339141846, + "Normal prob": -0.8310959339141846, + "Positive Loss": 0.0, + "Positive prob": 0.0, + "epoch": 0.3283582089552239, + "step": 11 + }, + { + "DPO Loss": 0.26161430097353844, + "Negative Geometric Mean": -1.1722790406482055, + "Negative prob": -1.1722790406482055, + "Normal Loss": 0.8640764355659485, + "Normal prob": -0.8640764355659485, + "Positive Loss": 0.07576876878738403, + "Positive prob": -0.07576876878738403, + "epoch": 0.3283582089552239, + "step": 11 + }, + { + "epoch": 0.3582089552238806, + "grad_norm": 12.679434481744659, + "learning_rate": 2.7272727272727272e-06, + "loss": 1.1083, + "step": 12 + }, + { + "DPO Loss": 0.0, + "Negative Geometric Mean": 0.0, + "Negative prob": 0.0, + "Normal Loss": 0.9239760637283325, + "Normal prob": -0.9239760637283325, + "Positive Loss": 0.0, + "Positive prob": 0.0, + "epoch": 0.3582089552238806, + "step": 12 + }, + { + "DPO Loss": 0.3063912470938362, + "Negative Geometric Mean": -0.7924091110304511, + "Negative prob": -0.7924091110304511, + "Normal Loss": 0.8149660229682922, + "Normal prob": -0.8149660229682922, + "Positive Loss": 0.06200961023569107, + "Positive prob": -0.06200961023569107, + "epoch": 0.3582089552238806, + "step": 12 + }, + { + "epoch": 0.3880597014925373, + "grad_norm": 8.690620794915942, + "learning_rate": 2.7045454545454545e-06, + "loss": 1.0424, + "step": 13 + }, + { + "DPO Loss": 0.0, + "Negative Geometric Mean": 0.0, + "Negative prob": 0.0, + "Normal Loss": 0.7188159227371216, + "Normal prob": -0.7188159227371216, + "Positive Loss": 0.0, + "Positive prob": 0.0, + "epoch": 0.3880597014925373, + "step": 13 + }, + { + "DPO Loss": 0.2937756023461591, + "Negative Geometric Mean": -0.4400056991696919, + "Negative prob": -0.4400056991696919, + "Normal Loss": 0.8943111300468445, + "Normal prob": -0.8943111300468445, + "Positive Loss": 0.09851977974176407, + "Positive prob": -0.09851977974176407, + "epoch": 0.3880597014925373, + "step": 13 + }, + { + "epoch": 0.417910447761194, + "grad_norm": 8.116923440818201, + "learning_rate": 2.681818181818182e-06, + "loss": 1.019, + "step": 14 + }, + { + "DPO Loss": 0.0, + "Negative Geometric Mean": 0.0, + "Negative prob": 0.0, + "Normal Loss": 0.7557149529457092, + "Normal prob": -0.7557149529457092, + "Positive Loss": 0.0, + "Positive prob": 0.0, + "epoch": 0.417910447761194, + "step": 14 + }, + { + "DPO Loss": 0.3186842313017755, + "Negative Geometric Mean": -1.1004963759258488, + "Negative prob": -1.1004963759258488, + "Normal Loss": 0.8593255877494812, + "Normal prob": -0.8593255877494812, + "Positive Loss": 0.04907805845141411, + "Positive prob": -0.04907805845141411, + "epoch": 0.417910447761194, + "step": 14 + }, + { + "epoch": 0.44776119402985076, + "grad_norm": 8.007172349603502, + "learning_rate": 2.659090909090909e-06, + "loss": 1.0353, + "step": 15 + }, + { + "DPO Loss": 0.0, + "Negative Geometric Mean": 0.0, + "Negative prob": 0.0, + "Normal Loss": 0.897316038608551, + "Normal prob": -0.897316038608551, + "Positive Loss": 0.0, + "Positive prob": 0.0, + "epoch": 0.44776119402985076, + "step": 15 + }, + { + "DPO Loss": 0.1223304973432013, + "Negative Geometric Mean": -1.9276966850878456, + "Negative prob": -1.9276966850878456, + "Normal Loss": 0.6334409117698669, + "Normal prob": -0.6334409117698669, + "Positive Loss": 0.05224687606096268, + "Positive prob": -0.05224687606096268, + "epoch": 0.44776119402985076, + "step": 15 + }, + { + "epoch": 0.47761194029850745, + "grad_norm": 8.24043100452026, + "learning_rate": 2.6363636363636364e-06, + "loss": 0.9134, + "step": 16 + }, + { + "DPO Loss": 0.0, + "Negative Geometric Mean": 0.0, + "Negative prob": 0.0, + "Normal Loss": 0.9187679886817932, + "Normal prob": -0.9187679886817932, + "Positive Loss": 0.0, + "Positive prob": 0.0, + "epoch": 0.47761194029850745, + "step": 16 + }, + { + "DPO Loss": 0.055564984135369914, + "Negative Geometric Mean": -3.0230499623024127, + "Negative prob": -3.0230499623024127, + "Normal Loss": 1.1529364585876465, + "Normal prob": -1.1529364585876465, + "Positive Loss": 0.08235401660203934, + "Positive prob": -0.08235401660203934, + "epoch": 0.47761194029850745, + "step": 16 + }, + { + "epoch": 0.5074626865671642, + "grad_norm": 8.688253054467179, + "learning_rate": 2.6136363636363637e-06, + "loss": 1.0468, + "step": 17 + }, + { + "DPO Loss": 0.0, + "Negative Geometric Mean": 0.0, + "Negative prob": 0.0, + "Normal Loss": 0.7582242488861084, + "Normal prob": -0.7582242488861084, + "Positive Loss": 0.0, + "Positive prob": 0.0, + "epoch": 0.5074626865671642, + "step": 17 + }, + { + "DPO Loss": 0.12820626634228, + "Negative Geometric Mean": -2.269475635729338, + "Negative prob": -2.269475635729338, + "Normal Loss": 0.3805200755596161, + "Normal prob": -0.3805200755596161, + "Positive Loss": 0.07097562402486801, + "Positive prob": -0.07097562402486801, + "epoch": 0.5074626865671642, + "step": 17 + }, + { + "epoch": 0.5373134328358209, + "grad_norm": 8.727946309173646, + "learning_rate": 2.590909090909091e-06, + "loss": 0.8819, + "step": 18 + }, + { + "DPO Loss": 0.0, + "Negative Geometric Mean": 0.0, + "Negative prob": 0.0, + "Normal Loss": 1.0369868278503418, + "Normal prob": -1.0369868278503418, + "Positive Loss": 0.0, + "Positive prob": 0.0, + "epoch": 0.5373134328358209, + "step": 18 + }, + { + "DPO Loss": 0.06657070366779086, + "Negative Geometric Mean": -2.8913005754059435, + "Negative prob": -2.8913005754059435, + "Normal Loss": 0.7784830331802368, + "Normal prob": -0.7784830331802368, + "Positive Loss": 0.04361863434314728, + "Positive prob": -0.04361863434314728, + "epoch": 0.5373134328358209, + "step": 18 + }, + { + "epoch": 0.5671641791044776, + "grad_norm": 8.562076920129734, + "learning_rate": 2.5681818181818183e-06, + "loss": 0.8626, + "step": 19 + }, + { + "DPO Loss": 0.0, + "Negative Geometric Mean": 0.0, + "Negative prob": 0.0, + "Normal Loss": 0.9392335414886475, + "Normal prob": -0.9392335414886475, + "Positive Loss": 0.0, + "Positive prob": 0.0, + "epoch": 0.5671641791044776, + "step": 19 + }, + { + "DPO Loss": 0.004452790854185025, + "Negative Geometric Mean": -5.43384802094139, + "Negative prob": -5.43384802094139, + "Normal Loss": 0.8345220685005188, + "Normal prob": -0.8345220685005188, + "Positive Loss": 0.07906413078308105, + "Positive prob": -0.07906413078308105, + "epoch": 0.5671641791044776, + "step": 19 + }, + { + "epoch": 0.5970149253731343, + "grad_norm": 9.154040073172876, + "learning_rate": 2.5454545454545456e-06, + "loss": 0.9584, + "step": 20 + }, + { + "DPO Loss": 0.0, + "Negative Geometric Mean": 0.0, + "Negative prob": 0.0, + "Normal Loss": 0.8151698112487793, + "Normal prob": -0.8151698112487793, + "Positive Loss": 0.0, + "Positive prob": 0.0, + "epoch": 0.5970149253731343, + "step": 20 + }, + { + "DPO Loss": 0.0007850244175145432, + "Negative Geometric Mean": -7.495313533399471, + "Negative prob": -7.495313533399471, + "Normal Loss": 0.8681256771087646, + "Normal prob": -0.8681256771087646, + "Positive Loss": 0.05661068111658096, + "Positive prob": -0.05661068111658096, + "epoch": 0.5970149253731343, + "step": 20 + }, + { + "epoch": 0.6268656716417911, + "grad_norm": 19.32249471061477, + "learning_rate": 2.522727272727273e-06, + "loss": 0.9313, + "step": 21 + }, + { + "DPO Loss": 0.0, + "Negative Geometric Mean": 0.0, + "Negative prob": 0.0, + "Normal Loss": 0.8350412249565125, + "Normal prob": -0.8350412249565125, + "Positive Loss": 0.0, + "Positive prob": 0.0, + "epoch": 0.6268656716417911, + "step": 21 + }, + { + "DPO Loss": 0.013323540066809244, + "Negative Geometric Mean": -4.539878773912091, + "Negative prob": -4.539878773912091, + "Normal Loss": 0.8549312949180603, + "Normal prob": -0.8549312949180603, + "Positive Loss": 0.0937165841460228, + "Positive prob": -0.0937165841460228, + "epoch": 0.6268656716417911, + "step": 21 + }, + { + "epoch": 0.6567164179104478, + "grad_norm": 18.4720664879043, + "learning_rate": 2.5e-06, + "loss": 0.9518, + "step": 22 + }, + { + "DPO Loss": 0.0, + "Negative Geometric Mean": 0.0, + "Negative prob": 0.0, + "Normal Loss": 0.83185875415802, + "Normal prob": -0.83185875415802, + "Positive Loss": 0.0, + "Positive prob": 0.0, + "epoch": 0.6567164179104478, + "step": 22 + }, + { + "DPO Loss": 7.441585176947715e-05, + "Negative Geometric Mean": -9.482926878210616, + "Negative prob": -9.482926878210616, + "Normal Loss": 1.0035439729690552, + "Normal prob": -1.0035439729690552, + "Positive Loss": 0.1059131771326065, + "Positive prob": -0.1059131771326065, + "epoch": 0.6567164179104478, + "step": 22 + }, + { + "epoch": 0.6865671641791045, + "grad_norm": 7.4403596397354015, + "learning_rate": 2.4772727272727275e-06, + "loss": 0.9741, + "step": 23 + }, + { + "DPO Loss": 0.0, + "Negative Geometric Mean": 0.0, + "Negative prob": 0.0, + "Normal Loss": 1.1941570043563843, + "Normal prob": -1.1941570043563843, + "Positive Loss": 0.0, + "Positive prob": 0.0, + "epoch": 0.6865671641791045, + "step": 23 + }, + { + "DPO Loss": 5.767926370817813e-06, + "Negative Geometric Mean": -12.173237529280238, + "Negative prob": -12.173237529280238, + "Normal Loss": 0.7278223037719727, + "Normal prob": -0.7278223037719727, + "Positive Loss": 0.07192976027727127, + "Positive prob": -0.07192976027727127, + "epoch": 0.6865671641791045, + "step": 23 + }, + { + "epoch": 0.7164179104477612, + "grad_norm": 7.330688093396721, + "learning_rate": 2.454545454545455e-06, + "loss": 0.9927, + "step": 24 + }, + { + "DPO Loss": 0.0, + "Negative Geometric Mean": 0.0, + "Negative prob": 0.0, + "Normal Loss": 0.9865759611129761, + "Normal prob": -0.9865759611129761, + "Positive Loss": 0.0, + "Positive prob": 0.0, + "epoch": 0.7164179104477612, + "step": 24 + }, + { + "DPO Loss": 1.0947351633426092e-05, + "Negative Geometric Mean": -11.567188243575508, + "Negative prob": -11.567188243575508, + "Normal Loss": 0.8570675253868103, + "Normal prob": -0.8570675253868103, + "Positive Loss": 0.06724032014608383, + "Positive prob": -0.06724032014608383, + "epoch": 0.7164179104477612, + "step": 24 + }, + { + "epoch": 0.746268656716418, + "grad_norm": 7.557781355223681, + "learning_rate": 2.4318181818181817e-06, + "loss": 0.9221, + "step": 25 + }, + { + "DPO Loss": 0.0, + "Negative Geometric Mean": 0.0, + "Negative prob": 0.0, + "Normal Loss": 0.9384709000587463, + "Normal prob": -0.9384709000587463, + "Positive Loss": 0.0, + "Positive prob": 0.0, + "epoch": 0.746268656716418, + "step": 25 + }, + { + "DPO Loss": 0.0003560853089358333, + "Negative Geometric Mean": -8.138521756169094, + "Negative prob": -8.138521756169094, + "Normal Loss": 0.8964889049530029, + "Normal prob": -0.8964889049530029, + "Positive Loss": 0.06566499173641205, + "Positive prob": -0.06566499173641205, + "epoch": 0.746268656716418, + "step": 25 + }, + { + "epoch": 0.7761194029850746, + "grad_norm": 7.046101597469539, + "learning_rate": 2.409090909090909e-06, + "loss": 0.9733, + "step": 26 + }, + { + "DPO Loss": 0.0, + "Negative Geometric Mean": 0.0, + "Negative prob": 0.0, + "Normal Loss": 0.9506588578224182, + "Normal prob": -0.9506588578224182, + "Positive Loss": 0.0, + "Positive prob": 0.0, + "epoch": 0.7761194029850746, + "step": 26 + }, + { + "DPO Loss": 8.255091540571396e-06, + "Negative Geometric Mean": -11.822831423895254, + "Negative prob": -11.822831423895254, + "Normal Loss": 0.9992624521255493, + "Normal prob": -0.9992624521255493, + "Positive Loss": 0.093373604118824, + "Positive prob": -0.093373604118824, + "epoch": 0.7761194029850746, + "step": 26 + }, + { + "epoch": 0.8059701492537313, + "grad_norm": 6.576814099531671, + "learning_rate": 2.3863636363636363e-06, + "loss": 0.9531, + "step": 27 + }, + { + "DPO Loss": 0.0, + "Negative Geometric Mean": 0.0, + "Negative prob": 0.0, + "Normal Loss": 0.964348316192627, + "Normal prob": -0.964348316192627, + "Positive Loss": 0.0, + "Positive prob": 0.0, + "epoch": 0.8059701492537313, + "step": 27 + }, + { + "DPO Loss": 1.038318288302413e-05, + "Negative Geometric Mean": -12.70634533110119, + "Negative prob": -12.70634533110119, + "Normal Loss": 0.8761529326438904, + "Normal prob": -0.8761529326438904, + "Positive Loss": 0.048190467059612274, + "Positive prob": -0.048190467059612274, + "epoch": 0.8059701492537313, + "step": 27 + }, + { + "epoch": 0.835820895522388, + "grad_norm": 7.041162886647003, + "learning_rate": 2.3636363636363636e-06, + "loss": 0.9254, + "step": 28 + }, + { + "DPO Loss": 0.0, + "Negative Geometric Mean": 0.0, + "Negative prob": 0.0, + "Normal Loss": 1.0659770965576172, + "Normal prob": -1.0659770965576172, + "Positive Loss": 0.0, + "Positive prob": 0.0, + "epoch": 0.835820895522388, + "step": 28 + }, + { + "DPO Loss": 4.745458003421849e-06, + "Negative Geometric Mean": -12.22632121816452, + "Negative prob": -12.22632121816452, + "Normal Loss": 0.877432644367218, + "Normal prob": -0.877432644367218, + "Positive Loss": 0.07441161572933197, + "Positive prob": -0.07441161572933197, + "epoch": 0.835820895522388, + "step": 28 + }, + { + "epoch": 0.8656716417910447, + "grad_norm": 6.123530075257292, + "learning_rate": 2.340909090909091e-06, + "loss": 0.9906, + "step": 29 + }, + { + "DPO Loss": 0.0, + "Negative Geometric Mean": 0.0, + "Negative prob": 0.0, + "Normal Loss": 0.9131932854652405, + "Normal prob": -0.9131932854652405, + "Positive Loss": 0.0, + "Positive prob": 0.0, + "epoch": 0.8656716417910447, + "step": 29 + }, + { + "DPO Loss": 1.6460282004165447e-05, + "Negative Geometric Mean": -11.04015827178955, + "Negative prob": -11.04015827178955, + "Normal Loss": 1.063417673110962, + "Normal prob": -1.063417673110962, + "Positive Loss": 0.050095487385988235, + "Positive prob": -0.050095487385988235, + "epoch": 0.8656716417910447, + "step": 29 + }, + { + "epoch": 0.8955223880597015, + "grad_norm": 6.118956679067234, + "learning_rate": 2.318181818181818e-06, + "loss": 0.9193, + "step": 30 + }, + { + "DPO Loss": 0.0, + "Negative Geometric Mean": 0.0, + "Negative prob": 0.0, + "Normal Loss": 0.8643659949302673, + "Normal prob": -0.8643659949302673, + "Positive Loss": 0.0, + "Positive prob": 0.0, + "epoch": 0.8955223880597015, + "step": 30 + }, + { + "DPO Loss": 9.904635203296299e-06, + "Negative Geometric Mean": -11.622193305258186, + "Negative prob": -11.622193305258186, + "Normal Loss": 0.7562206983566284, + "Normal prob": -0.7562206983566284, + "Positive Loss": 0.05644884705543518, + "Positive prob": -0.05644884705543518, + "epoch": 0.8955223880597015, + "step": 30 + }, + { + "epoch": 0.9253731343283582, + "grad_norm": 6.536101223333384, + "learning_rate": 2.2954545454545455e-06, + "loss": 0.8934, + "step": 31 + }, + { + "DPO Loss": 0.0, + "Negative Geometric Mean": 0.0, + "Negative prob": 0.0, + "Normal Loss": 0.8608868718147278, + "Normal prob": -0.8608868718147278, + "Positive Loss": 0.0, + "Positive prob": 0.0, + "epoch": 0.9253731343283582, + "step": 31 + }, + { + "DPO Loss": 2.137509260823493e-05, + "Negative Geometric Mean": -10.705368454391891, + "Negative prob": -10.705368454391891, + "Normal Loss": 0.71401047706604, + "Normal prob": -0.71401047706604, + "Positive Loss": 0.06425323337316513, + "Positive prob": -0.06425323337316513, + "epoch": 0.9253731343283582, + "step": 31 + }, + { + "epoch": 0.9552238805970149, + "grad_norm": 7.040623160546982, + "learning_rate": 2.2727272727272728e-06, + "loss": 0.8664, + "step": 32 + }, + { + "DPO Loss": 0.0, + "Negative Geometric Mean": 0.0, + "Negative prob": 0.0, + "Normal Loss": 0.7989053130149841, + "Normal prob": -0.7989053130149841, + "Positive Loss": 0.0, + "Positive prob": 0.0, + "epoch": 0.9552238805970149, + "step": 32 + }, + { + "DPO Loss": 2.7078393532067353e-05, + "Negative Geometric Mean": -10.612409125434027, + "Negative prob": -10.612409125434027, + "Normal Loss": 0.9129906892776489, + "Normal prob": -0.9129906892776489, + "Positive Loss": 0.10737287253141403, + "Positive prob": -0.10737287253141403, + "epoch": 0.9552238805970149, + "step": 32 + }, + { + "epoch": 0.9850746268656716, + "grad_norm": 6.585654111220224, + "learning_rate": 2.25e-06, + "loss": 0.842, + "step": 33 + }, + { + "DPO Loss": 0.0, + "Negative Geometric Mean": 0.0, + "Negative prob": 0.0, + "Normal Loss": 0.8037691712379456, + "Normal prob": -0.8037691712379456, + "Positive Loss": 0.0, + "Positive prob": 0.0, + "epoch": 0.9850746268656716, + "step": 33 + }, + { + "DPO Loss": 0.0, + "Negative Geometric Mean": 0.0, + "Negative prob": 0.0, + "Normal Loss": 0.7143898606300354, + "Normal prob": -0.7143898606300354, + "Positive Loss": 0.0, + "Positive prob": 0.0, + "epoch": 0.9850746268656716, + "step": 33 + }, + { + "epoch": 1.0149253731343284, + "grad_norm": 9.914782438598868, + "learning_rate": 2.2272727272727274e-06, + "loss": 0.6917, + "step": 34 + }, + { + "DPO Loss": 3.654100751997121e-05, + "Negative Geometric Mean": -10.674591619318182, + "Negative prob": -10.674591619318182, + "Normal Loss": 0.48714742064476013, + "Normal prob": -0.48714742064476013, + "Positive Loss": 0.03236498683691025, + "Positive prob": -0.03236498683691025, + "epoch": 1.0149253731343284, + "step": 34 + }, + { + "DPO Loss": 0.0, + "Negative Geometric Mean": 0.0, + "Negative prob": 0.0, + "Normal Loss": 0.5524182915687561, + "Normal prob": -0.5524182915687561, + "Positive Loss": 0.0, + "Positive prob": 0.0, + "epoch": 1.0149253731343284, + "step": 34 + }, + { + "epoch": 1.044776119402985, + "grad_norm": 7.2083835894058375, + "learning_rate": 2.2045454545454547e-06, + "loss": 0.6615, + "step": 35 + }, + { + "DPO Loss": 5.3735510809371045e-05, + "Negative Geometric Mean": -10.087603725282486, + "Negative prob": -10.087603725282486, + "Normal Loss": 0.47449687123298645, + "Normal prob": -0.47449687123298645, + "Positive Loss": 0.02946843020617962, + "Positive prob": -0.02946843020617962, + "epoch": 1.044776119402985, + "step": 35 + }, + { + "DPO Loss": 0.0, + "Negative Geometric Mean": 0.0, + "Negative prob": 0.0, + "Normal Loss": 0.5653090476989746, + "Normal prob": -0.5653090476989746, + "Positive Loss": 0.0, + "Positive prob": 0.0, + "epoch": 1.044776119402985, + "step": 35 + }, + { + "epoch": 1.0746268656716418, + "grad_norm": 7.509154772376704, + "learning_rate": 2.181818181818182e-06, + "loss": 0.5463, + "step": 36 + }, + { + "DPO Loss": 4.57076718186167e-05, + "Negative Geometric Mean": -10.369059509873779, + "Negative prob": -10.369059509873779, + "Normal Loss": 0.7275592684745789, + "Normal prob": -0.7275592684745789, + "Positive Loss": 0.022183816879987717, + "Positive prob": -0.022183816879987717, + "epoch": 1.0746268656716418, + "step": 36 + }, + { + "DPO Loss": 0.0, + "Negative Geometric Mean": 0.0, + "Negative prob": 0.0, + "Normal Loss": 0.48975300788879395, + "Normal prob": -0.48975300788879395, + "Positive Loss": 0.0, + "Positive prob": 0.0, + "epoch": 1.0746268656716418, + "step": 36 + }, + { + "epoch": 1.1044776119402986, + "grad_norm": 5.921019640825061, + "learning_rate": 2.1590909090909092e-06, + "loss": 0.6523, + "step": 37 + }, + { + "DPO Loss": 7.289560432171723e-05, + "Negative Geometric Mean": -9.601848503888467, + "Negative prob": -9.601848503888467, + "Normal Loss": 0.8984713554382324, + "Normal prob": -0.8984713554382324, + "Positive Loss": 0.029801441356539726, + "Positive prob": -0.029801441356539726, + "epoch": 1.1044776119402986, + "step": 37 + }, + { + "DPO Loss": 0.0, + "Negative Geometric Mean": 0.0, + "Negative prob": 0.0, + "Normal Loss": 0.709186851978302, + "Normal prob": -0.709186851978302, + "Positive Loss": 0.0, + "Positive prob": 0.0, + "epoch": 1.1044776119402986, + "step": 37 + }, + { + "epoch": 1.1343283582089552, + "grad_norm": 7.446657265486741, + "learning_rate": 2.1363636363636365e-06, + "loss": 0.6967, + "step": 38 + }, + { + "DPO Loss": 2.6440661378327594e-05, + "Negative Geometric Mean": -10.957384672619048, + "Negative prob": -10.957384672619048, + "Normal Loss": 0.44052013754844666, + "Normal prob": -0.44052013754844666, + "Positive Loss": 0.02077590487897396, + "Positive prob": -0.02077590487897396, + "epoch": 1.1343283582089552, + "step": 38 + }, + { + "DPO Loss": 0.0, + "Negative Geometric Mean": 0.0, + "Negative prob": 0.0, + "Normal Loss": 0.39025214314460754, + "Normal prob": -0.39025214314460754, + "Positive Loss": 0.0, + "Positive prob": 0.0, + "epoch": 1.1343283582089552, + "step": 38 + }, + { + "epoch": 1.164179104477612, + "grad_norm": 6.65773645557663, + "learning_rate": 2.113636363636364e-06, + "loss": 0.5978, + "step": 39 + }, + { + "DPO Loss": 2.123153925438824e-05, + "Negative Geometric Mean": -10.680970389887971, + "Negative prob": -10.680970389887971, + "Normal Loss": 0.6101383566856384, + "Normal prob": -0.6101383566856384, + "Positive Loss": 0.03984152898192406, + "Positive prob": -0.03984152898192406, + "epoch": 1.164179104477612, + "step": 39 + }, + { + "DPO Loss": 0.0, + "Negative Geometric Mean": 0.0, + "Negative prob": 0.0, + "Normal Loss": 0.5792780518531799, + "Normal prob": -0.5792780518531799, + "Positive Loss": 0.0, + "Positive prob": 0.0, + "epoch": 1.164179104477612, + "step": 39 + }, + { + "epoch": 1.1940298507462686, + "grad_norm": 6.924093024562789, + "learning_rate": 2.090909090909091e-06, + "loss": 0.6592, + "step": 40 + }, + { + "DPO Loss": 1.6680911890968927e-05, + "Negative Geometric Mean": -10.952719974078342, + "Negative prob": -10.952719974078342, + "Normal Loss": 0.4338739216327667, + "Normal prob": -0.4338739216327667, + "Positive Loss": 0.03350961208343506, + "Positive prob": -0.03350961208343506, + "epoch": 1.1940298507462686, + "step": 40 + }, + { + "DPO Loss": 0.0, + "Negative Geometric Mean": 0.0, + "Negative prob": 0.0, + "Normal Loss": 0.3462405800819397, + "Normal prob": -0.3462405800819397, + "Positive Loss": 0.0, + "Positive prob": 0.0, + "epoch": 1.1940298507462686, + "step": 40 + }, + { + "epoch": 1.2238805970149254, + "grad_norm": 6.295634713144118, + "learning_rate": 2.0681818181818184e-06, + "loss": 0.5281, + "step": 41 + }, + { + "DPO Loss": 1.9216125147544902e-05, + "Negative Geometric Mean": -10.949885493970315, + "Negative prob": -10.949885493970315, + "Normal Loss": 0.6209268569946289, + "Normal prob": -0.6209268569946289, + "Positive Loss": 0.010221516713500023, + "Positive prob": -0.010221516713500023, + "epoch": 1.2238805970149254, + "step": 41 + }, + { + "DPO Loss": 0.0, + "Negative Geometric Mean": 0.0, + "Negative prob": 0.0, + "Normal Loss": 0.4472298324108124, + "Normal prob": -0.4472298324108124, + "Positive Loss": 0.0, + "Positive prob": 0.0, + "epoch": 1.2238805970149254, + "step": 41 + }, + { + "epoch": 1.2537313432835822, + "grad_norm": 5.857596369043, + "learning_rate": 2.0454545454545453e-06, + "loss": 0.5837, + "step": 42 + }, + { + "DPO Loss": 3.1575882722812355e-05, + "Negative Geometric Mean": -10.842009715544872, + "Negative prob": -10.842009715544872, + "Normal Loss": 0.3952675461769104, + "Normal prob": -0.3952675461769104, + "Positive Loss": 0.06998435407876968, + "Positive prob": -0.06998435407876968, + "epoch": 1.2537313432835822, + "step": 42 + }, + { + "DPO Loss": 0.0, + "Negative Geometric Mean": 0.0, + "Negative prob": 0.0, + "Normal Loss": 0.3682936131954193, + "Normal prob": -0.3682936131954193, + "Positive Loss": 0.0, + "Positive prob": 0.0, + "epoch": 1.2537313432835822, + "step": 42 + }, + { + "epoch": 1.2835820895522387, + "grad_norm": 6.19946111675221, + "learning_rate": 2.0227272727272726e-06, + "loss": 0.4489, + "step": 43 + }, + { + "DPO Loss": 7.918896147509772e-06, + "Negative Geometric Mean": -11.86809765625, + "Negative prob": -11.86809765625, + "Normal Loss": 0.7341710329055786, + "Normal prob": -0.7341710329055786, + "Positive Loss": 0.023408204317092896, + "Positive prob": -0.023408204317092896, + "epoch": 1.2835820895522387, + "step": 43 + }, + { + "DPO Loss": 0.0, + "Negative Geometric Mean": 0.0, + "Negative prob": 0.0, + "Normal Loss": 0.5852903723716736, + "Normal prob": -0.5852903723716736, + "Positive Loss": 0.0, + "Positive prob": 0.0, + "epoch": 1.2835820895522387, + "step": 43 + }, + { + "epoch": 1.3134328358208955, + "grad_norm": 7.299737993528941, + "learning_rate": 2e-06, + "loss": 0.6287, + "step": 44 + }, + { + "DPO Loss": 1.81222332665437e-05, + "Negative Geometric Mean": -10.647718364689625, + "Negative prob": -10.647718364689625, + "Normal Loss": 0.4992733597755432, + "Normal prob": -0.4992733597755432, + "Positive Loss": 0.05666818842291832, + "Positive prob": -0.05666818842291832, + "epoch": 1.3134328358208955, + "step": 44 + }, + { + "DPO Loss": 0.0, + "Negative Geometric Mean": 0.0, + "Negative prob": 0.0, + "Normal Loss": 0.7956355810165405, + "Normal prob": -0.7956355810165405, + "Positive Loss": 0.0, + "Positive prob": 0.0, + "epoch": 1.3134328358208955, + "step": 44 + }, + { + "epoch": 1.3432835820895521, + "grad_norm": 7.900970686658878, + "learning_rate": 1.977272727272727e-06, + "loss": 0.7441, + "step": 45 + }, + { + "DPO Loss": 6.092015148833826e-05, + "Negative Geometric Mean": -9.997283935546875, + "Negative prob": -9.997283935546875, + "Normal Loss": 0.5293800830841064, + "Normal prob": -0.5293800830841064, + "Positive Loss": 0.012493799440562725, + "Positive prob": -0.012493799440562725, + "epoch": 1.3432835820895521, + "step": 45 + }, + { + "DPO Loss": 0.0, + "Negative Geometric Mean": 0.0, + "Negative prob": 0.0, + "Normal Loss": 0.6335676908493042, + "Normal prob": -0.6335676908493042, + "Positive Loss": 0.0, + "Positive prob": 0.0, + "epoch": 1.3432835820895521, + "step": 45 + }, + { + "epoch": 1.373134328358209, + "grad_norm": 6.033587067188048, + "learning_rate": 1.9545454545454545e-06, + "loss": 0.6688, + "step": 46 + }, + { + "DPO Loss": 2.075863324394268e-05, + "Negative Geometric Mean": -11.443209795884684, + "Negative prob": -11.443209795884684, + "Normal Loss": 0.9459198713302612, + "Normal prob": -0.9459198713302612, + "Positive Loss": 0.019672967493534088, + "Positive prob": -0.019672967493534088, + "epoch": 1.373134328358209, + "step": 46 + }, + { + "DPO Loss": 0.0, + "Negative Geometric Mean": 0.0, + "Negative prob": 0.0, + "Normal Loss": 0.5753485560417175, + "Normal prob": -0.5753485560417175, + "Positive Loss": 0.0, + "Positive prob": 0.0, + "epoch": 1.373134328358209, + "step": 46 + }, + { + "epoch": 1.4029850746268657, + "grad_norm": 6.437116667065512, + "learning_rate": 1.931818181818182e-06, + "loss": 0.7138, + "step": 47 + }, + { + "DPO Loss": 5.24218732737661e-05, + "Negative Geometric Mean": -10.045061616056572, + "Negative prob": -10.045061616056572, + "Normal Loss": 0.6808024644851685, + "Normal prob": -0.6808024644851685, + "Positive Loss": 0.023415615782141685, + "Positive prob": -0.023415615782141685, + "epoch": 1.4029850746268657, + "step": 47 + }, + { + "DPO Loss": 0.0, + "Negative Geometric Mean": 0.0, + "Negative prob": 0.0, + "Normal Loss": 0.6357601284980774, + "Normal prob": -0.6357601284980774, + "Positive Loss": 0.0, + "Positive prob": 0.0, + "epoch": 1.4029850746268657, + "step": 47 + }, + { + "epoch": 1.4328358208955223, + "grad_norm": 7.152119639795567, + "learning_rate": 1.909090909090909e-06, + "loss": 0.6079, + "step": 48 + }, + { + "DPO Loss": 7.2306889216542525e-06, + "Negative Geometric Mean": -12.073476457210242, + "Negative prob": -12.073476457210242, + "Normal Loss": 0.5705257058143616, + "Normal prob": -0.5705257058143616, + "Positive Loss": 0.02072186954319477, + "Positive prob": -0.02072186954319477, + "epoch": 1.4328358208955223, + "step": 48 + }, + { + "DPO Loss": 0.0, + "Negative Geometric Mean": 0.0, + "Negative prob": 0.0, + "Normal Loss": 0.5705331563949585, + "Normal prob": -0.5705331563949585, + "Positive Loss": 0.0, + "Positive prob": 0.0, + "epoch": 1.4328358208955223, + "step": 48 + }, + { + "epoch": 1.462686567164179, + "grad_norm": 8.435206603146995, + "learning_rate": 1.8863636363636364e-06, + "loss": 0.4921, + "step": 49 + }, + { + "DPO Loss": 1.2005791148960418e-05, + "Negative Geometric Mean": -11.335293660121682, + "Negative prob": -11.335293660121682, + "Normal Loss": 0.4985297918319702, + "Normal prob": -0.4985297918319702, + "Positive Loss": 0.03511481359601021, + "Positive prob": -0.03511481359601021, + "epoch": 1.462686567164179, + "step": 49 + }, + { + "DPO Loss": 0.0, + "Negative Geometric Mean": 0.0, + "Negative prob": 0.0, + "Normal Loss": 0.5165051221847534, + "Normal prob": -0.5165051221847534, + "Positive Loss": 0.0, + "Positive prob": 0.0, + "epoch": 1.462686567164179, + "step": 49 + }, + { + "epoch": 1.4925373134328357, + "grad_norm": 6.663071553118176, + "learning_rate": 1.8636363636363637e-06, + "loss": 0.582, + "step": 50 + }, + { + "DPO Loss": 8.121549918893668e-06, + "Negative Geometric Mean": -11.646775242426388, + "Negative prob": -11.646775242426388, + "Normal Loss": 0.7396381497383118, + "Normal prob": -0.7396381497383118, + "Positive Loss": 0.046656664460897446, + "Positive prob": -0.046656664460897446, + "epoch": 1.4925373134328357, + "step": 50 + }, + { + "DPO Loss": 0.0, + "Negative Geometric Mean": 0.0, + "Negative prob": 0.0, + "Normal Loss": 0.46003180742263794, + "Normal prob": -0.46003180742263794, + "Positive Loss": 0.0, + "Positive prob": 0.0, + "epoch": 1.4925373134328357, + "step": 50 + }, + { + "epoch": 1.5223880597014925, + "grad_norm": 6.8924034055431225, + "learning_rate": 1.840909090909091e-06, + "loss": 0.5667, + "step": 51 + }, + { + "DPO Loss": 3.960602457920955e-05, + "Negative Geometric Mean": -10.694715555003613, + "Negative prob": -10.694715555003613, + "Normal Loss": 0.32585108280181885, + "Normal prob": -0.32585108280181885, + "Positive Loss": 0.04867149889469147, + "Positive prob": -0.04867149889469147, + "epoch": 1.5223880597014925, + "step": 51 + }, + { + "DPO Loss": 0.0, + "Negative Geometric Mean": 0.0, + "Negative prob": 0.0, + "Normal Loss": 0.4816523492336273, + "Normal prob": -0.4816523492336273, + "Positive Loss": 0.0, + "Positive prob": 0.0, + "epoch": 1.5223880597014925, + "step": 51 + }, + { + "epoch": 1.5522388059701493, + "grad_norm": 7.4924421694754075, + "learning_rate": 1.8181818181818183e-06, + "loss": 0.4917, + "step": 52 + }, + { + "DPO Loss": 5.1645393655010374e-05, + "Negative Geometric Mean": -10.168975942689114, + "Negative prob": -10.168975942689114, + "Normal Loss": 0.30011507868766785, + "Normal prob": -0.30011507868766785, + "Positive Loss": 0.02231639437377453, + "Positive prob": -0.02231639437377453, + "epoch": 1.5522388059701493, + "step": 52 + }, + { + "DPO Loss": 0.0, + "Negative Geometric Mean": 0.0, + "Negative prob": 0.0, + "Normal Loss": 0.4937782287597656, + "Normal prob": -0.4937782287597656, + "Positive Loss": 0.0, + "Positive prob": 0.0, + "epoch": 1.5522388059701493, + "step": 52 + }, + { + "epoch": 1.582089552238806, + "grad_norm": 7.184478655478447, + "learning_rate": 1.7954545454545456e-06, + "loss": 0.5295, + "step": 53 + }, + { + "DPO Loss": 5.356822072205326e-06, + "Negative Geometric Mean": -12.203828545026882, + "Negative prob": -12.203828545026882, + "Normal Loss": 0.5068655014038086, + "Normal prob": -0.5068655014038086, + "Positive Loss": 0.02591904066503048, + "Positive prob": -0.02591904066503048, + "epoch": 1.582089552238806, + "step": 53 + }, + { + "DPO Loss": 0.0, + "Negative Geometric Mean": 0.0, + "Negative prob": 0.0, + "Normal Loss": 0.44835716485977173, + "Normal prob": -0.44835716485977173, + "Positive Loss": 0.0, + "Positive prob": 0.0, + "epoch": 1.582089552238806, + "step": 53 + }, + { + "epoch": 1.6119402985074627, + "grad_norm": 6.665246283694876, + "learning_rate": 1.7727272727272729e-06, + "loss": 0.5862, + "step": 54 + }, + { + "DPO Loss": 2.7645910456594184e-05, + "Negative Geometric Mean": -10.52684736755279, + "Negative prob": -10.52684736755279, + "Normal Loss": 0.4901617169380188, + "Normal prob": -0.4901617169380188, + "Positive Loss": 0.031082332134246826, + "Positive prob": -0.031082332134246826, + "epoch": 1.6119402985074627, + "step": 54 + }, + { + "DPO Loss": 0.0, + "Negative Geometric Mean": 0.0, + "Negative prob": 0.0, + "Normal Loss": 0.3990895450115204, + "Normal prob": -0.3990895450115204, + "Positive Loss": 0.0, + "Positive prob": 0.0, + "epoch": 1.6119402985074627, + "step": 54 + }, + { + "epoch": 1.6417910447761193, + "grad_norm": 7.251995962906654, + "learning_rate": 1.7500000000000002e-06, + "loss": 0.5512, + "step": 55 + }, + { + "DPO Loss": 5.845775193481474e-06, + "Negative Geometric Mean": -11.981290714110127, + "Negative prob": -11.981290714110127, + "Normal Loss": 0.36946558952331543, + "Normal prob": -0.36946558952331543, + "Positive Loss": 0.03706742450594902, + "Positive prob": -0.03706742450594902, + "epoch": 1.6417910447761193, + "step": 55 + }, + { + "DPO Loss": 0.0, + "Negative Geometric Mean": 0.0, + "Negative prob": 0.0, + "Normal Loss": 0.6298558712005615, + "Normal prob": -0.6298558712005615, + "Positive Loss": 0.0, + "Positive prob": 0.0, + "epoch": 1.6417910447761193, + "step": 55 + }, + { + "epoch": 1.671641791044776, + "grad_norm": 6.5830406614829995, + "learning_rate": 1.7272727272727275e-06, + "loss": 0.5497, + "step": 56 + }, + { + "DPO Loss": 3.2969348642918384e-05, + "Negative Geometric Mean": -10.456912128245772, + "Negative prob": -10.456912128245772, + "Normal Loss": 0.6241900324821472, + "Normal prob": -0.6241900324821472, + "Positive Loss": 0.027565686032176018, + "Positive prob": -0.027565686032176018, + "epoch": 1.671641791044776, + "step": 56 + }, + { + "DPO Loss": 0.0, + "Negative Geometric Mean": 0.0, + "Negative prob": 0.0, + "Normal Loss": 0.49294447898864746, + "Normal prob": -0.49294447898864746, + "Positive Loss": 0.0, + "Positive prob": 0.0, + "epoch": 1.671641791044776, + "step": 56 + }, + { + "epoch": 1.7014925373134329, + "grad_norm": 7.489375090765791, + "learning_rate": 1.7045454545454548e-06, + "loss": 0.5473, + "step": 57 + }, + { + "DPO Loss": 9.641101792233715e-06, + "Negative Geometric Mean": -11.58503936609456, + "Negative prob": -11.58503936609456, + "Normal Loss": 0.4547930359840393, + "Normal prob": -0.4547930359840393, + "Positive Loss": 0.02409125678241253, + "Positive prob": -0.02409125678241253, + "epoch": 1.7014925373134329, + "step": 57 + }, + { + "DPO Loss": 0.0, + "Negative Geometric Mean": 0.0, + "Negative prob": 0.0, + "Normal Loss": 0.5437726378440857, + "Normal prob": -0.5437726378440857, + "Positive Loss": 0.0, + "Positive prob": 0.0, + "epoch": 1.7014925373134329, + "step": 57 + }, + { + "epoch": 1.7313432835820897, + "grad_norm": 7.870395979704569, + "learning_rate": 1.6818181818181817e-06, + "loss": 0.6139, + "step": 58 + }, + { + "DPO Loss": 1.2307788643174536e-05, + "Negative Geometric Mean": -11.400927734375, + "Negative prob": -11.400927734375, + "Normal Loss": 0.4675034284591675, + "Normal prob": -0.4675034284591675, + "Positive Loss": 0.02847522310912609, + "Positive prob": -0.02847522310912609, + "epoch": 1.7313432835820897, + "step": 58 + }, + { + "DPO Loss": 0.0, + "Negative Geometric Mean": 0.0, + "Negative prob": 0.0, + "Normal Loss": 0.5951191186904907, + "Normal prob": -0.5951191186904907, + "Positive Loss": 0.0, + "Positive prob": 0.0, + "epoch": 1.7313432835820897, + "step": 58 + }, + { + "epoch": 1.7611940298507462, + "grad_norm": 7.248327621413268, + "learning_rate": 1.659090909090909e-06, + "loss": 0.5501, + "step": 59 + }, + { + "DPO Loss": 6.131353933599495e-06, + "Negative Geometric Mean": -12.089666559278351, + "Negative prob": -12.089666559278351, + "Normal Loss": 0.6625760793685913, + "Normal prob": -0.6625760793685913, + "Positive Loss": 0.024925949051976204, + "Positive prob": -0.024925949051976204, + "epoch": 1.7611940298507462, + "step": 59 + }, + { + "DPO Loss": 0.0, + "Negative Geometric Mean": 0.0, + "Negative prob": 0.0, + "Normal Loss": 0.574043333530426, + "Normal prob": -0.574043333530426, + "Positive Loss": 0.0, + "Positive prob": 0.0, + "epoch": 1.7611940298507462, + "step": 59 + }, + { + "epoch": 1.7910447761194028, + "grad_norm": 5.45423423175427, + "learning_rate": 1.6363636363636363e-06, + "loss": 0.5803, + "step": 60 + }, + { + "DPO Loss": 6.456255345351767e-06, + "Negative Geometric Mean": -12.335179908988403, + "Negative prob": -12.335179908988403, + "Normal Loss": 0.5476536750793457, + "Normal prob": -0.5476536750793457, + "Positive Loss": 0.03484680876135826, + "Positive prob": -0.03484680876135826, + "epoch": 1.7910447761194028, + "step": 60 + }, + { + "DPO Loss": 0.0, + "Negative Geometric Mean": 0.0, + "Negative prob": 0.0, + "Normal Loss": 0.4749366343021393, + "Normal prob": -0.4749366343021393, + "Positive Loss": 0.0, + "Positive prob": 0.0, + "epoch": 1.7910447761194028, + "step": 60 + }, + { + "epoch": 1.8208955223880596, + "grad_norm": 5.8299300029602845, + "learning_rate": 1.6136363636363635e-06, + "loss": 0.5286, + "step": 61 + }, + { + "DPO Loss": 6.855680101123193e-06, + "Negative Geometric Mean": -12.133075664569805, + "Negative prob": -12.133075664569805, + "Normal Loss": 0.49556368589401245, + "Normal prob": -0.49556368589401245, + "Positive Loss": 0.031043315306305885, + "Positive prob": -0.031043315306305885, + "epoch": 1.8208955223880596, + "step": 61 + }, + { + "DPO Loss": 0.0, + "Negative Geometric Mean": 0.0, + "Negative prob": 0.0, + "Normal Loss": 0.7167157530784607, + "Normal prob": -0.7167157530784607, + "Positive Loss": 0.0, + "Positive prob": 0.0, + "epoch": 1.8208955223880596, + "step": 61 + }, + { + "epoch": 1.8507462686567164, + "grad_norm": 6.451470188285151, + "learning_rate": 1.5909090909090908e-06, + "loss": 0.5513, + "step": 62 + }, + { + "DPO Loss": 1.236436099783623e-05, + "Negative Geometric Mean": -11.445466172271574, + "Negative prob": -11.445466172271574, + "Normal Loss": 0.5918139219284058, + "Normal prob": -0.5918139219284058, + "Positive Loss": 0.026092026382684708, + "Positive prob": -0.026092026382684708, + "epoch": 1.8507462686567164, + "step": 62 + }, + { + "DPO Loss": 0.0, + "Negative Geometric Mean": 0.0, + "Negative prob": 0.0, + "Normal Loss": 0.4669744670391083, + "Normal prob": -0.4669744670391083, + "Positive Loss": 0.0, + "Positive prob": 0.0, + "epoch": 1.8507462686567164, + "step": 62 + }, + { + "epoch": 1.8805970149253732, + "grad_norm": 6.794192852617203, + "learning_rate": 1.5681818181818181e-06, + "loss": 0.4827, + "step": 63 + }, + { + "DPO Loss": 1.3348207631132665e-05, + "Negative Geometric Mean": -11.5704201146176, + "Negative prob": -11.5704201146176, + "Normal Loss": 0.7752443552017212, + "Normal prob": -0.7752443552017212, + "Positive Loss": 0.03313179686665535, + "Positive prob": -0.03313179686665535, + "epoch": 1.8805970149253732, + "step": 63 + }, + { + "DPO Loss": 0.0, + "Negative Geometric Mean": 0.0, + "Negative prob": 0.0, + "Normal Loss": 0.4319833815097809, + "Normal prob": -0.4319833815097809, + "Positive Loss": 0.0, + "Positive prob": 0.0, + "epoch": 1.8805970149253732, + "step": 63 + }, + { + "epoch": 1.9104477611940298, + "grad_norm": 6.583453632012116, + "learning_rate": 1.5454545454545454e-06, + "loss": 0.5891, + "step": 64 + }, + { + "DPO Loss": 4.963582076406908e-06, + "Negative Geometric Mean": -12.19005351163903, + "Negative prob": -12.19005351163903, + "Normal Loss": 0.7006582617759705, + "Normal prob": -0.7006582617759705, + "Positive Loss": 0.05256428197026253, + "Positive prob": -0.05256428197026253, + "epoch": 1.9104477611940298, + "step": 64 + }, + { + "DPO Loss": 0.0, + "Negative Geometric Mean": 0.0, + "Negative prob": 0.0, + "Normal Loss": 0.5088911652565002, + "Normal prob": -0.5088911652565002, + "Positive Loss": 0.0, + "Positive prob": 0.0, + "epoch": 1.9104477611940298, + "step": 64 + }, + { + "epoch": 1.9402985074626866, + "grad_norm": 6.441815309507991, + "learning_rate": 1.5227272727272727e-06, + "loss": 0.676, + "step": 65 + }, + { + "DPO Loss": 3.909155035241524e-06, + "Negative Geometric Mean": -12.52902815645973, + "Negative prob": -12.52902815645973, + "Normal Loss": 0.4071587920188904, + "Normal prob": -0.4071587920188904, + "Positive Loss": 0.029172131791710854, + "Positive prob": -0.029172131791710854, + "epoch": 1.9402985074626866, + "step": 65 + }, + { + "DPO Loss": 0.0, + "Negative Geometric Mean": 0.0, + "Negative prob": 0.0, + "Normal Loss": 0.5863581299781799, + "Normal prob": -0.5863581299781799, + "Positive Loss": 0.0, + "Positive prob": 0.0, + "epoch": 1.9402985074626866, + "step": 65 + }, + { + "epoch": 1.9701492537313432, + "grad_norm": 6.3544200742459935, + "learning_rate": 1.5e-06, + "loss": 0.4844, + "step": 66 + }, + { + "DPO Loss": 4.394697707115605e-06, + "Negative Geometric Mean": -12.445152789608176, + "Negative prob": -12.445152789608176, + "Normal Loss": 0.5812058448791504, + "Normal prob": -0.5812058448791504, + "Positive Loss": 0.025852346792817116, + "Positive prob": -0.025852346792817116, + "epoch": 1.9701492537313432, + "step": 66 + }, + { + "DPO Loss": 0.0, + "Negative Geometric Mean": 0.0, + "Negative prob": 0.0, + "Normal Loss": 0.7593735456466675, + "Normal prob": -0.7593735456466675, + "Positive Loss": 0.0, + "Positive prob": 0.0, + "epoch": 1.9701492537313432, + "step": 66 + }, + { + "epoch": 2.0, + "grad_norm": 7.0108110525699985, + "learning_rate": 1.4772727272727273e-06, + "loss": 0.6144, + "step": 67 + }, + { + "DPO Loss": 0.0, + "Negative Geometric Mean": 0.0, + "Negative prob": 0.0, + "Normal Loss": 0.34294071793556213, + "Normal prob": -0.34294071793556213, + "Positive Loss": 0.0, + "Positive prob": 0.0, + "epoch": 2.0, + "step": 67 + }, + { + "DPO Loss": 2.4070561719272564e-06, + "Negative Geometric Mean": -13.018313531479217, + "Negative prob": -13.018313531479217, + "Normal Loss": 0.3513112962245941, + "Normal prob": -0.3513112962245941, + "Positive Loss": 0.013781579211354256, + "Positive prob": -0.013781579211354256, + "epoch": 2.0, + "step": 67 + }, + { + "epoch": 2.029850746268657, + "grad_norm": 7.07209986229336, + "learning_rate": 1.4545454545454546e-06, + "loss": 0.3488, + "step": 68 + }, + { + "DPO Loss": 0.0, + "Negative Geometric Mean": 0.0, + "Negative prob": 0.0, + "Normal Loss": 0.17317090928554535, + "Normal prob": -0.17317090928554535, + "Positive Loss": 0.0, + "Positive prob": 0.0, + "epoch": 2.029850746268657, + "step": 68 + }, + { + "DPO Loss": 2.6232480269051795e-05, + "Negative Geometric Mean": -10.84640401579797, + "Negative prob": -10.84640401579797, + "Normal Loss": 0.2502392828464508, + "Normal prob": -0.2502392828464508, + "Positive Loss": 0.009016763418912888, + "Positive prob": -0.009016763418912888, + "epoch": 2.029850746268657, + "step": 68 + }, + { + "epoch": 2.0597014925373136, + "grad_norm": 6.718484882180734, + "learning_rate": 1.431818181818182e-06, + "loss": 0.3109, + "step": 69 + }, + { + "DPO Loss": 0.0, + "Negative Geometric Mean": 0.0, + "Negative prob": 0.0, + "Normal Loss": 0.25603172183036804, + "Normal prob": -0.25603172183036804, + "Positive Loss": 0.0, + "Positive prob": 0.0, + "epoch": 2.0597014925373136, + "step": 69 + }, + { + "DPO Loss": 2.4722913198806987e-06, + "Negative Geometric Mean": -13.571044921875, + "Negative prob": -13.571044921875, + "Normal Loss": 0.6462356448173523, + "Normal prob": -0.6462356448173523, + "Positive Loss": 0.004487407859414816, + "Positive prob": -0.004487407859414816, + "epoch": 2.0597014925373136, + "step": 69 + }, + { + "epoch": 2.08955223880597, + "grad_norm": 6.391521213804556, + "learning_rate": 1.4090909090909092e-06, + "loss": 0.4603, + "step": 70 + }, + { + "DPO Loss": 0.0, + "Negative Geometric Mean": 0.0, + "Negative prob": 0.0, + "Normal Loss": 0.2792622148990631, + "Normal prob": -0.2792622148990631, + "Positive Loss": 0.0, + "Positive prob": 0.0, + "epoch": 2.08955223880597, + "step": 70 + }, + { + "DPO Loss": 2.30209765905298e-06, + "Negative Geometric Mean": -13.15788681702044, + "Negative prob": -13.15788681702044, + "Normal Loss": 0.297980934381485, + "Normal prob": -0.297980934381485, + "Positive Loss": 0.011687587015330791, + "Positive prob": -0.011687587015330791, + "epoch": 2.08955223880597, + "step": 70 + }, + { + "epoch": 2.1194029850746268, + "grad_norm": 6.000476341412616, + "learning_rate": 1.3863636363636363e-06, + "loss": 0.426, + "step": 71 + }, + { + "DPO Loss": 0.0, + "Negative Geometric Mean": 0.0, + "Negative prob": 0.0, + "Normal Loss": 0.31911832094192505, + "Normal prob": -0.31911832094192505, + "Positive Loss": 0.0, + "Positive prob": 0.0, + "epoch": 2.1194029850746268, + "step": 71 + }, + { + "DPO Loss": 9.623063611478237e-07, + "Negative Geometric Mean": -13.79306566782845, + "Negative prob": -13.79306566782845, + "Normal Loss": 0.1256338506937027, + "Normal prob": -0.1256338506937027, + "Positive Loss": 0.014073642902076244, + "Positive prob": -0.014073642902076244, + "epoch": 2.1194029850746268, + "step": 71 + }, + { + "epoch": 2.1492537313432836, + "grad_norm": 5.665938957087509, + "learning_rate": 1.3636363636363636e-06, + "loss": 0.31, + "step": 72 + }, + { + "DPO Loss": 0.0, + "Negative Geometric Mean": 0.0, + "Negative prob": 0.0, + "Normal Loss": 0.5485053062438965, + "Normal prob": -0.5485053062438965, + "Positive Loss": 0.0, + "Positive prob": 0.0, + "epoch": 2.1492537313432836, + "step": 72 + }, + { + "DPO Loss": 5.508823305986437e-06, + "Negative Geometric Mean": -12.17880211034751, + "Negative prob": -12.17880211034751, + "Normal Loss": 0.18863847851753235, + "Normal prob": -0.18863847851753235, + "Positive Loss": 0.011334001086652279, + "Positive prob": -0.011334001086652279, + "epoch": 2.1492537313432836, + "step": 72 + }, + { + "epoch": 2.1791044776119404, + "grad_norm": 6.60404865468319, + "learning_rate": 1.340909090909091e-06, + "loss": 0.3371, + "step": 73 + }, + { + "DPO Loss": 0.0, + "Negative Geometric Mean": 0.0, + "Negative prob": 0.0, + "Normal Loss": 0.2698725759983063, + "Normal prob": -0.2698725759983063, + "Positive Loss": 0.0, + "Positive prob": 0.0, + "epoch": 2.1791044776119404, + "step": 73 + }, + { + "DPO Loss": 2.5343320997566906e-06, + "Negative Geometric Mean": -13.084115531452266, + "Negative prob": -13.084115531452266, + "Normal Loss": 0.3101830780506134, + "Normal prob": -0.3101830780506134, + "Positive Loss": 0.007303276099264622, + "Positive prob": -0.007303276099264622, + "epoch": 2.1791044776119404, + "step": 73 + }, + { + "epoch": 2.208955223880597, + "grad_norm": 7.537056674857057, + "learning_rate": 1.3181818181818182e-06, + "loss": 0.3896, + "step": 74 + }, + { + "DPO Loss": 0.0, + "Negative Geometric Mean": 0.0, + "Negative prob": 0.0, + "Normal Loss": 0.2088720202445984, + "Normal prob": -0.2088720202445984, + "Positive Loss": 0.0, + "Positive prob": 0.0, + "epoch": 2.208955223880597, + "step": 74 + }, + { + "DPO Loss": 6.722595604209246e-06, + "Negative Geometric Mean": -12.173159354073661, + "Negative prob": -12.173159354073661, + "Normal Loss": 0.40398481488227844, + "Normal prob": -0.40398481488227844, + "Positive Loss": 0.01838095672428608, + "Positive prob": -0.01838095672428608, + "epoch": 2.208955223880597, + "step": 74 + }, + { + "epoch": 2.2388059701492535, + "grad_norm": 7.724539990601786, + "learning_rate": 1.2954545454545455e-06, + "loss": 0.3009, + "step": 75 + }, + { + "DPO Loss": 0.0, + "Negative Geometric Mean": 0.0, + "Negative prob": 0.0, + "Normal Loss": 0.48262086510658264, + "Normal prob": -0.48262086510658264, + "Positive Loss": 0.0, + "Positive prob": 0.0, + "epoch": 2.2388059701492535, + "step": 75 + }, + { + "DPO Loss": 5.197786322988637e-07, + "Negative Geometric Mean": -14.51572339888308, + "Negative prob": -14.51572339888308, + "Normal Loss": 0.23035627603530884, + "Normal prob": -0.23035627603530884, + "Positive Loss": 0.008795712143182755, + "Positive prob": -0.008795712143182755, + "epoch": 2.2388059701492535, + "step": 75 + }, + { + "epoch": 2.2686567164179103, + "grad_norm": 8.221187128676613, + "learning_rate": 1.2727272727272728e-06, + "loss": 0.3589, + "step": 76 + }, + { + "DPO Loss": 0.0, + "Negative Geometric Mean": 0.0, + "Negative prob": 0.0, + "Normal Loss": 0.2785874009132385, + "Normal prob": -0.2785874009132385, + "Positive Loss": 0.0, + "Positive prob": 0.0, + "epoch": 2.2686567164179103, + "step": 76 + }, + { + "DPO Loss": 1.621094342365068e-06, + "Negative Geometric Mean": -13.028692859266869, + "Negative prob": -13.028692859266869, + "Normal Loss": 0.29848527908325195, + "Normal prob": -0.29848527908325195, + "Positive Loss": 0.004369077738374472, + "Positive prob": -0.004369077738374472, + "epoch": 2.2686567164179103, + "step": 76 + }, + { + "epoch": 2.298507462686567, + "grad_norm": 6.89968090148801, + "learning_rate": 1.25e-06, + "loss": 0.3104, + "step": 77 + }, + { + "DPO Loss": 0.0, + "Negative Geometric Mean": 0.0, + "Negative prob": 0.0, + "Normal Loss": 0.31930315494537354, + "Normal prob": -0.31930315494537354, + "Positive Loss": 0.0, + "Positive prob": 0.0, + "epoch": 2.298507462686567, + "step": 77 + }, + { + "DPO Loss": 1.3040399279024244e-05, + "Negative Geometric Mean": -11.366265677550448, + "Negative prob": -11.366265677550448, + "Normal Loss": 0.4552519917488098, + "Normal prob": -0.4552519917488098, + "Positive Loss": 0.020085470750927925, + "Positive prob": -0.020085470750927925, + "epoch": 2.298507462686567, + "step": 77 + }, + { + "epoch": 2.328358208955224, + "grad_norm": 7.22395223128197, + "learning_rate": 1.2272727272727274e-06, + "loss": 0.3699, + "step": 78 + }, + { + "DPO Loss": 0.0, + "Negative Geometric Mean": 0.0, + "Negative prob": 0.0, + "Normal Loss": 0.4527321457862854, + "Normal prob": -0.4527321457862854, + "Positive Loss": 0.0, + "Positive prob": 0.0, + "epoch": 2.328358208955224, + "step": 78 + }, + { + "DPO Loss": 3.5797003005450865e-06, + "Negative Geometric Mean": -12.924953185405927, + "Negative prob": -12.924953185405927, + "Normal Loss": 0.49810460209846497, + "Normal prob": -0.49810460209846497, + "Positive Loss": 0.0035452607553452253, + "Positive prob": -0.0035452607553452253, + "epoch": 2.328358208955224, + "step": 78 + }, + { + "epoch": 2.3582089552238807, + "grad_norm": 7.0300487933358, + "learning_rate": 1.2045454545454545e-06, + "loss": 0.3254, + "step": 79 + }, + { + "DPO Loss": 0.0, + "Negative Geometric Mean": 0.0, + "Negative prob": 0.0, + "Normal Loss": 0.3027646243572235, + "Normal prob": -0.3027646243572235, + "Positive Loss": 0.0, + "Positive prob": 0.0, + "epoch": 2.3582089552238807, + "step": 79 + }, + { + "DPO Loss": 5.243361513598499e-06, + "Negative Geometric Mean": -12.248686441906308, + "Negative prob": -12.248686441906308, + "Normal Loss": 0.19559913873672485, + "Normal prob": -0.19559913873672485, + "Positive Loss": 0.0036265316884964705, + "Positive prob": -0.0036265316884964705, + "epoch": 2.3582089552238807, + "step": 79 + }, + { + "epoch": 2.388059701492537, + "grad_norm": 7.018808301104353, + "learning_rate": 1.1818181818181818e-06, + "loss": 0.2924, + "step": 80 + }, + { + "DPO Loss": 0.0, + "Negative Geometric Mean": 0.0, + "Negative prob": 0.0, + "Normal Loss": 0.37363290786743164, + "Normal prob": -0.37363290786743164, + "Positive Loss": 0.0, + "Positive prob": 0.0, + "epoch": 2.388059701492537, + "step": 80 + }, + { + "DPO Loss": 8.039128102474587e-06, + "Negative Geometric Mean": -12.245501740608809, + "Negative prob": -12.245501740608809, + "Normal Loss": 0.38694456219673157, + "Normal prob": -0.38694456219673157, + "Positive Loss": 0.007913284935057163, + "Positive prob": -0.007913284935057163, + "epoch": 2.388059701492537, + "step": 80 + }, + { + "epoch": 2.417910447761194, + "grad_norm": 7.1316719605682595, + "learning_rate": 1.159090909090909e-06, + "loss": 0.373, + "step": 81 + }, + { + "DPO Loss": 0.0, + "Negative Geometric Mean": 0.0, + "Negative prob": 0.0, + "Normal Loss": 0.23960407078266144, + "Normal prob": -0.23960407078266144, + "Positive Loss": 0.0, + "Positive prob": 0.0, + "epoch": 2.417910447761194, + "step": 81 + }, + { + "DPO Loss": 3.643317578642959e-06, + "Negative Geometric Mean": -12.694272748161765, + "Negative prob": -12.694272748161765, + "Normal Loss": 0.3460986912250519, + "Normal prob": -0.3460986912250519, + "Positive Loss": 0.03607124090194702, + "Positive prob": -0.03607124090194702, + "epoch": 2.417910447761194, + "step": 81 + }, + { + "epoch": 2.4477611940298507, + "grad_norm": 6.568519556302587, + "learning_rate": 1.1363636363636364e-06, + "loss": 0.2968, + "step": 82 + }, + { + "DPO Loss": 0.0, + "Negative Geometric Mean": 0.0, + "Negative prob": 0.0, + "Normal Loss": 0.3786263167858124, + "Normal prob": -0.3786263167858124, + "Positive Loss": 0.0, + "Positive prob": 0.0, + "epoch": 2.4477611940298507, + "step": 82 + }, + { + "DPO Loss": 1.17810282746522e-05, + "Negative Geometric Mean": -11.290989731297348, + "Negative prob": -11.290989731297348, + "Normal Loss": 0.3233850598335266, + "Normal prob": -0.3233850598335266, + "Positive Loss": 0.014756398275494576, + "Positive prob": -0.014756398275494576, + "epoch": 2.4477611940298507, + "step": 82 + }, + { + "epoch": 2.4776119402985075, + "grad_norm": 6.961788241099841, + "learning_rate": 1.1136363636363637e-06, + "loss": 0.3067, + "step": 83 + }, + { + "DPO Loss": 0.0, + "Negative Geometric Mean": 0.0, + "Negative prob": 0.0, + "Normal Loss": 0.7298503518104553, + "Normal prob": -0.7298503518104553, + "Positive Loss": 0.0, + "Positive prob": 0.0, + "epoch": 2.4776119402985075, + "step": 83 + }, + { + "DPO Loss": 6.489746401144139e-06, + "Negative Geometric Mean": -12.0409423828125, + "Negative prob": -12.0409423828125, + "Normal Loss": 0.36162418127059937, + "Normal prob": -0.36162418127059937, + "Positive Loss": 0.007005380000919104, + "Positive prob": -0.007005380000919104, + "epoch": 2.4776119402985075, + "step": 83 + }, + { + "epoch": 2.5074626865671643, + "grad_norm": 6.528351208906881, + "learning_rate": 1.090909090909091e-06, + "loss": 0.4822, + "step": 84 + }, + { + "DPO Loss": 0.0, + "Negative Geometric Mean": 0.0, + "Negative prob": 0.0, + "Normal Loss": 0.43608808517456055, + "Normal prob": -0.43608808517456055, + "Positive Loss": 0.0, + "Positive prob": 0.0, + "epoch": 2.5074626865671643, + "step": 84 + }, + { + "DPO Loss": 1.8647181536166908e-06, + "Negative Geometric Mean": -13.310558063113747, + "Negative prob": -13.310558063113747, + "Normal Loss": 0.2762463092803955, + "Normal prob": -0.2762463092803955, + "Positive Loss": 0.015207285061478615, + "Positive prob": -0.015207285061478615, + "epoch": 2.5074626865671643, + "step": 84 + }, + { + "epoch": 2.5373134328358207, + "grad_norm": 6.696386369118086, + "learning_rate": 1.0681818181818183e-06, + "loss": 0.3106, + "step": 85 + }, + { + "DPO Loss": 0.0, + "Negative Geometric Mean": 0.0, + "Negative prob": 0.0, + "Normal Loss": 0.3587005138397217, + "Normal prob": -0.3587005138397217, + "Positive Loss": 0.0, + "Positive prob": 0.0, + "epoch": 2.5373134328358207, + "step": 85 + }, + { + "DPO Loss": 1.0392724887777463e-05, + "Negative Geometric Mean": -11.663321547420965, + "Negative prob": -11.663321547420965, + "Normal Loss": 0.38271617889404297, + "Normal prob": -0.38271617889404297, + "Positive Loss": 0.007094533648341894, + "Positive prob": -0.007094533648341894, + "epoch": 2.5373134328358207, + "step": 85 + }, + { + "epoch": 2.5671641791044775, + "grad_norm": 7.213029254290765, + "learning_rate": 1.0454545454545456e-06, + "loss": 0.317, + "step": 86 + }, + { + "DPO Loss": 0.0, + "Negative Geometric Mean": 0.0, + "Negative prob": 0.0, + "Normal Loss": 0.3641352355480194, + "Normal prob": -0.3641352355480194, + "Positive Loss": 0.0, + "Positive prob": 0.0, + "epoch": 2.5671641791044775, + "step": 86 + }, + { + "DPO Loss": 3.5987715729513327e-06, + "Negative Geometric Mean": -12.57204106168927, + "Negative prob": -12.57204106168927, + "Normal Loss": 0.46394774317741394, + "Normal prob": -0.46394774317741394, + "Positive Loss": 0.0050806887447834015, + "Positive prob": -0.0050806887447834015, + "epoch": 2.5671641791044775, + "step": 86 + }, + { + "epoch": 2.5970149253731343, + "grad_norm": 7.901045462084173, + "learning_rate": 1.0227272727272727e-06, + "loss": 0.3702, + "step": 87 + }, + { + "DPO Loss": 0.0, + "Negative Geometric Mean": 0.0, + "Negative prob": 0.0, + "Normal Loss": 0.38809868693351746, + "Normal prob": -0.38809868693351746, + "Positive Loss": 0.0, + "Positive prob": 0.0, + "epoch": 2.5970149253731343, + "step": 87 + }, + { + "DPO Loss": 1.6651211340411204e-06, + "Negative Geometric Mean": -13.372305265036962, + "Negative prob": -13.372305265036962, + "Normal Loss": 0.27421802282333374, + "Normal prob": -0.27421802282333374, + "Positive Loss": 0.017523737624287605, + "Positive prob": -0.017523737624287605, + "epoch": 2.5970149253731343, + "step": 87 + }, + { + "epoch": 2.626865671641791, + "grad_norm": 6.883433596486567, + "learning_rate": 1e-06, + "loss": 0.3336, + "step": 88 + }, + { + "DPO Loss": 0.0, + "Negative Geometric Mean": 0.0, + "Negative prob": 0.0, + "Normal Loss": 0.28301262855529785, + "Normal prob": -0.28301262855529785, + "Positive Loss": 0.0, + "Positive prob": 0.0, + "epoch": 2.626865671641791, + "step": 88 + }, + { + "DPO Loss": 2.9454008504157696e-06, + "Negative Geometric Mean": -12.71242241010274, + "Negative prob": -12.71242241010274, + "Normal Loss": 0.35078540444374084, + "Normal prob": -0.35078540444374084, + "Positive Loss": 0.01793888583779335, + "Positive prob": -0.01793888583779335, + "epoch": 2.626865671641791, + "step": 88 + }, + { + "epoch": 2.656716417910448, + "grad_norm": 6.603176465896816, + "learning_rate": 9.772727272727273e-07, + "loss": 0.4153, + "step": 89 + }, + { + "DPO Loss": 0.0, + "Negative Geometric Mean": 0.0, + "Negative prob": 0.0, + "Normal Loss": 0.29014265537261963, + "Normal prob": -0.29014265537261963, + "Positive Loss": 0.0, + "Positive prob": 0.0, + "epoch": 2.656716417910448, + "step": 89 + }, + { + "DPO Loss": 1.4884178069432536e-05, + "Negative Geometric Mean": -11.291460420642494, + "Negative prob": -11.291460420642494, + "Normal Loss": 0.27552318572998047, + "Normal prob": -0.27552318572998047, + "Positive Loss": 0.006887962110340595, + "Positive prob": -0.006887962110340595, + "epoch": 2.656716417910448, + "step": 89 + }, + { + "epoch": 2.6865671641791042, + "grad_norm": 7.071652347635012, + "learning_rate": 9.545454545454546e-07, + "loss": 0.3525, + "step": 90 + }, + { + "DPO Loss": 0.0, + "Negative Geometric Mean": 0.0, + "Negative prob": 0.0, + "Normal Loss": 0.3731546103954315, + "Normal prob": -0.3731546103954315, + "Positive Loss": 0.0, + "Positive prob": 0.0, + "epoch": 2.6865671641791042, + "step": 90 + }, + { + "DPO Loss": 6.442647626957985e-06, + "Negative Geometric Mean": -12.235858669051204, + "Negative prob": -12.235858669051204, + "Normal Loss": 0.21569418907165527, + "Normal prob": -0.21569418907165527, + "Positive Loss": 0.005633717868477106, + "Positive prob": -0.005633717868477106, + "epoch": 2.6865671641791042, + "step": 90 + }, + { + "epoch": 2.716417910447761, + "grad_norm": 6.08004163712123, + "learning_rate": 9.318181818181818e-07, + "loss": 0.3038, + "step": 91 + }, + { + "DPO Loss": 0.0, + "Negative Geometric Mean": 0.0, + "Negative prob": 0.0, + "Normal Loss": 0.35742098093032837, + "Normal prob": -0.35742098093032837, + "Positive Loss": 0.0, + "Positive prob": 0.0, + "epoch": 2.716417910447761, + "step": 91 + }, + { + "DPO Loss": 4.149841651615268e-06, + "Negative Geometric Mean": -12.598418855144757, + "Negative prob": -12.598418855144757, + "Normal Loss": 0.28535205125808716, + "Normal prob": -0.28535205125808716, + "Positive Loss": 0.012952926568686962, + "Positive prob": -0.012952926568686962, + "epoch": 2.716417910447761, + "step": 91 + }, + { + "epoch": 2.746268656716418, + "grad_norm": 7.098164063144904, + "learning_rate": 9.090909090909091e-07, + "loss": 0.3625, + "step": 92 + }, + { + "DPO Loss": 0.0, + "Negative Geometric Mean": 0.0, + "Negative prob": 0.0, + "Normal Loss": 0.28534939885139465, + "Normal prob": -0.28534939885139465, + "Positive Loss": 0.0, + "Positive prob": 0.0, + "epoch": 2.746268656716418, + "step": 92 + }, + { + "DPO Loss": 1.2248438490094142e-06, + "Negative Geometric Mean": -13.854903100242078, + "Negative prob": -13.854903100242078, + "Normal Loss": 0.36843106150627136, + "Normal prob": -0.36843106150627136, + "Positive Loss": 0.03612969443202019, + "Positive prob": -0.03612969443202019, + "epoch": 2.746268656716418, + "step": 92 + }, + { + "epoch": 2.7761194029850746, + "grad_norm": 7.081778856883454, + "learning_rate": 8.863636363636364e-07, + "loss": 0.4371, + "step": 93 + }, + { + "DPO Loss": 0.0, + "Negative Geometric Mean": 0.0, + "Negative prob": 0.0, + "Normal Loss": 0.4569069743156433, + "Normal prob": -0.4569069743156433, + "Positive Loss": 0.0, + "Positive prob": 0.0, + "epoch": 2.7761194029850746, + "step": 93 + }, + { + "DPO Loss": 4.104862759699108e-06, + "Negative Geometric Mean": -12.369599921518265, + "Negative prob": -12.369599921518265, + "Normal Loss": 0.2255462110042572, + "Normal prob": -0.2255462110042572, + "Positive Loss": 0.018570953980088234, + "Positive prob": -0.018570953980088234, + "epoch": 2.7761194029850746, + "step": 93 + }, + { + "epoch": 2.8059701492537314, + "grad_norm": 6.9825157063188374, + "learning_rate": 8.636363636363637e-07, + "loss": 0.3096, + "step": 94 + }, + { + "DPO Loss": 0.0, + "Negative Geometric Mean": 0.0, + "Negative prob": 0.0, + "Normal Loss": 0.2608332633972168, + "Normal prob": -0.2608332633972168, + "Positive Loss": 0.0, + "Positive prob": 0.0, + "epoch": 2.8059701492537314, + "step": 94 + }, + { + "DPO Loss": 1.6469150536061094e-06, + "Negative Geometric Mean": -13.248110250737463, + "Negative prob": -13.248110250737463, + "Normal Loss": 0.24679048359394073, + "Normal prob": -0.24679048359394073, + "Positive Loss": 0.014129209332168102, + "Positive prob": -0.014129209332168102, + "epoch": 2.8059701492537314, + "step": 94 + }, + { + "epoch": 2.835820895522388, + "grad_norm": 7.604649982159979, + "learning_rate": 8.409090909090908e-07, + "loss": 0.2674, + "step": 95 + }, + { + "DPO Loss": 0.0, + "Negative Geometric Mean": 0.0, + "Negative prob": 0.0, + "Normal Loss": 0.2900543212890625, + "Normal prob": -0.2900543212890625, + "Positive Loss": 0.0, + "Positive prob": 0.0, + "epoch": 2.835820895522388, + "step": 95 + }, + { + "DPO Loss": 7.456183395726831e-07, + "Negative Geometric Mean": -13.86903901734104, + "Negative prob": -13.86903901734104, + "Normal Loss": 0.4645146429538727, + "Normal prob": -0.4645146429538727, + "Positive Loss": 0.006549107376486063, + "Positive prob": -0.006549107376486063, + "epoch": 2.835820895522388, + "step": 95 + }, + { + "epoch": 2.8656716417910446, + "grad_norm": 6.428480514761495, + "learning_rate": 8.181818181818181e-07, + "loss": 0.3561, + "step": 96 + }, + { + "DPO Loss": 0.0, + "Negative Geometric Mean": 0.0, + "Negative prob": 0.0, + "Normal Loss": 0.17966699600219727, + "Normal prob": -0.17966699600219727, + "Positive Loss": 0.0, + "Positive prob": 0.0, + "epoch": 2.8656716417910446, + "step": 96 + }, + { + "DPO Loss": 1.1826854588610623e-05, + "Negative Geometric Mean": -11.482138813405797, + "Negative prob": -11.482138813405797, + "Normal Loss": 0.4035150110721588, + "Normal prob": -0.4035150110721588, + "Positive Loss": 0.01260466780513525, + "Positive prob": -0.01260466780513525, + "epoch": 2.8656716417910446, + "step": 96 + }, + { + "epoch": 2.8955223880597014, + "grad_norm": 6.355093862289104, + "learning_rate": 7.954545454545454e-07, + "loss": 0.3552, + "step": 97 + }, + { + "DPO Loss": 0.0, + "Negative Geometric Mean": 0.0, + "Negative prob": 0.0, + "Normal Loss": 0.1942460834980011, + "Normal prob": -0.1942460834980011, + "Positive Loss": 0.0, + "Positive prob": 0.0, + "epoch": 2.8955223880597014, + "step": 97 + }, + { + "DPO Loss": 2.1722275431802666e-06, + "Negative Geometric Mean": -12.907205766876064, + "Negative prob": -12.907205766876064, + "Normal Loss": 0.3104533553123474, + "Normal prob": -0.3104533553123474, + "Positive Loss": 0.004998633172363043, + "Positive prob": -0.004998633172363043, + "epoch": 2.8955223880597014, + "step": 97 + }, + { + "epoch": 2.925373134328358, + "grad_norm": 6.623833463639339, + "learning_rate": 7.727272727272727e-07, + "loss": 0.3507, + "step": 98 + }, + { + "DPO Loss": 0.0, + "Negative Geometric Mean": 0.0, + "Negative prob": 0.0, + "Normal Loss": 0.33913183212280273, + "Normal prob": -0.33913183212280273, + "Positive Loss": 0.0, + "Positive prob": 0.0, + "epoch": 2.925373134328358, + "step": 98 + }, + { + "DPO Loss": 1.1818778170094944e-06, + "Negative Geometric Mean": -13.596246585154585, + "Negative prob": -13.596246585154585, + "Normal Loss": 0.3668951988220215, + "Normal prob": -0.3668951988220215, + "Positive Loss": 0.016438201069831848, + "Positive prob": -0.016438201069831848, + "epoch": 2.925373134328358, + "step": 98 + }, + { + "epoch": 2.955223880597015, + "grad_norm": 7.342348492585064, + "learning_rate": 7.5e-07, + "loss": 0.4162, + "step": 99 + }, + { + "DPO Loss": 0.0, + "Negative Geometric Mean": 0.0, + "Negative prob": 0.0, + "Normal Loss": 0.2540174424648285, + "Normal prob": -0.2540174424648285, + "Positive Loss": 0.0, + "Positive prob": 0.0, + "epoch": 2.955223880597015, + "step": 99 + }, + { + "DPO Loss": 2.3274892548031074e-06, + "Negative Geometric Mean": -13.38752170138889, + "Negative prob": -13.38752170138889, + "Normal Loss": 0.6933973431587219, + "Normal prob": -0.6933973431587219, + "Positive Loss": 0.004921761341392994, + "Positive prob": -0.004921761341392994, + "epoch": 2.955223880597015, + "step": 99 + }, + { + "epoch": 2.9850746268656714, + "grad_norm": 6.242023928985393, + "learning_rate": 7.272727272727273e-07, + "loss": 0.4423, + "step": 100 + }, + { + "DPO Loss": 0.0, + "Negative Geometric Mean": 0.0, + "Negative prob": 0.0, + "Normal Loss": 0.32996082305908203, + "Normal prob": -0.32996082305908203, + "Positive Loss": 0.0, + "Positive prob": 0.0, + "epoch": 2.9850746268656714, + "step": 100 + }, + { + "DPO Loss": 0.0, + "Negative Geometric Mean": 0.0, + "Negative prob": 0.0, + "Normal Loss": 0.27658382058143616, + "Normal prob": -0.27658382058143616, + "Positive Loss": 0.0, + "Positive prob": 0.0, + "epoch": 2.9850746268656714, + "step": 100 + }, + { + "epoch": 3.014925373134328, + "grad_norm": 5.853608572027528, + "learning_rate": 7.045454545454546e-07, + "loss": 0.288, + "step": 101 + }, + { + "DPO Loss": 1.1678178546410005e-06, + "Negative Geometric Mean": -14.891405087425595, + "Negative prob": -14.891405087425595, + "Normal Loss": 0.11027539521455765, + "Normal prob": -0.11027539521455765, + "Positive Loss": 0.0027892631478607655, + "Positive prob": -0.0027892631478607655, + "epoch": 3.014925373134328, + "step": 101 + }, + { + "DPO Loss": 0.0, + "Negative Geometric Mean": 0.0, + "Negative prob": 0.0, + "Normal Loss": 0.26463809609413147, + "Normal prob": -0.26463809609413147, + "Positive Loss": 0.0, + "Positive prob": 0.0, + "epoch": 3.014925373134328, + "step": 101 + }, + { + "epoch": 3.044776119402985, + "grad_norm": 6.514265106044286, + "learning_rate": 6.818181818181818e-07, + "loss": 0.1912, + "step": 102 + }, + { + "DPO Loss": 3.6430315872713267e-06, + "Negative Geometric Mean": -13.001615084134615, + "Negative prob": -13.001615084134615, + "Normal Loss": 0.10679034143686295, + "Normal prob": -0.10679034143686295, + "Positive Loss": 0.01767848990857601, + "Positive prob": -0.01767848990857601, + "epoch": 3.044776119402985, + "step": 102 + }, + { + "DPO Loss": 0.0, + "Negative Geometric Mean": 0.0, + "Negative prob": 0.0, + "Normal Loss": 0.1798650622367859, + "Normal prob": -0.1798650622367859, + "Positive Loss": 0.0, + "Positive prob": 0.0, + "epoch": 3.044776119402985, + "step": 102 + }, + { + "epoch": 3.074626865671642, + "grad_norm": 6.098513301658777, + "learning_rate": 6.590909090909091e-07, + "loss": 0.2014, + "step": 103 + }, + { + "DPO Loss": 1.1857304744373281e-05, + "Negative Geometric Mean": -11.364407111528822, + "Negative prob": -11.364407111528822, + "Normal Loss": 0.5280313491821289, + "Normal prob": -0.5280313491821289, + "Positive Loss": 0.0045397402718663216, + "Positive prob": -0.0045397402718663216, + "epoch": 3.074626865671642, + "step": 103 + }, + { + "DPO Loss": 0.0, + "Negative Geometric Mean": 0.0, + "Negative prob": 0.0, + "Normal Loss": 0.12902340292930603, + "Normal prob": -0.12902340292930603, + "Positive Loss": 0.0, + "Positive prob": 0.0, + "epoch": 3.074626865671642, + "step": 103 + }, + { + "epoch": 3.1044776119402986, + "grad_norm": 6.268435906008225, + "learning_rate": 6.363636363636364e-07, + "loss": 0.2413, + "step": 104 + }, + { + "DPO Loss": 8.258820908422388e-07, + "Negative Geometric Mean": -13.974816351361241, + "Negative prob": -13.974816351361241, + "Normal Loss": 0.08596272766590118, + "Normal prob": -0.08596272766590118, + "Positive Loss": 0.0037321026902645826, + "Positive prob": -0.0037321026902645826, + "epoch": 3.1044776119402986, + "step": 104 + }, + { + "DPO Loss": 0.0, + "Negative Geometric Mean": 0.0, + "Negative prob": 0.0, + "Normal Loss": 0.12673968076705933, + "Normal prob": -0.12673968076705933, + "Positive Loss": 0.0, + "Positive prob": 0.0, + "epoch": 3.1044776119402986, + "step": 104 + }, + { + "epoch": 3.1343283582089554, + "grad_norm": 4.849865946932611, + "learning_rate": 6.136363636363637e-07, + "loss": 0.2016, + "step": 105 + }, + { + "DPO Loss": 1.2510054247133794e-05, + "Negative Geometric Mean": -11.465228908237913, + "Negative prob": -11.465228908237913, + "Normal Loss": 0.35748380422592163, + "Normal prob": -0.35748380422592163, + "Positive Loss": 0.0027046226896345615, + "Positive prob": -0.0027046226896345615, + "epoch": 3.1343283582089554, + "step": 105 + }, + { + "DPO Loss": 0.0, + "Negative Geometric Mean": 0.0, + "Negative prob": 0.0, + "Normal Loss": 0.2695090174674988, + "Normal prob": -0.2695090174674988, + "Positive Loss": 0.0, + "Positive prob": 0.0, + "epoch": 3.1343283582089554, + "step": 105 + }, + { + "epoch": 3.1641791044776117, + "grad_norm": 5.314522862984474, + "learning_rate": 5.909090909090909e-07, + "loss": 0.3061, + "step": 106 + }, + { + "DPO Loss": 1.1451636416948107e-06, + "Negative Geometric Mean": -13.922169000330106, + "Negative prob": -13.922169000330106, + "Normal Loss": 0.15261346101760864, + "Normal prob": -0.15261346101760864, + "Positive Loss": 0.004130078945308924, + "Positive prob": -0.004130078945308924, + "epoch": 3.1641791044776117, + "step": 106 + }, + { + "DPO Loss": 0.0, + "Negative Geometric Mean": 0.0, + "Negative prob": 0.0, + "Normal Loss": 0.3225187063217163, + "Normal prob": -0.3225187063217163, + "Positive Loss": 0.0, + "Positive prob": 0.0, + "epoch": 3.1641791044776117, + "step": 106 + }, + { + "epoch": 3.1940298507462686, + "grad_norm": 5.845924095415361, + "learning_rate": 5.681818181818182e-07, + "loss": 0.2219, + "step": 107 + }, + { + "DPO Loss": 3.406975256320534e-06, + "Negative Geometric Mean": -13.250364491637324, + "Negative prob": -13.250364491637324, + "Normal Loss": 0.2123008817434311, + "Normal prob": -0.2123008817434311, + "Positive Loss": 0.00209601828828454, + "Positive prob": -0.00209601828828454, + "epoch": 3.1940298507462686, + "step": 107 + }, + { + "DPO Loss": 0.0, + "Negative Geometric Mean": 0.0, + "Negative prob": 0.0, + "Normal Loss": 0.2501071095466614, + "Normal prob": -0.2501071095466614, + "Positive Loss": 0.0, + "Positive prob": 0.0, + "epoch": 3.1940298507462686, + "step": 107 + }, + { + "epoch": 3.2238805970149254, + "grad_norm": 6.273812820779774, + "learning_rate": 5.454545454545455e-07, + "loss": 0.2085, + "step": 108 + }, + { + "DPO Loss": 7.219691741892465e-06, + "Negative Geometric Mean": -12.184994006283068, + "Negative prob": -12.184994006283068, + "Normal Loss": 0.11331921815872192, + "Normal prob": -0.11331921815872192, + "Positive Loss": 0.00535797793418169, + "Positive prob": -0.00535797793418169, + "epoch": 3.2238805970149254, + "step": 108 + }, + { + "DPO Loss": 0.0, + "Negative Geometric Mean": 0.0, + "Negative prob": 0.0, + "Normal Loss": 0.2601730525493622, + "Normal prob": -0.2601730525493622, + "Positive Loss": 0.0, + "Positive prob": 0.0, + "epoch": 3.2238805970149254, + "step": 108 + }, + { + "epoch": 3.253731343283582, + "grad_norm": 6.61609605527567, + "learning_rate": 5.227272727272728e-07, + "loss": 0.2738, + "step": 109 + }, + { + "DPO Loss": 3.8013957323267827e-06, + "Negative Geometric Mean": -12.432244078240172, + "Negative prob": -12.432244078240172, + "Normal Loss": 0.212859645485878, + "Normal prob": -0.212859645485878, + "Positive Loss": 0.0058334325440227985, + "Positive prob": -0.0058334325440227985, + "epoch": 3.253731343283582, + "step": 109 + }, + { + "DPO Loss": 0.0, + "Negative Geometric Mean": 0.0, + "Negative prob": 0.0, + "Normal Loss": 0.1951064020395279, + "Normal prob": -0.1951064020395279, + "Positive Loss": 0.0, + "Positive prob": 0.0, + "epoch": 3.253731343283582, + "step": 109 + }, + { + "epoch": 3.283582089552239, + "grad_norm": 6.461685783109346, + "learning_rate": 5e-07, + "loss": 0.1872, + "step": 110 + }, + { + "DPO Loss": 9.760423619643666e-07, + "Negative Geometric Mean": -13.36294397566719, + "Negative prob": -13.36294397566719, + "Normal Loss": 0.16994960606098175, + "Normal prob": -0.16994960606098175, + "Positive Loss": 0.007101885508745909, + "Positive prob": -0.007101885508745909, + "epoch": 3.283582089552239, + "step": 110 + }, + { + "DPO Loss": 0.0, + "Negative Geometric Mean": 0.0, + "Negative prob": 0.0, + "Normal Loss": 0.25234583020210266, + "Normal prob": -0.25234583020210266, + "Positive Loss": 0.0, + "Positive prob": 0.0, + "epoch": 3.283582089552239, + "step": 110 + }, + { + "epoch": 3.3134328358208958, + "grad_norm": 6.064062487418674, + "learning_rate": 4.772727272727273e-07, + "loss": 0.1877, + "step": 111 + }, + { + "DPO Loss": 3.0380582299826617e-06, + "Negative Geometric Mean": -12.93929797021028, + "Negative prob": -12.93929797021028, + "Normal Loss": 0.07496587187051773, + "Normal prob": -0.07496587187051773, + "Positive Loss": 0.003302493365481496, + "Positive prob": -0.003302493365481496, + "epoch": 3.3134328358208958, + "step": 111 + }, + { + "DPO Loss": 0.0, + "Negative Geometric Mean": 0.0, + "Negative prob": 0.0, + "Normal Loss": 0.116237573325634, + "Normal prob": -0.116237573325634, + "Positive Loss": 0.0, + "Positive prob": 0.0, + "epoch": 3.3134328358208958, + "step": 111 + }, + { + "epoch": 3.343283582089552, + "grad_norm": 11.774507449917868, + "learning_rate": 4.5454545454545457e-07, + "loss": 0.1455, + "step": 112 + }, + { + "DPO Loss": 1.8040673343906892e-06, + "Negative Geometric Mean": -13.262591667895046, + "Negative prob": -13.262591667895046, + "Normal Loss": 0.20686665177345276, + "Normal prob": -0.20686665177345276, + "Positive Loss": 0.0036297321785241365, + "Positive prob": -0.0036297321785241365, + "epoch": 3.343283582089552, + "step": 112 + }, + { + "DPO Loss": 0.0, + "Negative Geometric Mean": 0.0, + "Negative prob": 0.0, + "Normal Loss": 0.07100074738264084, + "Normal prob": -0.07100074738264084, + "Positive Loss": 0.0, + "Positive prob": 0.0, + "epoch": 3.343283582089552, + "step": 112 + }, + { + "epoch": 3.373134328358209, + "grad_norm": 6.197973269249537, + "learning_rate": 4.3181818181818187e-07, + "loss": 0.2737, + "step": 113 + }, + { + "DPO Loss": 1.8399912201802113e-05, + "Negative Geometric Mean": -11.161076035610465, + "Negative prob": -11.161076035610465, + "Normal Loss": 0.3017271161079407, + "Normal prob": -0.3017271161079407, + "Positive Loss": 0.0018219746416434646, + "Positive prob": -0.0018219746416434646, + "epoch": 3.373134328358209, + "step": 113 + }, + { + "DPO Loss": 0.0, + "Negative Geometric Mean": 0.0, + "Negative prob": 0.0, + "Normal Loss": 0.40800532698631287, + "Normal prob": -0.40800532698631287, + "Positive Loss": 0.0, + "Positive prob": 0.0, + "epoch": 3.373134328358209, + "step": 113 + }, + { + "epoch": 3.4029850746268657, + "grad_norm": 6.686699543045222, + "learning_rate": 4.0909090909090906e-07, + "loss": 0.2495, + "step": 114 + }, + { + "DPO Loss": 7.636819721194287e-06, + "Negative Geometric Mean": -11.901342007076794, + "Negative prob": -11.901342007076794, + "Normal Loss": 0.12182455509901047, + "Normal prob": -0.12182455509901047, + "Positive Loss": 0.008607598952949047, + "Positive prob": -0.008607598952949047, + "epoch": 3.4029850746268657, + "step": 114 + }, + { + "DPO Loss": 0.0, + "Negative Geometric Mean": 0.0, + "Negative prob": 0.0, + "Normal Loss": 0.18401654064655304, + "Normal prob": -0.18401654064655304, + "Positive Loss": 0.0, + "Positive prob": 0.0, + "epoch": 3.4029850746268657, + "step": 114 + }, + { + "epoch": 3.4328358208955225, + "grad_norm": 6.81794821888112, + "learning_rate": 3.8636363636363636e-07, + "loss": 0.2007, + "step": 115 + }, + { + "DPO Loss": 6.554748941648e-06, + "Negative Geometric Mean": -12.335026873289234, + "Negative prob": -12.335026873289234, + "Normal Loss": 0.21853935718536377, + "Normal prob": -0.21853935718536377, + "Positive Loss": 0.004862755537033081, + "Positive prob": -0.004862755537033081, + "epoch": 3.4328358208955225, + "step": 115 + }, + { + "DPO Loss": 0.0, + "Negative Geometric Mean": 0.0, + "Negative prob": 0.0, + "Normal Loss": 0.135187566280365, + "Normal prob": -0.135187566280365, + "Positive Loss": 0.0, + "Positive prob": 0.0, + "epoch": 3.4328358208955225, + "step": 115 + }, + { + "epoch": 3.4626865671641793, + "grad_norm": 6.189730848953201, + "learning_rate": 3.6363636363636366e-07, + "loss": 0.2086, + "step": 116 + }, + { + "DPO Loss": 3.6050653288985906e-06, + "Negative Geometric Mean": -12.796296909877233, + "Negative prob": -12.796296909877233, + "Normal Loss": 0.2358456403017044, + "Normal prob": -0.2358456403017044, + "Positive Loss": 0.010835876688361168, + "Positive prob": -0.010835876688361168, + "epoch": 3.4626865671641793, + "step": 116 + }, + { + "DPO Loss": 0.0, + "Negative Geometric Mean": 0.0, + "Negative prob": 0.0, + "Normal Loss": 0.14767657220363617, + "Normal prob": -0.14767657220363617, + "Positive Loss": 0.0, + "Positive prob": 0.0, + "epoch": 3.4626865671641793, + "step": 116 + }, + { + "epoch": 3.4925373134328357, + "grad_norm": 6.979155772945575, + "learning_rate": 3.409090909090909e-07, + "loss": 0.2849, + "step": 117 + }, + { + "DPO Loss": 6.702416418876966e-06, + "Negative Geometric Mean": -12.413108648255815, + "Negative prob": -12.413108648255815, + "Normal Loss": 0.14713706076145172, + "Normal prob": -0.14713706076145172, + "Positive Loss": 0.003462533000856638, + "Positive prob": -0.003462533000856638, + "epoch": 3.4925373134328357, + "step": 117 + }, + { + "DPO Loss": 0.0, + "Negative Geometric Mean": 0.0, + "Negative prob": 0.0, + "Normal Loss": 0.23745860159397125, + "Normal prob": -0.23745860159397125, + "Positive Loss": 0.0, + "Positive prob": 0.0, + "epoch": 3.4925373134328357, + "step": 117 + }, + { + "epoch": 3.5223880597014925, + "grad_norm": 6.728854419168043, + "learning_rate": 3.181818181818182e-07, + "loss": 0.1677, + "step": 118 + }, + { + "DPO Loss": 2.470218665968806e-06, + "Negative Geometric Mean": -13.20369715379494, + "Negative prob": -13.20369715379494, + "Normal Loss": 0.4334864020347595, + "Normal prob": -0.4334864020347595, + "Positive Loss": 0.005433392245322466, + "Positive prob": -0.005433392245322466, + "epoch": 3.5223880597014925, + "step": 118 + }, + { + "DPO Loss": 0.0, + "Negative Geometric Mean": 0.0, + "Negative prob": 0.0, + "Normal Loss": 0.1629062443971634, + "Normal prob": -0.1629062443971634, + "Positive Loss": 0.0, + "Positive prob": 0.0, + "epoch": 3.5223880597014925, + "step": 118 + }, + { + "epoch": 3.5522388059701493, + "grad_norm": 5.9093975782845645, + "learning_rate": 2.9545454545454545e-07, + "loss": 0.2375, + "step": 119 + }, + { + "DPO Loss": 1.9495445800859506e-06, + "Negative Geometric Mean": -13.439311124840561, + "Negative prob": -13.439311124840561, + "Normal Loss": 0.3102337718009949, + "Normal prob": -0.3102337718009949, + "Positive Loss": 0.001397938933223486, + "Positive prob": -0.001397938933223486, + "epoch": 3.5522388059701493, + "step": 119 + }, + { + "DPO Loss": 0.0, + "Negative Geometric Mean": 0.0, + "Negative prob": 0.0, + "Normal Loss": 0.10538414120674133, + "Normal prob": -0.10538414120674133, + "Positive Loss": 0.0, + "Positive prob": 0.0, + "epoch": 3.5522388059701493, + "step": 119 + }, + { + "epoch": 3.582089552238806, + "grad_norm": 5.432045886830493, + "learning_rate": 2.7272727272727274e-07, + "loss": 0.1749, + "step": 120 + }, + { + "DPO Loss": 2.111671823116432e-05, + "Negative Geometric Mean": -11.482684536637931, + "Negative prob": -11.482684536637931, + "Normal Loss": 0.09432564675807953, + "Normal prob": -0.09432564675807953, + "Positive Loss": 0.003968758508563042, + "Positive prob": -0.003968758508563042, + "epoch": 3.582089552238806, + "step": 120 + }, + { + "DPO Loss": 0.0, + "Negative Geometric Mean": 0.0, + "Negative prob": 0.0, + "Normal Loss": 0.2578660249710083, + "Normal prob": -0.2578660249710083, + "Positive Loss": 0.0, + "Positive prob": 0.0, + "epoch": 3.582089552238806, + "step": 120 + }, + { + "epoch": 3.611940298507463, + "grad_norm": 6.329624233904299, + "learning_rate": 2.5e-07, + "loss": 0.1958, + "step": 121 + }, + { + "DPO Loss": 2.2736615595795564e-06, + "Negative Geometric Mean": -13.00193465573286, + "Negative prob": -13.00193465573286, + "Normal Loss": 0.11790954321622849, + "Normal prob": -0.11790954321622849, + "Positive Loss": 0.015944618731737137, + "Positive prob": -0.015944618731737137, + "epoch": 3.611940298507463, + "step": 121 + }, + { + "DPO Loss": 0.0, + "Negative Geometric Mean": 0.0, + "Negative prob": 0.0, + "Normal Loss": 0.2980431020259857, + "Normal prob": -0.2980431020259857, + "Positive Loss": 0.0, + "Positive prob": 0.0, + "epoch": 3.611940298507463, + "step": 121 + }, + { + "epoch": 3.6417910447761193, + "grad_norm": 6.390125283101109, + "learning_rate": 2.2727272727272729e-07, + "loss": 0.2578, + "step": 122 + }, + { + "DPO Loss": 1.8224405365341362e-06, + "Negative Geometric Mean": -13.443804791865459, + "Negative prob": -13.443804791865459, + "Normal Loss": 0.2082529067993164, + "Normal prob": -0.2082529067993164, + "Positive Loss": 0.0020329623948782682, + "Positive prob": -0.0020329623948782682, + "epoch": 3.6417910447761193, + "step": 122 + }, + { + "DPO Loss": 0.0, + "Negative Geometric Mean": 0.0, + "Negative prob": 0.0, + "Normal Loss": 0.27499350905418396, + "Normal prob": -0.27499350905418396, + "Positive Loss": 0.0, + "Positive prob": 0.0, + "epoch": 3.6417910447761193, + "step": 122 + }, + { + "epoch": 3.671641791044776, + "grad_norm": 7.334639331815002, + "learning_rate": 2.0454545454545453e-07, + "loss": 0.1928, + "step": 123 + }, + { + "DPO Loss": 3.887408166527688e-06, + "Negative Geometric Mean": -12.602550216132615, + "Negative prob": -12.602550216132615, + "Normal Loss": 0.3751141130924225, + "Normal prob": -0.3751141130924225, + "Positive Loss": 0.0016299609560519457, + "Positive prob": -0.0016299609560519457, + "epoch": 3.671641791044776, + "step": 123 + }, + { + "DPO Loss": 0.0, + "Negative Geometric Mean": 0.0, + "Negative prob": 0.0, + "Normal Loss": 0.14575666189193726, + "Normal prob": -0.14575666189193726, + "Positive Loss": 0.0, + "Positive prob": 0.0, + "epoch": 3.671641791044776, + "step": 123 + }, + { + "epoch": 3.701492537313433, + "grad_norm": 5.602697068663161, + "learning_rate": 1.8181818181818183e-07, + "loss": 0.1898, + "step": 124 + }, + { + "DPO Loss": 1.1130948677477009e-06, + "Negative Geometric Mean": -13.74802903824201, + "Negative prob": -13.74802903824201, + "Normal Loss": 0.09506483376026154, + "Normal prob": -0.09506483376026154, + "Positive Loss": 0.0032382213976234198, + "Positive prob": -0.0032382213976234198, + "epoch": 3.701492537313433, + "step": 124 + }, + { + "DPO Loss": 0.0, + "Negative Geometric Mean": 0.0, + "Negative prob": 0.0, + "Normal Loss": 0.15355288982391357, + "Normal prob": -0.15355288982391357, + "Positive Loss": 0.0, + "Positive prob": 0.0, + "epoch": 3.701492537313433, + "step": 124 + }, + { + "epoch": 3.7313432835820897, + "grad_norm": 7.94396763225081, + "learning_rate": 1.590909090909091e-07, + "loss": 0.153, + "step": 125 + }, + { + "DPO Loss": 4.4835976933222324e-07, + "Negative Geometric Mean": -14.556803077741021, + "Negative prob": -14.556803077741021, + "Normal Loss": 0.09771548211574554, + "Normal prob": -0.09771548211574554, + "Positive Loss": 0.012089760042726994, + "Positive prob": -0.012089760042726994, + "epoch": 3.7313432835820897, + "step": 125 + }, + { + "DPO Loss": 0.0, + "Negative Geometric Mean": 0.0, + "Negative prob": 0.0, + "Normal Loss": 0.17557981610298157, + "Normal prob": -0.17557981610298157, + "Positive Loss": 0.0, + "Positive prob": 0.0, + "epoch": 3.7313432835820897, + "step": 125 + }, + { + "epoch": 3.7611940298507465, + "grad_norm": 5.746760679085967, + "learning_rate": 1.3636363636363637e-07, + "loss": 0.2027, + "step": 126 + }, + { + "DPO Loss": 2.5731311695920285e-06, + "Negative Geometric Mean": -12.94003257909751, + "Negative prob": -12.94003257909751, + "Normal Loss": 0.2374420166015625, + "Normal prob": -0.2374420166015625, + "Positive Loss": 0.006095151882618666, + "Positive prob": -0.006095151882618666, + "epoch": 3.7611940298507465, + "step": 126 + }, + { + "DPO Loss": 0.0, + "Negative Geometric Mean": 0.0, + "Negative prob": 0.0, + "Normal Loss": 0.12284944206476212, + "Normal prob": -0.12284944206476212, + "Positive Loss": 0.0, + "Positive prob": 0.0, + "epoch": 3.7611940298507465, + "step": 126 + }, + { + "epoch": 3.791044776119403, + "grad_norm": 6.172054742054878, + "learning_rate": 1.1363636363636364e-07, + "loss": 0.1587, + "step": 127 + }, + { + "DPO Loss": 3.012714219508236e-06, + "Negative Geometric Mean": -12.808327907986111, + "Negative prob": -12.808327907986111, + "Normal Loss": 0.07416192442178726, + "Normal prob": -0.07416192442178726, + "Positive Loss": 0.0024134027771651745, + "Positive prob": -0.0024134027771651745, + "epoch": 3.791044776119403, + "step": 127 + }, + { + "DPO Loss": 0.0, + "Negative Geometric Mean": 0.0, + "Negative prob": 0.0, + "Normal Loss": 0.292096883058548, + "Normal prob": -0.292096883058548, + "Positive Loss": 0.0, + "Positive prob": 0.0, + "epoch": 3.791044776119403, + "step": 127 + }, + { + "epoch": 3.8208955223880596, + "grad_norm": 6.689741596768201, + "learning_rate": 9.090909090909091e-08, + "loss": 0.1959, + "step": 128 + }, + { + "DPO Loss": 3.060298655777367e-06, + "Negative Geometric Mean": -12.873194280660377, + "Negative prob": -12.873194280660377, + "Normal Loss": 0.33571678400039673, + "Normal prob": -0.33571678400039673, + "Positive Loss": 0.0060377782210707664, + "Positive prob": -0.0060377782210707664, + "epoch": 3.8208955223880596, + "step": 128 + }, + { + "DPO Loss": 0.0, + "Negative Geometric Mean": 0.0, + "Negative prob": 0.0, + "Normal Loss": 0.22519126534461975, + "Normal prob": -0.22519126534461975, + "Positive Loss": 0.0, + "Positive prob": 0.0, + "epoch": 3.8208955223880596, + "step": 128 + }, + { + "epoch": 3.8507462686567164, + "grad_norm": 7.002612239614997, + "learning_rate": 6.818181818181819e-08, + "loss": 0.2147, + "step": 129 + }, + { + "DPO Loss": 1.0818999409674698e-06, + "Negative Geometric Mean": -13.897989908854166, + "Negative prob": -13.897989908854166, + "Normal Loss": 0.2647945284843445, + "Normal prob": -0.2647945284843445, + "Positive Loss": 0.005775726865977049, + "Positive prob": -0.005775726865977049, + "epoch": 3.8507462686567164, + "step": 129 + }, + { + "DPO Loss": 0.0, + "Negative Geometric Mean": 0.0, + "Negative prob": 0.0, + "Normal Loss": 0.14088931679725647, + "Normal prob": -0.14088931679725647, + "Positive Loss": 0.0, + "Positive prob": 0.0, + "epoch": 3.8507462686567164, + "step": 129 + }, + { + "epoch": 3.8805970149253732, + "grad_norm": 7.106870960626619, + "learning_rate": 4.545454545454546e-08, + "loss": 0.2408, + "step": 130 + }, + { + "DPO Loss": 3.805466487272458e-06, + "Negative Geometric Mean": -12.980504410990168, + "Negative prob": -12.980504410990168, + "Normal Loss": 0.3013966977596283, + "Normal prob": -0.3013966977596283, + "Positive Loss": 0.003041935386136174, + "Positive prob": -0.003041935386136174, + "epoch": 3.8805970149253732, + "step": 130 + }, + { + "DPO Loss": 0.0, + "Negative Geometric Mean": 0.0, + "Negative prob": 0.0, + "Normal Loss": 0.27534565329551697, + "Normal prob": -0.27534565329551697, + "Positive Loss": 0.0, + "Positive prob": 0.0, + "epoch": 3.8805970149253732, + "step": 130 + }, + { + "epoch": 3.91044776119403, + "grad_norm": 5.966195743329273, + "learning_rate": 2.272727272727273e-08, + "loss": 0.2029, + "step": 131 + }, + { + "DPO Loss": 1.5273080011035291e-06, + "Negative Geometric Mean": -13.34349348358295, + "Negative prob": -13.34349348358295, + "Normal Loss": 0.20480337738990784, + "Normal prob": -0.20480337738990784, + "Positive Loss": 0.0037362114526331425, + "Positive prob": -0.0037362114526331425, + "epoch": 3.91044776119403, + "step": 131 + }, + { + "DPO Loss": 0.0, + "Negative Geometric Mean": 0.0, + "Negative prob": 0.0, + "Normal Loss": 0.19383595883846283, + "Normal prob": -0.19383595883846283, + "Positive Loss": 0.0, + "Positive prob": 0.0, + "epoch": 3.91044776119403, + "step": 131 + }, + { + "epoch": 3.9402985074626864, + "grad_norm": 6.0018477972181445, + "learning_rate": 0.0, + "loss": 0.1864, + "step": 132 + } + ], + "logging_steps": 1, + "max_steps": 132, + "num_input_tokens_seen": 0, + "num_train_epochs": 4, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": true + }, + "attributes": {} + } + }, + "total_flos": 11615663554560.0, + "train_batch_size": 6, + "trial_name": null, + "trial_params": null +}