| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 2.998837479655894, |
| "eval_steps": 500, |
| "global_step": 2148, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.0013950244129272262, |
| "grad_norm": 9.06177574123373, |
| "learning_rate": 0.0, |
| "loss": 1.2425, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.0027900488258544524, |
| "grad_norm": 7.74250980936599, |
| "learning_rate": 4.6511627906976744e-08, |
| "loss": 1.1023, |
| "step": 2 |
| }, |
| { |
| "epoch": 0.0041850732387816785, |
| "grad_norm": 6.388418368134404, |
| "learning_rate": 9.302325581395349e-08, |
| "loss": 0.9672, |
| "step": 3 |
| }, |
| { |
| "epoch": 0.005580097651708905, |
| "grad_norm": 8.412567905846219, |
| "learning_rate": 1.3953488372093024e-07, |
| "loss": 1.1729, |
| "step": 4 |
| }, |
| { |
| "epoch": 0.006975122064636131, |
| "grad_norm": 7.573918067233279, |
| "learning_rate": 1.8604651162790698e-07, |
| "loss": 1.0899, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.008370146477563357, |
| "grad_norm": 8.091442848700519, |
| "learning_rate": 2.3255813953488374e-07, |
| "loss": 1.1316, |
| "step": 6 |
| }, |
| { |
| "epoch": 0.009765170890490584, |
| "grad_norm": 8.167879080412323, |
| "learning_rate": 2.790697674418605e-07, |
| "loss": 1.218, |
| "step": 7 |
| }, |
| { |
| "epoch": 0.01116019530341781, |
| "grad_norm": 8.519414634553403, |
| "learning_rate": 3.2558139534883724e-07, |
| "loss": 1.2305, |
| "step": 8 |
| }, |
| { |
| "epoch": 0.012555219716345037, |
| "grad_norm": 7.003865916552692, |
| "learning_rate": 3.7209302325581396e-07, |
| "loss": 1.0201, |
| "step": 9 |
| }, |
| { |
| "epoch": 0.013950244129272262, |
| "grad_norm": 7.644602533034688, |
| "learning_rate": 4.186046511627907e-07, |
| "loss": 1.0511, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.015345268542199489, |
| "grad_norm": 8.054509366220413, |
| "learning_rate": 4.651162790697675e-07, |
| "loss": 1.1006, |
| "step": 11 |
| }, |
| { |
| "epoch": 0.016740292955126714, |
| "grad_norm": 6.828467947829633, |
| "learning_rate": 5.116279069767442e-07, |
| "loss": 1.0077, |
| "step": 12 |
| }, |
| { |
| "epoch": 0.01813531736805394, |
| "grad_norm": 7.451234628274601, |
| "learning_rate": 5.58139534883721e-07, |
| "loss": 1.1322, |
| "step": 13 |
| }, |
| { |
| "epoch": 0.01953034178098117, |
| "grad_norm": 8.34154278843471, |
| "learning_rate": 6.046511627906977e-07, |
| "loss": 1.185, |
| "step": 14 |
| }, |
| { |
| "epoch": 0.020925366193908394, |
| "grad_norm": 6.204510873867492, |
| "learning_rate": 6.511627906976745e-07, |
| "loss": 0.9731, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.02232039060683562, |
| "grad_norm": 7.03015750664511, |
| "learning_rate": 6.976744186046513e-07, |
| "loss": 1.0501, |
| "step": 16 |
| }, |
| { |
| "epoch": 0.023715415019762844, |
| "grad_norm": 5.239488986570632, |
| "learning_rate": 7.441860465116279e-07, |
| "loss": 0.8553, |
| "step": 17 |
| }, |
| { |
| "epoch": 0.025110439432690073, |
| "grad_norm": 7.842221666246864, |
| "learning_rate": 7.906976744186047e-07, |
| "loss": 1.1987, |
| "step": 18 |
| }, |
| { |
| "epoch": 0.0265054638456173, |
| "grad_norm": 5.010590976687375, |
| "learning_rate": 8.372093023255814e-07, |
| "loss": 0.9201, |
| "step": 19 |
| }, |
| { |
| "epoch": 0.027900488258544524, |
| "grad_norm": 6.810234105335915, |
| "learning_rate": 8.837209302325582e-07, |
| "loss": 1.044, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.029295512671471752, |
| "grad_norm": 5.719852480088899, |
| "learning_rate": 9.30232558139535e-07, |
| "loss": 0.9848, |
| "step": 21 |
| }, |
| { |
| "epoch": 0.030690537084398978, |
| "grad_norm": 5.804104402736001, |
| "learning_rate": 9.767441860465117e-07, |
| "loss": 1.0273, |
| "step": 22 |
| }, |
| { |
| "epoch": 0.03208556149732621, |
| "grad_norm": 5.698265140721901, |
| "learning_rate": 1.0232558139534884e-06, |
| "loss": 1.0212, |
| "step": 23 |
| }, |
| { |
| "epoch": 0.03348058591025343, |
| "grad_norm": 3.5836689034500155, |
| "learning_rate": 1.0697674418604653e-06, |
| "loss": 0.8341, |
| "step": 24 |
| }, |
| { |
| "epoch": 0.03487561032318066, |
| "grad_norm": 4.9843301829941495, |
| "learning_rate": 1.116279069767442e-06, |
| "loss": 0.9715, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.03627063473610788, |
| "grad_norm": 4.0192024122191174, |
| "learning_rate": 1.1627906976744188e-06, |
| "loss": 0.9561, |
| "step": 26 |
| }, |
| { |
| "epoch": 0.03766565914903511, |
| "grad_norm": 5.0428069070438815, |
| "learning_rate": 1.2093023255813954e-06, |
| "loss": 1.0269, |
| "step": 27 |
| }, |
| { |
| "epoch": 0.03906068356196234, |
| "grad_norm": 3.9250973474293027, |
| "learning_rate": 1.2558139534883723e-06, |
| "loss": 0.9129, |
| "step": 28 |
| }, |
| { |
| "epoch": 0.04045570797488956, |
| "grad_norm": 3.5444590391314015, |
| "learning_rate": 1.302325581395349e-06, |
| "loss": 0.889, |
| "step": 29 |
| }, |
| { |
| "epoch": 0.04185073238781679, |
| "grad_norm": 3.4307236283710103, |
| "learning_rate": 1.3488372093023258e-06, |
| "loss": 0.8861, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.043245756800744016, |
| "grad_norm": 4.025994100913768, |
| "learning_rate": 1.3953488372093025e-06, |
| "loss": 0.8176, |
| "step": 31 |
| }, |
| { |
| "epoch": 0.04464078121367124, |
| "grad_norm": 4.1408375259554075, |
| "learning_rate": 1.4418604651162794e-06, |
| "loss": 0.8987, |
| "step": 32 |
| }, |
| { |
| "epoch": 0.04603580562659847, |
| "grad_norm": 3.8731866291196444, |
| "learning_rate": 1.4883720930232558e-06, |
| "loss": 0.9591, |
| "step": 33 |
| }, |
| { |
| "epoch": 0.04743083003952569, |
| "grad_norm": 3.989912476418152, |
| "learning_rate": 1.534883720930233e-06, |
| "loss": 0.9171, |
| "step": 34 |
| }, |
| { |
| "epoch": 0.04882585445245292, |
| "grad_norm": 3.7064707308962936, |
| "learning_rate": 1.5813953488372093e-06, |
| "loss": 0.9138, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.050220878865380146, |
| "grad_norm": 3.6202184488771993, |
| "learning_rate": 1.6279069767441862e-06, |
| "loss": 0.9461, |
| "step": 36 |
| }, |
| { |
| "epoch": 0.05161590327830737, |
| "grad_norm": 3.126403206575003, |
| "learning_rate": 1.6744186046511629e-06, |
| "loss": 0.8543, |
| "step": 37 |
| }, |
| { |
| "epoch": 0.0530109276912346, |
| "grad_norm": 3.02742858939562, |
| "learning_rate": 1.7209302325581397e-06, |
| "loss": 0.7944, |
| "step": 38 |
| }, |
| { |
| "epoch": 0.054405952104161825, |
| "grad_norm": 3.0983156242512324, |
| "learning_rate": 1.7674418604651164e-06, |
| "loss": 0.9039, |
| "step": 39 |
| }, |
| { |
| "epoch": 0.05580097651708905, |
| "grad_norm": 3.3601546255070347, |
| "learning_rate": 1.8139534883720933e-06, |
| "loss": 0.8522, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.057196000930016276, |
| "grad_norm": 3.170617165847687, |
| "learning_rate": 1.86046511627907e-06, |
| "loss": 0.8171, |
| "step": 41 |
| }, |
| { |
| "epoch": 0.058591025342943505, |
| "grad_norm": 3.163047232937274, |
| "learning_rate": 1.9069767441860468e-06, |
| "loss": 0.8097, |
| "step": 42 |
| }, |
| { |
| "epoch": 0.05998604975587073, |
| "grad_norm": 3.2539566987966526, |
| "learning_rate": 1.9534883720930235e-06, |
| "loss": 0.7917, |
| "step": 43 |
| }, |
| { |
| "epoch": 0.061381074168797956, |
| "grad_norm": 3.0851698620768935, |
| "learning_rate": 2.0000000000000003e-06, |
| "loss": 0.8263, |
| "step": 44 |
| }, |
| { |
| "epoch": 0.06277609858172518, |
| "grad_norm": 3.3988761007126183, |
| "learning_rate": 2.0465116279069768e-06, |
| "loss": 0.8794, |
| "step": 45 |
| }, |
| { |
| "epoch": 0.06417112299465241, |
| "grad_norm": 2.9613711343959785, |
| "learning_rate": 2.0930232558139536e-06, |
| "loss": 0.7589, |
| "step": 46 |
| }, |
| { |
| "epoch": 0.06556614740757963, |
| "grad_norm": 3.093943745703619, |
| "learning_rate": 2.1395348837209305e-06, |
| "loss": 0.8537, |
| "step": 47 |
| }, |
| { |
| "epoch": 0.06696117182050686, |
| "grad_norm": 2.7983992330490532, |
| "learning_rate": 2.1860465116279074e-06, |
| "loss": 0.8453, |
| "step": 48 |
| }, |
| { |
| "epoch": 0.06835619623343409, |
| "grad_norm": 2.6204585607252375, |
| "learning_rate": 2.232558139534884e-06, |
| "loss": 0.8777, |
| "step": 49 |
| }, |
| { |
| "epoch": 0.06975122064636131, |
| "grad_norm": 2.81413032613818, |
| "learning_rate": 2.2790697674418607e-06, |
| "loss": 0.8138, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.07114624505928854, |
| "grad_norm": 2.7779036842721703, |
| "learning_rate": 2.3255813953488376e-06, |
| "loss": 0.8686, |
| "step": 51 |
| }, |
| { |
| "epoch": 0.07254126947221576, |
| "grad_norm": 2.5802389971258557, |
| "learning_rate": 2.3720930232558144e-06, |
| "loss": 0.8089, |
| "step": 52 |
| }, |
| { |
| "epoch": 0.07393629388514299, |
| "grad_norm": 2.8171256077152527, |
| "learning_rate": 2.418604651162791e-06, |
| "loss": 0.8212, |
| "step": 53 |
| }, |
| { |
| "epoch": 0.07533131829807022, |
| "grad_norm": 2.9444479391684997, |
| "learning_rate": 2.4651162790697678e-06, |
| "loss": 0.8565, |
| "step": 54 |
| }, |
| { |
| "epoch": 0.07672634271099744, |
| "grad_norm": 2.6944161956139596, |
| "learning_rate": 2.5116279069767446e-06, |
| "loss": 0.7982, |
| "step": 55 |
| }, |
| { |
| "epoch": 0.07812136712392467, |
| "grad_norm": 2.7051824317676005, |
| "learning_rate": 2.558139534883721e-06, |
| "loss": 0.8422, |
| "step": 56 |
| }, |
| { |
| "epoch": 0.07951639153685189, |
| "grad_norm": 2.6720226291628637, |
| "learning_rate": 2.604651162790698e-06, |
| "loss": 0.8189, |
| "step": 57 |
| }, |
| { |
| "epoch": 0.08091141594977912, |
| "grad_norm": 2.5400408658615565, |
| "learning_rate": 2.6511627906976744e-06, |
| "loss": 0.7868, |
| "step": 58 |
| }, |
| { |
| "epoch": 0.08230644036270635, |
| "grad_norm": 2.72174082098465, |
| "learning_rate": 2.6976744186046517e-06, |
| "loss": 0.8908, |
| "step": 59 |
| }, |
| { |
| "epoch": 0.08370146477563357, |
| "grad_norm": 2.493197204847241, |
| "learning_rate": 2.744186046511628e-06, |
| "loss": 0.7486, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.0850964891885608, |
| "grad_norm": 2.590372903491486, |
| "learning_rate": 2.790697674418605e-06, |
| "loss": 0.7484, |
| "step": 61 |
| }, |
| { |
| "epoch": 0.08649151360148803, |
| "grad_norm": 2.5390193429571877, |
| "learning_rate": 2.8372093023255815e-06, |
| "loss": 0.8414, |
| "step": 62 |
| }, |
| { |
| "epoch": 0.08788653801441525, |
| "grad_norm": 2.455465710423539, |
| "learning_rate": 2.8837209302325587e-06, |
| "loss": 0.7389, |
| "step": 63 |
| }, |
| { |
| "epoch": 0.08928156242734248, |
| "grad_norm": 2.6820415416133208, |
| "learning_rate": 2.930232558139535e-06, |
| "loss": 0.8335, |
| "step": 64 |
| }, |
| { |
| "epoch": 0.0906765868402697, |
| "grad_norm": 2.3815607596683543, |
| "learning_rate": 2.9767441860465116e-06, |
| "loss": 0.7524, |
| "step": 65 |
| }, |
| { |
| "epoch": 0.09207161125319693, |
| "grad_norm": 2.5996024857047537, |
| "learning_rate": 3.0232558139534885e-06, |
| "loss": 0.8056, |
| "step": 66 |
| }, |
| { |
| "epoch": 0.09346663566612416, |
| "grad_norm": 2.5309785387428927, |
| "learning_rate": 3.069767441860466e-06, |
| "loss": 0.8627, |
| "step": 67 |
| }, |
| { |
| "epoch": 0.09486166007905138, |
| "grad_norm": 2.539797914962022, |
| "learning_rate": 3.1162790697674423e-06, |
| "loss": 0.8127, |
| "step": 68 |
| }, |
| { |
| "epoch": 0.0962566844919786, |
| "grad_norm": 2.472187333798063, |
| "learning_rate": 3.1627906976744187e-06, |
| "loss": 0.757, |
| "step": 69 |
| }, |
| { |
| "epoch": 0.09765170890490583, |
| "grad_norm": 2.326102202172242, |
| "learning_rate": 3.2093023255813956e-06, |
| "loss": 0.722, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.09904673331783306, |
| "grad_norm": 2.515731147365297, |
| "learning_rate": 3.2558139534883724e-06, |
| "loss": 0.7583, |
| "step": 71 |
| }, |
| { |
| "epoch": 0.10044175773076029, |
| "grad_norm": 2.509293997344943, |
| "learning_rate": 3.3023255813953493e-06, |
| "loss": 0.7642, |
| "step": 72 |
| }, |
| { |
| "epoch": 0.10183678214368752, |
| "grad_norm": 2.507853588356, |
| "learning_rate": 3.3488372093023258e-06, |
| "loss": 0.7746, |
| "step": 73 |
| }, |
| { |
| "epoch": 0.10323180655661474, |
| "grad_norm": 2.3923574692972136, |
| "learning_rate": 3.3953488372093026e-06, |
| "loss": 0.7831, |
| "step": 74 |
| }, |
| { |
| "epoch": 0.10462683096954196, |
| "grad_norm": 2.5608099260959634, |
| "learning_rate": 3.4418604651162795e-06, |
| "loss": 0.7607, |
| "step": 75 |
| }, |
| { |
| "epoch": 0.1060218553824692, |
| "grad_norm": 2.487504578194893, |
| "learning_rate": 3.4883720930232564e-06, |
| "loss": 0.6625, |
| "step": 76 |
| }, |
| { |
| "epoch": 0.10741687979539642, |
| "grad_norm": 2.1209002756919726, |
| "learning_rate": 3.534883720930233e-06, |
| "loss": 0.6498, |
| "step": 77 |
| }, |
| { |
| "epoch": 0.10881190420832365, |
| "grad_norm": 2.4857824461829163, |
| "learning_rate": 3.5813953488372093e-06, |
| "loss": 0.7939, |
| "step": 78 |
| }, |
| { |
| "epoch": 0.11020692862125087, |
| "grad_norm": 2.3220344228660745, |
| "learning_rate": 3.6279069767441866e-06, |
| "loss": 0.763, |
| "step": 79 |
| }, |
| { |
| "epoch": 0.1116019530341781, |
| "grad_norm": 2.605269469509474, |
| "learning_rate": 3.674418604651163e-06, |
| "loss": 0.8045, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.11299697744710532, |
| "grad_norm": 2.4819518552638486, |
| "learning_rate": 3.72093023255814e-06, |
| "loss": 0.7621, |
| "step": 81 |
| }, |
| { |
| "epoch": 0.11439200186003255, |
| "grad_norm": 2.8984252770783128, |
| "learning_rate": 3.7674418604651163e-06, |
| "loss": 0.8571, |
| "step": 82 |
| }, |
| { |
| "epoch": 0.11578702627295978, |
| "grad_norm": 2.473293325148463, |
| "learning_rate": 3.8139534883720936e-06, |
| "loss": 0.7575, |
| "step": 83 |
| }, |
| { |
| "epoch": 0.11718205068588701, |
| "grad_norm": 2.3794798368298493, |
| "learning_rate": 3.86046511627907e-06, |
| "loss": 0.7546, |
| "step": 84 |
| }, |
| { |
| "epoch": 0.11857707509881422, |
| "grad_norm": 2.337615082729649, |
| "learning_rate": 3.906976744186047e-06, |
| "loss": 0.7393, |
| "step": 85 |
| }, |
| { |
| "epoch": 0.11997209951174145, |
| "grad_norm": 2.336365843765245, |
| "learning_rate": 3.953488372093024e-06, |
| "loss": 0.6678, |
| "step": 86 |
| }, |
| { |
| "epoch": 0.12136712392466868, |
| "grad_norm": 2.5123928536454434, |
| "learning_rate": 4.000000000000001e-06, |
| "loss": 0.8062, |
| "step": 87 |
| }, |
| { |
| "epoch": 0.12276214833759591, |
| "grad_norm": 2.5461458978648115, |
| "learning_rate": 4.0465116279069775e-06, |
| "loss": 0.821, |
| "step": 88 |
| }, |
| { |
| "epoch": 0.12415717275052314, |
| "grad_norm": 2.546220602321451, |
| "learning_rate": 4.0930232558139536e-06, |
| "loss": 0.8603, |
| "step": 89 |
| }, |
| { |
| "epoch": 0.12555219716345037, |
| "grad_norm": 2.4356199052550744, |
| "learning_rate": 4.1395348837209304e-06, |
| "loss": 0.7435, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.12694722157637758, |
| "grad_norm": 2.4024492143380054, |
| "learning_rate": 4.186046511627907e-06, |
| "loss": 0.7425, |
| "step": 91 |
| }, |
| { |
| "epoch": 0.12834224598930483, |
| "grad_norm": 2.4927376817528017, |
| "learning_rate": 4.232558139534884e-06, |
| "loss": 0.7943, |
| "step": 92 |
| }, |
| { |
| "epoch": 0.12973727040223204, |
| "grad_norm": 2.546222385189929, |
| "learning_rate": 4.279069767441861e-06, |
| "loss": 0.7444, |
| "step": 93 |
| }, |
| { |
| "epoch": 0.13113229481515926, |
| "grad_norm": 2.3638464894566, |
| "learning_rate": 4.325581395348837e-06, |
| "loss": 0.7103, |
| "step": 94 |
| }, |
| { |
| "epoch": 0.1325273192280865, |
| "grad_norm": 2.0116912405977367, |
| "learning_rate": 4.372093023255815e-06, |
| "loss": 0.6561, |
| "step": 95 |
| }, |
| { |
| "epoch": 0.1339223436410137, |
| "grad_norm": 2.3569671450914402, |
| "learning_rate": 4.418604651162791e-06, |
| "loss": 0.7494, |
| "step": 96 |
| }, |
| { |
| "epoch": 0.13531736805394096, |
| "grad_norm": 2.455174006078999, |
| "learning_rate": 4.465116279069768e-06, |
| "loss": 0.7291, |
| "step": 97 |
| }, |
| { |
| "epoch": 0.13671239246686817, |
| "grad_norm": 2.3259679224878895, |
| "learning_rate": 4.5116279069767445e-06, |
| "loss": 0.6947, |
| "step": 98 |
| }, |
| { |
| "epoch": 0.13810741687979539, |
| "grad_norm": 2.2889925038801358, |
| "learning_rate": 4.558139534883721e-06, |
| "loss": 0.7622, |
| "step": 99 |
| }, |
| { |
| "epoch": 0.13950244129272263, |
| "grad_norm": 2.34754065529055, |
| "learning_rate": 4.604651162790698e-06, |
| "loss": 0.6875, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.14089746570564984, |
| "grad_norm": 2.4553141008398893, |
| "learning_rate": 4.651162790697675e-06, |
| "loss": 0.7712, |
| "step": 101 |
| }, |
| { |
| "epoch": 0.1422924901185771, |
| "grad_norm": 2.4054475492234153, |
| "learning_rate": 4.697674418604651e-06, |
| "loss": 0.7644, |
| "step": 102 |
| }, |
| { |
| "epoch": 0.1436875145315043, |
| "grad_norm": 2.5744411230055153, |
| "learning_rate": 4.744186046511629e-06, |
| "loss": 0.7202, |
| "step": 103 |
| }, |
| { |
| "epoch": 0.14508253894443152, |
| "grad_norm": 2.3847788121371774, |
| "learning_rate": 4.790697674418605e-06, |
| "loss": 0.7174, |
| "step": 104 |
| }, |
| { |
| "epoch": 0.14647756335735876, |
| "grad_norm": 2.3377822690454444, |
| "learning_rate": 4.837209302325582e-06, |
| "loss": 0.7038, |
| "step": 105 |
| }, |
| { |
| "epoch": 0.14787258777028597, |
| "grad_norm": 2.428457278015599, |
| "learning_rate": 4.883720930232559e-06, |
| "loss": 0.6988, |
| "step": 106 |
| }, |
| { |
| "epoch": 0.14926761218321322, |
| "grad_norm": 2.3799317230323846, |
| "learning_rate": 4.9302325581395355e-06, |
| "loss": 0.7673, |
| "step": 107 |
| }, |
| { |
| "epoch": 0.15066263659614043, |
| "grad_norm": 2.35083503319128, |
| "learning_rate": 4.976744186046512e-06, |
| "loss": 0.7376, |
| "step": 108 |
| }, |
| { |
| "epoch": 0.15205766100906765, |
| "grad_norm": 2.805407049871798, |
| "learning_rate": 5.023255813953489e-06, |
| "loss": 0.9254, |
| "step": 109 |
| }, |
| { |
| "epoch": 0.1534526854219949, |
| "grad_norm": 2.1905803113537403, |
| "learning_rate": 5.069767441860466e-06, |
| "loss": 0.6927, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.1548477098349221, |
| "grad_norm": 2.2807494953381706, |
| "learning_rate": 5.116279069767442e-06, |
| "loss": 0.7171, |
| "step": 111 |
| }, |
| { |
| "epoch": 0.15624273424784935, |
| "grad_norm": 2.3977975510102922, |
| "learning_rate": 5.162790697674419e-06, |
| "loss": 0.7676, |
| "step": 112 |
| }, |
| { |
| "epoch": 0.15763775866077656, |
| "grad_norm": 2.374163440061089, |
| "learning_rate": 5.209302325581396e-06, |
| "loss": 0.7407, |
| "step": 113 |
| }, |
| { |
| "epoch": 0.15903278307370378, |
| "grad_norm": 2.5800289116181085, |
| "learning_rate": 5.255813953488372e-06, |
| "loss": 0.7664, |
| "step": 114 |
| }, |
| { |
| "epoch": 0.16042780748663102, |
| "grad_norm": 2.430187403796108, |
| "learning_rate": 5.302325581395349e-06, |
| "loss": 0.751, |
| "step": 115 |
| }, |
| { |
| "epoch": 0.16182283189955823, |
| "grad_norm": 2.343909179556326, |
| "learning_rate": 5.348837209302326e-06, |
| "loss": 0.7237, |
| "step": 116 |
| }, |
| { |
| "epoch": 0.16321785631248548, |
| "grad_norm": 2.548797829296356, |
| "learning_rate": 5.395348837209303e-06, |
| "loss": 0.7932, |
| "step": 117 |
| }, |
| { |
| "epoch": 0.1646128807254127, |
| "grad_norm": 2.5320929256806823, |
| "learning_rate": 5.44186046511628e-06, |
| "loss": 0.7457, |
| "step": 118 |
| }, |
| { |
| "epoch": 0.16600790513833993, |
| "grad_norm": 2.5449094558951266, |
| "learning_rate": 5.488372093023256e-06, |
| "loss": 0.7163, |
| "step": 119 |
| }, |
| { |
| "epoch": 0.16740292955126715, |
| "grad_norm": 2.504891081408036, |
| "learning_rate": 5.534883720930233e-06, |
| "loss": 0.7197, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.16879795396419436, |
| "grad_norm": 2.475849061902109, |
| "learning_rate": 5.58139534883721e-06, |
| "loss": 0.7522, |
| "step": 121 |
| }, |
| { |
| "epoch": 0.1701929783771216, |
| "grad_norm": 2.4821935283018584, |
| "learning_rate": 5.627906976744186e-06, |
| "loss": 0.7701, |
| "step": 122 |
| }, |
| { |
| "epoch": 0.17158800279004882, |
| "grad_norm": 2.642205698934932, |
| "learning_rate": 5.674418604651163e-06, |
| "loss": 0.7893, |
| "step": 123 |
| }, |
| { |
| "epoch": 0.17298302720297606, |
| "grad_norm": 2.5207552171233187, |
| "learning_rate": 5.72093023255814e-06, |
| "loss": 0.7317, |
| "step": 124 |
| }, |
| { |
| "epoch": 0.17437805161590328, |
| "grad_norm": 2.3092477828749645, |
| "learning_rate": 5.7674418604651175e-06, |
| "loss": 0.7706, |
| "step": 125 |
| }, |
| { |
| "epoch": 0.1757730760288305, |
| "grad_norm": 2.489926710299257, |
| "learning_rate": 5.8139534883720935e-06, |
| "loss": 0.7555, |
| "step": 126 |
| }, |
| { |
| "epoch": 0.17716810044175774, |
| "grad_norm": 2.656527553681413, |
| "learning_rate": 5.86046511627907e-06, |
| "loss": 0.7651, |
| "step": 127 |
| }, |
| { |
| "epoch": 0.17856312485468495, |
| "grad_norm": 2.3563815058683923, |
| "learning_rate": 5.906976744186047e-06, |
| "loss": 0.7595, |
| "step": 128 |
| }, |
| { |
| "epoch": 0.1799581492676122, |
| "grad_norm": 2.3679438646515596, |
| "learning_rate": 5.953488372093023e-06, |
| "loss": 0.8296, |
| "step": 129 |
| }, |
| { |
| "epoch": 0.1813531736805394, |
| "grad_norm": 2.4057010731210307, |
| "learning_rate": 6e-06, |
| "loss": 0.745, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.18274819809346662, |
| "grad_norm": 2.522120968034962, |
| "learning_rate": 6.046511627906977e-06, |
| "loss": 0.6755, |
| "step": 131 |
| }, |
| { |
| "epoch": 0.18414322250639387, |
| "grad_norm": 2.4421735038455683, |
| "learning_rate": 6.093023255813954e-06, |
| "loss": 0.7682, |
| "step": 132 |
| }, |
| { |
| "epoch": 0.18553824691932108, |
| "grad_norm": 2.549138792539662, |
| "learning_rate": 6.139534883720932e-06, |
| "loss": 0.7427, |
| "step": 133 |
| }, |
| { |
| "epoch": 0.18693327133224832, |
| "grad_norm": 2.2976453414137628, |
| "learning_rate": 6.186046511627908e-06, |
| "loss": 0.7355, |
| "step": 134 |
| }, |
| { |
| "epoch": 0.18832829574517554, |
| "grad_norm": 2.612827929707925, |
| "learning_rate": 6.2325581395348845e-06, |
| "loss": 0.8027, |
| "step": 135 |
| }, |
| { |
| "epoch": 0.18972332015810275, |
| "grad_norm": 2.5538000384173296, |
| "learning_rate": 6.279069767441861e-06, |
| "loss": 0.7745, |
| "step": 136 |
| }, |
| { |
| "epoch": 0.19111834457103, |
| "grad_norm": 2.5739544308712565, |
| "learning_rate": 6.325581395348837e-06, |
| "loss": 0.7536, |
| "step": 137 |
| }, |
| { |
| "epoch": 0.1925133689839572, |
| "grad_norm": 2.4900615363384473, |
| "learning_rate": 6.372093023255814e-06, |
| "loss": 0.7661, |
| "step": 138 |
| }, |
| { |
| "epoch": 0.19390839339688445, |
| "grad_norm": 2.72347356864473, |
| "learning_rate": 6.418604651162791e-06, |
| "loss": 0.7113, |
| "step": 139 |
| }, |
| { |
| "epoch": 0.19530341780981167, |
| "grad_norm": 2.4124895218072173, |
| "learning_rate": 6.465116279069767e-06, |
| "loss": 0.7844, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.1966984422227389, |
| "grad_norm": 2.470332588836169, |
| "learning_rate": 6.511627906976745e-06, |
| "loss": 0.7984, |
| "step": 141 |
| }, |
| { |
| "epoch": 0.19809346663566613, |
| "grad_norm": 2.634184887029152, |
| "learning_rate": 6.558139534883722e-06, |
| "loss": 0.749, |
| "step": 142 |
| }, |
| { |
| "epoch": 0.19948849104859334, |
| "grad_norm": 2.3551296639045742, |
| "learning_rate": 6.604651162790699e-06, |
| "loss": 0.7404, |
| "step": 143 |
| }, |
| { |
| "epoch": 0.20088351546152058, |
| "grad_norm": 2.318054989604665, |
| "learning_rate": 6.651162790697675e-06, |
| "loss": 0.7011, |
| "step": 144 |
| }, |
| { |
| "epoch": 0.2022785398744478, |
| "grad_norm": 2.394650549678866, |
| "learning_rate": 6.6976744186046515e-06, |
| "loss": 0.7989, |
| "step": 145 |
| }, |
| { |
| "epoch": 0.20367356428737504, |
| "grad_norm": 2.3289567515126226, |
| "learning_rate": 6.744186046511628e-06, |
| "loss": 0.7381, |
| "step": 146 |
| }, |
| { |
| "epoch": 0.20506858870030226, |
| "grad_norm": 2.533330784474734, |
| "learning_rate": 6.790697674418605e-06, |
| "loss": 0.8333, |
| "step": 147 |
| }, |
| { |
| "epoch": 0.20646361311322947, |
| "grad_norm": 2.3789567467158474, |
| "learning_rate": 6.837209302325581e-06, |
| "loss": 0.7275, |
| "step": 148 |
| }, |
| { |
| "epoch": 0.20785863752615671, |
| "grad_norm": 2.5367630078041827, |
| "learning_rate": 6.883720930232559e-06, |
| "loss": 0.8271, |
| "step": 149 |
| }, |
| { |
| "epoch": 0.20925366193908393, |
| "grad_norm": 2.4354594911133263, |
| "learning_rate": 6.930232558139536e-06, |
| "loss": 0.7624, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.21064868635201117, |
| "grad_norm": 2.5775059164500758, |
| "learning_rate": 6.976744186046513e-06, |
| "loss": 0.7584, |
| "step": 151 |
| }, |
| { |
| "epoch": 0.2120437107649384, |
| "grad_norm": 2.190325083282746, |
| "learning_rate": 7.023255813953489e-06, |
| "loss": 0.7009, |
| "step": 152 |
| }, |
| { |
| "epoch": 0.2134387351778656, |
| "grad_norm": 2.4921469453291345, |
| "learning_rate": 7.069767441860466e-06, |
| "loss": 0.7554, |
| "step": 153 |
| }, |
| { |
| "epoch": 0.21483375959079284, |
| "grad_norm": 2.3569726016132635, |
| "learning_rate": 7.1162790697674425e-06, |
| "loss": 0.727, |
| "step": 154 |
| }, |
| { |
| "epoch": 0.21622878400372006, |
| "grad_norm": 2.3994767087861897, |
| "learning_rate": 7.1627906976744185e-06, |
| "loss": 0.7333, |
| "step": 155 |
| }, |
| { |
| "epoch": 0.2176238084166473, |
| "grad_norm": 2.316378530911968, |
| "learning_rate": 7.209302325581395e-06, |
| "loss": 0.755, |
| "step": 156 |
| }, |
| { |
| "epoch": 0.21901883282957452, |
| "grad_norm": 2.1898592032828033, |
| "learning_rate": 7.255813953488373e-06, |
| "loss": 0.6771, |
| "step": 157 |
| }, |
| { |
| "epoch": 0.22041385724250173, |
| "grad_norm": 2.6149357411219154, |
| "learning_rate": 7.30232558139535e-06, |
| "loss": 0.7357, |
| "step": 158 |
| }, |
| { |
| "epoch": 0.22180888165542897, |
| "grad_norm": 2.2792049067557216, |
| "learning_rate": 7.348837209302326e-06, |
| "loss": 0.7254, |
| "step": 159 |
| }, |
| { |
| "epoch": 0.2232039060683562, |
| "grad_norm": 2.4877232933622775, |
| "learning_rate": 7.395348837209303e-06, |
| "loss": 0.7488, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.22459893048128343, |
| "grad_norm": 2.4560817023680097, |
| "learning_rate": 7.44186046511628e-06, |
| "loss": 0.7413, |
| "step": 161 |
| }, |
| { |
| "epoch": 0.22599395489421065, |
| "grad_norm": 2.4221544352169975, |
| "learning_rate": 7.488372093023256e-06, |
| "loss": 0.7363, |
| "step": 162 |
| }, |
| { |
| "epoch": 0.22738897930713786, |
| "grad_norm": 2.4563224434742805, |
| "learning_rate": 7.534883720930233e-06, |
| "loss": 0.7673, |
| "step": 163 |
| }, |
| { |
| "epoch": 0.2287840037200651, |
| "grad_norm": 2.366320987925906, |
| "learning_rate": 7.5813953488372095e-06, |
| "loss": 0.6675, |
| "step": 164 |
| }, |
| { |
| "epoch": 0.23017902813299232, |
| "grad_norm": 2.5227544509508517, |
| "learning_rate": 7.627906976744187e-06, |
| "loss": 0.7483, |
| "step": 165 |
| }, |
| { |
| "epoch": 0.23157405254591956, |
| "grad_norm": 2.1154831131584975, |
| "learning_rate": 7.674418604651164e-06, |
| "loss": 0.6732, |
| "step": 166 |
| }, |
| { |
| "epoch": 0.23296907695884678, |
| "grad_norm": 2.4673285889919927, |
| "learning_rate": 7.72093023255814e-06, |
| "loss": 0.7009, |
| "step": 167 |
| }, |
| { |
| "epoch": 0.23436410137177402, |
| "grad_norm": 2.43878962840609, |
| "learning_rate": 7.767441860465116e-06, |
| "loss": 0.7252, |
| "step": 168 |
| }, |
| { |
| "epoch": 0.23575912578470123, |
| "grad_norm": 2.31935934137348, |
| "learning_rate": 7.813953488372094e-06, |
| "loss": 0.733, |
| "step": 169 |
| }, |
| { |
| "epoch": 0.23715415019762845, |
| "grad_norm": 2.1995960807975603, |
| "learning_rate": 7.86046511627907e-06, |
| "loss": 0.6615, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.2385491746105557, |
| "grad_norm": 2.168278254449867, |
| "learning_rate": 7.906976744186048e-06, |
| "loss": 0.7284, |
| "step": 171 |
| }, |
| { |
| "epoch": 0.2399441990234829, |
| "grad_norm": 2.4173940989857847, |
| "learning_rate": 7.953488372093024e-06, |
| "loss": 0.7081, |
| "step": 172 |
| }, |
| { |
| "epoch": 0.24133922343641015, |
| "grad_norm": 2.51400863859905, |
| "learning_rate": 8.000000000000001e-06, |
| "loss": 0.7747, |
| "step": 173 |
| }, |
| { |
| "epoch": 0.24273424784933736, |
| "grad_norm": 2.3463603446571857, |
| "learning_rate": 8.046511627906977e-06, |
| "loss": 0.7386, |
| "step": 174 |
| }, |
| { |
| "epoch": 0.24412927226226458, |
| "grad_norm": 2.3501001279570897, |
| "learning_rate": 8.093023255813955e-06, |
| "loss": 0.7658, |
| "step": 175 |
| }, |
| { |
| "epoch": 0.24552429667519182, |
| "grad_norm": 2.390612949278623, |
| "learning_rate": 8.139534883720931e-06, |
| "loss": 0.7733, |
| "step": 176 |
| }, |
| { |
| "epoch": 0.24691932108811904, |
| "grad_norm": 2.5310428833750325, |
| "learning_rate": 8.186046511627907e-06, |
| "loss": 0.7699, |
| "step": 177 |
| }, |
| { |
| "epoch": 0.24831434550104628, |
| "grad_norm": 2.5096094301774383, |
| "learning_rate": 8.232558139534885e-06, |
| "loss": 0.774, |
| "step": 178 |
| }, |
| { |
| "epoch": 0.2497093699139735, |
| "grad_norm": 2.3076005947523646, |
| "learning_rate": 8.279069767441861e-06, |
| "loss": 0.7115, |
| "step": 179 |
| }, |
| { |
| "epoch": 0.25110439432690074, |
| "grad_norm": 2.407603733719916, |
| "learning_rate": 8.325581395348837e-06, |
| "loss": 0.7587, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.25249941873982795, |
| "grad_norm": 2.5252107383278233, |
| "learning_rate": 8.372093023255815e-06, |
| "loss": 0.75, |
| "step": 181 |
| }, |
| { |
| "epoch": 0.25389444315275517, |
| "grad_norm": 2.309734616227639, |
| "learning_rate": 8.418604651162792e-06, |
| "loss": 0.7088, |
| "step": 182 |
| }, |
| { |
| "epoch": 0.2552894675656824, |
| "grad_norm": 2.426978371777092, |
| "learning_rate": 8.465116279069768e-06, |
| "loss": 0.7468, |
| "step": 183 |
| }, |
| { |
| "epoch": 0.25668449197860965, |
| "grad_norm": 2.4432053380067065, |
| "learning_rate": 8.511627906976744e-06, |
| "loss": 0.7142, |
| "step": 184 |
| }, |
| { |
| "epoch": 0.25807951639153687, |
| "grad_norm": 2.3740041197703023, |
| "learning_rate": 8.558139534883722e-06, |
| "loss": 0.7071, |
| "step": 185 |
| }, |
| { |
| "epoch": 0.2594745408044641, |
| "grad_norm": 2.0601901871327377, |
| "learning_rate": 8.604651162790698e-06, |
| "loss": 0.6276, |
| "step": 186 |
| }, |
| { |
| "epoch": 0.2608695652173913, |
| "grad_norm": 2.364861360993871, |
| "learning_rate": 8.651162790697674e-06, |
| "loss": 0.7856, |
| "step": 187 |
| }, |
| { |
| "epoch": 0.2622645896303185, |
| "grad_norm": 2.3561545953781597, |
| "learning_rate": 8.697674418604652e-06, |
| "loss": 0.7252, |
| "step": 188 |
| }, |
| { |
| "epoch": 0.2636596140432458, |
| "grad_norm": 2.282838512331675, |
| "learning_rate": 8.74418604651163e-06, |
| "loss": 0.6964, |
| "step": 189 |
| }, |
| { |
| "epoch": 0.265054638456173, |
| "grad_norm": 2.38101153955705, |
| "learning_rate": 8.790697674418606e-06, |
| "loss": 0.7203, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.2664496628691002, |
| "grad_norm": 2.4046315413267605, |
| "learning_rate": 8.837209302325582e-06, |
| "loss": 0.7592, |
| "step": 191 |
| }, |
| { |
| "epoch": 0.2678446872820274, |
| "grad_norm": 2.3462917230346885, |
| "learning_rate": 8.88372093023256e-06, |
| "loss": 0.701, |
| "step": 192 |
| }, |
| { |
| "epoch": 0.26923971169495464, |
| "grad_norm": 2.36595710790288, |
| "learning_rate": 8.930232558139535e-06, |
| "loss": 0.7542, |
| "step": 193 |
| }, |
| { |
| "epoch": 0.2706347361078819, |
| "grad_norm": 2.3866511967235944, |
| "learning_rate": 8.976744186046511e-06, |
| "loss": 0.7001, |
| "step": 194 |
| }, |
| { |
| "epoch": 0.2720297605208091, |
| "grad_norm": 2.3552326990004517, |
| "learning_rate": 9.023255813953489e-06, |
| "loss": 0.7146, |
| "step": 195 |
| }, |
| { |
| "epoch": 0.27342478493373634, |
| "grad_norm": 2.234288729152599, |
| "learning_rate": 9.069767441860465e-06, |
| "loss": 0.7804, |
| "step": 196 |
| }, |
| { |
| "epoch": 0.27481980934666356, |
| "grad_norm": 2.2137571036217722, |
| "learning_rate": 9.116279069767443e-06, |
| "loss": 0.709, |
| "step": 197 |
| }, |
| { |
| "epoch": 0.27621483375959077, |
| "grad_norm": 2.362054981862146, |
| "learning_rate": 9.162790697674419e-06, |
| "loss": 0.7079, |
| "step": 198 |
| }, |
| { |
| "epoch": 0.27760985817251804, |
| "grad_norm": 2.3776725722324925, |
| "learning_rate": 9.209302325581397e-06, |
| "loss": 0.7115, |
| "step": 199 |
| }, |
| { |
| "epoch": 0.27900488258544526, |
| "grad_norm": 2.360369888299576, |
| "learning_rate": 9.255813953488373e-06, |
| "loss": 0.7238, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.2803999069983725, |
| "grad_norm": 2.2507389312676462, |
| "learning_rate": 9.30232558139535e-06, |
| "loss": 0.7214, |
| "step": 201 |
| }, |
| { |
| "epoch": 0.2817949314112997, |
| "grad_norm": 2.279748703497406, |
| "learning_rate": 9.348837209302326e-06, |
| "loss": 0.7474, |
| "step": 202 |
| }, |
| { |
| "epoch": 0.2831899558242269, |
| "grad_norm": 2.3429648273532937, |
| "learning_rate": 9.395348837209302e-06, |
| "loss": 0.7681, |
| "step": 203 |
| }, |
| { |
| "epoch": 0.2845849802371542, |
| "grad_norm": 2.418888049162119, |
| "learning_rate": 9.44186046511628e-06, |
| "loss": 0.727, |
| "step": 204 |
| }, |
| { |
| "epoch": 0.2859800046500814, |
| "grad_norm": 2.399989246612679, |
| "learning_rate": 9.488372093023258e-06, |
| "loss": 0.7719, |
| "step": 205 |
| }, |
| { |
| "epoch": 0.2873750290630086, |
| "grad_norm": 2.660020683455921, |
| "learning_rate": 9.534883720930234e-06, |
| "loss": 0.7441, |
| "step": 206 |
| }, |
| { |
| "epoch": 0.2887700534759358, |
| "grad_norm": 2.143497203846691, |
| "learning_rate": 9.58139534883721e-06, |
| "loss": 0.6955, |
| "step": 207 |
| }, |
| { |
| "epoch": 0.29016507788886303, |
| "grad_norm": 2.4890129433808843, |
| "learning_rate": 9.627906976744188e-06, |
| "loss": 0.7716, |
| "step": 208 |
| }, |
| { |
| "epoch": 0.2915601023017903, |
| "grad_norm": 2.320446723006289, |
| "learning_rate": 9.674418604651164e-06, |
| "loss": 0.7583, |
| "step": 209 |
| }, |
| { |
| "epoch": 0.2929551267147175, |
| "grad_norm": 2.411341999029131, |
| "learning_rate": 9.72093023255814e-06, |
| "loss": 0.7681, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.29435015112764473, |
| "grad_norm": 2.301874213978857, |
| "learning_rate": 9.767441860465117e-06, |
| "loss": 0.7208, |
| "step": 211 |
| }, |
| { |
| "epoch": 0.29574517554057195, |
| "grad_norm": 2.640983189271861, |
| "learning_rate": 9.813953488372093e-06, |
| "loss": 0.7693, |
| "step": 212 |
| }, |
| { |
| "epoch": 0.29714019995349916, |
| "grad_norm": 2.4184498532898564, |
| "learning_rate": 9.860465116279071e-06, |
| "loss": 0.7405, |
| "step": 213 |
| }, |
| { |
| "epoch": 0.29853522436642643, |
| "grad_norm": 2.3297820871229202, |
| "learning_rate": 9.906976744186047e-06, |
| "loss": 0.7111, |
| "step": 214 |
| }, |
| { |
| "epoch": 0.29993024877935365, |
| "grad_norm": 2.4217191627013626, |
| "learning_rate": 9.953488372093025e-06, |
| "loss": 0.7665, |
| "step": 215 |
| }, |
| { |
| "epoch": 0.30132527319228086, |
| "grad_norm": 2.401188958751426, |
| "learning_rate": 1e-05, |
| "loss": 0.7454, |
| "step": 216 |
| }, |
| { |
| "epoch": 0.3027202976052081, |
| "grad_norm": 2.4499106462941094, |
| "learning_rate": 9.999993396473114e-06, |
| "loss": 0.7878, |
| "step": 217 |
| }, |
| { |
| "epoch": 0.3041153220181353, |
| "grad_norm": 2.2276901973554777, |
| "learning_rate": 9.999973585909898e-06, |
| "loss": 0.6551, |
| "step": 218 |
| }, |
| { |
| "epoch": 0.30551034643106256, |
| "grad_norm": 2.3733342447896417, |
| "learning_rate": 9.99994056836268e-06, |
| "loss": 0.7145, |
| "step": 219 |
| }, |
| { |
| "epoch": 0.3069053708439898, |
| "grad_norm": 2.29189829429208, |
| "learning_rate": 9.999894343918674e-06, |
| "loss": 0.7239, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.308300395256917, |
| "grad_norm": 2.217573234027078, |
| "learning_rate": 9.999834912699974e-06, |
| "loss": 0.7756, |
| "step": 221 |
| }, |
| { |
| "epoch": 0.3096954196698442, |
| "grad_norm": 2.2514531625857677, |
| "learning_rate": 9.999762274863567e-06, |
| "loss": 0.7255, |
| "step": 222 |
| }, |
| { |
| "epoch": 0.3110904440827714, |
| "grad_norm": 2.4225676302200942, |
| "learning_rate": 9.999676430601318e-06, |
| "loss": 0.7809, |
| "step": 223 |
| }, |
| { |
| "epoch": 0.3124854684956987, |
| "grad_norm": 2.167234001414, |
| "learning_rate": 9.999577380139976e-06, |
| "loss": 0.702, |
| "step": 224 |
| }, |
| { |
| "epoch": 0.3138804929086259, |
| "grad_norm": 2.3885170041413004, |
| "learning_rate": 9.999465123741172e-06, |
| "loss": 0.7084, |
| "step": 225 |
| }, |
| { |
| "epoch": 0.3152755173215531, |
| "grad_norm": 2.3894680417925604, |
| "learning_rate": 9.999339661701424e-06, |
| "loss": 0.7033, |
| "step": 226 |
| }, |
| { |
| "epoch": 0.31667054173448034, |
| "grad_norm": 2.2270776846137013, |
| "learning_rate": 9.99920099435213e-06, |
| "loss": 0.7521, |
| "step": 227 |
| }, |
| { |
| "epoch": 0.31806556614740755, |
| "grad_norm": 2.517415559204921, |
| "learning_rate": 9.999049122059565e-06, |
| "loss": 0.7706, |
| "step": 228 |
| }, |
| { |
| "epoch": 0.3194605905603348, |
| "grad_norm": 2.1313262835401288, |
| "learning_rate": 9.998884045224886e-06, |
| "loss": 0.7236, |
| "step": 229 |
| }, |
| { |
| "epoch": 0.32085561497326204, |
| "grad_norm": 2.2375739677464965, |
| "learning_rate": 9.998705764284132e-06, |
| "loss": 0.6958, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.32225063938618925, |
| "grad_norm": 2.4499599731198214, |
| "learning_rate": 9.998514279708212e-06, |
| "loss": 0.7827, |
| "step": 231 |
| }, |
| { |
| "epoch": 0.32364566379911647, |
| "grad_norm": 2.355065280389441, |
| "learning_rate": 9.998309592002914e-06, |
| "loss": 0.7762, |
| "step": 232 |
| }, |
| { |
| "epoch": 0.32504068821204374, |
| "grad_norm": 2.3578314293622573, |
| "learning_rate": 9.99809170170891e-06, |
| "loss": 0.6852, |
| "step": 233 |
| }, |
| { |
| "epoch": 0.32643571262497095, |
| "grad_norm": 2.259491531550109, |
| "learning_rate": 9.997860609401732e-06, |
| "loss": 0.7778, |
| "step": 234 |
| }, |
| { |
| "epoch": 0.32783073703789817, |
| "grad_norm": 2.2038320551296673, |
| "learning_rate": 9.99761631569179e-06, |
| "loss": 0.7799, |
| "step": 235 |
| }, |
| { |
| "epoch": 0.3292257614508254, |
| "grad_norm": 2.42655922351898, |
| "learning_rate": 9.997358821224365e-06, |
| "loss": 0.801, |
| "step": 236 |
| }, |
| { |
| "epoch": 0.3306207858637526, |
| "grad_norm": 2.3511248366461253, |
| "learning_rate": 9.997088126679607e-06, |
| "loss": 0.7303, |
| "step": 237 |
| }, |
| { |
| "epoch": 0.33201581027667987, |
| "grad_norm": 2.290728698460702, |
| "learning_rate": 9.996804232772528e-06, |
| "loss": 0.7332, |
| "step": 238 |
| }, |
| { |
| "epoch": 0.3334108346896071, |
| "grad_norm": 2.2585293725350226, |
| "learning_rate": 9.996507140253012e-06, |
| "loss": 0.7565, |
| "step": 239 |
| }, |
| { |
| "epoch": 0.3348058591025343, |
| "grad_norm": 2.162605836590699, |
| "learning_rate": 9.9961968499058e-06, |
| "loss": 0.7403, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.3362008835154615, |
| "grad_norm": 2.44214305770412, |
| "learning_rate": 9.9958733625505e-06, |
| "loss": 0.7672, |
| "step": 241 |
| }, |
| { |
| "epoch": 0.3375959079283887, |
| "grad_norm": 2.1410609124245257, |
| "learning_rate": 9.995536679041568e-06, |
| "loss": 0.7083, |
| "step": 242 |
| }, |
| { |
| "epoch": 0.338990932341316, |
| "grad_norm": 2.3045558078430917, |
| "learning_rate": 9.99518680026833e-06, |
| "loss": 0.759, |
| "step": 243 |
| }, |
| { |
| "epoch": 0.3403859567542432, |
| "grad_norm": 2.1962856519735063, |
| "learning_rate": 9.994823727154957e-06, |
| "loss": 0.7431, |
| "step": 244 |
| }, |
| { |
| "epoch": 0.34178098116717043, |
| "grad_norm": 2.538182288531259, |
| "learning_rate": 9.994447460660473e-06, |
| "loss": 0.7658, |
| "step": 245 |
| }, |
| { |
| "epoch": 0.34317600558009764, |
| "grad_norm": 2.147870815921298, |
| "learning_rate": 9.994058001778754e-06, |
| "loss": 0.7533, |
| "step": 246 |
| }, |
| { |
| "epoch": 0.34457102999302486, |
| "grad_norm": 2.268535359430784, |
| "learning_rate": 9.99365535153852e-06, |
| "loss": 0.7256, |
| "step": 247 |
| }, |
| { |
| "epoch": 0.34596605440595213, |
| "grad_norm": 2.2131932435600126, |
| "learning_rate": 9.993239511003338e-06, |
| "loss": 0.7366, |
| "step": 248 |
| }, |
| { |
| "epoch": 0.34736107881887934, |
| "grad_norm": 2.4607861240746445, |
| "learning_rate": 9.992810481271611e-06, |
| "loss": 0.7347, |
| "step": 249 |
| }, |
| { |
| "epoch": 0.34875610323180656, |
| "grad_norm": 2.2713964429296007, |
| "learning_rate": 9.992368263476585e-06, |
| "loss": 0.7511, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.3501511276447338, |
| "grad_norm": 2.227939017521476, |
| "learning_rate": 9.991912858786335e-06, |
| "loss": 0.7604, |
| "step": 251 |
| }, |
| { |
| "epoch": 0.351546152057661, |
| "grad_norm": 2.354239376083879, |
| "learning_rate": 9.991444268403776e-06, |
| "loss": 0.7632, |
| "step": 252 |
| }, |
| { |
| "epoch": 0.35294117647058826, |
| "grad_norm": 2.312214771993005, |
| "learning_rate": 9.990962493566645e-06, |
| "loss": 0.8016, |
| "step": 253 |
| }, |
| { |
| "epoch": 0.3543362008835155, |
| "grad_norm": 2.431880313811741, |
| "learning_rate": 9.99046753554751e-06, |
| "loss": 0.7021, |
| "step": 254 |
| }, |
| { |
| "epoch": 0.3557312252964427, |
| "grad_norm": 2.2846086141474573, |
| "learning_rate": 9.989959395653756e-06, |
| "loss": 0.7367, |
| "step": 255 |
| }, |
| { |
| "epoch": 0.3571262497093699, |
| "grad_norm": 2.1027600513299674, |
| "learning_rate": 9.989438075227588e-06, |
| "loss": 0.6324, |
| "step": 256 |
| }, |
| { |
| "epoch": 0.3585212741222971, |
| "grad_norm": 2.3013858529857756, |
| "learning_rate": 9.988903575646032e-06, |
| "loss": 0.6831, |
| "step": 257 |
| }, |
| { |
| "epoch": 0.3599162985352244, |
| "grad_norm": 2.232795724746918, |
| "learning_rate": 9.988355898320917e-06, |
| "loss": 0.7296, |
| "step": 258 |
| }, |
| { |
| "epoch": 0.3613113229481516, |
| "grad_norm": 2.5816820477428664, |
| "learning_rate": 9.987795044698885e-06, |
| "loss": 0.7603, |
| "step": 259 |
| }, |
| { |
| "epoch": 0.3627063473610788, |
| "grad_norm": 2.3931287274339623, |
| "learning_rate": 9.98722101626138e-06, |
| "loss": 0.7424, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.36410137177400603, |
| "grad_norm": 2.236682063683957, |
| "learning_rate": 9.986633814524648e-06, |
| "loss": 0.7005, |
| "step": 261 |
| }, |
| { |
| "epoch": 0.36549639618693325, |
| "grad_norm": 2.1826925703002416, |
| "learning_rate": 9.986033441039731e-06, |
| "loss": 0.7353, |
| "step": 262 |
| }, |
| { |
| "epoch": 0.3668914205998605, |
| "grad_norm": 2.4709157488020397, |
| "learning_rate": 9.985419897392459e-06, |
| "loss": 0.7928, |
| "step": 263 |
| }, |
| { |
| "epoch": 0.36828644501278773, |
| "grad_norm": 1.981866149603293, |
| "learning_rate": 9.984793185203456e-06, |
| "loss": 0.6918, |
| "step": 264 |
| }, |
| { |
| "epoch": 0.36968146942571495, |
| "grad_norm": 2.2345445872108045, |
| "learning_rate": 9.984153306128124e-06, |
| "loss": 0.7581, |
| "step": 265 |
| }, |
| { |
| "epoch": 0.37107649383864216, |
| "grad_norm": 2.5403963282899644, |
| "learning_rate": 9.983500261856646e-06, |
| "loss": 0.7616, |
| "step": 266 |
| }, |
| { |
| "epoch": 0.3724715182515694, |
| "grad_norm": 2.2145873038291866, |
| "learning_rate": 9.982834054113982e-06, |
| "loss": 0.6927, |
| "step": 267 |
| }, |
| { |
| "epoch": 0.37386654266449665, |
| "grad_norm": 2.415059957789154, |
| "learning_rate": 9.98215468465986e-06, |
| "loss": 0.7197, |
| "step": 268 |
| }, |
| { |
| "epoch": 0.37526156707742386, |
| "grad_norm": 2.1560691347775913, |
| "learning_rate": 9.981462155288773e-06, |
| "loss": 0.7377, |
| "step": 269 |
| }, |
| { |
| "epoch": 0.3766565914903511, |
| "grad_norm": 2.258433532015071, |
| "learning_rate": 9.980756467829977e-06, |
| "loss": 0.6838, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.3780516159032783, |
| "grad_norm": 2.3329270998443166, |
| "learning_rate": 9.98003762414748e-06, |
| "loss": 0.7265, |
| "step": 271 |
| }, |
| { |
| "epoch": 0.3794466403162055, |
| "grad_norm": 2.1635360885551598, |
| "learning_rate": 9.979305626140046e-06, |
| "loss": 0.6886, |
| "step": 272 |
| }, |
| { |
| "epoch": 0.3808416647291328, |
| "grad_norm": 2.127114059196922, |
| "learning_rate": 9.978560475741181e-06, |
| "loss": 0.7118, |
| "step": 273 |
| }, |
| { |
| "epoch": 0.38223668914206, |
| "grad_norm": 2.217596961579976, |
| "learning_rate": 9.977802174919134e-06, |
| "loss": 0.7936, |
| "step": 274 |
| }, |
| { |
| "epoch": 0.3836317135549872, |
| "grad_norm": 2.136075798958187, |
| "learning_rate": 9.977030725676887e-06, |
| "loss": 0.6947, |
| "step": 275 |
| }, |
| { |
| "epoch": 0.3850267379679144, |
| "grad_norm": 2.271555095939749, |
| "learning_rate": 9.976246130052157e-06, |
| "loss": 0.7561, |
| "step": 276 |
| }, |
| { |
| "epoch": 0.38642176238084164, |
| "grad_norm": 2.256449368924786, |
| "learning_rate": 9.97544839011738e-06, |
| "loss": 0.8082, |
| "step": 277 |
| }, |
| { |
| "epoch": 0.3878167867937689, |
| "grad_norm": 2.089485218424677, |
| "learning_rate": 9.974637507979721e-06, |
| "loss": 0.6938, |
| "step": 278 |
| }, |
| { |
| "epoch": 0.3892118112066961, |
| "grad_norm": 2.2379904394260084, |
| "learning_rate": 9.973813485781045e-06, |
| "loss": 0.7031, |
| "step": 279 |
| }, |
| { |
| "epoch": 0.39060683561962334, |
| "grad_norm": 2.065922207391741, |
| "learning_rate": 9.972976325697938e-06, |
| "loss": 0.6992, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.39200186003255055, |
| "grad_norm": 2.0549800888949528, |
| "learning_rate": 9.972126029941685e-06, |
| "loss": 0.7237, |
| "step": 281 |
| }, |
| { |
| "epoch": 0.3933968844454778, |
| "grad_norm": 2.212763606020422, |
| "learning_rate": 9.97126260075826e-06, |
| "loss": 0.7034, |
| "step": 282 |
| }, |
| { |
| "epoch": 0.39479190885840504, |
| "grad_norm": 2.3190648204003015, |
| "learning_rate": 9.97038604042834e-06, |
| "loss": 0.7811, |
| "step": 283 |
| }, |
| { |
| "epoch": 0.39618693327133225, |
| "grad_norm": 2.2158858186853, |
| "learning_rate": 9.969496351267278e-06, |
| "loss": 0.7176, |
| "step": 284 |
| }, |
| { |
| "epoch": 0.39758195768425947, |
| "grad_norm": 2.3115256506252693, |
| "learning_rate": 9.96859353562511e-06, |
| "loss": 0.7466, |
| "step": 285 |
| }, |
| { |
| "epoch": 0.3989769820971867, |
| "grad_norm": 2.1844162221084864, |
| "learning_rate": 9.967677595886542e-06, |
| "loss": 0.7166, |
| "step": 286 |
| }, |
| { |
| "epoch": 0.40037200651011395, |
| "grad_norm": 2.2684119567671126, |
| "learning_rate": 9.96674853447095e-06, |
| "loss": 0.743, |
| "step": 287 |
| }, |
| { |
| "epoch": 0.40176703092304117, |
| "grad_norm": 2.1345672593832608, |
| "learning_rate": 9.96580635383236e-06, |
| "loss": 0.6622, |
| "step": 288 |
| }, |
| { |
| "epoch": 0.4031620553359684, |
| "grad_norm": 1.920613760073464, |
| "learning_rate": 9.964851056459465e-06, |
| "loss": 0.6307, |
| "step": 289 |
| }, |
| { |
| "epoch": 0.4045570797488956, |
| "grad_norm": 2.2670166040543753, |
| "learning_rate": 9.963882644875594e-06, |
| "loss": 0.7169, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.4059521041618228, |
| "grad_norm": 2.2233142865091384, |
| "learning_rate": 9.96290112163872e-06, |
| "loss": 0.7076, |
| "step": 291 |
| }, |
| { |
| "epoch": 0.4073471285747501, |
| "grad_norm": 2.3333906520435455, |
| "learning_rate": 9.961906489341452e-06, |
| "loss": 0.7351, |
| "step": 292 |
| }, |
| { |
| "epoch": 0.4087421529876773, |
| "grad_norm": 2.458417913325325, |
| "learning_rate": 9.960898750611019e-06, |
| "loss": 0.7711, |
| "step": 293 |
| }, |
| { |
| "epoch": 0.4101371774006045, |
| "grad_norm": 2.479058663768207, |
| "learning_rate": 9.959877908109274e-06, |
| "loss": 0.8339, |
| "step": 294 |
| }, |
| { |
| "epoch": 0.41153220181353173, |
| "grad_norm": 2.1552249804400128, |
| "learning_rate": 9.958843964532683e-06, |
| "loss": 0.681, |
| "step": 295 |
| }, |
| { |
| "epoch": 0.41292722622645894, |
| "grad_norm": 2.2069137654327537, |
| "learning_rate": 9.957796922612314e-06, |
| "loss": 0.7126, |
| "step": 296 |
| }, |
| { |
| "epoch": 0.4143222506393862, |
| "grad_norm": 2.536473553585731, |
| "learning_rate": 9.956736785113833e-06, |
| "loss": 0.771, |
| "step": 297 |
| }, |
| { |
| "epoch": 0.41571727505231343, |
| "grad_norm": 2.093714464350585, |
| "learning_rate": 9.955663554837503e-06, |
| "loss": 0.6509, |
| "step": 298 |
| }, |
| { |
| "epoch": 0.41711229946524064, |
| "grad_norm": 2.342384161868706, |
| "learning_rate": 9.954577234618162e-06, |
| "loss": 0.7163, |
| "step": 299 |
| }, |
| { |
| "epoch": 0.41850732387816786, |
| "grad_norm": 2.2305186397795613, |
| "learning_rate": 9.953477827325229e-06, |
| "loss": 0.7352, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.4199023482910951, |
| "grad_norm": 2.1442071959324287, |
| "learning_rate": 9.952365335862693e-06, |
| "loss": 0.7667, |
| "step": 301 |
| }, |
| { |
| "epoch": 0.42129737270402234, |
| "grad_norm": 2.214992581838815, |
| "learning_rate": 9.951239763169097e-06, |
| "loss": 0.787, |
| "step": 302 |
| }, |
| { |
| "epoch": 0.42269239711694956, |
| "grad_norm": 2.2020890794485215, |
| "learning_rate": 9.950101112217543e-06, |
| "loss": 0.7207, |
| "step": 303 |
| }, |
| { |
| "epoch": 0.4240874215298768, |
| "grad_norm": 2.1586849736161016, |
| "learning_rate": 9.948949386015677e-06, |
| "loss": 0.6965, |
| "step": 304 |
| }, |
| { |
| "epoch": 0.425482445942804, |
| "grad_norm": 2.223387319666449, |
| "learning_rate": 9.947784587605678e-06, |
| "loss": 0.697, |
| "step": 305 |
| }, |
| { |
| "epoch": 0.4268774703557312, |
| "grad_norm": 2.4008683326131863, |
| "learning_rate": 9.946606720064257e-06, |
| "loss": 0.7564, |
| "step": 306 |
| }, |
| { |
| "epoch": 0.4282724947686585, |
| "grad_norm": 2.388205130653307, |
| "learning_rate": 9.945415786502649e-06, |
| "loss": 0.7356, |
| "step": 307 |
| }, |
| { |
| "epoch": 0.4296675191815857, |
| "grad_norm": 2.167142794736497, |
| "learning_rate": 9.944211790066597e-06, |
| "loss": 0.7234, |
| "step": 308 |
| }, |
| { |
| "epoch": 0.4310625435945129, |
| "grad_norm": 2.355995507133336, |
| "learning_rate": 9.94299473393635e-06, |
| "loss": 0.7773, |
| "step": 309 |
| }, |
| { |
| "epoch": 0.4324575680074401, |
| "grad_norm": 2.255660206292414, |
| "learning_rate": 9.941764621326655e-06, |
| "loss": 0.7175, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.43385259242036733, |
| "grad_norm": 2.123386649465731, |
| "learning_rate": 9.94052145548674e-06, |
| "loss": 0.7542, |
| "step": 311 |
| }, |
| { |
| "epoch": 0.4352476168332946, |
| "grad_norm": 2.371212398389211, |
| "learning_rate": 9.939265239700321e-06, |
| "loss": 0.7845, |
| "step": 312 |
| }, |
| { |
| "epoch": 0.4366426412462218, |
| "grad_norm": 2.375596398430479, |
| "learning_rate": 9.93799597728558e-06, |
| "loss": 0.7494, |
| "step": 313 |
| }, |
| { |
| "epoch": 0.43803766565914903, |
| "grad_norm": 2.3770091900621826, |
| "learning_rate": 9.936713671595158e-06, |
| "loss": 0.7801, |
| "step": 314 |
| }, |
| { |
| "epoch": 0.43943269007207625, |
| "grad_norm": 2.2444664500496674, |
| "learning_rate": 9.935418326016153e-06, |
| "loss": 0.7055, |
| "step": 315 |
| }, |
| { |
| "epoch": 0.44082771448500346, |
| "grad_norm": 2.1949523416601804, |
| "learning_rate": 9.934109943970103e-06, |
| "loss": 0.7864, |
| "step": 316 |
| }, |
| { |
| "epoch": 0.44222273889793073, |
| "grad_norm": 2.234576229734612, |
| "learning_rate": 9.932788528912983e-06, |
| "loss": 0.7558, |
| "step": 317 |
| }, |
| { |
| "epoch": 0.44361776331085795, |
| "grad_norm": 2.248104750900924, |
| "learning_rate": 9.931454084335192e-06, |
| "loss": 0.7549, |
| "step": 318 |
| }, |
| { |
| "epoch": 0.44501278772378516, |
| "grad_norm": 2.036656477248027, |
| "learning_rate": 9.930106613761549e-06, |
| "loss": 0.7004, |
| "step": 319 |
| }, |
| { |
| "epoch": 0.4464078121367124, |
| "grad_norm": 2.1759704388763423, |
| "learning_rate": 9.928746120751275e-06, |
| "loss": 0.7335, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.4478028365496396, |
| "grad_norm": 2.242033270499044, |
| "learning_rate": 9.927372608897992e-06, |
| "loss": 0.7691, |
| "step": 321 |
| }, |
| { |
| "epoch": 0.44919786096256686, |
| "grad_norm": 2.1579786835187305, |
| "learning_rate": 9.925986081829708e-06, |
| "loss": 0.6896, |
| "step": 322 |
| }, |
| { |
| "epoch": 0.4505928853754941, |
| "grad_norm": 2.383688707431281, |
| "learning_rate": 9.924586543208812e-06, |
| "loss": 0.7663, |
| "step": 323 |
| }, |
| { |
| "epoch": 0.4519879097884213, |
| "grad_norm": 2.2137281683181538, |
| "learning_rate": 9.923173996732058e-06, |
| "loss": 0.7102, |
| "step": 324 |
| }, |
| { |
| "epoch": 0.4533829342013485, |
| "grad_norm": 2.2671301590285178, |
| "learning_rate": 9.921748446130564e-06, |
| "loss": 0.7588, |
| "step": 325 |
| }, |
| { |
| "epoch": 0.4547779586142757, |
| "grad_norm": 2.0498917185694925, |
| "learning_rate": 9.920309895169793e-06, |
| "loss": 0.7111, |
| "step": 326 |
| }, |
| { |
| "epoch": 0.456172983027203, |
| "grad_norm": 2.2354446947614623, |
| "learning_rate": 9.91885834764955e-06, |
| "loss": 0.7137, |
| "step": 327 |
| }, |
| { |
| "epoch": 0.4575680074401302, |
| "grad_norm": 2.210652660226588, |
| "learning_rate": 9.917393807403965e-06, |
| "loss": 0.7683, |
| "step": 328 |
| }, |
| { |
| "epoch": 0.4589630318530574, |
| "grad_norm": 2.0554417534099243, |
| "learning_rate": 9.915916278301496e-06, |
| "loss": 0.7056, |
| "step": 329 |
| }, |
| { |
| "epoch": 0.46035805626598464, |
| "grad_norm": 2.2562712079069955, |
| "learning_rate": 9.9144257642449e-06, |
| "loss": 0.6896, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.46175308067891185, |
| "grad_norm": 1.9777979766584077, |
| "learning_rate": 9.91292226917124e-06, |
| "loss": 0.6562, |
| "step": 331 |
| }, |
| { |
| "epoch": 0.4631481050918391, |
| "grad_norm": 2.2861549002868955, |
| "learning_rate": 9.91140579705186e-06, |
| "loss": 0.7529, |
| "step": 332 |
| }, |
| { |
| "epoch": 0.46454312950476634, |
| "grad_norm": 2.1618172913510865, |
| "learning_rate": 9.909876351892388e-06, |
| "loss": 0.7675, |
| "step": 333 |
| }, |
| { |
| "epoch": 0.46593815391769355, |
| "grad_norm": 1.9997881559101407, |
| "learning_rate": 9.908333937732718e-06, |
| "loss": 0.7209, |
| "step": 334 |
| }, |
| { |
| "epoch": 0.46733317833062077, |
| "grad_norm": 2.110744936433282, |
| "learning_rate": 9.906778558647e-06, |
| "loss": 0.71, |
| "step": 335 |
| }, |
| { |
| "epoch": 0.46872820274354804, |
| "grad_norm": 2.0830747263370304, |
| "learning_rate": 9.905210218743626e-06, |
| "loss": 0.7266, |
| "step": 336 |
| }, |
| { |
| "epoch": 0.47012322715647525, |
| "grad_norm": 2.0876262800113374, |
| "learning_rate": 9.903628922165227e-06, |
| "loss": 0.7023, |
| "step": 337 |
| }, |
| { |
| "epoch": 0.47151825156940247, |
| "grad_norm": 2.3098228154162217, |
| "learning_rate": 9.902034673088656e-06, |
| "loss": 0.7472, |
| "step": 338 |
| }, |
| { |
| "epoch": 0.4729132759823297, |
| "grad_norm": 2.1523425641211618, |
| "learning_rate": 9.90042747572498e-06, |
| "loss": 0.6827, |
| "step": 339 |
| }, |
| { |
| "epoch": 0.4743083003952569, |
| "grad_norm": 2.3481769610491954, |
| "learning_rate": 9.898807334319471e-06, |
| "loss": 0.7682, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.47570332480818417, |
| "grad_norm": 2.1587172017907155, |
| "learning_rate": 9.897174253151583e-06, |
| "loss": 0.7217, |
| "step": 341 |
| }, |
| { |
| "epoch": 0.4770983492211114, |
| "grad_norm": 2.2768842079801836, |
| "learning_rate": 9.895528236534957e-06, |
| "loss": 0.7414, |
| "step": 342 |
| }, |
| { |
| "epoch": 0.4784933736340386, |
| "grad_norm": 2.267882603603539, |
| "learning_rate": 9.893869288817397e-06, |
| "loss": 0.7902, |
| "step": 343 |
| }, |
| { |
| "epoch": 0.4798883980469658, |
| "grad_norm": 2.003426082657925, |
| "learning_rate": 9.89219741438087e-06, |
| "loss": 0.7133, |
| "step": 344 |
| }, |
| { |
| "epoch": 0.48128342245989303, |
| "grad_norm": 2.2017001776217704, |
| "learning_rate": 9.890512617641474e-06, |
| "loss": 0.7315, |
| "step": 345 |
| }, |
| { |
| "epoch": 0.4826784468728203, |
| "grad_norm": 2.2874305641494663, |
| "learning_rate": 9.888814903049458e-06, |
| "loss": 0.7551, |
| "step": 346 |
| }, |
| { |
| "epoch": 0.4840734712857475, |
| "grad_norm": 2.2146672202568896, |
| "learning_rate": 9.88710427508918e-06, |
| "loss": 0.7701, |
| "step": 347 |
| }, |
| { |
| "epoch": 0.48546849569867473, |
| "grad_norm": 2.2368298170629473, |
| "learning_rate": 9.885380738279111e-06, |
| "loss": 0.6928, |
| "step": 348 |
| }, |
| { |
| "epoch": 0.48686352011160194, |
| "grad_norm": 2.1128133334522565, |
| "learning_rate": 9.883644297171821e-06, |
| "loss": 0.7264, |
| "step": 349 |
| }, |
| { |
| "epoch": 0.48825854452452916, |
| "grad_norm": 2.0878462954407517, |
| "learning_rate": 9.881894956353963e-06, |
| "loss": 0.7533, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.48965356893745643, |
| "grad_norm": 2.1787208125747384, |
| "learning_rate": 9.880132720446265e-06, |
| "loss": 0.7176, |
| "step": 351 |
| }, |
| { |
| "epoch": 0.49104859335038364, |
| "grad_norm": 2.2222401905237126, |
| "learning_rate": 9.878357594103516e-06, |
| "loss": 0.7732, |
| "step": 352 |
| }, |
| { |
| "epoch": 0.49244361776331086, |
| "grad_norm": 2.164885730565751, |
| "learning_rate": 9.876569582014554e-06, |
| "loss": 0.7192, |
| "step": 353 |
| }, |
| { |
| "epoch": 0.4938386421762381, |
| "grad_norm": 2.176298873741657, |
| "learning_rate": 9.874768688902252e-06, |
| "loss": 0.7583, |
| "step": 354 |
| }, |
| { |
| "epoch": 0.4952336665891653, |
| "grad_norm": 1.9635642084372211, |
| "learning_rate": 9.87295491952351e-06, |
| "loss": 0.7543, |
| "step": 355 |
| }, |
| { |
| "epoch": 0.49662869100209256, |
| "grad_norm": 2.2033913590623633, |
| "learning_rate": 9.871128278669238e-06, |
| "loss": 0.7254, |
| "step": 356 |
| }, |
| { |
| "epoch": 0.4980237154150198, |
| "grad_norm": 2.1462134327569014, |
| "learning_rate": 9.869288771164344e-06, |
| "loss": 0.7201, |
| "step": 357 |
| }, |
| { |
| "epoch": 0.499418739827947, |
| "grad_norm": 2.2380749870735177, |
| "learning_rate": 9.867436401867723e-06, |
| "loss": 0.7316, |
| "step": 358 |
| }, |
| { |
| "epoch": 0.5008137642408742, |
| "grad_norm": 2.3135966107377195, |
| "learning_rate": 9.865571175672245e-06, |
| "loss": 0.7659, |
| "step": 359 |
| }, |
| { |
| "epoch": 0.5022087886538015, |
| "grad_norm": 2.1533066959256155, |
| "learning_rate": 9.863693097504733e-06, |
| "loss": 0.7098, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.5036038130667286, |
| "grad_norm": 2.1171023788743644, |
| "learning_rate": 9.86180217232597e-06, |
| "loss": 0.7726, |
| "step": 361 |
| }, |
| { |
| "epoch": 0.5049988374796559, |
| "grad_norm": 2.4377521628329313, |
| "learning_rate": 9.859898405130661e-06, |
| "loss": 0.7916, |
| "step": 362 |
| }, |
| { |
| "epoch": 0.5063938618925832, |
| "grad_norm": 2.2854519749010844, |
| "learning_rate": 9.85798180094744e-06, |
| "loss": 0.7118, |
| "step": 363 |
| }, |
| { |
| "epoch": 0.5077888863055103, |
| "grad_norm": 2.244618640647697, |
| "learning_rate": 9.856052364838846e-06, |
| "loss": 0.8073, |
| "step": 364 |
| }, |
| { |
| "epoch": 0.5091839107184376, |
| "grad_norm": 2.177378244026803, |
| "learning_rate": 9.854110101901308e-06, |
| "loss": 0.6749, |
| "step": 365 |
| }, |
| { |
| "epoch": 0.5105789351313648, |
| "grad_norm": 1.9603446721831779, |
| "learning_rate": 9.852155017265146e-06, |
| "loss": 0.6283, |
| "step": 366 |
| }, |
| { |
| "epoch": 0.511973959544292, |
| "grad_norm": 2.220937568124573, |
| "learning_rate": 9.850187116094538e-06, |
| "loss": 0.721, |
| "step": 367 |
| }, |
| { |
| "epoch": 0.5133689839572193, |
| "grad_norm": 2.1071011951034637, |
| "learning_rate": 9.848206403587521e-06, |
| "loss": 0.7634, |
| "step": 368 |
| }, |
| { |
| "epoch": 0.5147640083701465, |
| "grad_norm": 2.294947828525865, |
| "learning_rate": 9.84621288497597e-06, |
| "loss": 0.7562, |
| "step": 369 |
| }, |
| { |
| "epoch": 0.5161590327830737, |
| "grad_norm": 2.122179199455438, |
| "learning_rate": 9.844206565525585e-06, |
| "loss": 0.7495, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.5175540571960009, |
| "grad_norm": 2.15699679179535, |
| "learning_rate": 9.842187450535881e-06, |
| "loss": 0.731, |
| "step": 371 |
| }, |
| { |
| "epoch": 0.5189490816089282, |
| "grad_norm": 2.43922784941149, |
| "learning_rate": 9.840155545340169e-06, |
| "loss": 0.7457, |
| "step": 372 |
| }, |
| { |
| "epoch": 0.5203441060218554, |
| "grad_norm": 2.251971046707549, |
| "learning_rate": 9.838110855305548e-06, |
| "loss": 0.7103, |
| "step": 373 |
| }, |
| { |
| "epoch": 0.5217391304347826, |
| "grad_norm": 2.0408680111240614, |
| "learning_rate": 9.836053385832881e-06, |
| "loss": 0.6503, |
| "step": 374 |
| }, |
| { |
| "epoch": 0.5231341548477099, |
| "grad_norm": 2.1603103563459105, |
| "learning_rate": 9.833983142356792e-06, |
| "loss": 0.7281, |
| "step": 375 |
| }, |
| { |
| "epoch": 0.524529179260637, |
| "grad_norm": 2.2773618533718563, |
| "learning_rate": 9.831900130345645e-06, |
| "loss": 0.7009, |
| "step": 376 |
| }, |
| { |
| "epoch": 0.5259242036735643, |
| "grad_norm": 2.099468862050019, |
| "learning_rate": 9.829804355301527e-06, |
| "loss": 0.6762, |
| "step": 377 |
| }, |
| { |
| "epoch": 0.5273192280864916, |
| "grad_norm": 2.064470755868956, |
| "learning_rate": 9.827695822760245e-06, |
| "loss": 0.6789, |
| "step": 378 |
| }, |
| { |
| "epoch": 0.5287142524994187, |
| "grad_norm": 2.2842388942976326, |
| "learning_rate": 9.825574538291293e-06, |
| "loss": 0.7384, |
| "step": 379 |
| }, |
| { |
| "epoch": 0.530109276912346, |
| "grad_norm": 2.0597543670014806, |
| "learning_rate": 9.823440507497863e-06, |
| "loss": 0.6774, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.5315043013252732, |
| "grad_norm": 2.2151766652128573, |
| "learning_rate": 9.821293736016802e-06, |
| "loss": 0.6933, |
| "step": 381 |
| }, |
| { |
| "epoch": 0.5328993257382004, |
| "grad_norm": 2.281926946410573, |
| "learning_rate": 9.819134229518617e-06, |
| "loss": 0.7449, |
| "step": 382 |
| }, |
| { |
| "epoch": 0.5342943501511277, |
| "grad_norm": 2.1443995307296464, |
| "learning_rate": 9.81696199370745e-06, |
| "loss": 0.7213, |
| "step": 383 |
| }, |
| { |
| "epoch": 0.5356893745640549, |
| "grad_norm": 2.11871178228737, |
| "learning_rate": 9.814777034321069e-06, |
| "loss": 0.7453, |
| "step": 384 |
| }, |
| { |
| "epoch": 0.5370843989769821, |
| "grad_norm": 2.218758315080764, |
| "learning_rate": 9.812579357130848e-06, |
| "loss": 0.7702, |
| "step": 385 |
| }, |
| { |
| "epoch": 0.5384794233899093, |
| "grad_norm": 2.0131323463999444, |
| "learning_rate": 9.810368967941757e-06, |
| "loss": 0.7129, |
| "step": 386 |
| }, |
| { |
| "epoch": 0.5398744478028366, |
| "grad_norm": 2.18529376681278, |
| "learning_rate": 9.808145872592341e-06, |
| "loss": 0.7192, |
| "step": 387 |
| }, |
| { |
| "epoch": 0.5412694722157638, |
| "grad_norm": 2.1408294189371113, |
| "learning_rate": 9.80591007695471e-06, |
| "loss": 0.689, |
| "step": 388 |
| }, |
| { |
| "epoch": 0.542664496628691, |
| "grad_norm": 2.1875152665270075, |
| "learning_rate": 9.803661586934514e-06, |
| "loss": 0.7041, |
| "step": 389 |
| }, |
| { |
| "epoch": 0.5440595210416183, |
| "grad_norm": 2.2494421729469587, |
| "learning_rate": 9.801400408470943e-06, |
| "loss": 0.7558, |
| "step": 390 |
| }, |
| { |
| "epoch": 0.5454545454545454, |
| "grad_norm": 2.1718406110229314, |
| "learning_rate": 9.799126547536695e-06, |
| "loss": 0.7542, |
| "step": 391 |
| }, |
| { |
| "epoch": 0.5468495698674727, |
| "grad_norm": 2.1100570891152524, |
| "learning_rate": 9.796840010137972e-06, |
| "loss": 0.7036, |
| "step": 392 |
| }, |
| { |
| "epoch": 0.5482445942804, |
| "grad_norm": 2.115132254738484, |
| "learning_rate": 9.79454080231446e-06, |
| "loss": 0.8377, |
| "step": 393 |
| }, |
| { |
| "epoch": 0.5496396186933271, |
| "grad_norm": 2.014824175362774, |
| "learning_rate": 9.79222893013931e-06, |
| "loss": 0.6456, |
| "step": 394 |
| }, |
| { |
| "epoch": 0.5510346431062544, |
| "grad_norm": 2.227921249849429, |
| "learning_rate": 9.789904399719124e-06, |
| "loss": 0.7739, |
| "step": 395 |
| }, |
| { |
| "epoch": 0.5524296675191815, |
| "grad_norm": 2.112220428527919, |
| "learning_rate": 9.787567217193944e-06, |
| "loss": 0.6688, |
| "step": 396 |
| }, |
| { |
| "epoch": 0.5538246919321088, |
| "grad_norm": 2.1494591416793636, |
| "learning_rate": 9.785217388737232e-06, |
| "loss": 0.7427, |
| "step": 397 |
| }, |
| { |
| "epoch": 0.5552197163450361, |
| "grad_norm": 2.0642390183703796, |
| "learning_rate": 9.782854920555844e-06, |
| "loss": 0.6962, |
| "step": 398 |
| }, |
| { |
| "epoch": 0.5566147407579632, |
| "grad_norm": 1.9583040952188322, |
| "learning_rate": 9.780479818890032e-06, |
| "loss": 0.7644, |
| "step": 399 |
| }, |
| { |
| "epoch": 0.5580097651708905, |
| "grad_norm": 2.1534478133290844, |
| "learning_rate": 9.778092090013416e-06, |
| "loss": 0.7137, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.5594047895838177, |
| "grad_norm": 1.914276540514964, |
| "learning_rate": 9.775691740232966e-06, |
| "loss": 0.6742, |
| "step": 401 |
| }, |
| { |
| "epoch": 0.560799813996745, |
| "grad_norm": 2.016867139681356, |
| "learning_rate": 9.773278775888995e-06, |
| "loss": 0.7475, |
| "step": 402 |
| }, |
| { |
| "epoch": 0.5621948384096722, |
| "grad_norm": 2.1849766754698416, |
| "learning_rate": 9.77085320335513e-06, |
| "loss": 0.7055, |
| "step": 403 |
| }, |
| { |
| "epoch": 0.5635898628225994, |
| "grad_norm": 2.223991761051566, |
| "learning_rate": 9.768415029038304e-06, |
| "loss": 0.7101, |
| "step": 404 |
| }, |
| { |
| "epoch": 0.5649848872355266, |
| "grad_norm": 2.0011991831554954, |
| "learning_rate": 9.76596425937874e-06, |
| "loss": 0.6877, |
| "step": 405 |
| }, |
| { |
| "epoch": 0.5663799116484538, |
| "grad_norm": 2.0542369776804166, |
| "learning_rate": 9.763500900849926e-06, |
| "loss": 0.6662, |
| "step": 406 |
| }, |
| { |
| "epoch": 0.5677749360613811, |
| "grad_norm": 2.3730906372976546, |
| "learning_rate": 9.761024959958605e-06, |
| "loss": 0.7489, |
| "step": 407 |
| }, |
| { |
| "epoch": 0.5691699604743083, |
| "grad_norm": 2.3196262507878775, |
| "learning_rate": 9.75853644324475e-06, |
| "loss": 0.7565, |
| "step": 408 |
| }, |
| { |
| "epoch": 0.5705649848872355, |
| "grad_norm": 2.341441695587702, |
| "learning_rate": 9.756035357281559e-06, |
| "loss": 0.7399, |
| "step": 409 |
| }, |
| { |
| "epoch": 0.5719600093001628, |
| "grad_norm": 2.090797895413644, |
| "learning_rate": 9.753521708675426e-06, |
| "loss": 0.7035, |
| "step": 410 |
| }, |
| { |
| "epoch": 0.5733550337130899, |
| "grad_norm": 2.137767654681621, |
| "learning_rate": 9.75099550406593e-06, |
| "loss": 0.7104, |
| "step": 411 |
| }, |
| { |
| "epoch": 0.5747500581260172, |
| "grad_norm": 2.0270348604623942, |
| "learning_rate": 9.748456750125817e-06, |
| "loss": 0.755, |
| "step": 412 |
| }, |
| { |
| "epoch": 0.5761450825389445, |
| "grad_norm": 2.15807439031817, |
| "learning_rate": 9.745905453560976e-06, |
| "loss": 0.7055, |
| "step": 413 |
| }, |
| { |
| "epoch": 0.5775401069518716, |
| "grad_norm": 1.941910111218879, |
| "learning_rate": 9.74334162111043e-06, |
| "loss": 0.7452, |
| "step": 414 |
| }, |
| { |
| "epoch": 0.5789351313647989, |
| "grad_norm": 2.0347312764060037, |
| "learning_rate": 9.740765259546312e-06, |
| "loss": 0.7233, |
| "step": 415 |
| }, |
| { |
| "epoch": 0.5803301557777261, |
| "grad_norm": 2.250098777709644, |
| "learning_rate": 9.738176375673856e-06, |
| "loss": 0.7386, |
| "step": 416 |
| }, |
| { |
| "epoch": 0.5817251801906533, |
| "grad_norm": 2.152752510392946, |
| "learning_rate": 9.735574976331362e-06, |
| "loss": 0.6596, |
| "step": 417 |
| }, |
| { |
| "epoch": 0.5831202046035806, |
| "grad_norm": 2.1555034248115446, |
| "learning_rate": 9.732961068390199e-06, |
| "loss": 0.7568, |
| "step": 418 |
| }, |
| { |
| "epoch": 0.5845152290165078, |
| "grad_norm": 2.3258624129809107, |
| "learning_rate": 9.730334658754767e-06, |
| "loss": 0.8289, |
| "step": 419 |
| }, |
| { |
| "epoch": 0.585910253429435, |
| "grad_norm": 2.0681332356052233, |
| "learning_rate": 9.727695754362498e-06, |
| "loss": 0.7454, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.5873052778423622, |
| "grad_norm": 2.1429328741531877, |
| "learning_rate": 9.725044362183817e-06, |
| "loss": 0.7189, |
| "step": 421 |
| }, |
| { |
| "epoch": 0.5887003022552895, |
| "grad_norm": 2.035384052327714, |
| "learning_rate": 9.722380489222145e-06, |
| "loss": 0.6795, |
| "step": 422 |
| }, |
| { |
| "epoch": 0.5900953266682167, |
| "grad_norm": 1.9628409776914946, |
| "learning_rate": 9.71970414251386e-06, |
| "loss": 0.6584, |
| "step": 423 |
| }, |
| { |
| "epoch": 0.5914903510811439, |
| "grad_norm": 2.119850722980248, |
| "learning_rate": 9.717015329128294e-06, |
| "loss": 0.7466, |
| "step": 424 |
| }, |
| { |
| "epoch": 0.5928853754940712, |
| "grad_norm": 2.127385339997239, |
| "learning_rate": 9.714314056167711e-06, |
| "loss": 0.711, |
| "step": 425 |
| }, |
| { |
| "epoch": 0.5942803999069983, |
| "grad_norm": 1.8294466072056457, |
| "learning_rate": 9.711600330767278e-06, |
| "loss": 0.6382, |
| "step": 426 |
| }, |
| { |
| "epoch": 0.5956754243199256, |
| "grad_norm": 2.1442575532302737, |
| "learning_rate": 9.708874160095061e-06, |
| "loss": 0.7207, |
| "step": 427 |
| }, |
| { |
| "epoch": 0.5970704487328529, |
| "grad_norm": 2.0715902524872436, |
| "learning_rate": 9.706135551351996e-06, |
| "loss": 0.721, |
| "step": 428 |
| }, |
| { |
| "epoch": 0.59846547314578, |
| "grad_norm": 2.017273861098596, |
| "learning_rate": 9.703384511771874e-06, |
| "loss": 0.6685, |
| "step": 429 |
| }, |
| { |
| "epoch": 0.5998604975587073, |
| "grad_norm": 2.0128870162232326, |
| "learning_rate": 9.700621048621322e-06, |
| "loss": 0.6621, |
| "step": 430 |
| }, |
| { |
| "epoch": 0.6012555219716345, |
| "grad_norm": 1.9324919806562177, |
| "learning_rate": 9.697845169199775e-06, |
| "loss": 0.6495, |
| "step": 431 |
| }, |
| { |
| "epoch": 0.6026505463845617, |
| "grad_norm": 2.2593974726575023, |
| "learning_rate": 9.69505688083948e-06, |
| "loss": 0.7673, |
| "step": 432 |
| }, |
| { |
| "epoch": 0.604045570797489, |
| "grad_norm": 1.9629658473435176, |
| "learning_rate": 9.692256190905444e-06, |
| "loss": 0.6567, |
| "step": 433 |
| }, |
| { |
| "epoch": 0.6054405952104162, |
| "grad_norm": 2.1948743143431373, |
| "learning_rate": 9.689443106795442e-06, |
| "loss": 0.7275, |
| "step": 434 |
| }, |
| { |
| "epoch": 0.6068356196233434, |
| "grad_norm": 2.173895338644472, |
| "learning_rate": 9.686617635939988e-06, |
| "loss": 0.6843, |
| "step": 435 |
| }, |
| { |
| "epoch": 0.6082306440362706, |
| "grad_norm": 2.183862902137113, |
| "learning_rate": 9.683779785802306e-06, |
| "loss": 0.7488, |
| "step": 436 |
| }, |
| { |
| "epoch": 0.6096256684491979, |
| "grad_norm": 2.247224096170596, |
| "learning_rate": 9.680929563878327e-06, |
| "loss": 0.7009, |
| "step": 437 |
| }, |
| { |
| "epoch": 0.6110206928621251, |
| "grad_norm": 2.2198797071377308, |
| "learning_rate": 9.678066977696656e-06, |
| "loss": 0.7576, |
| "step": 438 |
| }, |
| { |
| "epoch": 0.6124157172750523, |
| "grad_norm": 2.099155911257389, |
| "learning_rate": 9.675192034818561e-06, |
| "loss": 0.7342, |
| "step": 439 |
| }, |
| { |
| "epoch": 0.6138107416879796, |
| "grad_norm": 2.1826988188780456, |
| "learning_rate": 9.672304742837945e-06, |
| "loss": 0.7992, |
| "step": 440 |
| }, |
| { |
| "epoch": 0.6152057661009067, |
| "grad_norm": 2.11108770679613, |
| "learning_rate": 9.669405109381335e-06, |
| "loss": 0.7284, |
| "step": 441 |
| }, |
| { |
| "epoch": 0.616600790513834, |
| "grad_norm": 2.1204342794359974, |
| "learning_rate": 9.66649314210785e-06, |
| "loss": 0.7281, |
| "step": 442 |
| }, |
| { |
| "epoch": 0.6179958149267613, |
| "grad_norm": 1.9367375234614417, |
| "learning_rate": 9.663568848709194e-06, |
| "loss": 0.7283, |
| "step": 443 |
| }, |
| { |
| "epoch": 0.6193908393396884, |
| "grad_norm": 2.1130289482285574, |
| "learning_rate": 9.660632236909628e-06, |
| "loss": 0.7242, |
| "step": 444 |
| }, |
| { |
| "epoch": 0.6207858637526157, |
| "grad_norm": 1.9102389109395275, |
| "learning_rate": 9.657683314465948e-06, |
| "loss": 0.6874, |
| "step": 445 |
| }, |
| { |
| "epoch": 0.6221808881655428, |
| "grad_norm": 2.1435520707988314, |
| "learning_rate": 9.65472208916747e-06, |
| "loss": 0.7858, |
| "step": 446 |
| }, |
| { |
| "epoch": 0.6235759125784701, |
| "grad_norm": 2.1624999372909106, |
| "learning_rate": 9.651748568836007e-06, |
| "loss": 0.7646, |
| "step": 447 |
| }, |
| { |
| "epoch": 0.6249709369913974, |
| "grad_norm": 2.065236722576021, |
| "learning_rate": 9.648762761325847e-06, |
| "loss": 0.6898, |
| "step": 448 |
| }, |
| { |
| "epoch": 0.6263659614043245, |
| "grad_norm": 2.0485232389558616, |
| "learning_rate": 9.645764674523732e-06, |
| "loss": 0.7839, |
| "step": 449 |
| }, |
| { |
| "epoch": 0.6277609858172518, |
| "grad_norm": 2.0655621784312235, |
| "learning_rate": 9.642754316348846e-06, |
| "loss": 0.7075, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.629156010230179, |
| "grad_norm": 2.0060672739124152, |
| "learning_rate": 9.639731694752776e-06, |
| "loss": 0.7489, |
| "step": 451 |
| }, |
| { |
| "epoch": 0.6305510346431062, |
| "grad_norm": 2.208261364040988, |
| "learning_rate": 9.636696817719511e-06, |
| "loss": 0.7535, |
| "step": 452 |
| }, |
| { |
| "epoch": 0.6319460590560335, |
| "grad_norm": 2.2089796542819866, |
| "learning_rate": 9.633649693265406e-06, |
| "loss": 0.7482, |
| "step": 453 |
| }, |
| { |
| "epoch": 0.6333410834689607, |
| "grad_norm": 2.2384194618104125, |
| "learning_rate": 9.630590329439169e-06, |
| "loss": 0.7792, |
| "step": 454 |
| }, |
| { |
| "epoch": 0.634736107881888, |
| "grad_norm": 2.2060852158547264, |
| "learning_rate": 9.627518734321837e-06, |
| "loss": 0.7525, |
| "step": 455 |
| }, |
| { |
| "epoch": 0.6361311322948151, |
| "grad_norm": 2.149847852979536, |
| "learning_rate": 9.624434916026752e-06, |
| "loss": 0.7439, |
| "step": 456 |
| }, |
| { |
| "epoch": 0.6375261567077424, |
| "grad_norm": 2.062138416788092, |
| "learning_rate": 9.621338882699547e-06, |
| "loss": 0.7396, |
| "step": 457 |
| }, |
| { |
| "epoch": 0.6389211811206696, |
| "grad_norm": 2.093876836162674, |
| "learning_rate": 9.618230642518117e-06, |
| "loss": 0.6971, |
| "step": 458 |
| }, |
| { |
| "epoch": 0.6403162055335968, |
| "grad_norm": 2.052608597617671, |
| "learning_rate": 9.615110203692602e-06, |
| "loss": 0.7709, |
| "step": 459 |
| }, |
| { |
| "epoch": 0.6417112299465241, |
| "grad_norm": 2.1332375538387676, |
| "learning_rate": 9.61197757446536e-06, |
| "loss": 0.7243, |
| "step": 460 |
| }, |
| { |
| "epoch": 0.6431062543594513, |
| "grad_norm": 1.9725195227245578, |
| "learning_rate": 9.608832763110955e-06, |
| "loss": 0.6988, |
| "step": 461 |
| }, |
| { |
| "epoch": 0.6445012787723785, |
| "grad_norm": 2.1827535475246997, |
| "learning_rate": 9.605675777936123e-06, |
| "loss": 0.7206, |
| "step": 462 |
| }, |
| { |
| "epoch": 0.6458963031853058, |
| "grad_norm": 2.0617095984448017, |
| "learning_rate": 9.60250662727976e-06, |
| "loss": 0.7297, |
| "step": 463 |
| }, |
| { |
| "epoch": 0.6472913275982329, |
| "grad_norm": 2.104755014090587, |
| "learning_rate": 9.599325319512893e-06, |
| "loss": 0.7303, |
| "step": 464 |
| }, |
| { |
| "epoch": 0.6486863520111602, |
| "grad_norm": 2.0095406432497924, |
| "learning_rate": 9.596131863038664e-06, |
| "loss": 0.7136, |
| "step": 465 |
| }, |
| { |
| "epoch": 0.6500813764240875, |
| "grad_norm": 2.0521696580726267, |
| "learning_rate": 9.592926266292305e-06, |
| "loss": 0.7085, |
| "step": 466 |
| }, |
| { |
| "epoch": 0.6514764008370146, |
| "grad_norm": 2.216895328470358, |
| "learning_rate": 9.589708537741109e-06, |
| "loss": 0.7338, |
| "step": 467 |
| }, |
| { |
| "epoch": 0.6528714252499419, |
| "grad_norm": 2.0241813457119293, |
| "learning_rate": 9.586478685884424e-06, |
| "loss": 0.6855, |
| "step": 468 |
| }, |
| { |
| "epoch": 0.6542664496628691, |
| "grad_norm": 2.0778866351895293, |
| "learning_rate": 9.583236719253611e-06, |
| "loss": 0.7044, |
| "step": 469 |
| }, |
| { |
| "epoch": 0.6556614740757963, |
| "grad_norm": 2.1863136845278186, |
| "learning_rate": 9.579982646412039e-06, |
| "loss": 0.719, |
| "step": 470 |
| }, |
| { |
| "epoch": 0.6570564984887236, |
| "grad_norm": 1.9539661965522732, |
| "learning_rate": 9.576716475955048e-06, |
| "loss": 0.6416, |
| "step": 471 |
| }, |
| { |
| "epoch": 0.6584515229016508, |
| "grad_norm": 2.0452081518877603, |
| "learning_rate": 9.573438216509937e-06, |
| "loss": 0.6972, |
| "step": 472 |
| }, |
| { |
| "epoch": 0.659846547314578, |
| "grad_norm": 1.995830987986953, |
| "learning_rate": 9.570147876735937e-06, |
| "loss": 0.7378, |
| "step": 473 |
| }, |
| { |
| "epoch": 0.6612415717275052, |
| "grad_norm": 1.8915266992396211, |
| "learning_rate": 9.566845465324185e-06, |
| "loss": 0.6788, |
| "step": 474 |
| }, |
| { |
| "epoch": 0.6626365961404325, |
| "grad_norm": 2.003656922577594, |
| "learning_rate": 9.563530990997707e-06, |
| "loss": 0.7674, |
| "step": 475 |
| }, |
| { |
| "epoch": 0.6640316205533597, |
| "grad_norm": 2.088291822709115, |
| "learning_rate": 9.560204462511392e-06, |
| "loss": 0.7355, |
| "step": 476 |
| }, |
| { |
| "epoch": 0.6654266449662869, |
| "grad_norm": 1.984879297358519, |
| "learning_rate": 9.556865888651965e-06, |
| "loss": 0.7545, |
| "step": 477 |
| }, |
| { |
| "epoch": 0.6668216693792142, |
| "grad_norm": 1.9946933617029812, |
| "learning_rate": 9.553515278237975e-06, |
| "loss": 0.6782, |
| "step": 478 |
| }, |
| { |
| "epoch": 0.6682166937921413, |
| "grad_norm": 2.1899693736873607, |
| "learning_rate": 9.550152640119757e-06, |
| "loss": 0.7334, |
| "step": 479 |
| }, |
| { |
| "epoch": 0.6696117182050686, |
| "grad_norm": 2.1437476826926374, |
| "learning_rate": 9.546777983179421e-06, |
| "loss": 0.7577, |
| "step": 480 |
| }, |
| { |
| "epoch": 0.6710067426179959, |
| "grad_norm": 1.9104178259795124, |
| "learning_rate": 9.543391316330822e-06, |
| "loss": 0.636, |
| "step": 481 |
| }, |
| { |
| "epoch": 0.672401767030923, |
| "grad_norm": 2.0148752084647525, |
| "learning_rate": 9.539992648519538e-06, |
| "loss": 0.7451, |
| "step": 482 |
| }, |
| { |
| "epoch": 0.6737967914438503, |
| "grad_norm": 2.2385594955618826, |
| "learning_rate": 9.536581988722848e-06, |
| "loss": 0.8286, |
| "step": 483 |
| }, |
| { |
| "epoch": 0.6751918158567775, |
| "grad_norm": 2.1601886300481414, |
| "learning_rate": 9.533159345949704e-06, |
| "loss": 0.762, |
| "step": 484 |
| }, |
| { |
| "epoch": 0.6765868402697047, |
| "grad_norm": 1.9477636560481595, |
| "learning_rate": 9.529724729240712e-06, |
| "loss": 0.6817, |
| "step": 485 |
| }, |
| { |
| "epoch": 0.677981864682632, |
| "grad_norm": 2.0753277030485857, |
| "learning_rate": 9.526278147668104e-06, |
| "loss": 0.6958, |
| "step": 486 |
| }, |
| { |
| "epoch": 0.6793768890955592, |
| "grad_norm": 2.071829326274995, |
| "learning_rate": 9.522819610335721e-06, |
| "loss": 0.7237, |
| "step": 487 |
| }, |
| { |
| "epoch": 0.6807719135084864, |
| "grad_norm": 1.8341834360542066, |
| "learning_rate": 9.519349126378975e-06, |
| "loss": 0.6825, |
| "step": 488 |
| }, |
| { |
| "epoch": 0.6821669379214136, |
| "grad_norm": 2.2062085487991827, |
| "learning_rate": 9.515866704964846e-06, |
| "loss": 0.7498, |
| "step": 489 |
| }, |
| { |
| "epoch": 0.6835619623343409, |
| "grad_norm": 2.080856174420327, |
| "learning_rate": 9.512372355291838e-06, |
| "loss": 0.7133, |
| "step": 490 |
| }, |
| { |
| "epoch": 0.6849569867472681, |
| "grad_norm": 1.8162007262738658, |
| "learning_rate": 9.50886608658996e-06, |
| "loss": 0.6417, |
| "step": 491 |
| }, |
| { |
| "epoch": 0.6863520111601953, |
| "grad_norm": 2.0373986097463996, |
| "learning_rate": 9.505347908120712e-06, |
| "loss": 0.7397, |
| "step": 492 |
| }, |
| { |
| "epoch": 0.6877470355731226, |
| "grad_norm": 2.0546808459729324, |
| "learning_rate": 9.501817829177046e-06, |
| "loss": 0.7141, |
| "step": 493 |
| }, |
| { |
| "epoch": 0.6891420599860497, |
| "grad_norm": 2.133311384676604, |
| "learning_rate": 9.498275859083353e-06, |
| "loss": 0.7297, |
| "step": 494 |
| }, |
| { |
| "epoch": 0.690537084398977, |
| "grad_norm": 2.045445358883216, |
| "learning_rate": 9.494722007195427e-06, |
| "loss": 0.7056, |
| "step": 495 |
| }, |
| { |
| "epoch": 0.6919321088119043, |
| "grad_norm": 2.032312961897094, |
| "learning_rate": 9.491156282900454e-06, |
| "loss": 0.7126, |
| "step": 496 |
| }, |
| { |
| "epoch": 0.6933271332248314, |
| "grad_norm": 2.0855627156665597, |
| "learning_rate": 9.487578695616974e-06, |
| "loss": 0.7824, |
| "step": 497 |
| }, |
| { |
| "epoch": 0.6947221576377587, |
| "grad_norm": 2.0792422212264734, |
| "learning_rate": 9.483989254794865e-06, |
| "loss": 0.7313, |
| "step": 498 |
| }, |
| { |
| "epoch": 0.6961171820506858, |
| "grad_norm": 1.9424464522170488, |
| "learning_rate": 9.480387969915318e-06, |
| "loss": 0.6236, |
| "step": 499 |
| }, |
| { |
| "epoch": 0.6975122064636131, |
| "grad_norm": 2.101297942682129, |
| "learning_rate": 9.476774850490803e-06, |
| "loss": 0.7401, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.6989072308765404, |
| "grad_norm": 2.045369779079581, |
| "learning_rate": 9.47314990606505e-06, |
| "loss": 0.72, |
| "step": 501 |
| }, |
| { |
| "epoch": 0.7003022552894675, |
| "grad_norm": 2.070518186746418, |
| "learning_rate": 9.46951314621303e-06, |
| "loss": 0.6995, |
| "step": 502 |
| }, |
| { |
| "epoch": 0.7016972797023948, |
| "grad_norm": 2.2999734032614234, |
| "learning_rate": 9.465864580540917e-06, |
| "loss": 0.8061, |
| "step": 503 |
| }, |
| { |
| "epoch": 0.703092304115322, |
| "grad_norm": 2.1443964408217626, |
| "learning_rate": 9.462204218686075e-06, |
| "loss": 0.7044, |
| "step": 504 |
| }, |
| { |
| "epoch": 0.7044873285282492, |
| "grad_norm": 2.096007475498279, |
| "learning_rate": 9.458532070317021e-06, |
| "loss": 0.7273, |
| "step": 505 |
| }, |
| { |
| "epoch": 0.7058823529411765, |
| "grad_norm": 2.0041204965854162, |
| "learning_rate": 9.454848145133406e-06, |
| "loss": 0.7055, |
| "step": 506 |
| }, |
| { |
| "epoch": 0.7072773773541037, |
| "grad_norm": 2.2491552759682665, |
| "learning_rate": 9.451152452865991e-06, |
| "loss": 0.7134, |
| "step": 507 |
| }, |
| { |
| "epoch": 0.708672401767031, |
| "grad_norm": 1.9405986617138786, |
| "learning_rate": 9.447445003276618e-06, |
| "loss": 0.6579, |
| "step": 508 |
| }, |
| { |
| "epoch": 0.7100674261799581, |
| "grad_norm": 2.195166493132121, |
| "learning_rate": 9.443725806158182e-06, |
| "loss": 0.7339, |
| "step": 509 |
| }, |
| { |
| "epoch": 0.7114624505928854, |
| "grad_norm": 1.9578915579027396, |
| "learning_rate": 9.439994871334614e-06, |
| "loss": 0.7155, |
| "step": 510 |
| }, |
| { |
| "epoch": 0.7128574750058126, |
| "grad_norm": 2.190807561711358, |
| "learning_rate": 9.43625220866084e-06, |
| "loss": 0.7126, |
| "step": 511 |
| }, |
| { |
| "epoch": 0.7142524994187398, |
| "grad_norm": 2.098986891994908, |
| "learning_rate": 9.432497828022775e-06, |
| "loss": 0.7075, |
| "step": 512 |
| }, |
| { |
| "epoch": 0.7156475238316671, |
| "grad_norm": 2.1368418565133664, |
| "learning_rate": 9.428731739337277e-06, |
| "loss": 0.8113, |
| "step": 513 |
| }, |
| { |
| "epoch": 0.7170425482445942, |
| "grad_norm": 2.2290156819613687, |
| "learning_rate": 9.424953952552134e-06, |
| "loss": 0.7307, |
| "step": 514 |
| }, |
| { |
| "epoch": 0.7184375726575215, |
| "grad_norm": 2.04618346548946, |
| "learning_rate": 9.421164477646031e-06, |
| "loss": 0.7031, |
| "step": 515 |
| }, |
| { |
| "epoch": 0.7198325970704488, |
| "grad_norm": 2.2362515222635513, |
| "learning_rate": 9.41736332462853e-06, |
| "loss": 0.7545, |
| "step": 516 |
| }, |
| { |
| "epoch": 0.7212276214833759, |
| "grad_norm": 2.125062136154135, |
| "learning_rate": 9.413550503540039e-06, |
| "loss": 0.7379, |
| "step": 517 |
| }, |
| { |
| "epoch": 0.7226226458963032, |
| "grad_norm": 2.016687835394818, |
| "learning_rate": 9.409726024451781e-06, |
| "loss": 0.7415, |
| "step": 518 |
| }, |
| { |
| "epoch": 0.7240176703092304, |
| "grad_norm": 1.977367133466997, |
| "learning_rate": 9.40588989746578e-06, |
| "loss": 0.6891, |
| "step": 519 |
| }, |
| { |
| "epoch": 0.7254126947221576, |
| "grad_norm": 2.026408972891636, |
| "learning_rate": 9.402042132714817e-06, |
| "loss": 0.7004, |
| "step": 520 |
| }, |
| { |
| "epoch": 0.7268077191350849, |
| "grad_norm": 2.0546938078372183, |
| "learning_rate": 9.398182740362424e-06, |
| "loss": 0.7289, |
| "step": 521 |
| }, |
| { |
| "epoch": 0.7282027435480121, |
| "grad_norm": 1.9953897499935436, |
| "learning_rate": 9.39431173060284e-06, |
| "loss": 0.7276, |
| "step": 522 |
| }, |
| { |
| "epoch": 0.7295977679609393, |
| "grad_norm": 1.986025182877031, |
| "learning_rate": 9.390429113660993e-06, |
| "loss": 0.706, |
| "step": 523 |
| }, |
| { |
| "epoch": 0.7309927923738665, |
| "grad_norm": 2.1046471412286145, |
| "learning_rate": 9.38653489979247e-06, |
| "loss": 0.7602, |
| "step": 524 |
| }, |
| { |
| "epoch": 0.7323878167867938, |
| "grad_norm": 2.068123474852749, |
| "learning_rate": 9.382629099283486e-06, |
| "loss": 0.7071, |
| "step": 525 |
| }, |
| { |
| "epoch": 0.733782841199721, |
| "grad_norm": 1.9994411687174558, |
| "learning_rate": 9.378711722450866e-06, |
| "loss": 0.7304, |
| "step": 526 |
| }, |
| { |
| "epoch": 0.7351778656126482, |
| "grad_norm": 2.033536588615635, |
| "learning_rate": 9.374782779642013e-06, |
| "loss": 0.711, |
| "step": 527 |
| }, |
| { |
| "epoch": 0.7365728900255755, |
| "grad_norm": 2.0264601325168563, |
| "learning_rate": 9.370842281234876e-06, |
| "loss": 0.7279, |
| "step": 528 |
| }, |
| { |
| "epoch": 0.7379679144385026, |
| "grad_norm": 1.9113702618810533, |
| "learning_rate": 9.366890237637932e-06, |
| "loss": 0.6807, |
| "step": 529 |
| }, |
| { |
| "epoch": 0.7393629388514299, |
| "grad_norm": 1.968450453362606, |
| "learning_rate": 9.362926659290149e-06, |
| "loss": 0.6492, |
| "step": 530 |
| }, |
| { |
| "epoch": 0.7407579632643572, |
| "grad_norm": 2.09710149964991, |
| "learning_rate": 9.358951556660968e-06, |
| "loss": 0.7139, |
| "step": 531 |
| }, |
| { |
| "epoch": 0.7421529876772843, |
| "grad_norm": 1.9988693594361486, |
| "learning_rate": 9.354964940250269e-06, |
| "loss": 0.6732, |
| "step": 532 |
| }, |
| { |
| "epoch": 0.7435480120902116, |
| "grad_norm": 1.8960612048379188, |
| "learning_rate": 9.35096682058834e-06, |
| "loss": 0.6896, |
| "step": 533 |
| }, |
| { |
| "epoch": 0.7449430365031388, |
| "grad_norm": 2.005833390469395, |
| "learning_rate": 9.346957208235857e-06, |
| "loss": 0.7432, |
| "step": 534 |
| }, |
| { |
| "epoch": 0.746338060916066, |
| "grad_norm": 2.0295928389351476, |
| "learning_rate": 9.342936113783855e-06, |
| "loss": 0.7013, |
| "step": 535 |
| }, |
| { |
| "epoch": 0.7477330853289933, |
| "grad_norm": 2.1433987934073007, |
| "learning_rate": 9.338903547853698e-06, |
| "loss": 0.7294, |
| "step": 536 |
| }, |
| { |
| "epoch": 0.7491281097419205, |
| "grad_norm": 2.1474994409045287, |
| "learning_rate": 9.334859521097046e-06, |
| "loss": 0.7127, |
| "step": 537 |
| }, |
| { |
| "epoch": 0.7505231341548477, |
| "grad_norm": 1.921265945141032, |
| "learning_rate": 9.330804044195836e-06, |
| "loss": 0.6821, |
| "step": 538 |
| }, |
| { |
| "epoch": 0.7519181585677749, |
| "grad_norm": 2.168114835407853, |
| "learning_rate": 9.326737127862249e-06, |
| "loss": 0.711, |
| "step": 539 |
| }, |
| { |
| "epoch": 0.7533131829807022, |
| "grad_norm": 2.1528342823048074, |
| "learning_rate": 9.32265878283868e-06, |
| "loss": 0.7002, |
| "step": 540 |
| }, |
| { |
| "epoch": 0.7547082073936294, |
| "grad_norm": 2.0987262205834645, |
| "learning_rate": 9.318569019897713e-06, |
| "loss": 0.7538, |
| "step": 541 |
| }, |
| { |
| "epoch": 0.7561032318065566, |
| "grad_norm": 1.9509575446420426, |
| "learning_rate": 9.314467849842093e-06, |
| "loss": 0.7122, |
| "step": 542 |
| }, |
| { |
| "epoch": 0.7574982562194839, |
| "grad_norm": 2.057793685763029, |
| "learning_rate": 9.310355283504696e-06, |
| "loss": 0.7413, |
| "step": 543 |
| }, |
| { |
| "epoch": 0.758893280632411, |
| "grad_norm": 1.9925598392787818, |
| "learning_rate": 9.306231331748496e-06, |
| "loss": 0.7215, |
| "step": 544 |
| }, |
| { |
| "epoch": 0.7602883050453383, |
| "grad_norm": 1.9985902931925121, |
| "learning_rate": 9.302096005466547e-06, |
| "loss": 0.6459, |
| "step": 545 |
| }, |
| { |
| "epoch": 0.7616833294582656, |
| "grad_norm": 2.2363161994946505, |
| "learning_rate": 9.29794931558194e-06, |
| "loss": 0.7306, |
| "step": 546 |
| }, |
| { |
| "epoch": 0.7630783538711927, |
| "grad_norm": 2.1134851830312016, |
| "learning_rate": 9.29379127304779e-06, |
| "loss": 0.7338, |
| "step": 547 |
| }, |
| { |
| "epoch": 0.76447337828412, |
| "grad_norm": 2.0091531287699778, |
| "learning_rate": 9.289621888847194e-06, |
| "loss": 0.7636, |
| "step": 548 |
| }, |
| { |
| "epoch": 0.7658684026970471, |
| "grad_norm": 1.8526029005583842, |
| "learning_rate": 9.285441173993207e-06, |
| "loss": 0.6376, |
| "step": 549 |
| }, |
| { |
| "epoch": 0.7672634271099744, |
| "grad_norm": 2.076723919864085, |
| "learning_rate": 9.281249139528816e-06, |
| "loss": 0.7514, |
| "step": 550 |
| }, |
| { |
| "epoch": 0.7686584515229017, |
| "grad_norm": 2.024522052995256, |
| "learning_rate": 9.277045796526904e-06, |
| "loss": 0.7281, |
| "step": 551 |
| }, |
| { |
| "epoch": 0.7700534759358288, |
| "grad_norm": 2.07702628967691, |
| "learning_rate": 9.272831156090229e-06, |
| "loss": 0.7294, |
| "step": 552 |
| }, |
| { |
| "epoch": 0.7714485003487561, |
| "grad_norm": 1.9676509130458903, |
| "learning_rate": 9.268605229351387e-06, |
| "loss": 0.7135, |
| "step": 553 |
| }, |
| { |
| "epoch": 0.7728435247616833, |
| "grad_norm": 2.0873320795012993, |
| "learning_rate": 9.264368027472785e-06, |
| "loss": 0.7233, |
| "step": 554 |
| }, |
| { |
| "epoch": 0.7742385491746105, |
| "grad_norm": 2.0744695861945406, |
| "learning_rate": 9.260119561646614e-06, |
| "loss": 0.6836, |
| "step": 555 |
| }, |
| { |
| "epoch": 0.7756335735875378, |
| "grad_norm": 2.021884868718414, |
| "learning_rate": 9.255859843094817e-06, |
| "loss": 0.7258, |
| "step": 556 |
| }, |
| { |
| "epoch": 0.777028598000465, |
| "grad_norm": 2.1099089458381117, |
| "learning_rate": 9.25158888306906e-06, |
| "loss": 0.6828, |
| "step": 557 |
| }, |
| { |
| "epoch": 0.7784236224133922, |
| "grad_norm": 2.0907108018280507, |
| "learning_rate": 9.247306692850705e-06, |
| "loss": 0.7106, |
| "step": 558 |
| }, |
| { |
| "epoch": 0.7798186468263194, |
| "grad_norm": 2.0887522620268766, |
| "learning_rate": 9.243013283750774e-06, |
| "loss": 0.7697, |
| "step": 559 |
| }, |
| { |
| "epoch": 0.7812136712392467, |
| "grad_norm": 1.9955155672685916, |
| "learning_rate": 9.238708667109924e-06, |
| "loss": 0.7257, |
| "step": 560 |
| }, |
| { |
| "epoch": 0.782608695652174, |
| "grad_norm": 2.0014379807399756, |
| "learning_rate": 9.234392854298414e-06, |
| "loss": 0.7381, |
| "step": 561 |
| }, |
| { |
| "epoch": 0.7840037200651011, |
| "grad_norm": 1.9045153376629644, |
| "learning_rate": 9.230065856716081e-06, |
| "loss": 0.6771, |
| "step": 562 |
| }, |
| { |
| "epoch": 0.7853987444780284, |
| "grad_norm": 1.9469069768920204, |
| "learning_rate": 9.225727685792302e-06, |
| "loss": 0.7056, |
| "step": 563 |
| }, |
| { |
| "epoch": 0.7867937688909556, |
| "grad_norm": 2.2031190641694125, |
| "learning_rate": 9.221378352985967e-06, |
| "loss": 0.7734, |
| "step": 564 |
| }, |
| { |
| "epoch": 0.7881887933038828, |
| "grad_norm": 2.0748393924695203, |
| "learning_rate": 9.217017869785453e-06, |
| "loss": 0.7082, |
| "step": 565 |
| }, |
| { |
| "epoch": 0.7895838177168101, |
| "grad_norm": 2.0995502525717864, |
| "learning_rate": 9.212646247708585e-06, |
| "loss": 0.7439, |
| "step": 566 |
| }, |
| { |
| "epoch": 0.7909788421297372, |
| "grad_norm": 2.068581983139408, |
| "learning_rate": 9.208263498302613e-06, |
| "loss": 0.7559, |
| "step": 567 |
| }, |
| { |
| "epoch": 0.7923738665426645, |
| "grad_norm": 1.9531887516172675, |
| "learning_rate": 9.203869633144182e-06, |
| "loss": 0.7143, |
| "step": 568 |
| }, |
| { |
| "epoch": 0.7937688909555918, |
| "grad_norm": 2.0266830492686347, |
| "learning_rate": 9.19946466383929e-06, |
| "loss": 0.6901, |
| "step": 569 |
| }, |
| { |
| "epoch": 0.7951639153685189, |
| "grad_norm": 1.9362491161859336, |
| "learning_rate": 9.19504860202327e-06, |
| "loss": 0.6699, |
| "step": 570 |
| }, |
| { |
| "epoch": 0.7965589397814462, |
| "grad_norm": 1.986999275214, |
| "learning_rate": 9.19062145936076e-06, |
| "loss": 0.6474, |
| "step": 571 |
| }, |
| { |
| "epoch": 0.7979539641943734, |
| "grad_norm": 2.2000153236749402, |
| "learning_rate": 9.186183247545657e-06, |
| "loss": 0.7826, |
| "step": 572 |
| }, |
| { |
| "epoch": 0.7993489886073006, |
| "grad_norm": 2.0941023318573215, |
| "learning_rate": 9.181733978301103e-06, |
| "loss": 0.7143, |
| "step": 573 |
| }, |
| { |
| "epoch": 0.8007440130202279, |
| "grad_norm": 2.071666423970005, |
| "learning_rate": 9.177273663379449e-06, |
| "loss": 0.6763, |
| "step": 574 |
| }, |
| { |
| "epoch": 0.8021390374331551, |
| "grad_norm": 2.2615998191295255, |
| "learning_rate": 9.172802314562214e-06, |
| "loss": 0.7621, |
| "step": 575 |
| }, |
| { |
| "epoch": 0.8035340618460823, |
| "grad_norm": 1.9739199882286245, |
| "learning_rate": 9.16831994366007e-06, |
| "loss": 0.6538, |
| "step": 576 |
| }, |
| { |
| "epoch": 0.8049290862590095, |
| "grad_norm": 2.0341275898427593, |
| "learning_rate": 9.1638265625128e-06, |
| "loss": 0.6651, |
| "step": 577 |
| }, |
| { |
| "epoch": 0.8063241106719368, |
| "grad_norm": 2.212386148497631, |
| "learning_rate": 9.159322182989265e-06, |
| "loss": 0.7095, |
| "step": 578 |
| }, |
| { |
| "epoch": 0.807719135084864, |
| "grad_norm": 2.1309132243222244, |
| "learning_rate": 9.154806816987386e-06, |
| "loss": 0.6603, |
| "step": 579 |
| }, |
| { |
| "epoch": 0.8091141594977912, |
| "grad_norm": 2.054866199242643, |
| "learning_rate": 9.150280476434098e-06, |
| "loss": 0.6942, |
| "step": 580 |
| }, |
| { |
| "epoch": 0.8105091839107185, |
| "grad_norm": 1.9524458748633242, |
| "learning_rate": 9.145743173285325e-06, |
| "loss": 0.6488, |
| "step": 581 |
| }, |
| { |
| "epoch": 0.8119042083236456, |
| "grad_norm": 2.030621418231089, |
| "learning_rate": 9.141194919525949e-06, |
| "loss": 0.7108, |
| "step": 582 |
| }, |
| { |
| "epoch": 0.8132992327365729, |
| "grad_norm": 2.084991031526505, |
| "learning_rate": 9.136635727169776e-06, |
| "loss": 0.7265, |
| "step": 583 |
| }, |
| { |
| "epoch": 0.8146942571495002, |
| "grad_norm": 2.0622263794834224, |
| "learning_rate": 9.132065608259505e-06, |
| "loss": 0.7503, |
| "step": 584 |
| }, |
| { |
| "epoch": 0.8160892815624273, |
| "grad_norm": 2.0982571575326228, |
| "learning_rate": 9.127484574866699e-06, |
| "loss": 0.7235, |
| "step": 585 |
| }, |
| { |
| "epoch": 0.8174843059753546, |
| "grad_norm": 2.0514483972342834, |
| "learning_rate": 9.122892639091748e-06, |
| "loss": 0.7288, |
| "step": 586 |
| }, |
| { |
| "epoch": 0.8188793303882818, |
| "grad_norm": 1.973001994554894, |
| "learning_rate": 9.118289813063842e-06, |
| "loss": 0.6897, |
| "step": 587 |
| }, |
| { |
| "epoch": 0.820274354801209, |
| "grad_norm": 1.983523663982352, |
| "learning_rate": 9.11367610894093e-06, |
| "loss": 0.7193, |
| "step": 588 |
| }, |
| { |
| "epoch": 0.8216693792141363, |
| "grad_norm": 2.011324854017405, |
| "learning_rate": 9.109051538909707e-06, |
| "loss": 0.6593, |
| "step": 589 |
| }, |
| { |
| "epoch": 0.8230644036270635, |
| "grad_norm": 2.046660765022832, |
| "learning_rate": 9.104416115185557e-06, |
| "loss": 0.7679, |
| "step": 590 |
| }, |
| { |
| "epoch": 0.8244594280399907, |
| "grad_norm": 2.1499540624366635, |
| "learning_rate": 9.099769850012539e-06, |
| "loss": 0.757, |
| "step": 591 |
| }, |
| { |
| "epoch": 0.8258544524529179, |
| "grad_norm": 1.9870557541462837, |
| "learning_rate": 9.095112755663349e-06, |
| "loss": 0.6832, |
| "step": 592 |
| }, |
| { |
| "epoch": 0.8272494768658452, |
| "grad_norm": 1.9850961987043347, |
| "learning_rate": 9.090444844439284e-06, |
| "loss": 0.698, |
| "step": 593 |
| }, |
| { |
| "epoch": 0.8286445012787724, |
| "grad_norm": 1.9852851734683514, |
| "learning_rate": 9.085766128670218e-06, |
| "loss": 0.7543, |
| "step": 594 |
| }, |
| { |
| "epoch": 0.8300395256916996, |
| "grad_norm": 2.128351674301273, |
| "learning_rate": 9.08107662071456e-06, |
| "loss": 0.7076, |
| "step": 595 |
| }, |
| { |
| "epoch": 0.8314345501046269, |
| "grad_norm": 2.0178147387617122, |
| "learning_rate": 9.076376332959222e-06, |
| "loss": 0.7241, |
| "step": 596 |
| }, |
| { |
| "epoch": 0.832829574517554, |
| "grad_norm": 2.086053250244484, |
| "learning_rate": 9.071665277819603e-06, |
| "loss": 0.6777, |
| "step": 597 |
| }, |
| { |
| "epoch": 0.8342245989304813, |
| "grad_norm": 2.058823763171869, |
| "learning_rate": 9.066943467739529e-06, |
| "loss": 0.713, |
| "step": 598 |
| }, |
| { |
| "epoch": 0.8356196233434086, |
| "grad_norm": 2.1956205519625835, |
| "learning_rate": 9.06221091519124e-06, |
| "loss": 0.7586, |
| "step": 599 |
| }, |
| { |
| "epoch": 0.8370146477563357, |
| "grad_norm": 2.0804960970930537, |
| "learning_rate": 9.057467632675357e-06, |
| "loss": 0.7694, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.838409672169263, |
| "grad_norm": 1.9765569639718268, |
| "learning_rate": 9.05271363272083e-06, |
| "loss": 0.7285, |
| "step": 601 |
| }, |
| { |
| "epoch": 0.8398046965821901, |
| "grad_norm": 2.046920137660995, |
| "learning_rate": 9.047948927884927e-06, |
| "loss": 0.7139, |
| "step": 602 |
| }, |
| { |
| "epoch": 0.8411997209951174, |
| "grad_norm": 2.0022644204067523, |
| "learning_rate": 9.043173530753196e-06, |
| "loss": 0.7078, |
| "step": 603 |
| }, |
| { |
| "epoch": 0.8425947454080447, |
| "grad_norm": 2.1957522908702862, |
| "learning_rate": 9.038387453939416e-06, |
| "loss": 0.7107, |
| "step": 604 |
| }, |
| { |
| "epoch": 0.8439897698209718, |
| "grad_norm": 2.130770907443561, |
| "learning_rate": 9.033590710085584e-06, |
| "loss": 0.6822, |
| "step": 605 |
| }, |
| { |
| "epoch": 0.8453847942338991, |
| "grad_norm": 1.966006179455826, |
| "learning_rate": 9.028783311861874e-06, |
| "loss": 0.6649, |
| "step": 606 |
| }, |
| { |
| "epoch": 0.8467798186468263, |
| "grad_norm": 2.0759903419882164, |
| "learning_rate": 9.023965271966595e-06, |
| "loss": 0.6753, |
| "step": 607 |
| }, |
| { |
| "epoch": 0.8481748430597535, |
| "grad_norm": 1.9940729760728366, |
| "learning_rate": 9.019136603126171e-06, |
| "loss": 0.7197, |
| "step": 608 |
| }, |
| { |
| "epoch": 0.8495698674726808, |
| "grad_norm": 1.988407232009193, |
| "learning_rate": 9.0142973180951e-06, |
| "loss": 0.7094, |
| "step": 609 |
| }, |
| { |
| "epoch": 0.850964891885608, |
| "grad_norm": 2.035677011161458, |
| "learning_rate": 9.00944742965592e-06, |
| "loss": 0.7328, |
| "step": 610 |
| }, |
| { |
| "epoch": 0.8523599162985352, |
| "grad_norm": 1.8950053977425145, |
| "learning_rate": 9.004586950619182e-06, |
| "loss": 0.6823, |
| "step": 611 |
| }, |
| { |
| "epoch": 0.8537549407114624, |
| "grad_norm": 2.1156907480266605, |
| "learning_rate": 8.999715893823404e-06, |
| "loss": 0.7423, |
| "step": 612 |
| }, |
| { |
| "epoch": 0.8551499651243897, |
| "grad_norm": 1.961252039158567, |
| "learning_rate": 8.994834272135049e-06, |
| "loss": 0.7171, |
| "step": 613 |
| }, |
| { |
| "epoch": 0.856544989537317, |
| "grad_norm": 2.0001585593137303, |
| "learning_rate": 8.989942098448485e-06, |
| "loss": 0.7121, |
| "step": 614 |
| }, |
| { |
| "epoch": 0.8579400139502441, |
| "grad_norm": 1.9949836027553451, |
| "learning_rate": 8.985039385685952e-06, |
| "loss": 0.6975, |
| "step": 615 |
| }, |
| { |
| "epoch": 0.8593350383631714, |
| "grad_norm": 1.9357238900964018, |
| "learning_rate": 8.98012614679753e-06, |
| "loss": 0.6363, |
| "step": 616 |
| }, |
| { |
| "epoch": 0.8607300627760985, |
| "grad_norm": 2.2080743341990576, |
| "learning_rate": 8.975202394761098e-06, |
| "loss": 0.6823, |
| "step": 617 |
| }, |
| { |
| "epoch": 0.8621250871890258, |
| "grad_norm": 2.0593736149801574, |
| "learning_rate": 8.970268142582312e-06, |
| "loss": 0.7101, |
| "step": 618 |
| }, |
| { |
| "epoch": 0.8635201116019531, |
| "grad_norm": 2.0081932189174174, |
| "learning_rate": 8.965323403294553e-06, |
| "loss": 0.672, |
| "step": 619 |
| }, |
| { |
| "epoch": 0.8649151360148802, |
| "grad_norm": 2.0431522588311037, |
| "learning_rate": 8.960368189958913e-06, |
| "loss": 0.7229, |
| "step": 620 |
| }, |
| { |
| "epoch": 0.8663101604278075, |
| "grad_norm": 2.0999018236018636, |
| "learning_rate": 8.955402515664144e-06, |
| "loss": 0.7264, |
| "step": 621 |
| }, |
| { |
| "epoch": 0.8677051848407347, |
| "grad_norm": 2.2421086786694304, |
| "learning_rate": 8.950426393526633e-06, |
| "loss": 0.7121, |
| "step": 622 |
| }, |
| { |
| "epoch": 0.8691002092536619, |
| "grad_norm": 1.9548355595783216, |
| "learning_rate": 8.945439836690359e-06, |
| "loss": 0.7026, |
| "step": 623 |
| }, |
| { |
| "epoch": 0.8704952336665892, |
| "grad_norm": 2.0065422789990333, |
| "learning_rate": 8.940442858326871e-06, |
| "loss": 0.6661, |
| "step": 624 |
| }, |
| { |
| "epoch": 0.8718902580795164, |
| "grad_norm": 1.9311985998592487, |
| "learning_rate": 8.935435471635238e-06, |
| "loss": 0.7059, |
| "step": 625 |
| }, |
| { |
| "epoch": 0.8732852824924436, |
| "grad_norm": 2.0030403022945245, |
| "learning_rate": 8.93041768984203e-06, |
| "loss": 0.6671, |
| "step": 626 |
| }, |
| { |
| "epoch": 0.8746803069053708, |
| "grad_norm": 2.1110226512909716, |
| "learning_rate": 8.925389526201264e-06, |
| "loss": 0.6935, |
| "step": 627 |
| }, |
| { |
| "epoch": 0.8760753313182981, |
| "grad_norm": 1.837637633874893, |
| "learning_rate": 8.920350993994387e-06, |
| "loss": 0.6376, |
| "step": 628 |
| }, |
| { |
| "epoch": 0.8774703557312253, |
| "grad_norm": 2.0605269428260113, |
| "learning_rate": 8.915302106530234e-06, |
| "loss": 0.693, |
| "step": 629 |
| }, |
| { |
| "epoch": 0.8788653801441525, |
| "grad_norm": 2.0408953219727155, |
| "learning_rate": 8.91024287714499e-06, |
| "loss": 0.6685, |
| "step": 630 |
| }, |
| { |
| "epoch": 0.8802604045570798, |
| "grad_norm": 1.9732412676724707, |
| "learning_rate": 8.905173319202159e-06, |
| "loss": 0.696, |
| "step": 631 |
| }, |
| { |
| "epoch": 0.8816554289700069, |
| "grad_norm": 1.9599933746722413, |
| "learning_rate": 8.900093446092523e-06, |
| "loss": 0.6919, |
| "step": 632 |
| }, |
| { |
| "epoch": 0.8830504533829342, |
| "grad_norm": 2.0071687490178736, |
| "learning_rate": 8.895003271234116e-06, |
| "loss": 0.7947, |
| "step": 633 |
| }, |
| { |
| "epoch": 0.8844454777958615, |
| "grad_norm": 2.0368651268041766, |
| "learning_rate": 8.889902808072178e-06, |
| "loss": 0.6618, |
| "step": 634 |
| }, |
| { |
| "epoch": 0.8858405022087886, |
| "grad_norm": 2.006821931119106, |
| "learning_rate": 8.884792070079128e-06, |
| "loss": 0.6986, |
| "step": 635 |
| }, |
| { |
| "epoch": 0.8872355266217159, |
| "grad_norm": 1.953819130025858, |
| "learning_rate": 8.879671070754527e-06, |
| "loss": 0.6885, |
| "step": 636 |
| }, |
| { |
| "epoch": 0.8886305510346431, |
| "grad_norm": 2.077254864377799, |
| "learning_rate": 8.874539823625037e-06, |
| "loss": 0.7307, |
| "step": 637 |
| }, |
| { |
| "epoch": 0.8900255754475703, |
| "grad_norm": 1.9249651603564881, |
| "learning_rate": 8.869398342244387e-06, |
| "loss": 0.6911, |
| "step": 638 |
| }, |
| { |
| "epoch": 0.8914205998604976, |
| "grad_norm": 2.3835538973236616, |
| "learning_rate": 8.86424664019334e-06, |
| "loss": 0.7769, |
| "step": 639 |
| }, |
| { |
| "epoch": 0.8928156242734248, |
| "grad_norm": 1.8989674380166495, |
| "learning_rate": 8.859084731079664e-06, |
| "loss": 0.6892, |
| "step": 640 |
| }, |
| { |
| "epoch": 0.894210648686352, |
| "grad_norm": 2.0401897832182647, |
| "learning_rate": 8.853912628538072e-06, |
| "loss": 0.7131, |
| "step": 641 |
| }, |
| { |
| "epoch": 0.8956056730992792, |
| "grad_norm": 1.9764252068678023, |
| "learning_rate": 8.84873034623022e-06, |
| "loss": 0.6564, |
| "step": 642 |
| }, |
| { |
| "epoch": 0.8970006975122065, |
| "grad_norm": 1.92924996628619, |
| "learning_rate": 8.84353789784464e-06, |
| "loss": 0.7245, |
| "step": 643 |
| }, |
| { |
| "epoch": 0.8983957219251337, |
| "grad_norm": 2.057502035838945, |
| "learning_rate": 8.83833529709672e-06, |
| "loss": 0.71, |
| "step": 644 |
| }, |
| { |
| "epoch": 0.8997907463380609, |
| "grad_norm": 1.886888629633787, |
| "learning_rate": 8.833122557728667e-06, |
| "loss": 0.6765, |
| "step": 645 |
| }, |
| { |
| "epoch": 0.9011857707509882, |
| "grad_norm": 2.105286098847447, |
| "learning_rate": 8.827899693509467e-06, |
| "loss": 0.7547, |
| "step": 646 |
| }, |
| { |
| "epoch": 0.9025807951639153, |
| "grad_norm": 1.9861957912372208, |
| "learning_rate": 8.82266671823485e-06, |
| "loss": 0.7219, |
| "step": 647 |
| }, |
| { |
| "epoch": 0.9039758195768426, |
| "grad_norm": 1.888915458083586, |
| "learning_rate": 8.817423645727252e-06, |
| "loss": 0.6884, |
| "step": 648 |
| }, |
| { |
| "epoch": 0.9053708439897699, |
| "grad_norm": 2.277569646085658, |
| "learning_rate": 8.812170489835784e-06, |
| "loss": 0.7798, |
| "step": 649 |
| }, |
| { |
| "epoch": 0.906765868402697, |
| "grad_norm": 1.9944963507674216, |
| "learning_rate": 8.806907264436183e-06, |
| "loss": 0.6861, |
| "step": 650 |
| }, |
| { |
| "epoch": 0.9081608928156243, |
| "grad_norm": 1.9298477509530116, |
| "learning_rate": 8.801633983430794e-06, |
| "loss": 0.6591, |
| "step": 651 |
| }, |
| { |
| "epoch": 0.9095559172285514, |
| "grad_norm": 1.8705807621214512, |
| "learning_rate": 8.796350660748516e-06, |
| "loss": 0.6462, |
| "step": 652 |
| }, |
| { |
| "epoch": 0.9109509416414787, |
| "grad_norm": 1.781871433076164, |
| "learning_rate": 8.791057310344775e-06, |
| "loss": 0.696, |
| "step": 653 |
| }, |
| { |
| "epoch": 0.912345966054406, |
| "grad_norm": 2.03708716140148, |
| "learning_rate": 8.785753946201484e-06, |
| "loss": 0.7908, |
| "step": 654 |
| }, |
| { |
| "epoch": 0.9137409904673331, |
| "grad_norm": 1.9723359587123608, |
| "learning_rate": 8.780440582327005e-06, |
| "loss": 0.6875, |
| "step": 655 |
| }, |
| { |
| "epoch": 0.9151360148802604, |
| "grad_norm": 1.9079177689320517, |
| "learning_rate": 8.775117232756116e-06, |
| "loss": 0.6515, |
| "step": 656 |
| }, |
| { |
| "epoch": 0.9165310392931876, |
| "grad_norm": 2.0101883192315175, |
| "learning_rate": 8.769783911549968e-06, |
| "loss": 0.7285, |
| "step": 657 |
| }, |
| { |
| "epoch": 0.9179260637061148, |
| "grad_norm": 1.9520878357376525, |
| "learning_rate": 8.764440632796055e-06, |
| "loss": 0.7019, |
| "step": 658 |
| }, |
| { |
| "epoch": 0.9193210881190421, |
| "grad_norm": 1.8645433147053474, |
| "learning_rate": 8.75908741060817e-06, |
| "loss": 0.723, |
| "step": 659 |
| }, |
| { |
| "epoch": 0.9207161125319693, |
| "grad_norm": 1.9232448575331198, |
| "learning_rate": 8.75372425912637e-06, |
| "loss": 0.7015, |
| "step": 660 |
| }, |
| { |
| "epoch": 0.9221111369448965, |
| "grad_norm": 2.2313621348056074, |
| "learning_rate": 8.748351192516943e-06, |
| "loss": 0.779, |
| "step": 661 |
| }, |
| { |
| "epoch": 0.9235061613578237, |
| "grad_norm": 2.0761748347303737, |
| "learning_rate": 8.742968224972366e-06, |
| "loss": 0.695, |
| "step": 662 |
| }, |
| { |
| "epoch": 0.924901185770751, |
| "grad_norm": 1.9030018961305923, |
| "learning_rate": 8.737575370711265e-06, |
| "loss": 0.682, |
| "step": 663 |
| }, |
| { |
| "epoch": 0.9262962101836782, |
| "grad_norm": 2.039629984371781, |
| "learning_rate": 8.732172643978383e-06, |
| "loss": 0.7116, |
| "step": 664 |
| }, |
| { |
| "epoch": 0.9276912345966054, |
| "grad_norm": 2.0754184443315475, |
| "learning_rate": 8.726760059044542e-06, |
| "loss": 0.6939, |
| "step": 665 |
| }, |
| { |
| "epoch": 0.9290862590095327, |
| "grad_norm": 2.138276597253035, |
| "learning_rate": 8.721337630206603e-06, |
| "loss": 0.7504, |
| "step": 666 |
| }, |
| { |
| "epoch": 0.93048128342246, |
| "grad_norm": 2.088090556558224, |
| "learning_rate": 8.715905371787426e-06, |
| "loss": 0.7307, |
| "step": 667 |
| }, |
| { |
| "epoch": 0.9318763078353871, |
| "grad_norm": 1.9914456952917818, |
| "learning_rate": 8.710463298135836e-06, |
| "loss": 0.6436, |
| "step": 668 |
| }, |
| { |
| "epoch": 0.9332713322483144, |
| "grad_norm": 1.930717236841167, |
| "learning_rate": 8.705011423626589e-06, |
| "loss": 0.7399, |
| "step": 669 |
| }, |
| { |
| "epoch": 0.9346663566612415, |
| "grad_norm": 2.1376988846220746, |
| "learning_rate": 8.699549762660318e-06, |
| "loss": 0.6954, |
| "step": 670 |
| }, |
| { |
| "epoch": 0.9360613810741688, |
| "grad_norm": 1.9710970809397323, |
| "learning_rate": 8.69407832966352e-06, |
| "loss": 0.7039, |
| "step": 671 |
| }, |
| { |
| "epoch": 0.9374564054870961, |
| "grad_norm": 2.0867427607781757, |
| "learning_rate": 8.688597139088494e-06, |
| "loss": 0.7234, |
| "step": 672 |
| }, |
| { |
| "epoch": 0.9388514299000232, |
| "grad_norm": 1.8578210034776752, |
| "learning_rate": 8.683106205413316e-06, |
| "loss": 0.6676, |
| "step": 673 |
| }, |
| { |
| "epoch": 0.9402464543129505, |
| "grad_norm": 1.7992418975827844, |
| "learning_rate": 8.677605543141797e-06, |
| "loss": 0.6473, |
| "step": 674 |
| }, |
| { |
| "epoch": 0.9416414787258777, |
| "grad_norm": 2.208549526088649, |
| "learning_rate": 8.672095166803445e-06, |
| "loss": 0.734, |
| "step": 675 |
| }, |
| { |
| "epoch": 0.9430365031388049, |
| "grad_norm": 1.9309424859366655, |
| "learning_rate": 8.666575090953426e-06, |
| "loss": 0.7099, |
| "step": 676 |
| }, |
| { |
| "epoch": 0.9444315275517322, |
| "grad_norm": 2.0689005805396006, |
| "learning_rate": 8.661045330172533e-06, |
| "loss": 0.7762, |
| "step": 677 |
| }, |
| { |
| "epoch": 0.9458265519646594, |
| "grad_norm": 1.9327832133972194, |
| "learning_rate": 8.65550589906713e-06, |
| "loss": 0.6634, |
| "step": 678 |
| }, |
| { |
| "epoch": 0.9472215763775866, |
| "grad_norm": 1.8654184391224133, |
| "learning_rate": 8.649956812269134e-06, |
| "loss": 0.6768, |
| "step": 679 |
| }, |
| { |
| "epoch": 0.9486166007905138, |
| "grad_norm": 1.8537317310543904, |
| "learning_rate": 8.644398084435959e-06, |
| "loss": 0.6687, |
| "step": 680 |
| }, |
| { |
| "epoch": 0.9500116252034411, |
| "grad_norm": 1.9620198811804095, |
| "learning_rate": 8.63882973025049e-06, |
| "loss": 0.6837, |
| "step": 681 |
| }, |
| { |
| "epoch": 0.9514066496163683, |
| "grad_norm": 1.8902874912708485, |
| "learning_rate": 8.63325176442104e-06, |
| "loss": 0.7166, |
| "step": 682 |
| }, |
| { |
| "epoch": 0.9528016740292955, |
| "grad_norm": 1.9012648753801518, |
| "learning_rate": 8.627664201681305e-06, |
| "loss": 0.6771, |
| "step": 683 |
| }, |
| { |
| "epoch": 0.9541966984422228, |
| "grad_norm": 2.0772673760923657, |
| "learning_rate": 8.622067056790333e-06, |
| "loss": 0.7442, |
| "step": 684 |
| }, |
| { |
| "epoch": 0.9555917228551499, |
| "grad_norm": 1.881931761326016, |
| "learning_rate": 8.616460344532483e-06, |
| "loss": 0.7081, |
| "step": 685 |
| }, |
| { |
| "epoch": 0.9569867472680772, |
| "grad_norm": 1.886919445097637, |
| "learning_rate": 8.610844079717387e-06, |
| "loss": 0.6433, |
| "step": 686 |
| }, |
| { |
| "epoch": 0.9583817716810045, |
| "grad_norm": 2.002169759368971, |
| "learning_rate": 8.605218277179907e-06, |
| "loss": 0.7323, |
| "step": 687 |
| }, |
| { |
| "epoch": 0.9597767960939316, |
| "grad_norm": 1.7540706653539795, |
| "learning_rate": 8.599582951780095e-06, |
| "loss": 0.6229, |
| "step": 688 |
| }, |
| { |
| "epoch": 0.9611718205068589, |
| "grad_norm": 1.9764352278914321, |
| "learning_rate": 8.593938118403164e-06, |
| "loss": 0.6744, |
| "step": 689 |
| }, |
| { |
| "epoch": 0.9625668449197861, |
| "grad_norm": 2.043953151411979, |
| "learning_rate": 8.588283791959437e-06, |
| "loss": 0.7051, |
| "step": 690 |
| }, |
| { |
| "epoch": 0.9639618693327133, |
| "grad_norm": 2.0060893469750303, |
| "learning_rate": 8.582619987384311e-06, |
| "loss": 0.6858, |
| "step": 691 |
| }, |
| { |
| "epoch": 0.9653568937456406, |
| "grad_norm": 1.885119316458161, |
| "learning_rate": 8.57694671963822e-06, |
| "loss": 0.6503, |
| "step": 692 |
| }, |
| { |
| "epoch": 0.9667519181585678, |
| "grad_norm": 2.027815652298139, |
| "learning_rate": 8.571264003706596e-06, |
| "loss": 0.7235, |
| "step": 693 |
| }, |
| { |
| "epoch": 0.968146942571495, |
| "grad_norm": 1.905998996142595, |
| "learning_rate": 8.565571854599825e-06, |
| "loss": 0.6732, |
| "step": 694 |
| }, |
| { |
| "epoch": 0.9695419669844222, |
| "grad_norm": 2.066485530703545, |
| "learning_rate": 8.559870287353214e-06, |
| "loss": 0.737, |
| "step": 695 |
| }, |
| { |
| "epoch": 0.9709369913973495, |
| "grad_norm": 1.9622091919063105, |
| "learning_rate": 8.554159317026939e-06, |
| "loss": 0.6509, |
| "step": 696 |
| }, |
| { |
| "epoch": 0.9723320158102767, |
| "grad_norm": 1.988578644158127, |
| "learning_rate": 8.548438958706022e-06, |
| "loss": 0.7456, |
| "step": 697 |
| }, |
| { |
| "epoch": 0.9737270402232039, |
| "grad_norm": 2.116243375506424, |
| "learning_rate": 8.542709227500276e-06, |
| "loss": 0.765, |
| "step": 698 |
| }, |
| { |
| "epoch": 0.9751220646361312, |
| "grad_norm": 1.9231890289170668, |
| "learning_rate": 8.536970138544278e-06, |
| "loss": 0.6652, |
| "step": 699 |
| }, |
| { |
| "epoch": 0.9765170890490583, |
| "grad_norm": 1.9552199841603932, |
| "learning_rate": 8.531221706997316e-06, |
| "loss": 0.6576, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.9779121134619856, |
| "grad_norm": 2.031756235992153, |
| "learning_rate": 8.525463948043365e-06, |
| "loss": 0.7072, |
| "step": 701 |
| }, |
| { |
| "epoch": 0.9793071378749129, |
| "grad_norm": 1.9389046550623854, |
| "learning_rate": 8.519696876891024e-06, |
| "loss": 0.6442, |
| "step": 702 |
| }, |
| { |
| "epoch": 0.98070216228784, |
| "grad_norm": 1.919889704452434, |
| "learning_rate": 8.513920508773499e-06, |
| "loss": 0.6964, |
| "step": 703 |
| }, |
| { |
| "epoch": 0.9820971867007673, |
| "grad_norm": 1.9236646080238848, |
| "learning_rate": 8.508134858948553e-06, |
| "loss": 0.6919, |
| "step": 704 |
| }, |
| { |
| "epoch": 0.9834922111136944, |
| "grad_norm": 1.9493128891249456, |
| "learning_rate": 8.502339942698463e-06, |
| "loss": 0.6646, |
| "step": 705 |
| }, |
| { |
| "epoch": 0.9848872355266217, |
| "grad_norm": 1.9673672208067479, |
| "learning_rate": 8.496535775329982e-06, |
| "loss": 0.7233, |
| "step": 706 |
| }, |
| { |
| "epoch": 0.986282259939549, |
| "grad_norm": 1.9621954045331194, |
| "learning_rate": 8.4907223721743e-06, |
| "loss": 0.6778, |
| "step": 707 |
| }, |
| { |
| "epoch": 0.9876772843524761, |
| "grad_norm": 1.8046753114923484, |
| "learning_rate": 8.484899748587003e-06, |
| "loss": 0.6646, |
| "step": 708 |
| }, |
| { |
| "epoch": 0.9890723087654034, |
| "grad_norm": 2.2295174686287274, |
| "learning_rate": 8.479067919948032e-06, |
| "loss": 0.6989, |
| "step": 709 |
| }, |
| { |
| "epoch": 0.9904673331783306, |
| "grad_norm": 2.1070329009446156, |
| "learning_rate": 8.473226901661643e-06, |
| "loss": 0.7844, |
| "step": 710 |
| }, |
| { |
| "epoch": 0.9918623575912578, |
| "grad_norm": 2.1002202467765927, |
| "learning_rate": 8.46737670915636e-06, |
| "loss": 0.6869, |
| "step": 711 |
| }, |
| { |
| "epoch": 0.9932573820041851, |
| "grad_norm": 2.0464784128561084, |
| "learning_rate": 8.46151735788495e-06, |
| "loss": 0.6978, |
| "step": 712 |
| }, |
| { |
| "epoch": 0.9946524064171123, |
| "grad_norm": 1.8705732417938106, |
| "learning_rate": 8.455648863324364e-06, |
| "loss": 0.6423, |
| "step": 713 |
| }, |
| { |
| "epoch": 0.9960474308300395, |
| "grad_norm": 1.8660210155077552, |
| "learning_rate": 8.449771240975707e-06, |
| "loss": 0.6669, |
| "step": 714 |
| }, |
| { |
| "epoch": 0.9974424552429667, |
| "grad_norm": 1.9315548591495726, |
| "learning_rate": 8.443884506364192e-06, |
| "loss": 0.6853, |
| "step": 715 |
| }, |
| { |
| "epoch": 0.998837479655894, |
| "grad_norm": 1.9920543550527006, |
| "learning_rate": 8.437988675039108e-06, |
| "loss": 0.6772, |
| "step": 716 |
| }, |
| { |
| "epoch": 1.0013950244129273, |
| "grad_norm": 2.763874765719216, |
| "learning_rate": 8.432083762573761e-06, |
| "loss": 1.1699, |
| "step": 717 |
| }, |
| { |
| "epoch": 1.0027900488258545, |
| "grad_norm": 1.754525337447559, |
| "learning_rate": 8.426169784565452e-06, |
| "loss": 0.4932, |
| "step": 718 |
| }, |
| { |
| "epoch": 1.0041850732387816, |
| "grad_norm": 1.846326658857791, |
| "learning_rate": 8.420246756635431e-06, |
| "loss": 0.448, |
| "step": 719 |
| }, |
| { |
| "epoch": 1.0055800976517089, |
| "grad_norm": 1.8768804454261978, |
| "learning_rate": 8.414314694428842e-06, |
| "loss": 0.4791, |
| "step": 720 |
| }, |
| { |
| "epoch": 1.0069751220646361, |
| "grad_norm": 1.6152669499441352, |
| "learning_rate": 8.408373613614699e-06, |
| "loss": 0.491, |
| "step": 721 |
| }, |
| { |
| "epoch": 1.0083701464775634, |
| "grad_norm": 1.8596160204714411, |
| "learning_rate": 8.40242352988584e-06, |
| "loss": 0.4418, |
| "step": 722 |
| }, |
| { |
| "epoch": 1.0097651708904907, |
| "grad_norm": 1.8162139230993801, |
| "learning_rate": 8.396464458958876e-06, |
| "loss": 0.5305, |
| "step": 723 |
| }, |
| { |
| "epoch": 1.0111601953034177, |
| "grad_norm": 2.0254017362191536, |
| "learning_rate": 8.390496416574166e-06, |
| "loss": 0.481, |
| "step": 724 |
| }, |
| { |
| "epoch": 1.012555219716345, |
| "grad_norm": 2.1506267595528867, |
| "learning_rate": 8.384519418495755e-06, |
| "loss": 0.5195, |
| "step": 725 |
| }, |
| { |
| "epoch": 1.0139502441292723, |
| "grad_norm": 2.101119705098204, |
| "learning_rate": 8.378533480511355e-06, |
| "loss": 0.4906, |
| "step": 726 |
| }, |
| { |
| "epoch": 1.0153452685421995, |
| "grad_norm": 2.684686947501843, |
| "learning_rate": 8.372538618432282e-06, |
| "loss": 0.513, |
| "step": 727 |
| }, |
| { |
| "epoch": 1.0167402929551268, |
| "grad_norm": 2.2485542720043092, |
| "learning_rate": 8.366534848093434e-06, |
| "loss": 0.5166, |
| "step": 728 |
| }, |
| { |
| "epoch": 1.0181353173680538, |
| "grad_norm": 2.28734638846399, |
| "learning_rate": 8.360522185353234e-06, |
| "loss": 0.4826, |
| "step": 729 |
| }, |
| { |
| "epoch": 1.0195303417809811, |
| "grad_norm": 1.9701409436889183, |
| "learning_rate": 8.354500646093592e-06, |
| "loss": 0.4653, |
| "step": 730 |
| }, |
| { |
| "epoch": 1.0209253661939084, |
| "grad_norm": 2.2247454732523426, |
| "learning_rate": 8.348470246219872e-06, |
| "loss": 0.5245, |
| "step": 731 |
| }, |
| { |
| "epoch": 1.0223203906068357, |
| "grad_norm": 2.007192211701691, |
| "learning_rate": 8.342431001660826e-06, |
| "loss": 0.4565, |
| "step": 732 |
| }, |
| { |
| "epoch": 1.023715415019763, |
| "grad_norm": 1.9247320693390784, |
| "learning_rate": 8.33638292836859e-06, |
| "loss": 0.4874, |
| "step": 733 |
| }, |
| { |
| "epoch": 1.02511043943269, |
| "grad_norm": 1.9650014172175294, |
| "learning_rate": 8.330326042318605e-06, |
| "loss": 0.5046, |
| "step": 734 |
| }, |
| { |
| "epoch": 1.0265054638456172, |
| "grad_norm": 1.8423438549714501, |
| "learning_rate": 8.324260359509594e-06, |
| "loss": 0.4836, |
| "step": 735 |
| }, |
| { |
| "epoch": 1.0279004882585445, |
| "grad_norm": 1.9039412894123366, |
| "learning_rate": 8.31818589596352e-06, |
| "loss": 0.4582, |
| "step": 736 |
| }, |
| { |
| "epoch": 1.0292955126714718, |
| "grad_norm": 2.1936250318321453, |
| "learning_rate": 8.312102667725534e-06, |
| "loss": 0.5209, |
| "step": 737 |
| }, |
| { |
| "epoch": 1.030690537084399, |
| "grad_norm": 1.84976943631084, |
| "learning_rate": 8.306010690863943e-06, |
| "loss": 0.4619, |
| "step": 738 |
| }, |
| { |
| "epoch": 1.032085561497326, |
| "grad_norm": 2.0139559424062443, |
| "learning_rate": 8.299909981470159e-06, |
| "loss": 0.487, |
| "step": 739 |
| }, |
| { |
| "epoch": 1.0334805859102534, |
| "grad_norm": 2.0270755400609515, |
| "learning_rate": 8.29380055565866e-06, |
| "loss": 0.5021, |
| "step": 740 |
| }, |
| { |
| "epoch": 1.0348756103231807, |
| "grad_norm": 1.9790811313609495, |
| "learning_rate": 8.28768242956695e-06, |
| "loss": 0.4792, |
| "step": 741 |
| }, |
| { |
| "epoch": 1.036270634736108, |
| "grad_norm": 1.931174120623029, |
| "learning_rate": 8.281555619355515e-06, |
| "loss": 0.5543, |
| "step": 742 |
| }, |
| { |
| "epoch": 1.0376656591490352, |
| "grad_norm": 2.094412424706352, |
| "learning_rate": 8.275420141207775e-06, |
| "loss": 0.5025, |
| "step": 743 |
| }, |
| { |
| "epoch": 1.0390606835619622, |
| "grad_norm": 2.086934890928623, |
| "learning_rate": 8.269276011330048e-06, |
| "loss": 0.4627, |
| "step": 744 |
| }, |
| { |
| "epoch": 1.0404557079748895, |
| "grad_norm": 1.9002029258471083, |
| "learning_rate": 8.263123245951504e-06, |
| "loss": 0.5457, |
| "step": 745 |
| }, |
| { |
| "epoch": 1.0418507323878168, |
| "grad_norm": 2.114961818573959, |
| "learning_rate": 8.256961861324127e-06, |
| "loss": 0.479, |
| "step": 746 |
| }, |
| { |
| "epoch": 1.043245756800744, |
| "grad_norm": 2.153993003889234, |
| "learning_rate": 8.250791873722662e-06, |
| "loss": 0.4701, |
| "step": 747 |
| }, |
| { |
| "epoch": 1.0446407812136713, |
| "grad_norm": 2.0652134135665343, |
| "learning_rate": 8.244613299444581e-06, |
| "loss": 0.5379, |
| "step": 748 |
| }, |
| { |
| "epoch": 1.0460358056265984, |
| "grad_norm": 1.9967021838933243, |
| "learning_rate": 8.238426154810035e-06, |
| "loss": 0.4514, |
| "step": 749 |
| }, |
| { |
| "epoch": 1.0474308300395256, |
| "grad_norm": 2.0039576329601037, |
| "learning_rate": 8.232230456161819e-06, |
| "loss": 0.4457, |
| "step": 750 |
| }, |
| { |
| "epoch": 1.048825854452453, |
| "grad_norm": 2.12340999999131, |
| "learning_rate": 8.226026219865313e-06, |
| "loss": 0.509, |
| "step": 751 |
| }, |
| { |
| "epoch": 1.0502208788653802, |
| "grad_norm": 2.082417711494918, |
| "learning_rate": 8.219813462308458e-06, |
| "loss": 0.4752, |
| "step": 752 |
| }, |
| { |
| "epoch": 1.0516159032783075, |
| "grad_norm": 2.1077205247643156, |
| "learning_rate": 8.213592199901692e-06, |
| "loss": 0.4321, |
| "step": 753 |
| }, |
| { |
| "epoch": 1.0530109276912345, |
| "grad_norm": 1.919259142002624, |
| "learning_rate": 8.207362449077932e-06, |
| "loss": 0.4702, |
| "step": 754 |
| }, |
| { |
| "epoch": 1.0544059521041618, |
| "grad_norm": 2.0585541043039646, |
| "learning_rate": 8.201124226292505e-06, |
| "loss": 0.4796, |
| "step": 755 |
| }, |
| { |
| "epoch": 1.055800976517089, |
| "grad_norm": 1.981427974951629, |
| "learning_rate": 8.19487754802312e-06, |
| "loss": 0.4664, |
| "step": 756 |
| }, |
| { |
| "epoch": 1.0571960009300163, |
| "grad_norm": 2.2319241325523786, |
| "learning_rate": 8.18862243076982e-06, |
| "loss": 0.475, |
| "step": 757 |
| }, |
| { |
| "epoch": 1.0585910253429436, |
| "grad_norm": 2.039055467549461, |
| "learning_rate": 8.18235889105494e-06, |
| "loss": 0.4312, |
| "step": 758 |
| }, |
| { |
| "epoch": 1.0599860497558706, |
| "grad_norm": 2.0535980015454856, |
| "learning_rate": 8.17608694542306e-06, |
| "loss": 0.5021, |
| "step": 759 |
| }, |
| { |
| "epoch": 1.061381074168798, |
| "grad_norm": 2.0117098685514128, |
| "learning_rate": 8.169806610440966e-06, |
| "loss": 0.4974, |
| "step": 760 |
| }, |
| { |
| "epoch": 1.0627760985817252, |
| "grad_norm": 2.123763035186518, |
| "learning_rate": 8.163517902697602e-06, |
| "loss": 0.5005, |
| "step": 761 |
| }, |
| { |
| "epoch": 1.0641711229946524, |
| "grad_norm": 2.0459984276686622, |
| "learning_rate": 8.157220838804026e-06, |
| "loss": 0.5113, |
| "step": 762 |
| }, |
| { |
| "epoch": 1.0655661474075797, |
| "grad_norm": 2.086483588594984, |
| "learning_rate": 8.150915435393371e-06, |
| "loss": 0.475, |
| "step": 763 |
| }, |
| { |
| "epoch": 1.0669611718205068, |
| "grad_norm": 2.0259222979461917, |
| "learning_rate": 8.1446017091208e-06, |
| "loss": 0.4294, |
| "step": 764 |
| }, |
| { |
| "epoch": 1.068356196233434, |
| "grad_norm": 1.939016863108113, |
| "learning_rate": 8.138279676663458e-06, |
| "loss": 0.5281, |
| "step": 765 |
| }, |
| { |
| "epoch": 1.0697512206463613, |
| "grad_norm": 2.188159597757035, |
| "learning_rate": 8.131949354720425e-06, |
| "loss": 0.4351, |
| "step": 766 |
| }, |
| { |
| "epoch": 1.0711462450592886, |
| "grad_norm": 1.9805506282461658, |
| "learning_rate": 8.125610760012685e-06, |
| "loss": 0.4761, |
| "step": 767 |
| }, |
| { |
| "epoch": 1.0725412694722158, |
| "grad_norm": 1.993266574219508, |
| "learning_rate": 8.11926390928307e-06, |
| "loss": 0.5198, |
| "step": 768 |
| }, |
| { |
| "epoch": 1.073936293885143, |
| "grad_norm": 2.051047444269424, |
| "learning_rate": 8.112908819296217e-06, |
| "loss": 0.4673, |
| "step": 769 |
| }, |
| { |
| "epoch": 1.0753313182980702, |
| "grad_norm": 2.017405300453376, |
| "learning_rate": 8.106545506838533e-06, |
| "loss": 0.4968, |
| "step": 770 |
| }, |
| { |
| "epoch": 1.0767263427109974, |
| "grad_norm": 1.926246923060768, |
| "learning_rate": 8.100173988718136e-06, |
| "loss": 0.4685, |
| "step": 771 |
| }, |
| { |
| "epoch": 1.0781213671239247, |
| "grad_norm": 2.0051822197441838, |
| "learning_rate": 8.093794281764824e-06, |
| "loss": 0.4998, |
| "step": 772 |
| }, |
| { |
| "epoch": 1.079516391536852, |
| "grad_norm": 2.192605416056662, |
| "learning_rate": 8.087406402830026e-06, |
| "loss": 0.4681, |
| "step": 773 |
| }, |
| { |
| "epoch": 1.080911415949779, |
| "grad_norm": 2.03492190725647, |
| "learning_rate": 8.081010368786751e-06, |
| "loss": 0.4774, |
| "step": 774 |
| }, |
| { |
| "epoch": 1.0823064403627063, |
| "grad_norm": 1.8916433025293278, |
| "learning_rate": 8.074606196529554e-06, |
| "loss": 0.4679, |
| "step": 775 |
| }, |
| { |
| "epoch": 1.0837014647756336, |
| "grad_norm": 2.029167018400251, |
| "learning_rate": 8.068193902974482e-06, |
| "loss": 0.471, |
| "step": 776 |
| }, |
| { |
| "epoch": 1.0850964891885608, |
| "grad_norm": 2.0278286303865816, |
| "learning_rate": 8.06177350505904e-06, |
| "loss": 0.437, |
| "step": 777 |
| }, |
| { |
| "epoch": 1.086491513601488, |
| "grad_norm": 2.1953587700733186, |
| "learning_rate": 8.055345019742133e-06, |
| "loss": 0.4578, |
| "step": 778 |
| }, |
| { |
| "epoch": 1.0878865380144151, |
| "grad_norm": 2.0927543689762023, |
| "learning_rate": 8.048908464004032e-06, |
| "loss": 0.5168, |
| "step": 779 |
| }, |
| { |
| "epoch": 1.0892815624273424, |
| "grad_norm": 2.250143542210799, |
| "learning_rate": 8.042463854846325e-06, |
| "loss": 0.4474, |
| "step": 780 |
| }, |
| { |
| "epoch": 1.0906765868402697, |
| "grad_norm": 2.0315426491546056, |
| "learning_rate": 8.036011209291872e-06, |
| "loss": 0.4701, |
| "step": 781 |
| }, |
| { |
| "epoch": 1.092071611253197, |
| "grad_norm": 2.189955370940604, |
| "learning_rate": 8.029550544384758e-06, |
| "loss": 0.4567, |
| "step": 782 |
| }, |
| { |
| "epoch": 1.0934666356661242, |
| "grad_norm": 2.098770628449276, |
| "learning_rate": 8.023081877190257e-06, |
| "loss": 0.5319, |
| "step": 783 |
| }, |
| { |
| "epoch": 1.0948616600790513, |
| "grad_norm": 2.2036959427181393, |
| "learning_rate": 8.016605224794773e-06, |
| "loss": 0.4803, |
| "step": 784 |
| }, |
| { |
| "epoch": 1.0962566844919786, |
| "grad_norm": 2.061217417191479, |
| "learning_rate": 8.010120604305806e-06, |
| "loss": 0.464, |
| "step": 785 |
| }, |
| { |
| "epoch": 1.0976517089049058, |
| "grad_norm": 2.0770104851414857, |
| "learning_rate": 8.003628032851904e-06, |
| "loss": 0.477, |
| "step": 786 |
| }, |
| { |
| "epoch": 1.099046733317833, |
| "grad_norm": 2.0452867521427107, |
| "learning_rate": 7.997127527582613e-06, |
| "loss": 0.4644, |
| "step": 787 |
| }, |
| { |
| "epoch": 1.1004417577307604, |
| "grad_norm": 2.5829534069096747, |
| "learning_rate": 7.990619105668437e-06, |
| "loss": 0.4444, |
| "step": 788 |
| }, |
| { |
| "epoch": 1.1018367821436876, |
| "grad_norm": 1.7402633467783815, |
| "learning_rate": 7.984102784300794e-06, |
| "loss": 0.4822, |
| "step": 789 |
| }, |
| { |
| "epoch": 1.1032318065566147, |
| "grad_norm": 2.015841114887429, |
| "learning_rate": 7.977578580691963e-06, |
| "loss": 0.4916, |
| "step": 790 |
| }, |
| { |
| "epoch": 1.104626830969542, |
| "grad_norm": 1.9106009742536385, |
| "learning_rate": 7.971046512075047e-06, |
| "loss": 0.4409, |
| "step": 791 |
| }, |
| { |
| "epoch": 1.1060218553824692, |
| "grad_norm": 2.05674016861173, |
| "learning_rate": 7.964506595703923e-06, |
| "loss": 0.4565, |
| "step": 792 |
| }, |
| { |
| "epoch": 1.1074168797953965, |
| "grad_norm": 1.9348516245501983, |
| "learning_rate": 7.957958848853193e-06, |
| "loss": 0.5205, |
| "step": 793 |
| }, |
| { |
| "epoch": 1.1088119042083235, |
| "grad_norm": 2.199190699063569, |
| "learning_rate": 7.95140328881815e-06, |
| "loss": 0.5013, |
| "step": 794 |
| }, |
| { |
| "epoch": 1.1102069286212508, |
| "grad_norm": 2.0083712618435277, |
| "learning_rate": 7.944839932914718e-06, |
| "loss": 0.4876, |
| "step": 795 |
| }, |
| { |
| "epoch": 1.111601953034178, |
| "grad_norm": 2.1922894266968673, |
| "learning_rate": 7.938268798479419e-06, |
| "loss": 0.4816, |
| "step": 796 |
| }, |
| { |
| "epoch": 1.1129969774471054, |
| "grad_norm": 1.901000847965792, |
| "learning_rate": 7.931689902869314e-06, |
| "loss": 0.5018, |
| "step": 797 |
| }, |
| { |
| "epoch": 1.1143920018600326, |
| "grad_norm": 2.222074221286817, |
| "learning_rate": 7.925103263461971e-06, |
| "loss": 0.4994, |
| "step": 798 |
| }, |
| { |
| "epoch": 1.11578702627296, |
| "grad_norm": 1.940812874546972, |
| "learning_rate": 7.91850889765541e-06, |
| "loss": 0.4576, |
| "step": 799 |
| }, |
| { |
| "epoch": 1.117182050685887, |
| "grad_norm": 2.0817228977455953, |
| "learning_rate": 7.91190682286806e-06, |
| "loss": 0.4388, |
| "step": 800 |
| }, |
| { |
| "epoch": 1.1185770750988142, |
| "grad_norm": 2.1179633199248316, |
| "learning_rate": 7.905297056538713e-06, |
| "loss": 0.4652, |
| "step": 801 |
| }, |
| { |
| "epoch": 1.1199720995117415, |
| "grad_norm": 1.9607161065971666, |
| "learning_rate": 7.898679616126474e-06, |
| "loss": 0.4803, |
| "step": 802 |
| }, |
| { |
| "epoch": 1.1213671239246688, |
| "grad_norm": 1.9838680808121743, |
| "learning_rate": 7.892054519110726e-06, |
| "loss": 0.4878, |
| "step": 803 |
| }, |
| { |
| "epoch": 1.1227621483375958, |
| "grad_norm": 2.107089413069441, |
| "learning_rate": 7.885421782991064e-06, |
| "loss": 0.5175, |
| "step": 804 |
| }, |
| { |
| "epoch": 1.124157172750523, |
| "grad_norm": 2.0093683502992152, |
| "learning_rate": 7.878781425287277e-06, |
| "loss": 0.446, |
| "step": 805 |
| }, |
| { |
| "epoch": 1.1255521971634503, |
| "grad_norm": 1.9616515420321192, |
| "learning_rate": 7.872133463539274e-06, |
| "loss": 0.4333, |
| "step": 806 |
| }, |
| { |
| "epoch": 1.1269472215763776, |
| "grad_norm": 2.001100054062502, |
| "learning_rate": 7.86547791530705e-06, |
| "loss": 0.5393, |
| "step": 807 |
| }, |
| { |
| "epoch": 1.1283422459893049, |
| "grad_norm": 1.9580216968373765, |
| "learning_rate": 7.858814798170644e-06, |
| "loss": 0.492, |
| "step": 808 |
| }, |
| { |
| "epoch": 1.1297372704022322, |
| "grad_norm": 2.025423359557969, |
| "learning_rate": 7.852144129730087e-06, |
| "loss": 0.4743, |
| "step": 809 |
| }, |
| { |
| "epoch": 1.1311322948151592, |
| "grad_norm": 2.218171379188175, |
| "learning_rate": 7.84546592760535e-06, |
| "loss": 0.489, |
| "step": 810 |
| }, |
| { |
| "epoch": 1.1325273192280865, |
| "grad_norm": 1.9026069806809947, |
| "learning_rate": 7.83878020943631e-06, |
| "loss": 0.5161, |
| "step": 811 |
| }, |
| { |
| "epoch": 1.1339223436410137, |
| "grad_norm": 2.1794929058113146, |
| "learning_rate": 7.832086992882697e-06, |
| "loss": 0.4831, |
| "step": 812 |
| }, |
| { |
| "epoch": 1.135317368053941, |
| "grad_norm": 1.7960116856711474, |
| "learning_rate": 7.825386295624043e-06, |
| "loss": 0.4499, |
| "step": 813 |
| }, |
| { |
| "epoch": 1.136712392466868, |
| "grad_norm": 2.0247282315582376, |
| "learning_rate": 7.818678135359641e-06, |
| "loss": 0.4982, |
| "step": 814 |
| }, |
| { |
| "epoch": 1.1381074168797953, |
| "grad_norm": 2.0547296042290792, |
| "learning_rate": 7.811962529808499e-06, |
| "loss": 0.477, |
| "step": 815 |
| }, |
| { |
| "epoch": 1.1395024412927226, |
| "grad_norm": 2.0574834628921446, |
| "learning_rate": 7.805239496709291e-06, |
| "loss": 0.4546, |
| "step": 816 |
| }, |
| { |
| "epoch": 1.1408974657056499, |
| "grad_norm": 1.9394156746898406, |
| "learning_rate": 7.798509053820305e-06, |
| "loss": 0.4439, |
| "step": 817 |
| }, |
| { |
| "epoch": 1.1422924901185771, |
| "grad_norm": 2.0254153845417733, |
| "learning_rate": 7.79177121891941e-06, |
| "loss": 0.4619, |
| "step": 818 |
| }, |
| { |
| "epoch": 1.1436875145315044, |
| "grad_norm": 2.096601881606947, |
| "learning_rate": 7.785026009803993e-06, |
| "loss": 0.5061, |
| "step": 819 |
| }, |
| { |
| "epoch": 1.1450825389444315, |
| "grad_norm": 2.262505975665749, |
| "learning_rate": 7.778273444290921e-06, |
| "loss": 0.4799, |
| "step": 820 |
| }, |
| { |
| "epoch": 1.1464775633573587, |
| "grad_norm": 2.119118003205617, |
| "learning_rate": 7.771513540216496e-06, |
| "loss": 0.4928, |
| "step": 821 |
| }, |
| { |
| "epoch": 1.147872587770286, |
| "grad_norm": 2.133703419481106, |
| "learning_rate": 7.764746315436399e-06, |
| "loss": 0.4542, |
| "step": 822 |
| }, |
| { |
| "epoch": 1.1492676121832133, |
| "grad_norm": 2.070568845440914, |
| "learning_rate": 7.75797178782565e-06, |
| "loss": 0.4523, |
| "step": 823 |
| }, |
| { |
| "epoch": 1.1506626365961403, |
| "grad_norm": 1.7991652992094438, |
| "learning_rate": 7.751189975278561e-06, |
| "loss": 0.4077, |
| "step": 824 |
| }, |
| { |
| "epoch": 1.1520576610090676, |
| "grad_norm": 2.038280747930084, |
| "learning_rate": 7.744400895708683e-06, |
| "loss": 0.4647, |
| "step": 825 |
| }, |
| { |
| "epoch": 1.1534526854219949, |
| "grad_norm": 2.0602555390170316, |
| "learning_rate": 7.737604567048766e-06, |
| "loss": 0.5507, |
| "step": 826 |
| }, |
| { |
| "epoch": 1.1548477098349221, |
| "grad_norm": 2.180790828248436, |
| "learning_rate": 7.730801007250704e-06, |
| "loss": 0.4655, |
| "step": 827 |
| }, |
| { |
| "epoch": 1.1562427342478494, |
| "grad_norm": 2.0458938855615605, |
| "learning_rate": 7.72399023428549e-06, |
| "loss": 0.4643, |
| "step": 828 |
| }, |
| { |
| "epoch": 1.1576377586607767, |
| "grad_norm": 1.9127629534222181, |
| "learning_rate": 7.717172266143178e-06, |
| "loss": 0.5365, |
| "step": 829 |
| }, |
| { |
| "epoch": 1.1590327830737037, |
| "grad_norm": 2.209826641763816, |
| "learning_rate": 7.710347120832821e-06, |
| "loss": 0.4545, |
| "step": 830 |
| }, |
| { |
| "epoch": 1.160427807486631, |
| "grad_norm": 2.010738480822612, |
| "learning_rate": 7.703514816382432e-06, |
| "loss": 0.4423, |
| "step": 831 |
| }, |
| { |
| "epoch": 1.1618228318995583, |
| "grad_norm": 2.0297966468628457, |
| "learning_rate": 7.696675370838929e-06, |
| "loss": 0.4518, |
| "step": 832 |
| }, |
| { |
| "epoch": 1.1632178563124855, |
| "grad_norm": 2.0826482950087226, |
| "learning_rate": 7.689828802268102e-06, |
| "loss": 0.4533, |
| "step": 833 |
| }, |
| { |
| "epoch": 1.1646128807254126, |
| "grad_norm": 1.8008744406646637, |
| "learning_rate": 7.682975128754548e-06, |
| "loss": 0.451, |
| "step": 834 |
| }, |
| { |
| "epoch": 1.1660079051383399, |
| "grad_norm": 2.0565446537269194, |
| "learning_rate": 7.676114368401635e-06, |
| "loss": 0.5319, |
| "step": 835 |
| }, |
| { |
| "epoch": 1.1674029295512671, |
| "grad_norm": 2.1287025934347086, |
| "learning_rate": 7.66924653933145e-06, |
| "loss": 0.4858, |
| "step": 836 |
| }, |
| { |
| "epoch": 1.1687979539641944, |
| "grad_norm": 2.0809030253570944, |
| "learning_rate": 7.662371659684749e-06, |
| "loss": 0.508, |
| "step": 837 |
| }, |
| { |
| "epoch": 1.1701929783771217, |
| "grad_norm": 2.091119136946387, |
| "learning_rate": 7.655489747620913e-06, |
| "loss": 0.4702, |
| "step": 838 |
| }, |
| { |
| "epoch": 1.171588002790049, |
| "grad_norm": 2.0095956535575987, |
| "learning_rate": 7.648600821317901e-06, |
| "loss": 0.5005, |
| "step": 839 |
| }, |
| { |
| "epoch": 1.172983027202976, |
| "grad_norm": 2.108702760839066, |
| "learning_rate": 7.641704898972194e-06, |
| "loss": 0.4931, |
| "step": 840 |
| }, |
| { |
| "epoch": 1.1743780516159033, |
| "grad_norm": 2.1002645954174577, |
| "learning_rate": 7.634801998798755e-06, |
| "loss": 0.5118, |
| "step": 841 |
| }, |
| { |
| "epoch": 1.1757730760288305, |
| "grad_norm": 2.060993512560868, |
| "learning_rate": 7.6278921390309834e-06, |
| "loss": 0.4357, |
| "step": 842 |
| }, |
| { |
| "epoch": 1.1771681004417578, |
| "grad_norm": 1.8482022743666582, |
| "learning_rate": 7.620975337920653e-06, |
| "loss": 0.4732, |
| "step": 843 |
| }, |
| { |
| "epoch": 1.1785631248546848, |
| "grad_norm": 2.0436494232781435, |
| "learning_rate": 7.6140516137378786e-06, |
| "loss": 0.4204, |
| "step": 844 |
| }, |
| { |
| "epoch": 1.179958149267612, |
| "grad_norm": 1.916148888628701, |
| "learning_rate": 7.607120984771058e-06, |
| "loss": 0.4595, |
| "step": 845 |
| }, |
| { |
| "epoch": 1.1813531736805394, |
| "grad_norm": 1.971942182009117, |
| "learning_rate": 7.600183469326829e-06, |
| "loss": 0.464, |
| "step": 846 |
| }, |
| { |
| "epoch": 1.1827481980934667, |
| "grad_norm": 2.0099670391093576, |
| "learning_rate": 7.593239085730022e-06, |
| "loss": 0.4997, |
| "step": 847 |
| }, |
| { |
| "epoch": 1.184143222506394, |
| "grad_norm": 2.364610588530083, |
| "learning_rate": 7.586287852323605e-06, |
| "loss": 0.511, |
| "step": 848 |
| }, |
| { |
| "epoch": 1.1855382469193212, |
| "grad_norm": 2.0980427797891323, |
| "learning_rate": 7.579329787468639e-06, |
| "loss": 0.4545, |
| "step": 849 |
| }, |
| { |
| "epoch": 1.1869332713322482, |
| "grad_norm": 2.0688493550588403, |
| "learning_rate": 7.572364909544235e-06, |
| "loss": 0.5001, |
| "step": 850 |
| }, |
| { |
| "epoch": 1.1883282957451755, |
| "grad_norm": 1.8729866135500641, |
| "learning_rate": 7.565393236947494e-06, |
| "loss": 0.4338, |
| "step": 851 |
| }, |
| { |
| "epoch": 1.1897233201581028, |
| "grad_norm": 2.168283835304086, |
| "learning_rate": 7.558414788093467e-06, |
| "loss": 0.4762, |
| "step": 852 |
| }, |
| { |
| "epoch": 1.19111834457103, |
| "grad_norm": 2.066651188721128, |
| "learning_rate": 7.551429581415104e-06, |
| "loss": 0.5172, |
| "step": 853 |
| }, |
| { |
| "epoch": 1.192513368983957, |
| "grad_norm": 2.1920591445911364, |
| "learning_rate": 7.5444376353632064e-06, |
| "loss": 0.4567, |
| "step": 854 |
| }, |
| { |
| "epoch": 1.1939083933968844, |
| "grad_norm": 1.831321422970195, |
| "learning_rate": 7.537438968406372e-06, |
| "loss": 0.491, |
| "step": 855 |
| }, |
| { |
| "epoch": 1.1953034178098116, |
| "grad_norm": 2.0968609844910056, |
| "learning_rate": 7.530433599030962e-06, |
| "loss": 0.4656, |
| "step": 856 |
| }, |
| { |
| "epoch": 1.196698442222739, |
| "grad_norm": 2.1422512387969994, |
| "learning_rate": 7.5234215457410255e-06, |
| "loss": 0.5081, |
| "step": 857 |
| }, |
| { |
| "epoch": 1.1980934666356662, |
| "grad_norm": 2.2362083848047596, |
| "learning_rate": 7.516402827058283e-06, |
| "loss": 0.5361, |
| "step": 858 |
| }, |
| { |
| "epoch": 1.1994884910485935, |
| "grad_norm": 1.9286458341926276, |
| "learning_rate": 7.509377461522049e-06, |
| "loss": 0.4805, |
| "step": 859 |
| }, |
| { |
| "epoch": 1.2008835154615205, |
| "grad_norm": 2.3123734766029527, |
| "learning_rate": 7.502345467689202e-06, |
| "loss": 0.5048, |
| "step": 860 |
| }, |
| { |
| "epoch": 1.2022785398744478, |
| "grad_norm": 2.116964583062056, |
| "learning_rate": 7.4953068641341255e-06, |
| "loss": 0.4499, |
| "step": 861 |
| }, |
| { |
| "epoch": 1.203673564287375, |
| "grad_norm": 2.1029489962024144, |
| "learning_rate": 7.488261669448662e-06, |
| "loss": 0.5238, |
| "step": 862 |
| }, |
| { |
| "epoch": 1.2050685887003023, |
| "grad_norm": 2.0626515202597777, |
| "learning_rate": 7.4812099022420636e-06, |
| "loss": 0.4624, |
| "step": 863 |
| }, |
| { |
| "epoch": 1.2064636131132294, |
| "grad_norm": 2.004582769305792, |
| "learning_rate": 7.474151581140947e-06, |
| "loss": 0.4733, |
| "step": 864 |
| }, |
| { |
| "epoch": 1.2078586375261566, |
| "grad_norm": 2.1127969533998843, |
| "learning_rate": 7.4670867247892346e-06, |
| "loss": 0.4704, |
| "step": 865 |
| }, |
| { |
| "epoch": 1.209253661939084, |
| "grad_norm": 1.8514576184123828, |
| "learning_rate": 7.460015351848115e-06, |
| "loss": 0.4825, |
| "step": 866 |
| }, |
| { |
| "epoch": 1.2106486863520112, |
| "grad_norm": 2.078998051567728, |
| "learning_rate": 7.4529374809959895e-06, |
| "loss": 0.4166, |
| "step": 867 |
| }, |
| { |
| "epoch": 1.2120437107649384, |
| "grad_norm": 1.9641129542739804, |
| "learning_rate": 7.445853130928422e-06, |
| "loss": 0.5042, |
| "step": 868 |
| }, |
| { |
| "epoch": 1.2134387351778657, |
| "grad_norm": 2.2654120011022414, |
| "learning_rate": 7.438762320358089e-06, |
| "loss": 0.5175, |
| "step": 869 |
| }, |
| { |
| "epoch": 1.2148337595907928, |
| "grad_norm": 2.0429675987113134, |
| "learning_rate": 7.431665068014737e-06, |
| "loss": 0.4706, |
| "step": 870 |
| }, |
| { |
| "epoch": 1.21622878400372, |
| "grad_norm": 1.982257979787058, |
| "learning_rate": 7.424561392645122e-06, |
| "loss": 0.4706, |
| "step": 871 |
| }, |
| { |
| "epoch": 1.2176238084166473, |
| "grad_norm": 2.076586625001963, |
| "learning_rate": 7.417451313012971e-06, |
| "loss": 0.4895, |
| "step": 872 |
| }, |
| { |
| "epoch": 1.2190188328295746, |
| "grad_norm": 2.173072442479642, |
| "learning_rate": 7.410334847898921e-06, |
| "loss": 0.5118, |
| "step": 873 |
| }, |
| { |
| "epoch": 1.2204138572425016, |
| "grad_norm": 2.047041786534623, |
| "learning_rate": 7.403212016100484e-06, |
| "loss": 0.463, |
| "step": 874 |
| }, |
| { |
| "epoch": 1.221808881655429, |
| "grad_norm": 2.011805245547945, |
| "learning_rate": 7.396082836431981e-06, |
| "loss": 0.4624, |
| "step": 875 |
| }, |
| { |
| "epoch": 1.2232039060683562, |
| "grad_norm": 2.046453837182149, |
| "learning_rate": 7.388947327724506e-06, |
| "loss": 0.4503, |
| "step": 876 |
| }, |
| { |
| "epoch": 1.2245989304812834, |
| "grad_norm": 2.0074523324744233, |
| "learning_rate": 7.3818055088258676e-06, |
| "loss": 0.465, |
| "step": 877 |
| }, |
| { |
| "epoch": 1.2259939548942107, |
| "grad_norm": 2.15744694552415, |
| "learning_rate": 7.374657398600542e-06, |
| "loss": 0.4489, |
| "step": 878 |
| }, |
| { |
| "epoch": 1.227388979307138, |
| "grad_norm": 2.010685773385559, |
| "learning_rate": 7.367503015929627e-06, |
| "loss": 0.4692, |
| "step": 879 |
| }, |
| { |
| "epoch": 1.228784003720065, |
| "grad_norm": 2.0352429043175055, |
| "learning_rate": 7.3603423797107845e-06, |
| "loss": 0.4546, |
| "step": 880 |
| }, |
| { |
| "epoch": 1.2301790281329923, |
| "grad_norm": 2.1476461439581245, |
| "learning_rate": 7.353175508858195e-06, |
| "loss": 0.4962, |
| "step": 881 |
| }, |
| { |
| "epoch": 1.2315740525459196, |
| "grad_norm": 2.1997937220709405, |
| "learning_rate": 7.3460024223025095e-06, |
| "loss": 0.5091, |
| "step": 882 |
| }, |
| { |
| "epoch": 1.2329690769588468, |
| "grad_norm": 1.666080626807778, |
| "learning_rate": 7.338823138990796e-06, |
| "loss": 0.4352, |
| "step": 883 |
| }, |
| { |
| "epoch": 1.234364101371774, |
| "grad_norm": 2.06513419711823, |
| "learning_rate": 7.33163767788649e-06, |
| "loss": 0.4584, |
| "step": 884 |
| }, |
| { |
| "epoch": 1.2357591257847012, |
| "grad_norm": 2.0063814585695123, |
| "learning_rate": 7.324446057969346e-06, |
| "loss": 0.5118, |
| "step": 885 |
| }, |
| { |
| "epoch": 1.2371541501976284, |
| "grad_norm": 2.136466810820014, |
| "learning_rate": 7.317248298235387e-06, |
| "loss": 0.4905, |
| "step": 886 |
| }, |
| { |
| "epoch": 1.2385491746105557, |
| "grad_norm": 2.005738467998993, |
| "learning_rate": 7.3100444176968514e-06, |
| "loss": 0.4471, |
| "step": 887 |
| }, |
| { |
| "epoch": 1.239944199023483, |
| "grad_norm": 2.1451364094879155, |
| "learning_rate": 7.302834435382147e-06, |
| "loss": 0.5487, |
| "step": 888 |
| }, |
| { |
| "epoch": 1.2413392234364102, |
| "grad_norm": 2.121313221799734, |
| "learning_rate": 7.2956183703358e-06, |
| "loss": 0.431, |
| "step": 889 |
| }, |
| { |
| "epoch": 1.2427342478493373, |
| "grad_norm": 1.9010978340983133, |
| "learning_rate": 7.288396241618401e-06, |
| "loss": 0.4701, |
| "step": 890 |
| }, |
| { |
| "epoch": 1.2441292722622646, |
| "grad_norm": 2.1243540704544133, |
| "learning_rate": 7.281168068306559e-06, |
| "loss": 0.5113, |
| "step": 891 |
| }, |
| { |
| "epoch": 1.2455242966751918, |
| "grad_norm": 1.9858608436318579, |
| "learning_rate": 7.2739338694928485e-06, |
| "loss": 0.4278, |
| "step": 892 |
| }, |
| { |
| "epoch": 1.246919321088119, |
| "grad_norm": 2.0319961298094404, |
| "learning_rate": 7.266693664285761e-06, |
| "loss": 0.4874, |
| "step": 893 |
| }, |
| { |
| "epoch": 1.2483143455010464, |
| "grad_norm": 2.1329652725825294, |
| "learning_rate": 7.259447471809651e-06, |
| "loss": 0.4732, |
| "step": 894 |
| }, |
| { |
| "epoch": 1.2497093699139734, |
| "grad_norm": 1.8887015726470242, |
| "learning_rate": 7.252195311204689e-06, |
| "loss": 0.4973, |
| "step": 895 |
| }, |
| { |
| "epoch": 1.2511043943269007, |
| "grad_norm": 2.2465835446589484, |
| "learning_rate": 7.244937201626812e-06, |
| "loss": 0.5229, |
| "step": 896 |
| }, |
| { |
| "epoch": 1.252499418739828, |
| "grad_norm": 2.137057584376221, |
| "learning_rate": 7.237673162247667e-06, |
| "loss": 0.5224, |
| "step": 897 |
| }, |
| { |
| "epoch": 1.2538944431527552, |
| "grad_norm": 2.165616318347168, |
| "learning_rate": 7.230403212254566e-06, |
| "loss": 0.4415, |
| "step": 898 |
| }, |
| { |
| "epoch": 1.2552894675656825, |
| "grad_norm": 1.9723542482886345, |
| "learning_rate": 7.223127370850433e-06, |
| "loss": 0.4883, |
| "step": 899 |
| }, |
| { |
| "epoch": 1.2566844919786098, |
| "grad_norm": 2.0860634186365123, |
| "learning_rate": 7.215845657253755e-06, |
| "loss": 0.4966, |
| "step": 900 |
| }, |
| { |
| "epoch": 1.2580795163915368, |
| "grad_norm": 2.0929823854721645, |
| "learning_rate": 7.208558090698528e-06, |
| "loss": 0.4694, |
| "step": 901 |
| }, |
| { |
| "epoch": 1.259474540804464, |
| "grad_norm": 2.0668298861598893, |
| "learning_rate": 7.2012646904342065e-06, |
| "loss": 0.514, |
| "step": 902 |
| }, |
| { |
| "epoch": 1.2608695652173914, |
| "grad_norm": 2.15111282561836, |
| "learning_rate": 7.193965475725659e-06, |
| "loss": 0.4687, |
| "step": 903 |
| }, |
| { |
| "epoch": 1.2622645896303184, |
| "grad_norm": 1.9150017426541963, |
| "learning_rate": 7.186660465853111e-06, |
| "loss": 0.4807, |
| "step": 904 |
| }, |
| { |
| "epoch": 1.2636596140432457, |
| "grad_norm": 2.047097651427044, |
| "learning_rate": 7.1793496801120885e-06, |
| "loss": 0.4753, |
| "step": 905 |
| }, |
| { |
| "epoch": 1.265054638456173, |
| "grad_norm": 2.0657264775450495, |
| "learning_rate": 7.172033137813387e-06, |
| "loss": 0.4814, |
| "step": 906 |
| }, |
| { |
| "epoch": 1.2664496628691002, |
| "grad_norm": 2.191648239680036, |
| "learning_rate": 7.1647108582829924e-06, |
| "loss": 0.512, |
| "step": 907 |
| }, |
| { |
| "epoch": 1.2678446872820275, |
| "grad_norm": 2.1481439685301, |
| "learning_rate": 7.157382860862059e-06, |
| "loss": 0.4949, |
| "step": 908 |
| }, |
| { |
| "epoch": 1.2692397116949548, |
| "grad_norm": 2.0193130576657436, |
| "learning_rate": 7.1500491649068345e-06, |
| "loss": 0.4745, |
| "step": 909 |
| }, |
| { |
| "epoch": 1.270634736107882, |
| "grad_norm": 2.152032552208028, |
| "learning_rate": 7.1427097897886225e-06, |
| "loss": 0.4649, |
| "step": 910 |
| }, |
| { |
| "epoch": 1.272029760520809, |
| "grad_norm": 1.79491248450368, |
| "learning_rate": 7.135364754893729e-06, |
| "loss": 0.4824, |
| "step": 911 |
| }, |
| { |
| "epoch": 1.2734247849337363, |
| "grad_norm": 2.006219248252164, |
| "learning_rate": 7.128014079623408e-06, |
| "loss": 0.4683, |
| "step": 912 |
| }, |
| { |
| "epoch": 1.2748198093466636, |
| "grad_norm": 2.1167631241450833, |
| "learning_rate": 7.120657783393809e-06, |
| "loss": 0.4544, |
| "step": 913 |
| }, |
| { |
| "epoch": 1.2762148337595907, |
| "grad_norm": 1.9858989311000603, |
| "learning_rate": 7.113295885635936e-06, |
| "loss": 0.463, |
| "step": 914 |
| }, |
| { |
| "epoch": 1.277609858172518, |
| "grad_norm": 2.033452279845351, |
| "learning_rate": 7.105928405795584e-06, |
| "loss": 0.5281, |
| "step": 915 |
| }, |
| { |
| "epoch": 1.2790048825854452, |
| "grad_norm": 2.0872449768803123, |
| "learning_rate": 7.098555363333289e-06, |
| "loss": 0.4642, |
| "step": 916 |
| }, |
| { |
| "epoch": 1.2803999069983725, |
| "grad_norm": 2.04826654738253, |
| "learning_rate": 7.091176777724291e-06, |
| "loss": 0.4912, |
| "step": 917 |
| }, |
| { |
| "epoch": 1.2817949314112997, |
| "grad_norm": 2.067550326057046, |
| "learning_rate": 7.083792668458463e-06, |
| "loss": 0.4561, |
| "step": 918 |
| }, |
| { |
| "epoch": 1.283189955824227, |
| "grad_norm": 2.012007521941956, |
| "learning_rate": 7.076403055040271e-06, |
| "loss": 0.4612, |
| "step": 919 |
| }, |
| { |
| "epoch": 1.2845849802371543, |
| "grad_norm": 2.0098154320853534, |
| "learning_rate": 7.069007956988718e-06, |
| "loss": 0.4952, |
| "step": 920 |
| }, |
| { |
| "epoch": 1.2859800046500813, |
| "grad_norm": 2.0434296920508945, |
| "learning_rate": 7.061607393837295e-06, |
| "loss": 0.5048, |
| "step": 921 |
| }, |
| { |
| "epoch": 1.2873750290630086, |
| "grad_norm": 2.0366764578127543, |
| "learning_rate": 7.0542013851339316e-06, |
| "loss": 0.4098, |
| "step": 922 |
| }, |
| { |
| "epoch": 1.2887700534759359, |
| "grad_norm": 1.9173857646730377, |
| "learning_rate": 7.04678995044094e-06, |
| "loss": 0.4423, |
| "step": 923 |
| }, |
| { |
| "epoch": 1.290165077888863, |
| "grad_norm": 1.9666733803958376, |
| "learning_rate": 7.039373109334957e-06, |
| "loss": 0.4782, |
| "step": 924 |
| }, |
| { |
| "epoch": 1.2915601023017902, |
| "grad_norm": 2.0178632072627756, |
| "learning_rate": 7.031950881406913e-06, |
| "loss": 0.4829, |
| "step": 925 |
| }, |
| { |
| "epoch": 1.2929551267147175, |
| "grad_norm": 2.099833553417301, |
| "learning_rate": 7.024523286261959e-06, |
| "loss": 0.447, |
| "step": 926 |
| }, |
| { |
| "epoch": 1.2943501511276447, |
| "grad_norm": 1.953265508839639, |
| "learning_rate": 7.017090343519421e-06, |
| "loss": 0.4818, |
| "step": 927 |
| }, |
| { |
| "epoch": 1.295745175540572, |
| "grad_norm": 2.1071393237014484, |
| "learning_rate": 7.009652072812758e-06, |
| "loss": 0.4478, |
| "step": 928 |
| }, |
| { |
| "epoch": 1.2971401999534993, |
| "grad_norm": 1.9409046297212447, |
| "learning_rate": 7.0022084937895e-06, |
| "loss": 0.4926, |
| "step": 929 |
| }, |
| { |
| "epoch": 1.2985352243664265, |
| "grad_norm": 2.157406168167787, |
| "learning_rate": 6.994759626111189e-06, |
| "loss": 0.4971, |
| "step": 930 |
| }, |
| { |
| "epoch": 1.2999302487793536, |
| "grad_norm": 2.1435260313512723, |
| "learning_rate": 6.987305489453352e-06, |
| "loss": 0.5317, |
| "step": 931 |
| }, |
| { |
| "epoch": 1.3013252731922809, |
| "grad_norm": 2.178710662283696, |
| "learning_rate": 6.979846103505423e-06, |
| "loss": 0.4975, |
| "step": 932 |
| }, |
| { |
| "epoch": 1.3027202976052081, |
| "grad_norm": 1.9560199347527047, |
| "learning_rate": 6.972381487970702e-06, |
| "loss": 0.4461, |
| "step": 933 |
| }, |
| { |
| "epoch": 1.3041153220181352, |
| "grad_norm": 2.0653446781054283, |
| "learning_rate": 6.964911662566309e-06, |
| "loss": 0.5171, |
| "step": 934 |
| }, |
| { |
| "epoch": 1.3055103464310625, |
| "grad_norm": 2.186381545142342, |
| "learning_rate": 6.957436647023117e-06, |
| "loss": 0.494, |
| "step": 935 |
| }, |
| { |
| "epoch": 1.3069053708439897, |
| "grad_norm": 1.9412815445850002, |
| "learning_rate": 6.949956461085714e-06, |
| "loss": 0.4895, |
| "step": 936 |
| }, |
| { |
| "epoch": 1.308300395256917, |
| "grad_norm": 2.0736709472728188, |
| "learning_rate": 6.942471124512346e-06, |
| "loss": 0.4724, |
| "step": 937 |
| }, |
| { |
| "epoch": 1.3096954196698443, |
| "grad_norm": 2.0017211386286897, |
| "learning_rate": 6.934980657074859e-06, |
| "loss": 0.4184, |
| "step": 938 |
| }, |
| { |
| "epoch": 1.3110904440827715, |
| "grad_norm": 1.9545935061243371, |
| "learning_rate": 6.9274850785586526e-06, |
| "loss": 0.4803, |
| "step": 939 |
| }, |
| { |
| "epoch": 1.3124854684956988, |
| "grad_norm": 1.9307069252931495, |
| "learning_rate": 6.919984408762632e-06, |
| "loss": 0.4624, |
| "step": 940 |
| }, |
| { |
| "epoch": 1.3138804929086259, |
| "grad_norm": 1.994431506317055, |
| "learning_rate": 6.9124786674991465e-06, |
| "loss": 0.466, |
| "step": 941 |
| }, |
| { |
| "epoch": 1.3152755173215531, |
| "grad_norm": 1.9292964581080267, |
| "learning_rate": 6.90496787459394e-06, |
| "loss": 0.4776, |
| "step": 942 |
| }, |
| { |
| "epoch": 1.3166705417344804, |
| "grad_norm": 2.0814712850032198, |
| "learning_rate": 6.897452049886103e-06, |
| "loss": 0.4756, |
| "step": 943 |
| }, |
| { |
| "epoch": 1.3180655661474074, |
| "grad_norm": 1.8033829750621577, |
| "learning_rate": 6.889931213228015e-06, |
| "loss": 0.5377, |
| "step": 944 |
| }, |
| { |
| "epoch": 1.3194605905603347, |
| "grad_norm": 2.168509526523026, |
| "learning_rate": 6.882405384485294e-06, |
| "loss": 0.4954, |
| "step": 945 |
| }, |
| { |
| "epoch": 1.320855614973262, |
| "grad_norm": 2.0688957036575033, |
| "learning_rate": 6.874874583536748e-06, |
| "loss": 0.493, |
| "step": 946 |
| }, |
| { |
| "epoch": 1.3222506393861893, |
| "grad_norm": 2.087243202440419, |
| "learning_rate": 6.867338830274312e-06, |
| "loss": 0.5271, |
| "step": 947 |
| }, |
| { |
| "epoch": 1.3236456637991165, |
| "grad_norm": 2.213328060307952, |
| "learning_rate": 6.8597981446030095e-06, |
| "loss": 0.525, |
| "step": 948 |
| }, |
| { |
| "epoch": 1.3250406882120438, |
| "grad_norm": 2.0595937823990624, |
| "learning_rate": 6.852252546440885e-06, |
| "loss": 0.4905, |
| "step": 949 |
| }, |
| { |
| "epoch": 1.326435712624971, |
| "grad_norm": 2.0526208858028876, |
| "learning_rate": 6.844702055718964e-06, |
| "loss": 0.4614, |
| "step": 950 |
| }, |
| { |
| "epoch": 1.3278307370378981, |
| "grad_norm": 1.9783705959415367, |
| "learning_rate": 6.837146692381197e-06, |
| "loss": 0.4686, |
| "step": 951 |
| }, |
| { |
| "epoch": 1.3292257614508254, |
| "grad_norm": 1.9500237608372892, |
| "learning_rate": 6.8295864763843965e-06, |
| "loss": 0.5116, |
| "step": 952 |
| }, |
| { |
| "epoch": 1.3306207858637527, |
| "grad_norm": 2.072824515482108, |
| "learning_rate": 6.822021427698201e-06, |
| "loss": 0.4521, |
| "step": 953 |
| }, |
| { |
| "epoch": 1.33201581027668, |
| "grad_norm": 2.097209437746427, |
| "learning_rate": 6.814451566305014e-06, |
| "loss": 0.5228, |
| "step": 954 |
| }, |
| { |
| "epoch": 1.333410834689607, |
| "grad_norm": 2.2226534279386705, |
| "learning_rate": 6.806876912199945e-06, |
| "loss": 0.4926, |
| "step": 955 |
| }, |
| { |
| "epoch": 1.3348058591025342, |
| "grad_norm": 2.130421968111814, |
| "learning_rate": 6.7992974853907655e-06, |
| "loss": 0.4859, |
| "step": 956 |
| }, |
| { |
| "epoch": 1.3362008835154615, |
| "grad_norm": 2.0794274350627013, |
| "learning_rate": 6.791713305897861e-06, |
| "loss": 0.476, |
| "step": 957 |
| }, |
| { |
| "epoch": 1.3375959079283888, |
| "grad_norm": 1.892246451887343, |
| "learning_rate": 6.78412439375416e-06, |
| "loss": 0.4791, |
| "step": 958 |
| }, |
| { |
| "epoch": 1.338990932341316, |
| "grad_norm": 2.0209725081913894, |
| "learning_rate": 6.776530769005099e-06, |
| "loss": 0.4569, |
| "step": 959 |
| }, |
| { |
| "epoch": 1.3403859567542433, |
| "grad_norm": 1.9927839085181078, |
| "learning_rate": 6.768932451708557e-06, |
| "loss": 0.4678, |
| "step": 960 |
| }, |
| { |
| "epoch": 1.3417809811671704, |
| "grad_norm": 2.0939421385477477, |
| "learning_rate": 6.761329461934814e-06, |
| "loss": 0.4711, |
| "step": 961 |
| }, |
| { |
| "epoch": 1.3431760055800976, |
| "grad_norm": 2.081168989179342, |
| "learning_rate": 6.753721819766489e-06, |
| "loss": 0.4945, |
| "step": 962 |
| }, |
| { |
| "epoch": 1.344571029993025, |
| "grad_norm": 2.1984714091362876, |
| "learning_rate": 6.746109545298488e-06, |
| "loss": 0.486, |
| "step": 963 |
| }, |
| { |
| "epoch": 1.3459660544059522, |
| "grad_norm": 2.112630508666608, |
| "learning_rate": 6.738492658637957e-06, |
| "loss": 0.4402, |
| "step": 964 |
| }, |
| { |
| "epoch": 1.3473610788188792, |
| "grad_norm": 1.7973422226559084, |
| "learning_rate": 6.730871179904218e-06, |
| "loss": 0.5329, |
| "step": 965 |
| }, |
| { |
| "epoch": 1.3487561032318065, |
| "grad_norm": 2.0722305312694935, |
| "learning_rate": 6.723245129228732e-06, |
| "loss": 0.4406, |
| "step": 966 |
| }, |
| { |
| "epoch": 1.3501511276447338, |
| "grad_norm": 2.148255296747169, |
| "learning_rate": 6.7156145267550275e-06, |
| "loss": 0.4711, |
| "step": 967 |
| }, |
| { |
| "epoch": 1.351546152057661, |
| "grad_norm": 2.062135239126828, |
| "learning_rate": 6.707979392638663e-06, |
| "loss": 0.4591, |
| "step": 968 |
| }, |
| { |
| "epoch": 1.3529411764705883, |
| "grad_norm": 2.0077274982122835, |
| "learning_rate": 6.700339747047162e-06, |
| "loss": 0.4914, |
| "step": 969 |
| }, |
| { |
| "epoch": 1.3543362008835156, |
| "grad_norm": 2.0688184370721747, |
| "learning_rate": 6.692695610159966e-06, |
| "loss": 0.4816, |
| "step": 970 |
| }, |
| { |
| "epoch": 1.3557312252964426, |
| "grad_norm": 1.9158674884573788, |
| "learning_rate": 6.685047002168382e-06, |
| "loss": 0.4511, |
| "step": 971 |
| }, |
| { |
| "epoch": 1.35712624970937, |
| "grad_norm": 2.041299786373518, |
| "learning_rate": 6.677393943275525e-06, |
| "loss": 0.4621, |
| "step": 972 |
| }, |
| { |
| "epoch": 1.3585212741222972, |
| "grad_norm": 1.9859822832346596, |
| "learning_rate": 6.669736453696266e-06, |
| "loss": 0.4668, |
| "step": 973 |
| }, |
| { |
| "epoch": 1.3599162985352244, |
| "grad_norm": 2.085368871543757, |
| "learning_rate": 6.66207455365718e-06, |
| "loss": 0.4548, |
| "step": 974 |
| }, |
| { |
| "epoch": 1.3613113229481515, |
| "grad_norm": 2.016128984506122, |
| "learning_rate": 6.6544082633964955e-06, |
| "loss": 0.4429, |
| "step": 975 |
| }, |
| { |
| "epoch": 1.3627063473610788, |
| "grad_norm": 2.145371160224853, |
| "learning_rate": 6.646737603164031e-06, |
| "loss": 0.5083, |
| "step": 976 |
| }, |
| { |
| "epoch": 1.364101371774006, |
| "grad_norm": 2.1386222042642524, |
| "learning_rate": 6.639062593221152e-06, |
| "loss": 0.5335, |
| "step": 977 |
| }, |
| { |
| "epoch": 1.3654963961869333, |
| "grad_norm": 2.270440526076038, |
| "learning_rate": 6.6313832538407106e-06, |
| "loss": 0.496, |
| "step": 978 |
| }, |
| { |
| "epoch": 1.3668914205998606, |
| "grad_norm": 1.9889941520807715, |
| "learning_rate": 6.623699605306999e-06, |
| "loss": 0.4915, |
| "step": 979 |
| }, |
| { |
| "epoch": 1.3682864450127878, |
| "grad_norm": 2.1440931891884727, |
| "learning_rate": 6.6160116679156874e-06, |
| "loss": 0.4679, |
| "step": 980 |
| }, |
| { |
| "epoch": 1.369681469425715, |
| "grad_norm": 2.227537687856243, |
| "learning_rate": 6.608319461973778e-06, |
| "loss": 0.4598, |
| "step": 981 |
| }, |
| { |
| "epoch": 1.3710764938386422, |
| "grad_norm": 2.0287236808325884, |
| "learning_rate": 6.6006230077995424e-06, |
| "loss": 0.4601, |
| "step": 982 |
| }, |
| { |
| "epoch": 1.3724715182515694, |
| "grad_norm": 1.7888137177639865, |
| "learning_rate": 6.592922325722483e-06, |
| "loss": 0.4609, |
| "step": 983 |
| }, |
| { |
| "epoch": 1.3738665426644967, |
| "grad_norm": 2.3520604904332534, |
| "learning_rate": 6.58521743608326e-06, |
| "loss": 0.5031, |
| "step": 984 |
| }, |
| { |
| "epoch": 1.3752615670774238, |
| "grad_norm": 2.110792804041468, |
| "learning_rate": 6.577508359233653e-06, |
| "loss": 0.4888, |
| "step": 985 |
| }, |
| { |
| "epoch": 1.376656591490351, |
| "grad_norm": 2.3149781630133393, |
| "learning_rate": 6.569795115536502e-06, |
| "loss": 0.476, |
| "step": 986 |
| }, |
| { |
| "epoch": 1.3780516159032783, |
| "grad_norm": 2.0549954385732434, |
| "learning_rate": 6.562077725365648e-06, |
| "loss": 0.5159, |
| "step": 987 |
| }, |
| { |
| "epoch": 1.3794466403162056, |
| "grad_norm": 2.0125811654928283, |
| "learning_rate": 6.554356209105892e-06, |
| "loss": 0.4225, |
| "step": 988 |
| }, |
| { |
| "epoch": 1.3808416647291328, |
| "grad_norm": 2.0683754856323873, |
| "learning_rate": 6.54663058715293e-06, |
| "loss": 0.5251, |
| "step": 989 |
| }, |
| { |
| "epoch": 1.38223668914206, |
| "grad_norm": 2.084529371644239, |
| "learning_rate": 6.538900879913301e-06, |
| "loss": 0.4857, |
| "step": 990 |
| }, |
| { |
| "epoch": 1.3836317135549872, |
| "grad_norm": 2.1618474971322827, |
| "learning_rate": 6.531167107804337e-06, |
| "loss": 0.4976, |
| "step": 991 |
| }, |
| { |
| "epoch": 1.3850267379679144, |
| "grad_norm": 2.062302710914967, |
| "learning_rate": 6.523429291254109e-06, |
| "loss": 0.5046, |
| "step": 992 |
| }, |
| { |
| "epoch": 1.3864217623808417, |
| "grad_norm": 2.162972647685988, |
| "learning_rate": 6.515687450701367e-06, |
| "loss": 0.4762, |
| "step": 993 |
| }, |
| { |
| "epoch": 1.387816786793769, |
| "grad_norm": 2.018219733272469, |
| "learning_rate": 6.507941606595492e-06, |
| "loss": 0.4364, |
| "step": 994 |
| }, |
| { |
| "epoch": 1.389211811206696, |
| "grad_norm": 1.8942784577915137, |
| "learning_rate": 6.500191779396439e-06, |
| "loss": 0.4854, |
| "step": 995 |
| }, |
| { |
| "epoch": 1.3906068356196233, |
| "grad_norm": 2.1180507917189195, |
| "learning_rate": 6.492437989574689e-06, |
| "loss": 0.4829, |
| "step": 996 |
| }, |
| { |
| "epoch": 1.3920018600325506, |
| "grad_norm": 2.1192897908451758, |
| "learning_rate": 6.48468025761118e-06, |
| "loss": 0.4852, |
| "step": 997 |
| }, |
| { |
| "epoch": 1.3933968844454778, |
| "grad_norm": 2.0232470469302775, |
| "learning_rate": 6.476918603997273e-06, |
| "loss": 0.4559, |
| "step": 998 |
| }, |
| { |
| "epoch": 1.394791908858405, |
| "grad_norm": 2.2141742307018655, |
| "learning_rate": 6.469153049234683e-06, |
| "loss": 0.5079, |
| "step": 999 |
| }, |
| { |
| "epoch": 1.3961869332713324, |
| "grad_norm": 2.225056470302364, |
| "learning_rate": 6.461383613835427e-06, |
| "loss": 0.494, |
| "step": 1000 |
| }, |
| { |
| "epoch": 1.3975819576842594, |
| "grad_norm": 1.9182609150895602, |
| "learning_rate": 6.453610318321777e-06, |
| "loss": 0.4497, |
| "step": 1001 |
| }, |
| { |
| "epoch": 1.3989769820971867, |
| "grad_norm": 1.964134487756821, |
| "learning_rate": 6.445833183226201e-06, |
| "loss": 0.5051, |
| "step": 1002 |
| }, |
| { |
| "epoch": 1.400372006510114, |
| "grad_norm": 2.2625631905462886, |
| "learning_rate": 6.438052229091303e-06, |
| "loss": 0.4843, |
| "step": 1003 |
| }, |
| { |
| "epoch": 1.4017670309230412, |
| "grad_norm": 2.095268057911824, |
| "learning_rate": 6.430267476469783e-06, |
| "loss": 0.4879, |
| "step": 1004 |
| }, |
| { |
| "epoch": 1.4031620553359683, |
| "grad_norm": 2.0824573756180316, |
| "learning_rate": 6.4224789459243705e-06, |
| "loss": 0.4845, |
| "step": 1005 |
| }, |
| { |
| "epoch": 1.4045570797488955, |
| "grad_norm": 2.051949843595703, |
| "learning_rate": 6.4146866580277686e-06, |
| "loss": 0.454, |
| "step": 1006 |
| }, |
| { |
| "epoch": 1.4059521041618228, |
| "grad_norm": 2.01418184621082, |
| "learning_rate": 6.406890633362618e-06, |
| "loss": 0.4382, |
| "step": 1007 |
| }, |
| { |
| "epoch": 1.40734712857475, |
| "grad_norm": 1.9009970558050422, |
| "learning_rate": 6.3990908925214155e-06, |
| "loss": 0.4246, |
| "step": 1008 |
| }, |
| { |
| "epoch": 1.4087421529876774, |
| "grad_norm": 1.9077315815732818, |
| "learning_rate": 6.391287456106483e-06, |
| "loss": 0.477, |
| "step": 1009 |
| }, |
| { |
| "epoch": 1.4101371774006046, |
| "grad_norm": 2.1376510322845816, |
| "learning_rate": 6.383480344729903e-06, |
| "loss": 0.5019, |
| "step": 1010 |
| }, |
| { |
| "epoch": 1.4115322018135317, |
| "grad_norm": 2.1232082872185845, |
| "learning_rate": 6.375669579013461e-06, |
| "loss": 0.5143, |
| "step": 1011 |
| }, |
| { |
| "epoch": 1.412927226226459, |
| "grad_norm": 2.0264092739997226, |
| "learning_rate": 6.367855179588597e-06, |
| "loss": 0.5137, |
| "step": 1012 |
| }, |
| { |
| "epoch": 1.4143222506393862, |
| "grad_norm": 2.0673469407609453, |
| "learning_rate": 6.3600371670963525e-06, |
| "loss": 0.469, |
| "step": 1013 |
| }, |
| { |
| "epoch": 1.4157172750523135, |
| "grad_norm": 2.061303960217057, |
| "learning_rate": 6.352215562187307e-06, |
| "loss": 0.4851, |
| "step": 1014 |
| }, |
| { |
| "epoch": 1.4171122994652405, |
| "grad_norm": 2.0208655316631496, |
| "learning_rate": 6.344390385521534e-06, |
| "loss": 0.4796, |
| "step": 1015 |
| }, |
| { |
| "epoch": 1.4185073238781678, |
| "grad_norm": 2.1338536003958506, |
| "learning_rate": 6.33656165776854e-06, |
| "loss": 0.4878, |
| "step": 1016 |
| }, |
| { |
| "epoch": 1.419902348291095, |
| "grad_norm": 2.2451798484385908, |
| "learning_rate": 6.328729399607206e-06, |
| "loss": 0.4825, |
| "step": 1017 |
| }, |
| { |
| "epoch": 1.4212973727040223, |
| "grad_norm": 2.0158460176508672, |
| "learning_rate": 6.320893631725748e-06, |
| "loss": 0.4634, |
| "step": 1018 |
| }, |
| { |
| "epoch": 1.4226923971169496, |
| "grad_norm": 2.0286301190724734, |
| "learning_rate": 6.313054374821647e-06, |
| "loss": 0.4824, |
| "step": 1019 |
| }, |
| { |
| "epoch": 1.4240874215298769, |
| "grad_norm": 1.996772494073169, |
| "learning_rate": 6.305211649601595e-06, |
| "loss": 0.4183, |
| "step": 1020 |
| }, |
| { |
| "epoch": 1.425482445942804, |
| "grad_norm": 1.9064412054811033, |
| "learning_rate": 6.29736547678146e-06, |
| "loss": 0.4563, |
| "step": 1021 |
| }, |
| { |
| "epoch": 1.4268774703557312, |
| "grad_norm": 2.0971755867348434, |
| "learning_rate": 6.289515877086199e-06, |
| "loss": 0.5462, |
| "step": 1022 |
| }, |
| { |
| "epoch": 1.4282724947686585, |
| "grad_norm": 2.0723675284704175, |
| "learning_rate": 6.2816628712498315e-06, |
| "loss": 0.4796, |
| "step": 1023 |
| }, |
| { |
| "epoch": 1.4296675191815857, |
| "grad_norm": 1.9705008439394425, |
| "learning_rate": 6.273806480015374e-06, |
| "loss": 0.4649, |
| "step": 1024 |
| }, |
| { |
| "epoch": 1.4310625435945128, |
| "grad_norm": 2.0561169300756053, |
| "learning_rate": 6.265946724134782e-06, |
| "loss": 0.4159, |
| "step": 1025 |
| }, |
| { |
| "epoch": 1.43245756800744, |
| "grad_norm": 1.9343638344307528, |
| "learning_rate": 6.258083624368895e-06, |
| "loss": 0.4818, |
| "step": 1026 |
| }, |
| { |
| "epoch": 1.4338525924203673, |
| "grad_norm": 2.004281252907952, |
| "learning_rate": 6.250217201487395e-06, |
| "loss": 0.491, |
| "step": 1027 |
| }, |
| { |
| "epoch": 1.4352476168332946, |
| "grad_norm": 1.9996076845166801, |
| "learning_rate": 6.242347476268733e-06, |
| "loss": 0.4851, |
| "step": 1028 |
| }, |
| { |
| "epoch": 1.4366426412462219, |
| "grad_norm": 2.1397928370434998, |
| "learning_rate": 6.2344744695000855e-06, |
| "loss": 0.4941, |
| "step": 1029 |
| }, |
| { |
| "epoch": 1.4380376656591491, |
| "grad_norm": 2.1653211753488666, |
| "learning_rate": 6.226598201977299e-06, |
| "loss": 0.491, |
| "step": 1030 |
| }, |
| { |
| "epoch": 1.4394326900720762, |
| "grad_norm": 2.133936024923636, |
| "learning_rate": 6.218718694504831e-06, |
| "loss": 0.5052, |
| "step": 1031 |
| }, |
| { |
| "epoch": 1.4408277144850035, |
| "grad_norm": 2.1209125416279635, |
| "learning_rate": 6.2108359678956954e-06, |
| "loss": 0.4457, |
| "step": 1032 |
| }, |
| { |
| "epoch": 1.4422227388979307, |
| "grad_norm": 2.2685773861447633, |
| "learning_rate": 6.202950042971414e-06, |
| "loss": 0.5026, |
| "step": 1033 |
| }, |
| { |
| "epoch": 1.443617763310858, |
| "grad_norm": 2.0835264796801627, |
| "learning_rate": 6.19506094056195e-06, |
| "loss": 0.5125, |
| "step": 1034 |
| }, |
| { |
| "epoch": 1.445012787723785, |
| "grad_norm": 2.335657398503236, |
| "learning_rate": 6.187168681505666e-06, |
| "loss": 0.487, |
| "step": 1035 |
| }, |
| { |
| "epoch": 1.4464078121367123, |
| "grad_norm": 2.0727595506282865, |
| "learning_rate": 6.17927328664926e-06, |
| "loss": 0.4688, |
| "step": 1036 |
| }, |
| { |
| "epoch": 1.4478028365496396, |
| "grad_norm": 1.841772404318466, |
| "learning_rate": 6.171374776847711e-06, |
| "loss": 0.4967, |
| "step": 1037 |
| }, |
| { |
| "epoch": 1.4491978609625669, |
| "grad_norm": 2.131993766169631, |
| "learning_rate": 6.163473172964229e-06, |
| "loss": 0.4573, |
| "step": 1038 |
| }, |
| { |
| "epoch": 1.4505928853754941, |
| "grad_norm": 2.0927552772654643, |
| "learning_rate": 6.1555684958701965e-06, |
| "loss": 0.4897, |
| "step": 1039 |
| }, |
| { |
| "epoch": 1.4519879097884214, |
| "grad_norm": 2.0569744283426923, |
| "learning_rate": 6.1476607664451105e-06, |
| "loss": 0.4963, |
| "step": 1040 |
| }, |
| { |
| "epoch": 1.4533829342013485, |
| "grad_norm": 2.079023172581386, |
| "learning_rate": 6.1397500055765345e-06, |
| "loss": 0.5044, |
| "step": 1041 |
| }, |
| { |
| "epoch": 1.4547779586142757, |
| "grad_norm": 2.154680029422567, |
| "learning_rate": 6.131836234160036e-06, |
| "loss": 0.4789, |
| "step": 1042 |
| }, |
| { |
| "epoch": 1.456172983027203, |
| "grad_norm": 2.0278215354462175, |
| "learning_rate": 6.123919473099134e-06, |
| "loss": 0.4712, |
| "step": 1043 |
| }, |
| { |
| "epoch": 1.4575680074401303, |
| "grad_norm": 2.0902931301788765, |
| "learning_rate": 6.115999743305252e-06, |
| "loss": 0.4583, |
| "step": 1044 |
| }, |
| { |
| "epoch": 1.4589630318530573, |
| "grad_norm": 2.051969856622332, |
| "learning_rate": 6.1080770656976444e-06, |
| "loss": 0.4222, |
| "step": 1045 |
| }, |
| { |
| "epoch": 1.4603580562659846, |
| "grad_norm": 2.019221977340812, |
| "learning_rate": 6.100151461203359e-06, |
| "loss": 0.474, |
| "step": 1046 |
| }, |
| { |
| "epoch": 1.4617530806789119, |
| "grad_norm": 1.953257537676229, |
| "learning_rate": 6.0922229507571716e-06, |
| "loss": 0.4927, |
| "step": 1047 |
| }, |
| { |
| "epoch": 1.4631481050918391, |
| "grad_norm": 2.035957959177665, |
| "learning_rate": 6.084291555301537e-06, |
| "loss": 0.506, |
| "step": 1048 |
| }, |
| { |
| "epoch": 1.4645431295047664, |
| "grad_norm": 2.171235089740684, |
| "learning_rate": 6.076357295786526e-06, |
| "loss": 0.4568, |
| "step": 1049 |
| }, |
| { |
| "epoch": 1.4659381539176937, |
| "grad_norm": 2.0290741267430636, |
| "learning_rate": 6.068420193169779e-06, |
| "loss": 0.519, |
| "step": 1050 |
| }, |
| { |
| "epoch": 1.4673331783306207, |
| "grad_norm": 2.119657460018465, |
| "learning_rate": 6.0604802684164436e-06, |
| "loss": 0.5092, |
| "step": 1051 |
| }, |
| { |
| "epoch": 1.468728202743548, |
| "grad_norm": 2.1165818757895956, |
| "learning_rate": 6.052537542499122e-06, |
| "loss": 0.4536, |
| "step": 1052 |
| }, |
| { |
| "epoch": 1.4701232271564753, |
| "grad_norm": 2.0023105481496457, |
| "learning_rate": 6.044592036397816e-06, |
| "loss": 0.4459, |
| "step": 1053 |
| }, |
| { |
| "epoch": 1.4715182515694025, |
| "grad_norm": 1.9714486621554073, |
| "learning_rate": 6.0366437710998715e-06, |
| "loss": 0.4845, |
| "step": 1054 |
| }, |
| { |
| "epoch": 1.4729132759823296, |
| "grad_norm": 2.237061577630844, |
| "learning_rate": 6.0286927675999205e-06, |
| "loss": 0.4729, |
| "step": 1055 |
| }, |
| { |
| "epoch": 1.4743083003952568, |
| "grad_norm": 1.92340136259166, |
| "learning_rate": 6.02073904689983e-06, |
| "loss": 0.5227, |
| "step": 1056 |
| }, |
| { |
| "epoch": 1.4757033248081841, |
| "grad_norm": 2.350503819784743, |
| "learning_rate": 6.012782630008646e-06, |
| "loss": 0.503, |
| "step": 1057 |
| }, |
| { |
| "epoch": 1.4770983492211114, |
| "grad_norm": 2.1697136056489934, |
| "learning_rate": 6.004823537942528e-06, |
| "loss": 0.466, |
| "step": 1058 |
| }, |
| { |
| "epoch": 1.4784933736340387, |
| "grad_norm": 1.9939215886063746, |
| "learning_rate": 5.996861791724713e-06, |
| "loss": 0.5017, |
| "step": 1059 |
| }, |
| { |
| "epoch": 1.479888398046966, |
| "grad_norm": 2.1106593906557896, |
| "learning_rate": 5.98889741238544e-06, |
| "loss": 0.4771, |
| "step": 1060 |
| }, |
| { |
| "epoch": 1.481283422459893, |
| "grad_norm": 2.1574303269272153, |
| "learning_rate": 5.9809304209619054e-06, |
| "loss": 0.4632, |
| "step": 1061 |
| }, |
| { |
| "epoch": 1.4826784468728202, |
| "grad_norm": 1.8051167313806595, |
| "learning_rate": 5.9729608384982085e-06, |
| "loss": 0.4388, |
| "step": 1062 |
| }, |
| { |
| "epoch": 1.4840734712857475, |
| "grad_norm": 2.0543453042457656, |
| "learning_rate": 5.964988686045289e-06, |
| "loss": 0.5055, |
| "step": 1063 |
| }, |
| { |
| "epoch": 1.4854684956986748, |
| "grad_norm": 2.1478980267908314, |
| "learning_rate": 5.957013984660875e-06, |
| "loss": 0.4961, |
| "step": 1064 |
| }, |
| { |
| "epoch": 1.4868635201116018, |
| "grad_norm": 2.14056081044156, |
| "learning_rate": 5.949036755409432e-06, |
| "loss": 0.4581, |
| "step": 1065 |
| }, |
| { |
| "epoch": 1.488258544524529, |
| "grad_norm": 2.1026984133609745, |
| "learning_rate": 5.941057019362095e-06, |
| "loss": 0.4443, |
| "step": 1066 |
| }, |
| { |
| "epoch": 1.4896535689374564, |
| "grad_norm": 1.9689741478177671, |
| "learning_rate": 5.933074797596627e-06, |
| "loss": 0.4511, |
| "step": 1067 |
| }, |
| { |
| "epoch": 1.4910485933503836, |
| "grad_norm": 2.1362113215367238, |
| "learning_rate": 5.925090111197355e-06, |
| "loss": 0.4693, |
| "step": 1068 |
| }, |
| { |
| "epoch": 1.492443617763311, |
| "grad_norm": 2.0910124749432537, |
| "learning_rate": 5.917102981255114e-06, |
| "loss": 0.4889, |
| "step": 1069 |
| }, |
| { |
| "epoch": 1.4938386421762382, |
| "grad_norm": 2.097915816754819, |
| "learning_rate": 5.909113428867195e-06, |
| "loss": 0.5733, |
| "step": 1070 |
| }, |
| { |
| "epoch": 1.4952336665891652, |
| "grad_norm": 2.329805560189044, |
| "learning_rate": 5.901121475137287e-06, |
| "loss": 0.483, |
| "step": 1071 |
| }, |
| { |
| "epoch": 1.4966286910020925, |
| "grad_norm": 2.1245862949546814, |
| "learning_rate": 5.893127141175425e-06, |
| "loss": 0.4991, |
| "step": 1072 |
| }, |
| { |
| "epoch": 1.4980237154150198, |
| "grad_norm": 2.090320969759168, |
| "learning_rate": 5.885130448097926e-06, |
| "loss": 0.5058, |
| "step": 1073 |
| }, |
| { |
| "epoch": 1.499418739827947, |
| "grad_norm": 2.191307478211539, |
| "learning_rate": 5.877131417027343e-06, |
| "loss": 0.5275, |
| "step": 1074 |
| }, |
| { |
| "epoch": 1.500813764240874, |
| "grad_norm": 2.1275043838983034, |
| "learning_rate": 5.869130069092401e-06, |
| "loss": 0.5048, |
| "step": 1075 |
| }, |
| { |
| "epoch": 1.5022087886538014, |
| "grad_norm": 2.0220782985141055, |
| "learning_rate": 5.861126425427949e-06, |
| "loss": 0.4596, |
| "step": 1076 |
| }, |
| { |
| "epoch": 1.5036038130667286, |
| "grad_norm": 2.1080717931508643, |
| "learning_rate": 5.853120507174894e-06, |
| "loss": 0.5189, |
| "step": 1077 |
| }, |
| { |
| "epoch": 1.504998837479656, |
| "grad_norm": 2.091932259125407, |
| "learning_rate": 5.845112335480159e-06, |
| "loss": 0.4921, |
| "step": 1078 |
| }, |
| { |
| "epoch": 1.5063938618925832, |
| "grad_norm": 2.058742155033756, |
| "learning_rate": 5.83710193149661e-06, |
| "loss": 0.4337, |
| "step": 1079 |
| }, |
| { |
| "epoch": 1.5077888863055104, |
| "grad_norm": 1.9879796252815132, |
| "learning_rate": 5.829089316383018e-06, |
| "loss": 0.4928, |
| "step": 1080 |
| }, |
| { |
| "epoch": 1.5091839107184377, |
| "grad_norm": 2.160851363843134, |
| "learning_rate": 5.821074511303988e-06, |
| "loss": 0.4643, |
| "step": 1081 |
| }, |
| { |
| "epoch": 1.5105789351313648, |
| "grad_norm": 1.913208554086219, |
| "learning_rate": 5.813057537429915e-06, |
| "loss": 0.4714, |
| "step": 1082 |
| }, |
| { |
| "epoch": 1.511973959544292, |
| "grad_norm": 2.077158539782065, |
| "learning_rate": 5.805038415936919e-06, |
| "loss": 0.4272, |
| "step": 1083 |
| }, |
| { |
| "epoch": 1.5133689839572193, |
| "grad_norm": 1.9639093713317004, |
| "learning_rate": 5.797017168006791e-06, |
| "loss": 0.4794, |
| "step": 1084 |
| }, |
| { |
| "epoch": 1.5147640083701464, |
| "grad_norm": 2.1781993886924744, |
| "learning_rate": 5.7889938148269445e-06, |
| "loss": 0.5108, |
| "step": 1085 |
| }, |
| { |
| "epoch": 1.5161590327830736, |
| "grad_norm": 2.021353634653322, |
| "learning_rate": 5.7809683775903525e-06, |
| "loss": 0.4582, |
| "step": 1086 |
| }, |
| { |
| "epoch": 1.517554057196001, |
| "grad_norm": 2.2318747817283078, |
| "learning_rate": 5.7729408774954865e-06, |
| "loss": 0.4792, |
| "step": 1087 |
| }, |
| { |
| "epoch": 1.5189490816089282, |
| "grad_norm": 2.1777046381098795, |
| "learning_rate": 5.764911335746275e-06, |
| "loss": 0.4792, |
| "step": 1088 |
| }, |
| { |
| "epoch": 1.5203441060218554, |
| "grad_norm": 2.1281373438537208, |
| "learning_rate": 5.756879773552037e-06, |
| "loss": 0.4847, |
| "step": 1089 |
| }, |
| { |
| "epoch": 1.5217391304347827, |
| "grad_norm": 2.0558258117624724, |
| "learning_rate": 5.748846212127421e-06, |
| "loss": 0.5479, |
| "step": 1090 |
| }, |
| { |
| "epoch": 1.52313415484771, |
| "grad_norm": 2.207562158111901, |
| "learning_rate": 5.74081067269237e-06, |
| "loss": 0.4505, |
| "step": 1091 |
| }, |
| { |
| "epoch": 1.524529179260637, |
| "grad_norm": 1.9924679823320044, |
| "learning_rate": 5.732773176472042e-06, |
| "loss": 0.4275, |
| "step": 1092 |
| }, |
| { |
| "epoch": 1.5259242036735643, |
| "grad_norm": 1.8602848014371722, |
| "learning_rate": 5.7247337446967625e-06, |
| "loss": 0.4798, |
| "step": 1093 |
| }, |
| { |
| "epoch": 1.5273192280864916, |
| "grad_norm": 1.9906027660956394, |
| "learning_rate": 5.716692398601975e-06, |
| "loss": 0.4743, |
| "step": 1094 |
| }, |
| { |
| "epoch": 1.5287142524994186, |
| "grad_norm": 1.9544357110617374, |
| "learning_rate": 5.708649159428181e-06, |
| "loss": 0.5145, |
| "step": 1095 |
| }, |
| { |
| "epoch": 1.5301092769123459, |
| "grad_norm": 2.3355021057730796, |
| "learning_rate": 5.700604048420875e-06, |
| "loss": 0.5103, |
| "step": 1096 |
| }, |
| { |
| "epoch": 1.5315043013252732, |
| "grad_norm": 1.8568276485535198, |
| "learning_rate": 5.692557086830501e-06, |
| "loss": 0.4975, |
| "step": 1097 |
| }, |
| { |
| "epoch": 1.5328993257382004, |
| "grad_norm": 2.125493413956754, |
| "learning_rate": 5.68450829591239e-06, |
| "loss": 0.4663, |
| "step": 1098 |
| }, |
| { |
| "epoch": 1.5342943501511277, |
| "grad_norm": 2.071457449537922, |
| "learning_rate": 5.676457696926703e-06, |
| "loss": 0.4903, |
| "step": 1099 |
| }, |
| { |
| "epoch": 1.535689374564055, |
| "grad_norm": 2.1674492150404987, |
| "learning_rate": 5.668405311138382e-06, |
| "loss": 0.4943, |
| "step": 1100 |
| }, |
| { |
| "epoch": 1.5370843989769822, |
| "grad_norm": 2.023194559841172, |
| "learning_rate": 5.660351159817083e-06, |
| "loss": 0.4715, |
| "step": 1101 |
| }, |
| { |
| "epoch": 1.5384794233899093, |
| "grad_norm": 2.196574189719957, |
| "learning_rate": 5.652295264237128e-06, |
| "loss": 0.5002, |
| "step": 1102 |
| }, |
| { |
| "epoch": 1.5398744478028366, |
| "grad_norm": 2.0729450743671425, |
| "learning_rate": 5.6442376456774495e-06, |
| "loss": 0.4333, |
| "step": 1103 |
| }, |
| { |
| "epoch": 1.5412694722157638, |
| "grad_norm": 1.8939760093629232, |
| "learning_rate": 5.636178325421524e-06, |
| "loss": 0.4704, |
| "step": 1104 |
| }, |
| { |
| "epoch": 1.5426644966286909, |
| "grad_norm": 2.0676972299709955, |
| "learning_rate": 5.628117324757326e-06, |
| "loss": 0.4879, |
| "step": 1105 |
| }, |
| { |
| "epoch": 1.5440595210416181, |
| "grad_norm": 2.0776134536197457, |
| "learning_rate": 5.620054664977275e-06, |
| "loss": 0.3994, |
| "step": 1106 |
| }, |
| { |
| "epoch": 1.5454545454545454, |
| "grad_norm": 1.9149487033980843, |
| "learning_rate": 5.61199036737816e-06, |
| "loss": 0.4564, |
| "step": 1107 |
| }, |
| { |
| "epoch": 1.5468495698674727, |
| "grad_norm": 1.8789388381354126, |
| "learning_rate": 5.603924453261109e-06, |
| "loss": 0.4306, |
| "step": 1108 |
| }, |
| { |
| "epoch": 1.5482445942804, |
| "grad_norm": 2.1151583976969444, |
| "learning_rate": 5.595856943931512e-06, |
| "loss": 0.4965, |
| "step": 1109 |
| }, |
| { |
| "epoch": 1.5496396186933272, |
| "grad_norm": 2.168510370730916, |
| "learning_rate": 5.587787860698975e-06, |
| "loss": 0.4645, |
| "step": 1110 |
| }, |
| { |
| "epoch": 1.5510346431062545, |
| "grad_norm": 1.9652877078434625, |
| "learning_rate": 5.579717224877261e-06, |
| "loss": 0.5291, |
| "step": 1111 |
| }, |
| { |
| "epoch": 1.5524296675191815, |
| "grad_norm": 2.2649679803534637, |
| "learning_rate": 5.571645057784236e-06, |
| "loss": 0.5016, |
| "step": 1112 |
| }, |
| { |
| "epoch": 1.5538246919321088, |
| "grad_norm": 2.1957127413656328, |
| "learning_rate": 5.5635713807418055e-06, |
| "loss": 0.4695, |
| "step": 1113 |
| }, |
| { |
| "epoch": 1.555219716345036, |
| "grad_norm": 2.0362707966220723, |
| "learning_rate": 5.55549621507587e-06, |
| "loss": 0.4885, |
| "step": 1114 |
| }, |
| { |
| "epoch": 1.5566147407579631, |
| "grad_norm": 1.9927839746067004, |
| "learning_rate": 5.547419582116259e-06, |
| "loss": 0.5161, |
| "step": 1115 |
| }, |
| { |
| "epoch": 1.5580097651708904, |
| "grad_norm": 2.196498341950231, |
| "learning_rate": 5.539341503196674e-06, |
| "loss": 0.4412, |
| "step": 1116 |
| }, |
| { |
| "epoch": 1.5594047895838177, |
| "grad_norm": 2.1458152403575768, |
| "learning_rate": 5.531261999654646e-06, |
| "loss": 0.442, |
| "step": 1117 |
| }, |
| { |
| "epoch": 1.560799813996745, |
| "grad_norm": 2.006850128740523, |
| "learning_rate": 5.5231810928314555e-06, |
| "loss": 0.4598, |
| "step": 1118 |
| }, |
| { |
| "epoch": 1.5621948384096722, |
| "grad_norm": 1.9493183184314022, |
| "learning_rate": 5.5150988040721e-06, |
| "loss": 0.457, |
| "step": 1119 |
| }, |
| { |
| "epoch": 1.5635898628225995, |
| "grad_norm": 2.0840902735769684, |
| "learning_rate": 5.507015154725226e-06, |
| "loss": 0.4632, |
| "step": 1120 |
| }, |
| { |
| "epoch": 1.5649848872355268, |
| "grad_norm": 1.8428305966849245, |
| "learning_rate": 5.4989301661430685e-06, |
| "loss": 0.4506, |
| "step": 1121 |
| }, |
| { |
| "epoch": 1.5663799116484538, |
| "grad_norm": 1.9920482502185506, |
| "learning_rate": 5.490843859681404e-06, |
| "loss": 0.46, |
| "step": 1122 |
| }, |
| { |
| "epoch": 1.567774936061381, |
| "grad_norm": 2.1161191567578324, |
| "learning_rate": 5.48275625669949e-06, |
| "loss": 0.4538, |
| "step": 1123 |
| }, |
| { |
| "epoch": 1.5691699604743083, |
| "grad_norm": 1.9746726770175769, |
| "learning_rate": 5.474667378560007e-06, |
| "loss": 0.4625, |
| "step": 1124 |
| }, |
| { |
| "epoch": 1.5705649848872354, |
| "grad_norm": 1.9991374998558773, |
| "learning_rate": 5.466577246629006e-06, |
| "loss": 0.4789, |
| "step": 1125 |
| }, |
| { |
| "epoch": 1.5719600093001627, |
| "grad_norm": 1.9636955625225274, |
| "learning_rate": 5.458485882275848e-06, |
| "loss": 0.433, |
| "step": 1126 |
| }, |
| { |
| "epoch": 1.57335503371309, |
| "grad_norm": 1.8591665976898522, |
| "learning_rate": 5.45039330687315e-06, |
| "loss": 0.4911, |
| "step": 1127 |
| }, |
| { |
| "epoch": 1.5747500581260172, |
| "grad_norm": 2.016208821382892, |
| "learning_rate": 5.442299541796727e-06, |
| "loss": 0.4491, |
| "step": 1128 |
| }, |
| { |
| "epoch": 1.5761450825389445, |
| "grad_norm": 2.043786587370076, |
| "learning_rate": 5.4342046084255385e-06, |
| "loss": 0.4343, |
| "step": 1129 |
| }, |
| { |
| "epoch": 1.5775401069518717, |
| "grad_norm": 1.9497412453804057, |
| "learning_rate": 5.426108528141627e-06, |
| "loss": 0.47, |
| "step": 1130 |
| }, |
| { |
| "epoch": 1.578935131364799, |
| "grad_norm": 1.9244444803664198, |
| "learning_rate": 5.4180113223300665e-06, |
| "loss": 0.4416, |
| "step": 1131 |
| }, |
| { |
| "epoch": 1.580330155777726, |
| "grad_norm": 2.0298017508341006, |
| "learning_rate": 5.409913012378903e-06, |
| "loss": 0.4235, |
| "step": 1132 |
| }, |
| { |
| "epoch": 1.5817251801906533, |
| "grad_norm": 1.9384109846754765, |
| "learning_rate": 5.401813619679102e-06, |
| "loss": 0.4938, |
| "step": 1133 |
| }, |
| { |
| "epoch": 1.5831202046035806, |
| "grad_norm": 2.1038176771719512, |
| "learning_rate": 5.3937131656244834e-06, |
| "loss": 0.4393, |
| "step": 1134 |
| }, |
| { |
| "epoch": 1.5845152290165077, |
| "grad_norm": 2.1838408097559086, |
| "learning_rate": 5.385611671611676e-06, |
| "loss": 0.4802, |
| "step": 1135 |
| }, |
| { |
| "epoch": 1.585910253429435, |
| "grad_norm": 2.055193473610198, |
| "learning_rate": 5.377509159040051e-06, |
| "loss": 0.4912, |
| "step": 1136 |
| }, |
| { |
| "epoch": 1.5873052778423622, |
| "grad_norm": 2.1117830596189258, |
| "learning_rate": 5.3694056493116745e-06, |
| "loss": 0.4495, |
| "step": 1137 |
| }, |
| { |
| "epoch": 1.5887003022552895, |
| "grad_norm": 2.126637279332641, |
| "learning_rate": 5.361301163831242e-06, |
| "loss": 0.4921, |
| "step": 1138 |
| }, |
| { |
| "epoch": 1.5900953266682167, |
| "grad_norm": 2.072596734971921, |
| "learning_rate": 5.353195724006031e-06, |
| "loss": 0.4346, |
| "step": 1139 |
| }, |
| { |
| "epoch": 1.591490351081144, |
| "grad_norm": 1.9778671288497442, |
| "learning_rate": 5.345089351245834e-06, |
| "loss": 0.4889, |
| "step": 1140 |
| }, |
| { |
| "epoch": 1.5928853754940713, |
| "grad_norm": 2.0619924037997315, |
| "learning_rate": 5.336982066962915e-06, |
| "loss": 0.4723, |
| "step": 1141 |
| }, |
| { |
| "epoch": 1.5942803999069983, |
| "grad_norm": 2.095968190026346, |
| "learning_rate": 5.328873892571941e-06, |
| "loss": 0.4294, |
| "step": 1142 |
| }, |
| { |
| "epoch": 1.5956754243199256, |
| "grad_norm": 2.1452385884936405, |
| "learning_rate": 5.320764849489929e-06, |
| "loss": 0.4964, |
| "step": 1143 |
| }, |
| { |
| "epoch": 1.5970704487328529, |
| "grad_norm": 2.1195901942810624, |
| "learning_rate": 5.312654959136194e-06, |
| "loss": 0.4024, |
| "step": 1144 |
| }, |
| { |
| "epoch": 1.59846547314578, |
| "grad_norm": 1.9898460201178139, |
| "learning_rate": 5.304544242932288e-06, |
| "loss": 0.502, |
| "step": 1145 |
| }, |
| { |
| "epoch": 1.5998604975587072, |
| "grad_norm": 2.1486403537619614, |
| "learning_rate": 5.296432722301944e-06, |
| "loss": 0.5122, |
| "step": 1146 |
| }, |
| { |
| "epoch": 1.6012555219716345, |
| "grad_norm": 2.048752350661396, |
| "learning_rate": 5.288320418671018e-06, |
| "loss": 0.4522, |
| "step": 1147 |
| }, |
| { |
| "epoch": 1.6026505463845617, |
| "grad_norm": 2.242124204991126, |
| "learning_rate": 5.280207353467438e-06, |
| "loss": 0.4824, |
| "step": 1148 |
| }, |
| { |
| "epoch": 1.604045570797489, |
| "grad_norm": 2.1087909007924206, |
| "learning_rate": 5.272093548121141e-06, |
| "loss": 0.4882, |
| "step": 1149 |
| }, |
| { |
| "epoch": 1.6054405952104163, |
| "grad_norm": 2.0689612978514185, |
| "learning_rate": 5.26397902406402e-06, |
| "loss": 0.4895, |
| "step": 1150 |
| }, |
| { |
| "epoch": 1.6068356196233435, |
| "grad_norm": 2.2404499749663596, |
| "learning_rate": 5.255863802729866e-06, |
| "loss": 0.4703, |
| "step": 1151 |
| }, |
| { |
| "epoch": 1.6082306440362706, |
| "grad_norm": 2.1796296336015004, |
| "learning_rate": 5.247747905554311e-06, |
| "loss": 0.4371, |
| "step": 1152 |
| }, |
| { |
| "epoch": 1.6096256684491979, |
| "grad_norm": 1.83480870370753, |
| "learning_rate": 5.239631353974774e-06, |
| "loss": 0.5222, |
| "step": 1153 |
| }, |
| { |
| "epoch": 1.6110206928621251, |
| "grad_norm": 2.1557579183524207, |
| "learning_rate": 5.231514169430403e-06, |
| "loss": 0.4727, |
| "step": 1154 |
| }, |
| { |
| "epoch": 1.6124157172750522, |
| "grad_norm": 2.1001021273931504, |
| "learning_rate": 5.223396373362013e-06, |
| "loss": 0.4419, |
| "step": 1155 |
| }, |
| { |
| "epoch": 1.6138107416879794, |
| "grad_norm": 1.8542588865560108, |
| "learning_rate": 5.215277987212041e-06, |
| "loss": 0.5285, |
| "step": 1156 |
| }, |
| { |
| "epoch": 1.6152057661009067, |
| "grad_norm": 2.128214277180948, |
| "learning_rate": 5.207159032424478e-06, |
| "loss": 0.4982, |
| "step": 1157 |
| }, |
| { |
| "epoch": 1.616600790513834, |
| "grad_norm": 2.05081239685872, |
| "learning_rate": 5.199039530444819e-06, |
| "loss": 0.4488, |
| "step": 1158 |
| }, |
| { |
| "epoch": 1.6179958149267613, |
| "grad_norm": 2.0930206418500936, |
| "learning_rate": 5.1909195027200055e-06, |
| "loss": 0.4665, |
| "step": 1159 |
| }, |
| { |
| "epoch": 1.6193908393396885, |
| "grad_norm": 1.9511896188233324, |
| "learning_rate": 5.182798970698361e-06, |
| "loss": 0.4662, |
| "step": 1160 |
| }, |
| { |
| "epoch": 1.6207858637526158, |
| "grad_norm": 2.1151104009916377, |
| "learning_rate": 5.174677955829551e-06, |
| "loss": 0.479, |
| "step": 1161 |
| }, |
| { |
| "epoch": 1.6221808881655428, |
| "grad_norm": 2.0233329936544675, |
| "learning_rate": 5.166556479564511e-06, |
| "loss": 0.5033, |
| "step": 1162 |
| }, |
| { |
| "epoch": 1.6235759125784701, |
| "grad_norm": 2.005641777027395, |
| "learning_rate": 5.158434563355392e-06, |
| "loss": 0.5077, |
| "step": 1163 |
| }, |
| { |
| "epoch": 1.6249709369913974, |
| "grad_norm": 2.3289383835606796, |
| "learning_rate": 5.150312228655515e-06, |
| "loss": 0.4157, |
| "step": 1164 |
| }, |
| { |
| "epoch": 1.6263659614043244, |
| "grad_norm": 1.9091089129449355, |
| "learning_rate": 5.142189496919302e-06, |
| "loss": 0.4645, |
| "step": 1165 |
| }, |
| { |
| "epoch": 1.6277609858172517, |
| "grad_norm": 1.9509007916244274, |
| "learning_rate": 5.1340663896022206e-06, |
| "loss": 0.449, |
| "step": 1166 |
| }, |
| { |
| "epoch": 1.629156010230179, |
| "grad_norm": 1.9978926625299753, |
| "learning_rate": 5.125942928160736e-06, |
| "loss": 0.4693, |
| "step": 1167 |
| }, |
| { |
| "epoch": 1.6305510346431062, |
| "grad_norm": 2.1593985262641393, |
| "learning_rate": 5.117819134052246e-06, |
| "loss": 0.4853, |
| "step": 1168 |
| }, |
| { |
| "epoch": 1.6319460590560335, |
| "grad_norm": 2.0010212399413074, |
| "learning_rate": 5.10969502873503e-06, |
| "loss": 0.4646, |
| "step": 1169 |
| }, |
| { |
| "epoch": 1.6333410834689608, |
| "grad_norm": 2.179277406220846, |
| "learning_rate": 5.101570633668185e-06, |
| "loss": 0.4685, |
| "step": 1170 |
| }, |
| { |
| "epoch": 1.634736107881888, |
| "grad_norm": 2.19745037995779, |
| "learning_rate": 5.093445970311576e-06, |
| "loss": 0.4716, |
| "step": 1171 |
| }, |
| { |
| "epoch": 1.636131132294815, |
| "grad_norm": 2.0643787126674162, |
| "learning_rate": 5.085321060125775e-06, |
| "loss": 0.4534, |
| "step": 1172 |
| }, |
| { |
| "epoch": 1.6375261567077424, |
| "grad_norm": 2.0764902906142697, |
| "learning_rate": 5.07719592457201e-06, |
| "loss": 0.4648, |
| "step": 1173 |
| }, |
| { |
| "epoch": 1.6389211811206696, |
| "grad_norm": 2.1659245368856554, |
| "learning_rate": 5.069070585112097e-06, |
| "loss": 0.5295, |
| "step": 1174 |
| }, |
| { |
| "epoch": 1.6403162055335967, |
| "grad_norm": 2.3959883670923587, |
| "learning_rate": 5.060945063208399e-06, |
| "loss": 0.4686, |
| "step": 1175 |
| }, |
| { |
| "epoch": 1.641711229946524, |
| "grad_norm": 2.1564182143544675, |
| "learning_rate": 5.052819380323757e-06, |
| "loss": 0.4761, |
| "step": 1176 |
| }, |
| { |
| "epoch": 1.6431062543594512, |
| "grad_norm": 2.053086822481449, |
| "learning_rate": 5.044693557921434e-06, |
| "loss": 0.4694, |
| "step": 1177 |
| }, |
| { |
| "epoch": 1.6445012787723785, |
| "grad_norm": 2.067471373158249, |
| "learning_rate": 5.036567617465067e-06, |
| "loss": 0.4787, |
| "step": 1178 |
| }, |
| { |
| "epoch": 1.6458963031853058, |
| "grad_norm": 2.2385488484589433, |
| "learning_rate": 5.0284415804186025e-06, |
| "loss": 0.4661, |
| "step": 1179 |
| }, |
| { |
| "epoch": 1.647291327598233, |
| "grad_norm": 2.0037594738988678, |
| "learning_rate": 5.02031546824624e-06, |
| "loss": 0.4727, |
| "step": 1180 |
| }, |
| { |
| "epoch": 1.6486863520111603, |
| "grad_norm": 2.104281706724569, |
| "learning_rate": 5.012189302412383e-06, |
| "loss": 0.4894, |
| "step": 1181 |
| }, |
| { |
| "epoch": 1.6500813764240876, |
| "grad_norm": 2.1163907295817563, |
| "learning_rate": 5.0040631043815715e-06, |
| "loss": 0.5042, |
| "step": 1182 |
| }, |
| { |
| "epoch": 1.6514764008370146, |
| "grad_norm": 2.084686423119246, |
| "learning_rate": 4.99593689561843e-06, |
| "loss": 0.4659, |
| "step": 1183 |
| }, |
| { |
| "epoch": 1.652871425249942, |
| "grad_norm": 1.972423605655503, |
| "learning_rate": 4.987810697587618e-06, |
| "loss": 0.4969, |
| "step": 1184 |
| }, |
| { |
| "epoch": 1.654266449662869, |
| "grad_norm": 2.1644206111383255, |
| "learning_rate": 4.979684531753761e-06, |
| "loss": 0.4543, |
| "step": 1185 |
| }, |
| { |
| "epoch": 1.6556614740757962, |
| "grad_norm": 2.1503763455287808, |
| "learning_rate": 4.971558419581398e-06, |
| "loss": 0.4504, |
| "step": 1186 |
| }, |
| { |
| "epoch": 1.6570564984887235, |
| "grad_norm": 1.971907358337702, |
| "learning_rate": 4.963432382534933e-06, |
| "loss": 0.5174, |
| "step": 1187 |
| }, |
| { |
| "epoch": 1.6584515229016508, |
| "grad_norm": 2.311562963469942, |
| "learning_rate": 4.955306442078568e-06, |
| "loss": 0.5096, |
| "step": 1188 |
| }, |
| { |
| "epoch": 1.659846547314578, |
| "grad_norm": 2.0406738494866117, |
| "learning_rate": 4.947180619676244e-06, |
| "loss": 0.4221, |
| "step": 1189 |
| }, |
| { |
| "epoch": 1.6612415717275053, |
| "grad_norm": 1.8981730758330377, |
| "learning_rate": 4.9390549367916004e-06, |
| "loss": 0.4456, |
| "step": 1190 |
| }, |
| { |
| "epoch": 1.6626365961404326, |
| "grad_norm": 1.882840958072787, |
| "learning_rate": 4.930929414887904e-06, |
| "loss": 0.4484, |
| "step": 1191 |
| }, |
| { |
| "epoch": 1.6640316205533598, |
| "grad_norm": 2.1125846152280054, |
| "learning_rate": 4.9228040754279915e-06, |
| "loss": 0.4778, |
| "step": 1192 |
| }, |
| { |
| "epoch": 1.665426644966287, |
| "grad_norm": 2.0262565344195536, |
| "learning_rate": 4.914678939874225e-06, |
| "loss": 0.5206, |
| "step": 1193 |
| }, |
| { |
| "epoch": 1.6668216693792142, |
| "grad_norm": 2.2200965218244972, |
| "learning_rate": 4.906554029688427e-06, |
| "loss": 0.4383, |
| "step": 1194 |
| }, |
| { |
| "epoch": 1.6682166937921412, |
| "grad_norm": 1.9240667691854707, |
| "learning_rate": 4.898429366331815e-06, |
| "loss": 0.4802, |
| "step": 1195 |
| }, |
| { |
| "epoch": 1.6696117182050685, |
| "grad_norm": 2.1125221215706236, |
| "learning_rate": 4.8903049712649705e-06, |
| "loss": 0.5099, |
| "step": 1196 |
| }, |
| { |
| "epoch": 1.6710067426179958, |
| "grad_norm": 2.114831308671667, |
| "learning_rate": 4.8821808659477544e-06, |
| "loss": 0.4483, |
| "step": 1197 |
| }, |
| { |
| "epoch": 1.672401767030923, |
| "grad_norm": 2.0277403831475853, |
| "learning_rate": 4.874057071839265e-06, |
| "loss": 0.4633, |
| "step": 1198 |
| }, |
| { |
| "epoch": 1.6737967914438503, |
| "grad_norm": 1.9861660198892896, |
| "learning_rate": 4.86593361039778e-06, |
| "loss": 0.48, |
| "step": 1199 |
| }, |
| { |
| "epoch": 1.6751918158567776, |
| "grad_norm": 2.134110515376525, |
| "learning_rate": 4.857810503080701e-06, |
| "loss": 0.4877, |
| "step": 1200 |
| }, |
| { |
| "epoch": 1.6765868402697048, |
| "grad_norm": 2.1201931398630824, |
| "learning_rate": 4.849687771344487e-06, |
| "loss": 0.5193, |
| "step": 1201 |
| }, |
| { |
| "epoch": 1.677981864682632, |
| "grad_norm": 2.3401336739315686, |
| "learning_rate": 4.841565436644609e-06, |
| "loss": 0.5108, |
| "step": 1202 |
| }, |
| { |
| "epoch": 1.6793768890955592, |
| "grad_norm": 2.173303767477864, |
| "learning_rate": 4.8334435204354915e-06, |
| "loss": 0.4609, |
| "step": 1203 |
| }, |
| { |
| "epoch": 1.6807719135084864, |
| "grad_norm": 2.1088182001036957, |
| "learning_rate": 4.825322044170451e-06, |
| "loss": 0.5097, |
| "step": 1204 |
| }, |
| { |
| "epoch": 1.6821669379214135, |
| "grad_norm": 1.9686905359024993, |
| "learning_rate": 4.81720102930164e-06, |
| "loss": 0.4725, |
| "step": 1205 |
| }, |
| { |
| "epoch": 1.6835619623343407, |
| "grad_norm": 2.1236567609317265, |
| "learning_rate": 4.809080497279998e-06, |
| "loss": 0.4915, |
| "step": 1206 |
| }, |
| { |
| "epoch": 1.684956986747268, |
| "grad_norm": 2.2136938683111986, |
| "learning_rate": 4.800960469555183e-06, |
| "loss": 0.4904, |
| "step": 1207 |
| }, |
| { |
| "epoch": 1.6863520111601953, |
| "grad_norm": 1.944578414169582, |
| "learning_rate": 4.792840967575523e-06, |
| "loss": 0.4837, |
| "step": 1208 |
| }, |
| { |
| "epoch": 1.6877470355731226, |
| "grad_norm": 2.068496086879585, |
| "learning_rate": 4.784722012787961e-06, |
| "loss": 0.4957, |
| "step": 1209 |
| }, |
| { |
| "epoch": 1.6891420599860498, |
| "grad_norm": 2.259138484792416, |
| "learning_rate": 4.776603626637988e-06, |
| "loss": 0.4853, |
| "step": 1210 |
| }, |
| { |
| "epoch": 1.690537084398977, |
| "grad_norm": 2.0361945607382017, |
| "learning_rate": 4.768485830569598e-06, |
| "loss": 0.4494, |
| "step": 1211 |
| }, |
| { |
| "epoch": 1.6919321088119044, |
| "grad_norm": 1.9985570020748773, |
| "learning_rate": 4.7603686460252265e-06, |
| "loss": 0.4912, |
| "step": 1212 |
| }, |
| { |
| "epoch": 1.6933271332248314, |
| "grad_norm": 2.1741124461981034, |
| "learning_rate": 4.75225209444569e-06, |
| "loss": 0.464, |
| "step": 1213 |
| }, |
| { |
| "epoch": 1.6947221576377587, |
| "grad_norm": 2.101109519431568, |
| "learning_rate": 4.744136197270135e-06, |
| "loss": 0.5325, |
| "step": 1214 |
| }, |
| { |
| "epoch": 1.6961171820506857, |
| "grad_norm": 2.2648499488417504, |
| "learning_rate": 4.736020975935981e-06, |
| "loss": 0.4799, |
| "step": 1215 |
| }, |
| { |
| "epoch": 1.697512206463613, |
| "grad_norm": 2.050720067395807, |
| "learning_rate": 4.72790645187886e-06, |
| "loss": 0.4379, |
| "step": 1216 |
| }, |
| { |
| "epoch": 1.6989072308765403, |
| "grad_norm": 1.9934244980053937, |
| "learning_rate": 4.7197926465325626e-06, |
| "loss": 0.4568, |
| "step": 1217 |
| }, |
| { |
| "epoch": 1.7003022552894675, |
| "grad_norm": 2.0680803010766353, |
| "learning_rate": 4.711679581328983e-06, |
| "loss": 0.4844, |
| "step": 1218 |
| }, |
| { |
| "epoch": 1.7016972797023948, |
| "grad_norm": 2.1697148699026427, |
| "learning_rate": 4.703567277698058e-06, |
| "loss": 0.5517, |
| "step": 1219 |
| }, |
| { |
| "epoch": 1.703092304115322, |
| "grad_norm": 2.3093501303979056, |
| "learning_rate": 4.695455757067712e-06, |
| "loss": 0.4494, |
| "step": 1220 |
| }, |
| { |
| "epoch": 1.7044873285282494, |
| "grad_norm": 1.9215030222404832, |
| "learning_rate": 4.687345040863808e-06, |
| "loss": 0.4841, |
| "step": 1221 |
| }, |
| { |
| "epoch": 1.7058823529411766, |
| "grad_norm": 2.194442976010545, |
| "learning_rate": 4.679235150510072e-06, |
| "loss": 0.4329, |
| "step": 1222 |
| }, |
| { |
| "epoch": 1.7072773773541037, |
| "grad_norm": 1.9682684824994168, |
| "learning_rate": 4.671126107428061e-06, |
| "loss": 0.4774, |
| "step": 1223 |
| }, |
| { |
| "epoch": 1.708672401767031, |
| "grad_norm": 1.9357493109786206, |
| "learning_rate": 4.663017933037087e-06, |
| "loss": 0.4623, |
| "step": 1224 |
| }, |
| { |
| "epoch": 1.710067426179958, |
| "grad_norm": 2.07959597800617, |
| "learning_rate": 4.6549106487541666e-06, |
| "loss": 0.4922, |
| "step": 1225 |
| }, |
| { |
| "epoch": 1.7114624505928853, |
| "grad_norm": 2.12106114838888, |
| "learning_rate": 4.646804275993971e-06, |
| "loss": 0.4598, |
| "step": 1226 |
| }, |
| { |
| "epoch": 1.7128574750058125, |
| "grad_norm": 2.193483910207871, |
| "learning_rate": 4.63869883616876e-06, |
| "loss": 0.4517, |
| "step": 1227 |
| }, |
| { |
| "epoch": 1.7142524994187398, |
| "grad_norm": 1.9267269677926486, |
| "learning_rate": 4.630594350688327e-06, |
| "loss": 0.4974, |
| "step": 1228 |
| }, |
| { |
| "epoch": 1.715647523831667, |
| "grad_norm": 2.159284260285703, |
| "learning_rate": 4.62249084095995e-06, |
| "loss": 0.464, |
| "step": 1229 |
| }, |
| { |
| "epoch": 1.7170425482445943, |
| "grad_norm": 2.04247259167603, |
| "learning_rate": 4.614388328388327e-06, |
| "loss": 0.5124, |
| "step": 1230 |
| }, |
| { |
| "epoch": 1.7184375726575216, |
| "grad_norm": 2.172072715605954, |
| "learning_rate": 4.606286834375517e-06, |
| "loss": 0.498, |
| "step": 1231 |
| }, |
| { |
| "epoch": 1.7198325970704489, |
| "grad_norm": 2.06551781626508, |
| "learning_rate": 4.598186380320899e-06, |
| "loss": 0.4357, |
| "step": 1232 |
| }, |
| { |
| "epoch": 1.721227621483376, |
| "grad_norm": 2.2163136697977404, |
| "learning_rate": 4.5900869876210986e-06, |
| "loss": 0.4807, |
| "step": 1233 |
| }, |
| { |
| "epoch": 1.7226226458963032, |
| "grad_norm": 2.0544727475631164, |
| "learning_rate": 4.581988677669935e-06, |
| "loss": 0.5156, |
| "step": 1234 |
| }, |
| { |
| "epoch": 1.7240176703092303, |
| "grad_norm": 2.1102785785058025, |
| "learning_rate": 4.573891471858375e-06, |
| "loss": 0.4841, |
| "step": 1235 |
| }, |
| { |
| "epoch": 1.7254126947221575, |
| "grad_norm": 2.091638327396252, |
| "learning_rate": 4.565795391574465e-06, |
| "loss": 0.4742, |
| "step": 1236 |
| }, |
| { |
| "epoch": 1.7268077191350848, |
| "grad_norm": 2.073708631501961, |
| "learning_rate": 4.5577004582032745e-06, |
| "loss": 0.4483, |
| "step": 1237 |
| }, |
| { |
| "epoch": 1.728202743548012, |
| "grad_norm": 2.0663949647182505, |
| "learning_rate": 4.549606693126851e-06, |
| "loss": 0.464, |
| "step": 1238 |
| }, |
| { |
| "epoch": 1.7295977679609393, |
| "grad_norm": 2.0950685671584885, |
| "learning_rate": 4.541514117724155e-06, |
| "loss": 0.5271, |
| "step": 1239 |
| }, |
| { |
| "epoch": 1.7309927923738666, |
| "grad_norm": 2.069764512552555, |
| "learning_rate": 4.533422753370995e-06, |
| "loss": 0.4246, |
| "step": 1240 |
| }, |
| { |
| "epoch": 1.7323878167867939, |
| "grad_norm": 1.9471175111782795, |
| "learning_rate": 4.525332621439995e-06, |
| "loss": 0.4876, |
| "step": 1241 |
| }, |
| { |
| "epoch": 1.7337828411997211, |
| "grad_norm": 2.106836696946677, |
| "learning_rate": 4.517243743300513e-06, |
| "loss": 0.4195, |
| "step": 1242 |
| }, |
| { |
| "epoch": 1.7351778656126482, |
| "grad_norm": 2.1428624250471917, |
| "learning_rate": 4.5091561403185976e-06, |
| "loss": 0.5121, |
| "step": 1243 |
| }, |
| { |
| "epoch": 1.7365728900255755, |
| "grad_norm": 2.1001384678126596, |
| "learning_rate": 4.501069833856934e-06, |
| "loss": 0.4635, |
| "step": 1244 |
| }, |
| { |
| "epoch": 1.7379679144385025, |
| "grad_norm": 2.124440573719448, |
| "learning_rate": 4.492984845274774e-06, |
| "loss": 0.4337, |
| "step": 1245 |
| }, |
| { |
| "epoch": 1.7393629388514298, |
| "grad_norm": 2.0913781241638056, |
| "learning_rate": 4.484901195927901e-06, |
| "loss": 0.4648, |
| "step": 1246 |
| }, |
| { |
| "epoch": 1.740757963264357, |
| "grad_norm": 1.988763311594427, |
| "learning_rate": 4.476818907168545e-06, |
| "loss": 0.4183, |
| "step": 1247 |
| }, |
| { |
| "epoch": 1.7421529876772843, |
| "grad_norm": 2.026709075336435, |
| "learning_rate": 4.4687380003453555e-06, |
| "loss": 0.4416, |
| "step": 1248 |
| }, |
| { |
| "epoch": 1.7435480120902116, |
| "grad_norm": 2.1037185629898834, |
| "learning_rate": 4.460658496803327e-06, |
| "loss": 0.4772, |
| "step": 1249 |
| }, |
| { |
| "epoch": 1.7449430365031389, |
| "grad_norm": 2.1544403103897563, |
| "learning_rate": 4.4525804178837425e-06, |
| "loss": 0.461, |
| "step": 1250 |
| }, |
| { |
| "epoch": 1.7463380609160661, |
| "grad_norm": 2.2928421209767595, |
| "learning_rate": 4.4445037849241305e-06, |
| "loss": 0.5024, |
| "step": 1251 |
| }, |
| { |
| "epoch": 1.7477330853289934, |
| "grad_norm": 2.135997919508182, |
| "learning_rate": 4.436428619258196e-06, |
| "loss": 0.4752, |
| "step": 1252 |
| }, |
| { |
| "epoch": 1.7491281097419205, |
| "grad_norm": 2.085493617102473, |
| "learning_rate": 4.428354942215766e-06, |
| "loss": 0.4422, |
| "step": 1253 |
| }, |
| { |
| "epoch": 1.7505231341548477, |
| "grad_norm": 2.120851175775019, |
| "learning_rate": 4.42028277512274e-06, |
| "loss": 0.4488, |
| "step": 1254 |
| }, |
| { |
| "epoch": 1.7519181585677748, |
| "grad_norm": 1.9217827847817792, |
| "learning_rate": 4.412212139301027e-06, |
| "loss": 0.4928, |
| "step": 1255 |
| }, |
| { |
| "epoch": 1.753313182980702, |
| "grad_norm": 2.128839111883042, |
| "learning_rate": 4.404143056068489e-06, |
| "loss": 0.4084, |
| "step": 1256 |
| }, |
| { |
| "epoch": 1.7547082073936293, |
| "grad_norm": 1.9027826190815191, |
| "learning_rate": 4.3960755467388916e-06, |
| "loss": 0.475, |
| "step": 1257 |
| }, |
| { |
| "epoch": 1.7561032318065566, |
| "grad_norm": 2.0787775348878195, |
| "learning_rate": 4.388009632621841e-06, |
| "loss": 0.5112, |
| "step": 1258 |
| }, |
| { |
| "epoch": 1.7574982562194839, |
| "grad_norm": 2.0669238505154985, |
| "learning_rate": 4.379945335022727e-06, |
| "loss": 0.4698, |
| "step": 1259 |
| }, |
| { |
| "epoch": 1.7588932806324111, |
| "grad_norm": 2.251534310250234, |
| "learning_rate": 4.371882675242674e-06, |
| "loss": 0.5002, |
| "step": 1260 |
| }, |
| { |
| "epoch": 1.7602883050453384, |
| "grad_norm": 2.2205227372571517, |
| "learning_rate": 4.363821674578479e-06, |
| "loss": 0.5113, |
| "step": 1261 |
| }, |
| { |
| "epoch": 1.7616833294582657, |
| "grad_norm": 2.0821257827137356, |
| "learning_rate": 4.355762354322552e-06, |
| "loss": 0.4511, |
| "step": 1262 |
| }, |
| { |
| "epoch": 1.7630783538711927, |
| "grad_norm": 2.0560505922750965, |
| "learning_rate": 4.347704735762872e-06, |
| "loss": 0.496, |
| "step": 1263 |
| }, |
| { |
| "epoch": 1.76447337828412, |
| "grad_norm": 2.0968400735874506, |
| "learning_rate": 4.339648840182919e-06, |
| "loss": 0.4827, |
| "step": 1264 |
| }, |
| { |
| "epoch": 1.765868402697047, |
| "grad_norm": 2.0915848890730655, |
| "learning_rate": 4.331594688861619e-06, |
| "loss": 0.4674, |
| "step": 1265 |
| }, |
| { |
| "epoch": 1.7672634271099743, |
| "grad_norm": 2.1096575286314523, |
| "learning_rate": 4.323542303073297e-06, |
| "loss": 0.4453, |
| "step": 1266 |
| }, |
| { |
| "epoch": 1.7686584515229016, |
| "grad_norm": 2.1182357112080714, |
| "learning_rate": 4.315491704087613e-06, |
| "loss": 0.4546, |
| "step": 1267 |
| }, |
| { |
| "epoch": 1.7700534759358288, |
| "grad_norm": 1.978572548445151, |
| "learning_rate": 4.3074429131695e-06, |
| "loss": 0.4644, |
| "step": 1268 |
| }, |
| { |
| "epoch": 1.7714485003487561, |
| "grad_norm": 2.1705214494874387, |
| "learning_rate": 4.299395951579126e-06, |
| "loss": 0.468, |
| "step": 1269 |
| }, |
| { |
| "epoch": 1.7728435247616834, |
| "grad_norm": 2.0255085627659146, |
| "learning_rate": 4.291350840571821e-06, |
| "loss": 0.4304, |
| "step": 1270 |
| }, |
| { |
| "epoch": 1.7742385491746107, |
| "grad_norm": 2.072433117751704, |
| "learning_rate": 4.283307601398026e-06, |
| "loss": 0.4824, |
| "step": 1271 |
| }, |
| { |
| "epoch": 1.775633573587538, |
| "grad_norm": 2.2629627765772677, |
| "learning_rate": 4.275266255303238e-06, |
| "loss": 0.4915, |
| "step": 1272 |
| }, |
| { |
| "epoch": 1.777028598000465, |
| "grad_norm": 2.234922030785417, |
| "learning_rate": 4.2672268235279616e-06, |
| "loss": 0.4629, |
| "step": 1273 |
| }, |
| { |
| "epoch": 1.7784236224133922, |
| "grad_norm": 1.9270261774066482, |
| "learning_rate": 4.259189327307632e-06, |
| "loss": 0.4784, |
| "step": 1274 |
| }, |
| { |
| "epoch": 1.7798186468263193, |
| "grad_norm": 2.2081059039595683, |
| "learning_rate": 4.251153787872579e-06, |
| "loss": 0.4803, |
| "step": 1275 |
| }, |
| { |
| "epoch": 1.7812136712392466, |
| "grad_norm": 1.939134834396615, |
| "learning_rate": 4.2431202264479665e-06, |
| "loss": 0.4586, |
| "step": 1276 |
| }, |
| { |
| "epoch": 1.7826086956521738, |
| "grad_norm": 2.095040560965543, |
| "learning_rate": 4.235088664253726e-06, |
| "loss": 0.5089, |
| "step": 1277 |
| }, |
| { |
| "epoch": 1.784003720065101, |
| "grad_norm": 2.1919774266759515, |
| "learning_rate": 4.227059122504514e-06, |
| "loss": 0.485, |
| "step": 1278 |
| }, |
| { |
| "epoch": 1.7853987444780284, |
| "grad_norm": 1.8782861952292524, |
| "learning_rate": 4.21903162240965e-06, |
| "loss": 0.4446, |
| "step": 1279 |
| }, |
| { |
| "epoch": 1.7867937688909556, |
| "grad_norm": 2.0890195629123385, |
| "learning_rate": 4.211006185173056e-06, |
| "loss": 0.5088, |
| "step": 1280 |
| }, |
| { |
| "epoch": 1.788188793303883, |
| "grad_norm": 2.026437008974195, |
| "learning_rate": 4.20298283199321e-06, |
| "loss": 0.425, |
| "step": 1281 |
| }, |
| { |
| "epoch": 1.7895838177168102, |
| "grad_norm": 2.2467401498211252, |
| "learning_rate": 4.1949615840630845e-06, |
| "loss": 0.4653, |
| "step": 1282 |
| }, |
| { |
| "epoch": 1.7909788421297372, |
| "grad_norm": 2.093166602036887, |
| "learning_rate": 4.186942462570087e-06, |
| "loss": 0.5048, |
| "step": 1283 |
| }, |
| { |
| "epoch": 1.7923738665426645, |
| "grad_norm": 2.156850489159853, |
| "learning_rate": 4.178925488696012e-06, |
| "loss": 0.4903, |
| "step": 1284 |
| }, |
| { |
| "epoch": 1.7937688909555918, |
| "grad_norm": 2.085389759207312, |
| "learning_rate": 4.170910683616985e-06, |
| "loss": 0.4688, |
| "step": 1285 |
| }, |
| { |
| "epoch": 1.7951639153685188, |
| "grad_norm": 2.213200329005782, |
| "learning_rate": 4.1628980685033914e-06, |
| "loss": 0.5185, |
| "step": 1286 |
| }, |
| { |
| "epoch": 1.796558939781446, |
| "grad_norm": 2.050195188781033, |
| "learning_rate": 4.154887664519842e-06, |
| "loss": 0.4881, |
| "step": 1287 |
| }, |
| { |
| "epoch": 1.7979539641943734, |
| "grad_norm": 2.1196860562270294, |
| "learning_rate": 4.1468794928251064e-06, |
| "loss": 0.4673, |
| "step": 1288 |
| }, |
| { |
| "epoch": 1.7993489886073006, |
| "grad_norm": 2.177343517348136, |
| "learning_rate": 4.138873574572053e-06, |
| "loss": 0.4747, |
| "step": 1289 |
| }, |
| { |
| "epoch": 1.800744013020228, |
| "grad_norm": 2.003472990127112, |
| "learning_rate": 4.130869930907599e-06, |
| "loss": 0.4159, |
| "step": 1290 |
| }, |
| { |
| "epoch": 1.8021390374331552, |
| "grad_norm": 1.8839471124092069, |
| "learning_rate": 4.122868582972659e-06, |
| "loss": 0.469, |
| "step": 1291 |
| }, |
| { |
| "epoch": 1.8035340618460824, |
| "grad_norm": 2.035907134161031, |
| "learning_rate": 4.114869551902075e-06, |
| "loss": 0.4282, |
| "step": 1292 |
| }, |
| { |
| "epoch": 1.8049290862590095, |
| "grad_norm": 2.001424449827008, |
| "learning_rate": 4.106872858824576e-06, |
| "loss": 0.4554, |
| "step": 1293 |
| }, |
| { |
| "epoch": 1.8063241106719368, |
| "grad_norm": 2.008750426771798, |
| "learning_rate": 4.098878524862715e-06, |
| "loss": 0.4981, |
| "step": 1294 |
| }, |
| { |
| "epoch": 1.807719135084864, |
| "grad_norm": 2.2520451839673012, |
| "learning_rate": 4.090886571132807e-06, |
| "loss": 0.5648, |
| "step": 1295 |
| }, |
| { |
| "epoch": 1.809114159497791, |
| "grad_norm": 2.216674336178155, |
| "learning_rate": 4.082897018744887e-06, |
| "loss": 0.4966, |
| "step": 1296 |
| }, |
| { |
| "epoch": 1.8105091839107184, |
| "grad_norm": 2.3579614946903833, |
| "learning_rate": 4.074909888802648e-06, |
| "loss": 0.5033, |
| "step": 1297 |
| }, |
| { |
| "epoch": 1.8119042083236456, |
| "grad_norm": 1.8522361974582706, |
| "learning_rate": 4.066925202403374e-06, |
| "loss": 0.4183, |
| "step": 1298 |
| }, |
| { |
| "epoch": 1.813299232736573, |
| "grad_norm": 2.0409109799975242, |
| "learning_rate": 4.058942980637906e-06, |
| "loss": 0.4835, |
| "step": 1299 |
| }, |
| { |
| "epoch": 1.8146942571495002, |
| "grad_norm": 1.9846733203898725, |
| "learning_rate": 4.050963244590571e-06, |
| "loss": 0.4369, |
| "step": 1300 |
| }, |
| { |
| "epoch": 1.8160892815624274, |
| "grad_norm": 1.9908168675190487, |
| "learning_rate": 4.042986015339126e-06, |
| "loss": 0.4705, |
| "step": 1301 |
| }, |
| { |
| "epoch": 1.8174843059753547, |
| "grad_norm": 1.97724651246631, |
| "learning_rate": 4.035011313954713e-06, |
| "loss": 0.4826, |
| "step": 1302 |
| }, |
| { |
| "epoch": 1.8188793303882818, |
| "grad_norm": 2.1982531234476284, |
| "learning_rate": 4.027039161501795e-06, |
| "loss": 0.4077, |
| "step": 1303 |
| }, |
| { |
| "epoch": 1.820274354801209, |
| "grad_norm": 1.9503370551181338, |
| "learning_rate": 4.019069579038096e-06, |
| "loss": 0.4615, |
| "step": 1304 |
| }, |
| { |
| "epoch": 1.8216693792141363, |
| "grad_norm": 1.9113061201404709, |
| "learning_rate": 4.011102587614563e-06, |
| "loss": 0.427, |
| "step": 1305 |
| }, |
| { |
| "epoch": 1.8230644036270633, |
| "grad_norm": 2.088228005988047, |
| "learning_rate": 4.00313820827529e-06, |
| "loss": 0.4925, |
| "step": 1306 |
| }, |
| { |
| "epoch": 1.8244594280399906, |
| "grad_norm": 2.265032952966295, |
| "learning_rate": 3.995176462057473e-06, |
| "loss": 0.5266, |
| "step": 1307 |
| }, |
| { |
| "epoch": 1.8258544524529179, |
| "grad_norm": 2.2633304605254065, |
| "learning_rate": 3.987217369991357e-06, |
| "loss": 0.4703, |
| "step": 1308 |
| }, |
| { |
| "epoch": 1.8272494768658452, |
| "grad_norm": 2.034863642322609, |
| "learning_rate": 3.979260953100169e-06, |
| "loss": 0.4721, |
| "step": 1309 |
| }, |
| { |
| "epoch": 1.8286445012787724, |
| "grad_norm": 2.0974244608448736, |
| "learning_rate": 3.97130723240008e-06, |
| "loss": 0.4539, |
| "step": 1310 |
| }, |
| { |
| "epoch": 1.8300395256916997, |
| "grad_norm": 2.414871430447381, |
| "learning_rate": 3.96335622890013e-06, |
| "loss": 0.4988, |
| "step": 1311 |
| }, |
| { |
| "epoch": 1.831434550104627, |
| "grad_norm": 1.6347770659827563, |
| "learning_rate": 3.955407963602184e-06, |
| "loss": 0.4765, |
| "step": 1312 |
| }, |
| { |
| "epoch": 1.832829574517554, |
| "grad_norm": 1.9715809826573552, |
| "learning_rate": 3.94746245750088e-06, |
| "loss": 0.4583, |
| "step": 1313 |
| }, |
| { |
| "epoch": 1.8342245989304813, |
| "grad_norm": 2.1527465543653252, |
| "learning_rate": 3.939519731583557e-06, |
| "loss": 0.4851, |
| "step": 1314 |
| }, |
| { |
| "epoch": 1.8356196233434086, |
| "grad_norm": 1.8555310432515144, |
| "learning_rate": 3.9315798068302214e-06, |
| "loss": 0.4367, |
| "step": 1315 |
| }, |
| { |
| "epoch": 1.8370146477563356, |
| "grad_norm": 1.871881038073264, |
| "learning_rate": 3.923642704213475e-06, |
| "loss": 0.4948, |
| "step": 1316 |
| }, |
| { |
| "epoch": 1.8384096721692629, |
| "grad_norm": 2.068493987616388, |
| "learning_rate": 3.915708444698465e-06, |
| "loss": 0.4911, |
| "step": 1317 |
| }, |
| { |
| "epoch": 1.8398046965821901, |
| "grad_norm": 2.108597697463182, |
| "learning_rate": 3.907777049242828e-06, |
| "loss": 0.4578, |
| "step": 1318 |
| }, |
| { |
| "epoch": 1.8411997209951174, |
| "grad_norm": 2.034896589340762, |
| "learning_rate": 3.899848538796643e-06, |
| "loss": 0.4772, |
| "step": 1319 |
| }, |
| { |
| "epoch": 1.8425947454080447, |
| "grad_norm": 2.0072300235614255, |
| "learning_rate": 3.891922934302356e-06, |
| "loss": 0.4897, |
| "step": 1320 |
| }, |
| { |
| "epoch": 1.843989769820972, |
| "grad_norm": 2.188051096125535, |
| "learning_rate": 3.884000256694749e-06, |
| "loss": 0.4872, |
| "step": 1321 |
| }, |
| { |
| "epoch": 1.8453847942338992, |
| "grad_norm": 2.118044774667855, |
| "learning_rate": 3.876080526900867e-06, |
| "loss": 0.5054, |
| "step": 1322 |
| }, |
| { |
| "epoch": 1.8467798186468263, |
| "grad_norm": 2.1344072938179344, |
| "learning_rate": 3.868163765839966e-06, |
| "loss": 0.4731, |
| "step": 1323 |
| }, |
| { |
| "epoch": 1.8481748430597535, |
| "grad_norm": 2.053672801159552, |
| "learning_rate": 3.860249994423467e-06, |
| "loss": 0.4723, |
| "step": 1324 |
| }, |
| { |
| "epoch": 1.8495698674726808, |
| "grad_norm": 2.230370059524332, |
| "learning_rate": 3.852339233554891e-06, |
| "loss": 0.4587, |
| "step": 1325 |
| }, |
| { |
| "epoch": 1.8509648918856079, |
| "grad_norm": 2.1250159637012827, |
| "learning_rate": 3.844431504129804e-06, |
| "loss": 0.5112, |
| "step": 1326 |
| }, |
| { |
| "epoch": 1.8523599162985351, |
| "grad_norm": 2.0955476438334806, |
| "learning_rate": 3.8365268270357715e-06, |
| "loss": 0.4793, |
| "step": 1327 |
| }, |
| { |
| "epoch": 1.8537549407114624, |
| "grad_norm": 1.9947397547326173, |
| "learning_rate": 3.828625223152291e-06, |
| "loss": 0.4653, |
| "step": 1328 |
| }, |
| { |
| "epoch": 1.8551499651243897, |
| "grad_norm": 2.0597481264501214, |
| "learning_rate": 3.820726713350742e-06, |
| "loss": 0.4625, |
| "step": 1329 |
| }, |
| { |
| "epoch": 1.856544989537317, |
| "grad_norm": 2.2135489734378755, |
| "learning_rate": 3.812831318494335e-06, |
| "loss": 0.4897, |
| "step": 1330 |
| }, |
| { |
| "epoch": 1.8579400139502442, |
| "grad_norm": 2.010088784483442, |
| "learning_rate": 3.804939059438052e-06, |
| "loss": 0.4686, |
| "step": 1331 |
| }, |
| { |
| "epoch": 1.8593350383631715, |
| "grad_norm": 2.0704467732349485, |
| "learning_rate": 3.797049957028588e-06, |
| "loss": 0.4763, |
| "step": 1332 |
| }, |
| { |
| "epoch": 1.8607300627760985, |
| "grad_norm": 2.0974552233676036, |
| "learning_rate": 3.7891640321043054e-06, |
| "loss": 0.421, |
| "step": 1333 |
| }, |
| { |
| "epoch": 1.8621250871890258, |
| "grad_norm": 2.1435925715160145, |
| "learning_rate": 3.781281305495171e-06, |
| "loss": 0.419, |
| "step": 1334 |
| }, |
| { |
| "epoch": 1.863520111601953, |
| "grad_norm": 2.0082993030068437, |
| "learning_rate": 3.773401798022701e-06, |
| "loss": 0.4321, |
| "step": 1335 |
| }, |
| { |
| "epoch": 1.8649151360148801, |
| "grad_norm": 2.1583115545533347, |
| "learning_rate": 3.765525530499915e-06, |
| "loss": 0.4628, |
| "step": 1336 |
| }, |
| { |
| "epoch": 1.8663101604278074, |
| "grad_norm": 2.11597183573991, |
| "learning_rate": 3.757652523731269e-06, |
| "loss": 0.4195, |
| "step": 1337 |
| }, |
| { |
| "epoch": 1.8677051848407347, |
| "grad_norm": 1.949534543566651, |
| "learning_rate": 3.7497827985126054e-06, |
| "loss": 0.4481, |
| "step": 1338 |
| }, |
| { |
| "epoch": 1.869100209253662, |
| "grad_norm": 2.0253447244970335, |
| "learning_rate": 3.741916375631105e-06, |
| "loss": 0.4319, |
| "step": 1339 |
| }, |
| { |
| "epoch": 1.8704952336665892, |
| "grad_norm": 2.3285146478186247, |
| "learning_rate": 3.7340532758652217e-06, |
| "loss": 0.5306, |
| "step": 1340 |
| }, |
| { |
| "epoch": 1.8718902580795165, |
| "grad_norm": 2.152694903826016, |
| "learning_rate": 3.7261935199846266e-06, |
| "loss": 0.4423, |
| "step": 1341 |
| }, |
| { |
| "epoch": 1.8732852824924437, |
| "grad_norm": 2.2329276959444257, |
| "learning_rate": 3.7183371287501684e-06, |
| "loss": 0.4592, |
| "step": 1342 |
| }, |
| { |
| "epoch": 1.8746803069053708, |
| "grad_norm": 1.9615943761756212, |
| "learning_rate": 3.7104841229138034e-06, |
| "loss": 0.4831, |
| "step": 1343 |
| }, |
| { |
| "epoch": 1.876075331318298, |
| "grad_norm": 2.0983585004345557, |
| "learning_rate": 3.7026345232185416e-06, |
| "loss": 0.4368, |
| "step": 1344 |
| }, |
| { |
| "epoch": 1.8774703557312253, |
| "grad_norm": 1.9460400781991807, |
| "learning_rate": 3.6947883503984037e-06, |
| "loss": 0.4429, |
| "step": 1345 |
| }, |
| { |
| "epoch": 1.8788653801441524, |
| "grad_norm": 1.9426677248205293, |
| "learning_rate": 3.686945625178356e-06, |
| "loss": 0.4477, |
| "step": 1346 |
| }, |
| { |
| "epoch": 1.8802604045570797, |
| "grad_norm": 2.010880459472735, |
| "learning_rate": 3.6791063682742535e-06, |
| "loss": 0.4897, |
| "step": 1347 |
| }, |
| { |
| "epoch": 1.881655428970007, |
| "grad_norm": 2.032395468552332, |
| "learning_rate": 3.6712706003927937e-06, |
| "loss": 0.464, |
| "step": 1348 |
| }, |
| { |
| "epoch": 1.8830504533829342, |
| "grad_norm": 2.033909833330093, |
| "learning_rate": 3.6634383422314622e-06, |
| "loss": 0.4415, |
| "step": 1349 |
| }, |
| { |
| "epoch": 1.8844454777958615, |
| "grad_norm": 2.1329623060747607, |
| "learning_rate": 3.655609614478467e-06, |
| "loss": 0.5069, |
| "step": 1350 |
| }, |
| { |
| "epoch": 1.8858405022087887, |
| "grad_norm": 2.225055368929594, |
| "learning_rate": 3.647784437812693e-06, |
| "loss": 0.4889, |
| "step": 1351 |
| }, |
| { |
| "epoch": 1.887235526621716, |
| "grad_norm": 2.141063499651939, |
| "learning_rate": 3.6399628329036496e-06, |
| "loss": 0.4978, |
| "step": 1352 |
| }, |
| { |
| "epoch": 1.888630551034643, |
| "grad_norm": 2.2248929304810097, |
| "learning_rate": 3.632144820411405e-06, |
| "loss": 0.4004, |
| "step": 1353 |
| }, |
| { |
| "epoch": 1.8900255754475703, |
| "grad_norm": 1.8643692161399157, |
| "learning_rate": 3.624330420986541e-06, |
| "loss": 0.4244, |
| "step": 1354 |
| }, |
| { |
| "epoch": 1.8914205998604976, |
| "grad_norm": 1.9733735956698852, |
| "learning_rate": 3.6165196552701e-06, |
| "loss": 0.4697, |
| "step": 1355 |
| }, |
| { |
| "epoch": 1.8928156242734246, |
| "grad_norm": 1.9848401068488462, |
| "learning_rate": 3.6087125438935187e-06, |
| "loss": 0.4624, |
| "step": 1356 |
| }, |
| { |
| "epoch": 1.894210648686352, |
| "grad_norm": 2.073164703831999, |
| "learning_rate": 3.6009091074785853e-06, |
| "loss": 0.4629, |
| "step": 1357 |
| }, |
| { |
| "epoch": 1.8956056730992792, |
| "grad_norm": 2.0659109705444556, |
| "learning_rate": 3.5931093666373845e-06, |
| "loss": 0.513, |
| "step": 1358 |
| }, |
| { |
| "epoch": 1.8970006975122065, |
| "grad_norm": 2.1125582756734103, |
| "learning_rate": 3.585313341972232e-06, |
| "loss": 0.4522, |
| "step": 1359 |
| }, |
| { |
| "epoch": 1.8983957219251337, |
| "grad_norm": 2.0584832403532904, |
| "learning_rate": 3.577521054075631e-06, |
| "loss": 0.4274, |
| "step": 1360 |
| }, |
| { |
| "epoch": 1.899790746338061, |
| "grad_norm": 2.0843140708165815, |
| "learning_rate": 3.5697325235302183e-06, |
| "loss": 0.4201, |
| "step": 1361 |
| }, |
| { |
| "epoch": 1.9011857707509883, |
| "grad_norm": 2.0579271534464585, |
| "learning_rate": 3.5619477709086982e-06, |
| "loss": 0.4297, |
| "step": 1362 |
| }, |
| { |
| "epoch": 1.9025807951639153, |
| "grad_norm": 2.1115377455750535, |
| "learning_rate": 3.5541668167738003e-06, |
| "loss": 0.5433, |
| "step": 1363 |
| }, |
| { |
| "epoch": 1.9039758195768426, |
| "grad_norm": 2.2747225928648978, |
| "learning_rate": 3.546389681678224e-06, |
| "loss": 0.4562, |
| "step": 1364 |
| }, |
| { |
| "epoch": 1.9053708439897699, |
| "grad_norm": 1.9665911130251685, |
| "learning_rate": 3.538616386164575e-06, |
| "loss": 0.4618, |
| "step": 1365 |
| }, |
| { |
| "epoch": 1.906765868402697, |
| "grad_norm": 2.04359025515372, |
| "learning_rate": 3.530846950765318e-06, |
| "loss": 0.4771, |
| "step": 1366 |
| }, |
| { |
| "epoch": 1.9081608928156242, |
| "grad_norm": 2.105984355882839, |
| "learning_rate": 3.5230813960027275e-06, |
| "loss": 0.4773, |
| "step": 1367 |
| }, |
| { |
| "epoch": 1.9095559172285514, |
| "grad_norm": 2.256498118072638, |
| "learning_rate": 3.5153197423888206e-06, |
| "loss": 0.4975, |
| "step": 1368 |
| }, |
| { |
| "epoch": 1.9109509416414787, |
| "grad_norm": 1.9536343765489645, |
| "learning_rate": 3.5075620104253123e-06, |
| "loss": 0.4375, |
| "step": 1369 |
| }, |
| { |
| "epoch": 1.912345966054406, |
| "grad_norm": 2.0611008824205754, |
| "learning_rate": 3.4998082206035606e-06, |
| "loss": 0.4481, |
| "step": 1370 |
| }, |
| { |
| "epoch": 1.9137409904673333, |
| "grad_norm": 1.8294594643156625, |
| "learning_rate": 3.492058393404509e-06, |
| "loss": 0.5112, |
| "step": 1371 |
| }, |
| { |
| "epoch": 1.9151360148802605, |
| "grad_norm": 2.0956104344978823, |
| "learning_rate": 3.4843125492986345e-06, |
| "loss": 0.4213, |
| "step": 1372 |
| }, |
| { |
| "epoch": 1.9165310392931876, |
| "grad_norm": 1.911894442742041, |
| "learning_rate": 3.4765707087458912e-06, |
| "loss": 0.493, |
| "step": 1373 |
| }, |
| { |
| "epoch": 1.9179260637061148, |
| "grad_norm": 2.102284559161196, |
| "learning_rate": 3.468832892195664e-06, |
| "loss": 0.4985, |
| "step": 1374 |
| }, |
| { |
| "epoch": 1.9193210881190421, |
| "grad_norm": 2.5416534065559935, |
| "learning_rate": 3.4610991200867006e-06, |
| "loss": 0.4735, |
| "step": 1375 |
| }, |
| { |
| "epoch": 1.9207161125319692, |
| "grad_norm": 2.0019610291575805, |
| "learning_rate": 3.453369412847071e-06, |
| "loss": 0.4061, |
| "step": 1376 |
| }, |
| { |
| "epoch": 1.9221111369448964, |
| "grad_norm": 2.0730001274558947, |
| "learning_rate": 3.445643790894109e-06, |
| "loss": 0.4451, |
| "step": 1377 |
| }, |
| { |
| "epoch": 1.9235061613578237, |
| "grad_norm": 2.0984153028815538, |
| "learning_rate": 3.4379222746343534e-06, |
| "loss": 0.4679, |
| "step": 1378 |
| }, |
| { |
| "epoch": 1.924901185770751, |
| "grad_norm": 2.074651799672325, |
| "learning_rate": 3.4302048844634995e-06, |
| "loss": 0.4776, |
| "step": 1379 |
| }, |
| { |
| "epoch": 1.9262962101836782, |
| "grad_norm": 2.027386258813131, |
| "learning_rate": 3.4224916407663484e-06, |
| "loss": 0.427, |
| "step": 1380 |
| }, |
| { |
| "epoch": 1.9276912345966055, |
| "grad_norm": 2.1138275693841986, |
| "learning_rate": 3.414782563916742e-06, |
| "loss": 0.5011, |
| "step": 1381 |
| }, |
| { |
| "epoch": 1.9290862590095328, |
| "grad_norm": 2.192773328005699, |
| "learning_rate": 3.407077674277518e-06, |
| "loss": 0.4716, |
| "step": 1382 |
| }, |
| { |
| "epoch": 1.93048128342246, |
| "grad_norm": 2.213292858679933, |
| "learning_rate": 3.3993769922004584e-06, |
| "loss": 0.5317, |
| "step": 1383 |
| }, |
| { |
| "epoch": 1.931876307835387, |
| "grad_norm": 2.1487844965796308, |
| "learning_rate": 3.391680538026224e-06, |
| "loss": 0.4242, |
| "step": 1384 |
| }, |
| { |
| "epoch": 1.9332713322483144, |
| "grad_norm": 2.090023360978835, |
| "learning_rate": 3.3839883320843125e-06, |
| "loss": 0.4513, |
| "step": 1385 |
| }, |
| { |
| "epoch": 1.9346663566612414, |
| "grad_norm": 1.9959369737145263, |
| "learning_rate": 3.3763003946930023e-06, |
| "loss": 0.4656, |
| "step": 1386 |
| }, |
| { |
| "epoch": 1.9360613810741687, |
| "grad_norm": 2.1931110824630418, |
| "learning_rate": 3.36861674615929e-06, |
| "loss": 0.4548, |
| "step": 1387 |
| }, |
| { |
| "epoch": 1.937456405487096, |
| "grad_norm": 2.190386146798527, |
| "learning_rate": 3.360937406778849e-06, |
| "loss": 0.5095, |
| "step": 1388 |
| }, |
| { |
| "epoch": 1.9388514299000232, |
| "grad_norm": 2.135412483356473, |
| "learning_rate": 3.35326239683597e-06, |
| "loss": 0.4221, |
| "step": 1389 |
| }, |
| { |
| "epoch": 1.9402464543129505, |
| "grad_norm": 2.0515132595158616, |
| "learning_rate": 3.3455917366035058e-06, |
| "loss": 0.4702, |
| "step": 1390 |
| }, |
| { |
| "epoch": 1.9416414787258778, |
| "grad_norm": 2.073867022732308, |
| "learning_rate": 3.337925446342819e-06, |
| "loss": 0.4391, |
| "step": 1391 |
| }, |
| { |
| "epoch": 1.943036503138805, |
| "grad_norm": 1.8548990606837181, |
| "learning_rate": 3.3302635463037352e-06, |
| "loss": 0.3957, |
| "step": 1392 |
| }, |
| { |
| "epoch": 1.9444315275517323, |
| "grad_norm": 2.0591050886917666, |
| "learning_rate": 3.3226060567244767e-06, |
| "loss": 0.479, |
| "step": 1393 |
| }, |
| { |
| "epoch": 1.9458265519646594, |
| "grad_norm": 2.1031599275109993, |
| "learning_rate": 3.314952997831618e-06, |
| "loss": 0.4221, |
| "step": 1394 |
| }, |
| { |
| "epoch": 1.9472215763775866, |
| "grad_norm": 1.8174504828503302, |
| "learning_rate": 3.307304389840036e-06, |
| "loss": 0.4339, |
| "step": 1395 |
| }, |
| { |
| "epoch": 1.9486166007905137, |
| "grad_norm": 2.0361355052531147, |
| "learning_rate": 3.29966025295284e-06, |
| "loss": 0.447, |
| "step": 1396 |
| }, |
| { |
| "epoch": 1.950011625203441, |
| "grad_norm": 2.116288896112645, |
| "learning_rate": 3.292020607361337e-06, |
| "loss": 0.5655, |
| "step": 1397 |
| }, |
| { |
| "epoch": 1.9514066496163682, |
| "grad_norm": 2.1226615577582777, |
| "learning_rate": 3.284385473244974e-06, |
| "loss": 0.4208, |
| "step": 1398 |
| }, |
| { |
| "epoch": 1.9528016740292955, |
| "grad_norm": 2.0903634352822804, |
| "learning_rate": 3.2767548707712693e-06, |
| "loss": 0.4854, |
| "step": 1399 |
| }, |
| { |
| "epoch": 1.9541966984422228, |
| "grad_norm": 2.1629936996115458, |
| "learning_rate": 3.2691288200957826e-06, |
| "loss": 0.4792, |
| "step": 1400 |
| }, |
| { |
| "epoch": 1.95559172285515, |
| "grad_norm": 2.186118380372086, |
| "learning_rate": 3.2615073413620467e-06, |
| "loss": 0.4976, |
| "step": 1401 |
| }, |
| { |
| "epoch": 1.9569867472680773, |
| "grad_norm": 2.0815648698913485, |
| "learning_rate": 3.2538904547015137e-06, |
| "loss": 0.4719, |
| "step": 1402 |
| }, |
| { |
| "epoch": 1.9583817716810046, |
| "grad_norm": 2.0373008076972, |
| "learning_rate": 3.2462781802335124e-06, |
| "loss": 0.4655, |
| "step": 1403 |
| }, |
| { |
| "epoch": 1.9597767960939316, |
| "grad_norm": 2.187393729560026, |
| "learning_rate": 3.2386705380651877e-06, |
| "loss": 0.4686, |
| "step": 1404 |
| }, |
| { |
| "epoch": 1.961171820506859, |
| "grad_norm": 1.857328655696565, |
| "learning_rate": 3.2310675482914444e-06, |
| "loss": 0.4681, |
| "step": 1405 |
| }, |
| { |
| "epoch": 1.962566844919786, |
| "grad_norm": 2.1023104143665847, |
| "learning_rate": 3.2234692309949034e-06, |
| "loss": 0.4678, |
| "step": 1406 |
| }, |
| { |
| "epoch": 1.9639618693327132, |
| "grad_norm": 2.036361172428041, |
| "learning_rate": 3.2158756062458422e-06, |
| "loss": 0.4722, |
| "step": 1407 |
| }, |
| { |
| "epoch": 1.9653568937456405, |
| "grad_norm": 2.0089004500677756, |
| "learning_rate": 3.208286694102141e-06, |
| "loss": 0.4994, |
| "step": 1408 |
| }, |
| { |
| "epoch": 1.9667519181585678, |
| "grad_norm": 2.1615010459513075, |
| "learning_rate": 3.2007025146092345e-06, |
| "loss": 0.4398, |
| "step": 1409 |
| }, |
| { |
| "epoch": 1.968146942571495, |
| "grad_norm": 1.8494820793896445, |
| "learning_rate": 3.1931230878000586e-06, |
| "loss": 0.45, |
| "step": 1410 |
| }, |
| { |
| "epoch": 1.9695419669844223, |
| "grad_norm": 2.0839549919773552, |
| "learning_rate": 3.1855484336949876e-06, |
| "loss": 0.4568, |
| "step": 1411 |
| }, |
| { |
| "epoch": 1.9709369913973496, |
| "grad_norm": 1.9340841864438192, |
| "learning_rate": 3.1779785723017988e-06, |
| "loss": 0.5065, |
| "step": 1412 |
| }, |
| { |
| "epoch": 1.9723320158102768, |
| "grad_norm": 2.0965032360845637, |
| "learning_rate": 3.170413523615605e-06, |
| "loss": 0.441, |
| "step": 1413 |
| }, |
| { |
| "epoch": 1.9737270402232039, |
| "grad_norm": 2.1238493162876466, |
| "learning_rate": 3.162853307618805e-06, |
| "loss": 0.402, |
| "step": 1414 |
| }, |
| { |
| "epoch": 1.9751220646361312, |
| "grad_norm": 1.8144332716416323, |
| "learning_rate": 3.155297944281036e-06, |
| "loss": 0.4518, |
| "step": 1415 |
| }, |
| { |
| "epoch": 1.9765170890490582, |
| "grad_norm": 2.0794891682927177, |
| "learning_rate": 3.1477474535591167e-06, |
| "loss": 0.4618, |
| "step": 1416 |
| }, |
| { |
| "epoch": 1.9779121134619855, |
| "grad_norm": 2.092341448966828, |
| "learning_rate": 3.1402018553969917e-06, |
| "loss": 0.5035, |
| "step": 1417 |
| }, |
| { |
| "epoch": 1.9793071378749127, |
| "grad_norm": 2.033890245909392, |
| "learning_rate": 3.132661169725688e-06, |
| "loss": 0.4757, |
| "step": 1418 |
| }, |
| { |
| "epoch": 1.98070216228784, |
| "grad_norm": 1.9352972803319826, |
| "learning_rate": 3.125125416463254e-06, |
| "loss": 0.4332, |
| "step": 1419 |
| }, |
| { |
| "epoch": 1.9820971867007673, |
| "grad_norm": 2.105289598294007, |
| "learning_rate": 3.1175946155147064e-06, |
| "loss": 0.4633, |
| "step": 1420 |
| }, |
| { |
| "epoch": 1.9834922111136946, |
| "grad_norm": 2.323601540036809, |
| "learning_rate": 3.110068786771987e-06, |
| "loss": 0.4709, |
| "step": 1421 |
| }, |
| { |
| "epoch": 1.9848872355266218, |
| "grad_norm": 1.7129060190000591, |
| "learning_rate": 3.1025479501139e-06, |
| "loss": 0.4934, |
| "step": 1422 |
| }, |
| { |
| "epoch": 1.986282259939549, |
| "grad_norm": 2.2024747583092115, |
| "learning_rate": 3.095032125406062e-06, |
| "loss": 0.4469, |
| "step": 1423 |
| }, |
| { |
| "epoch": 1.9876772843524761, |
| "grad_norm": 2.2101104273083885, |
| "learning_rate": 3.0875213325008548e-06, |
| "loss": 0.5061, |
| "step": 1424 |
| }, |
| { |
| "epoch": 1.9890723087654034, |
| "grad_norm": 2.1708027049479957, |
| "learning_rate": 3.0800155912373696e-06, |
| "loss": 0.4703, |
| "step": 1425 |
| }, |
| { |
| "epoch": 1.9904673331783305, |
| "grad_norm": 2.155953890818061, |
| "learning_rate": 3.0725149214413487e-06, |
| "loss": 0.4196, |
| "step": 1426 |
| }, |
| { |
| "epoch": 1.9918623575912577, |
| "grad_norm": 2.005245900668136, |
| "learning_rate": 3.065019342925143e-06, |
| "loss": 0.4886, |
| "step": 1427 |
| }, |
| { |
| "epoch": 1.993257382004185, |
| "grad_norm": 2.2555355821584158, |
| "learning_rate": 3.0575288754876565e-06, |
| "loss": 0.4786, |
| "step": 1428 |
| }, |
| { |
| "epoch": 1.9946524064171123, |
| "grad_norm": 1.868282129579876, |
| "learning_rate": 3.0500435389142867e-06, |
| "loss": 0.4108, |
| "step": 1429 |
| }, |
| { |
| "epoch": 1.9960474308300395, |
| "grad_norm": 2.1879958454321655, |
| "learning_rate": 3.042563352976884e-06, |
| "loss": 0.4564, |
| "step": 1430 |
| }, |
| { |
| "epoch": 1.9974424552429668, |
| "grad_norm": 2.036612583842871, |
| "learning_rate": 3.035088337433694e-06, |
| "loss": 0.4773, |
| "step": 1431 |
| }, |
| { |
| "epoch": 1.998837479655894, |
| "grad_norm": 2.0987197531994193, |
| "learning_rate": 3.0276185120292996e-06, |
| "loss": 0.4866, |
| "step": 1432 |
| }, |
| { |
| "epoch": 2.0013950244129273, |
| "grad_norm": 2.1060512775299687, |
| "learning_rate": 3.0201538964945787e-06, |
| "loss": 0.7126, |
| "step": 1433 |
| }, |
| { |
| "epoch": 2.0027900488258545, |
| "grad_norm": 1.932749210999598, |
| "learning_rate": 3.0126945105466486e-06, |
| "loss": 0.2665, |
| "step": 1434 |
| }, |
| { |
| "epoch": 2.004185073238782, |
| "grad_norm": 1.803959519110432, |
| "learning_rate": 3.005240373888812e-06, |
| "loss": 0.2909, |
| "step": 1435 |
| }, |
| { |
| "epoch": 2.005580097651709, |
| "grad_norm": 1.882695279541564, |
| "learning_rate": 2.9977915062105023e-06, |
| "loss": 0.2668, |
| "step": 1436 |
| }, |
| { |
| "epoch": 2.0069751220646364, |
| "grad_norm": 1.7001162613390293, |
| "learning_rate": 2.9903479271872416e-06, |
| "loss": 0.2382, |
| "step": 1437 |
| }, |
| { |
| "epoch": 2.008370146477563, |
| "grad_norm": 1.9030283807992499, |
| "learning_rate": 2.9829096564805804e-06, |
| "loss": 0.2321, |
| "step": 1438 |
| }, |
| { |
| "epoch": 2.0097651708904904, |
| "grad_norm": 1.7597971492802806, |
| "learning_rate": 2.975476713738043e-06, |
| "loss": 0.2496, |
| "step": 1439 |
| }, |
| { |
| "epoch": 2.0111601953034177, |
| "grad_norm": 1.7225059934269706, |
| "learning_rate": 2.9680491185930877e-06, |
| "loss": 0.2556, |
| "step": 1440 |
| }, |
| { |
| "epoch": 2.012555219716345, |
| "grad_norm": 1.8760479786303343, |
| "learning_rate": 2.960626890665044e-06, |
| "loss": 0.2429, |
| "step": 1441 |
| }, |
| { |
| "epoch": 2.0139502441292723, |
| "grad_norm": 1.8962652973945915, |
| "learning_rate": 2.953210049559062e-06, |
| "loss": 0.2149, |
| "step": 1442 |
| }, |
| { |
| "epoch": 2.0153452685421995, |
| "grad_norm": 2.384087090505903, |
| "learning_rate": 2.945798614866068e-06, |
| "loss": 0.2684, |
| "step": 1443 |
| }, |
| { |
| "epoch": 2.016740292955127, |
| "grad_norm": 2.373148278542001, |
| "learning_rate": 2.9383926061627055e-06, |
| "loss": 0.2407, |
| "step": 1444 |
| }, |
| { |
| "epoch": 2.018135317368054, |
| "grad_norm": 2.994047668095242, |
| "learning_rate": 2.9309920430112825e-06, |
| "loss": 0.2713, |
| "step": 1445 |
| }, |
| { |
| "epoch": 2.0195303417809813, |
| "grad_norm": 2.41812388777858, |
| "learning_rate": 2.92359694495973e-06, |
| "loss": 0.2561, |
| "step": 1446 |
| }, |
| { |
| "epoch": 2.0209253661939086, |
| "grad_norm": 2.064667983995233, |
| "learning_rate": 2.9162073315415384e-06, |
| "loss": 0.2361, |
| "step": 1447 |
| }, |
| { |
| "epoch": 2.0223203906068354, |
| "grad_norm": 2.4543601055709003, |
| "learning_rate": 2.9088232222757085e-06, |
| "loss": 0.2539, |
| "step": 1448 |
| }, |
| { |
| "epoch": 2.0237154150197627, |
| "grad_norm": 2.098982722115911, |
| "learning_rate": 2.9014446366667115e-06, |
| "loss": 0.2677, |
| "step": 1449 |
| }, |
| { |
| "epoch": 2.02511043943269, |
| "grad_norm": 2.098306519762135, |
| "learning_rate": 2.8940715942044204e-06, |
| "loss": 0.2583, |
| "step": 1450 |
| }, |
| { |
| "epoch": 2.0265054638456172, |
| "grad_norm": 2.000843801466561, |
| "learning_rate": 2.8867041143640663e-06, |
| "loss": 0.2395, |
| "step": 1451 |
| }, |
| { |
| "epoch": 2.0279004882585445, |
| "grad_norm": 2.145854321461008, |
| "learning_rate": 2.8793422166061918e-06, |
| "loss": 0.2821, |
| "step": 1452 |
| }, |
| { |
| "epoch": 2.029295512671472, |
| "grad_norm": 1.9525784509980673, |
| "learning_rate": 2.8719859203765955e-06, |
| "loss": 0.2263, |
| "step": 1453 |
| }, |
| { |
| "epoch": 2.030690537084399, |
| "grad_norm": 1.970474839624812, |
| "learning_rate": 2.864635245106272e-06, |
| "loss": 0.2427, |
| "step": 1454 |
| }, |
| { |
| "epoch": 2.0320855614973263, |
| "grad_norm": 1.9323635610264698, |
| "learning_rate": 2.8572902102113788e-06, |
| "loss": 0.2339, |
| "step": 1455 |
| }, |
| { |
| "epoch": 2.0334805859102536, |
| "grad_norm": 2.0460140070756276, |
| "learning_rate": 2.849950835093168e-06, |
| "loss": 0.2417, |
| "step": 1456 |
| }, |
| { |
| "epoch": 2.034875610323181, |
| "grad_norm": 1.8600960559305393, |
| "learning_rate": 2.8426171391379433e-06, |
| "loss": 0.2492, |
| "step": 1457 |
| }, |
| { |
| "epoch": 2.0362706347361077, |
| "grad_norm": 1.6992216414634733, |
| "learning_rate": 2.835289141717008e-06, |
| "loss": 0.2158, |
| "step": 1458 |
| }, |
| { |
| "epoch": 2.037665659149035, |
| "grad_norm": 1.9085343998278854, |
| "learning_rate": 2.827966862186616e-06, |
| "loss": 0.2362, |
| "step": 1459 |
| }, |
| { |
| "epoch": 2.0390606835619622, |
| "grad_norm": 1.9838552421241669, |
| "learning_rate": 2.820650319887911e-06, |
| "loss": 0.2375, |
| "step": 1460 |
| }, |
| { |
| "epoch": 2.0404557079748895, |
| "grad_norm": 2.0510000137002513, |
| "learning_rate": 2.8133395341468915e-06, |
| "loss": 0.2487, |
| "step": 1461 |
| }, |
| { |
| "epoch": 2.041850732387817, |
| "grad_norm": 2.0254719491785065, |
| "learning_rate": 2.8060345242743427e-06, |
| "loss": 0.2321, |
| "step": 1462 |
| }, |
| { |
| "epoch": 2.043245756800744, |
| "grad_norm": 1.7698260233356893, |
| "learning_rate": 2.7987353095657944e-06, |
| "loss": 0.2666, |
| "step": 1463 |
| }, |
| { |
| "epoch": 2.0446407812136713, |
| "grad_norm": 2.1844227717957545, |
| "learning_rate": 2.7914419093014734e-06, |
| "loss": 0.2418, |
| "step": 1464 |
| }, |
| { |
| "epoch": 2.0460358056265986, |
| "grad_norm": 1.8986321251662384, |
| "learning_rate": 2.784154342746246e-06, |
| "loss": 0.246, |
| "step": 1465 |
| }, |
| { |
| "epoch": 2.047430830039526, |
| "grad_norm": 2.1557796026506453, |
| "learning_rate": 2.7768726291495667e-06, |
| "loss": 0.2616, |
| "step": 1466 |
| }, |
| { |
| "epoch": 2.048825854452453, |
| "grad_norm": 2.1460228298343242, |
| "learning_rate": 2.7695967877454356e-06, |
| "loss": 0.2644, |
| "step": 1467 |
| }, |
| { |
| "epoch": 2.05022087886538, |
| "grad_norm": 2.183952950578596, |
| "learning_rate": 2.7623268377523356e-06, |
| "loss": 0.2136, |
| "step": 1468 |
| }, |
| { |
| "epoch": 2.0516159032783072, |
| "grad_norm": 1.8822992647896113, |
| "learning_rate": 2.755062798373189e-06, |
| "loss": 0.2454, |
| "step": 1469 |
| }, |
| { |
| "epoch": 2.0530109276912345, |
| "grad_norm": 2.0587391825233907, |
| "learning_rate": 2.747804688795311e-06, |
| "loss": 0.2572, |
| "step": 1470 |
| }, |
| { |
| "epoch": 2.0544059521041618, |
| "grad_norm": 2.0337932576382194, |
| "learning_rate": 2.7405525281903506e-06, |
| "loss": 0.2095, |
| "step": 1471 |
| }, |
| { |
| "epoch": 2.055800976517089, |
| "grad_norm": 2.180806793212479, |
| "learning_rate": 2.7333063357142414e-06, |
| "loss": 0.2398, |
| "step": 1472 |
| }, |
| { |
| "epoch": 2.0571960009300163, |
| "grad_norm": 2.1427495369403773, |
| "learning_rate": 2.7260661305071523e-06, |
| "loss": 0.2217, |
| "step": 1473 |
| }, |
| { |
| "epoch": 2.0585910253429436, |
| "grad_norm": 2.038226675373655, |
| "learning_rate": 2.718831931693443e-06, |
| "loss": 0.2492, |
| "step": 1474 |
| }, |
| { |
| "epoch": 2.059986049755871, |
| "grad_norm": 1.9218578361086367, |
| "learning_rate": 2.7116037583816e-06, |
| "loss": 0.2498, |
| "step": 1475 |
| }, |
| { |
| "epoch": 2.061381074168798, |
| "grad_norm": 2.0508632152717494, |
| "learning_rate": 2.7043816296642005e-06, |
| "loss": 0.2331, |
| "step": 1476 |
| }, |
| { |
| "epoch": 2.0627760985817254, |
| "grad_norm": 2.1232429074513224, |
| "learning_rate": 2.6971655646178544e-06, |
| "loss": 0.2612, |
| "step": 1477 |
| }, |
| { |
| "epoch": 2.064171122994652, |
| "grad_norm": 1.8524908410533427, |
| "learning_rate": 2.689955582303152e-06, |
| "loss": 0.2165, |
| "step": 1478 |
| }, |
| { |
| "epoch": 2.0655661474075795, |
| "grad_norm": 1.8152481510782883, |
| "learning_rate": 2.6827517017646154e-06, |
| "loss": 0.2245, |
| "step": 1479 |
| }, |
| { |
| "epoch": 2.0669611718205068, |
| "grad_norm": 2.0764635863149516, |
| "learning_rate": 2.6755539420306565e-06, |
| "loss": 0.2258, |
| "step": 1480 |
| }, |
| { |
| "epoch": 2.068356196233434, |
| "grad_norm": 1.720048095036099, |
| "learning_rate": 2.668362322113512e-06, |
| "loss": 0.2451, |
| "step": 1481 |
| }, |
| { |
| "epoch": 2.0697512206463613, |
| "grad_norm": 1.8692404797467597, |
| "learning_rate": 2.661176861009205e-06, |
| "loss": 0.2406, |
| "step": 1482 |
| }, |
| { |
| "epoch": 2.0711462450592886, |
| "grad_norm": 2.0026676185323313, |
| "learning_rate": 2.6539975776974926e-06, |
| "loss": 0.2306, |
| "step": 1483 |
| }, |
| { |
| "epoch": 2.072541269472216, |
| "grad_norm": 1.9167887054562096, |
| "learning_rate": 2.646824491141807e-06, |
| "loss": 0.2211, |
| "step": 1484 |
| }, |
| { |
| "epoch": 2.073936293885143, |
| "grad_norm": 1.94450012415561, |
| "learning_rate": 2.6396576202892176e-06, |
| "loss": 0.2207, |
| "step": 1485 |
| }, |
| { |
| "epoch": 2.0753313182980704, |
| "grad_norm": 1.8160393968583468, |
| "learning_rate": 2.632496984070375e-06, |
| "loss": 0.2144, |
| "step": 1486 |
| }, |
| { |
| "epoch": 2.0767263427109977, |
| "grad_norm": 2.041628010035314, |
| "learning_rate": 2.6253426013994586e-06, |
| "loss": 0.2523, |
| "step": 1487 |
| }, |
| { |
| "epoch": 2.0781213671239245, |
| "grad_norm": 2.134712711238633, |
| "learning_rate": 2.6181944911741333e-06, |
| "loss": 0.2525, |
| "step": 1488 |
| }, |
| { |
| "epoch": 2.0795163915368517, |
| "grad_norm": 2.113789784111181, |
| "learning_rate": 2.6110526722754955e-06, |
| "loss": 0.2679, |
| "step": 1489 |
| }, |
| { |
| "epoch": 2.080911415949779, |
| "grad_norm": 2.169975003010366, |
| "learning_rate": 2.603917163568021e-06, |
| "loss": 0.2487, |
| "step": 1490 |
| }, |
| { |
| "epoch": 2.0823064403627063, |
| "grad_norm": 1.8802285332237876, |
| "learning_rate": 2.5967879838995176e-06, |
| "loss": 0.2268, |
| "step": 1491 |
| }, |
| { |
| "epoch": 2.0837014647756336, |
| "grad_norm": 2.1346446458657375, |
| "learning_rate": 2.589665152101081e-06, |
| "loss": 0.2444, |
| "step": 1492 |
| }, |
| { |
| "epoch": 2.085096489188561, |
| "grad_norm": 1.995965282834599, |
| "learning_rate": 2.582548686987031e-06, |
| "loss": 0.233, |
| "step": 1493 |
| }, |
| { |
| "epoch": 2.086491513601488, |
| "grad_norm": 1.9479032032819366, |
| "learning_rate": 2.5754386073548775e-06, |
| "loss": 0.2245, |
| "step": 1494 |
| }, |
| { |
| "epoch": 2.0878865380144154, |
| "grad_norm": 1.981745320689665, |
| "learning_rate": 2.5683349319852647e-06, |
| "loss": 0.2411, |
| "step": 1495 |
| }, |
| { |
| "epoch": 2.0892815624273426, |
| "grad_norm": 1.9301685232993138, |
| "learning_rate": 2.5612376796419126e-06, |
| "loss": 0.2382, |
| "step": 1496 |
| }, |
| { |
| "epoch": 2.09067658684027, |
| "grad_norm": 2.1167901194901884, |
| "learning_rate": 2.5541468690715797e-06, |
| "loss": 0.2461, |
| "step": 1497 |
| }, |
| { |
| "epoch": 2.0920716112531967, |
| "grad_norm": 1.9947152398221217, |
| "learning_rate": 2.5470625190040105e-06, |
| "loss": 0.249, |
| "step": 1498 |
| }, |
| { |
| "epoch": 2.093466635666124, |
| "grad_norm": 2.0494633364154646, |
| "learning_rate": 2.5399846481518857e-06, |
| "loss": 0.2499, |
| "step": 1499 |
| }, |
| { |
| "epoch": 2.0948616600790513, |
| "grad_norm": 1.950090286363791, |
| "learning_rate": 2.5329132752107675e-06, |
| "loss": 0.2621, |
| "step": 1500 |
| }, |
| { |
| "epoch": 2.0962566844919786, |
| "grad_norm": 2.1301861314715924, |
| "learning_rate": 2.525848418859055e-06, |
| "loss": 0.2521, |
| "step": 1501 |
| }, |
| { |
| "epoch": 2.097651708904906, |
| "grad_norm": 1.771798927570182, |
| "learning_rate": 2.518790097757938e-06, |
| "loss": 0.2204, |
| "step": 1502 |
| }, |
| { |
| "epoch": 2.099046733317833, |
| "grad_norm": 1.8590300077093187, |
| "learning_rate": 2.51173833055134e-06, |
| "loss": 0.2212, |
| "step": 1503 |
| }, |
| { |
| "epoch": 2.1004417577307604, |
| "grad_norm": 2.063306202589427, |
| "learning_rate": 2.504693135865875e-06, |
| "loss": 0.2621, |
| "step": 1504 |
| }, |
| { |
| "epoch": 2.1018367821436876, |
| "grad_norm": 2.02342129140672, |
| "learning_rate": 2.497654532310799e-06, |
| "loss": 0.2454, |
| "step": 1505 |
| }, |
| { |
| "epoch": 2.103231806556615, |
| "grad_norm": 1.8990466972330668, |
| "learning_rate": 2.490622538477952e-06, |
| "loss": 0.2406, |
| "step": 1506 |
| }, |
| { |
| "epoch": 2.104626830969542, |
| "grad_norm": 2.1590362411607464, |
| "learning_rate": 2.483597172941718e-06, |
| "loss": 0.2375, |
| "step": 1507 |
| }, |
| { |
| "epoch": 2.106021855382469, |
| "grad_norm": 2.0545993680320596, |
| "learning_rate": 2.4765784542589754e-06, |
| "loss": 0.2492, |
| "step": 1508 |
| }, |
| { |
| "epoch": 2.1074168797953963, |
| "grad_norm": 2.038598849419707, |
| "learning_rate": 2.46956640096904e-06, |
| "loss": 0.2073, |
| "step": 1509 |
| }, |
| { |
| "epoch": 2.1088119042083235, |
| "grad_norm": 1.8605747435871027, |
| "learning_rate": 2.4625610315936267e-06, |
| "loss": 0.2454, |
| "step": 1510 |
| }, |
| { |
| "epoch": 2.110206928621251, |
| "grad_norm": 2.1562619973708057, |
| "learning_rate": 2.4555623646367952e-06, |
| "loss": 0.2756, |
| "step": 1511 |
| }, |
| { |
| "epoch": 2.111601953034178, |
| "grad_norm": 2.0329623515500246, |
| "learning_rate": 2.448570418584898e-06, |
| "loss": 0.2461, |
| "step": 1512 |
| }, |
| { |
| "epoch": 2.1129969774471054, |
| "grad_norm": 1.953214188504134, |
| "learning_rate": 2.4415852119065343e-06, |
| "loss": 0.247, |
| "step": 1513 |
| }, |
| { |
| "epoch": 2.1143920018600326, |
| "grad_norm": 2.0529609432367235, |
| "learning_rate": 2.4346067630525084e-06, |
| "loss": 0.24, |
| "step": 1514 |
| }, |
| { |
| "epoch": 2.11578702627296, |
| "grad_norm": 2.0799839572137695, |
| "learning_rate": 2.427635090455766e-06, |
| "loss": 0.229, |
| "step": 1515 |
| }, |
| { |
| "epoch": 2.117182050685887, |
| "grad_norm": 2.0126307201591014, |
| "learning_rate": 2.42067021253136e-06, |
| "loss": 0.2339, |
| "step": 1516 |
| }, |
| { |
| "epoch": 2.1185770750988144, |
| "grad_norm": 1.8308783448148316, |
| "learning_rate": 2.4137121476763965e-06, |
| "loss": 0.2071, |
| "step": 1517 |
| }, |
| { |
| "epoch": 2.1199720995117413, |
| "grad_norm": 1.9553265656138026, |
| "learning_rate": 2.4067609142699798e-06, |
| "loss": 0.2215, |
| "step": 1518 |
| }, |
| { |
| "epoch": 2.1213671239246685, |
| "grad_norm": 2.258180433222967, |
| "learning_rate": 2.3998165306731713e-06, |
| "loss": 0.2444, |
| "step": 1519 |
| }, |
| { |
| "epoch": 2.122762148337596, |
| "grad_norm": 2.028159380935348, |
| "learning_rate": 2.3928790152289443e-06, |
| "loss": 0.2116, |
| "step": 1520 |
| }, |
| { |
| "epoch": 2.124157172750523, |
| "grad_norm": 1.9801498809540945, |
| "learning_rate": 2.385948386262123e-06, |
| "loss": 0.2602, |
| "step": 1521 |
| }, |
| { |
| "epoch": 2.1255521971634503, |
| "grad_norm": 2.2663540920146863, |
| "learning_rate": 2.3790246620793466e-06, |
| "loss": 0.2457, |
| "step": 1522 |
| }, |
| { |
| "epoch": 2.1269472215763776, |
| "grad_norm": 2.101327168537436, |
| "learning_rate": 2.372107860969019e-06, |
| "loss": 0.2306, |
| "step": 1523 |
| }, |
| { |
| "epoch": 2.128342245989305, |
| "grad_norm": 1.9895205668500822, |
| "learning_rate": 2.3651980012012454e-06, |
| "loss": 0.2166, |
| "step": 1524 |
| }, |
| { |
| "epoch": 2.129737270402232, |
| "grad_norm": 1.8300310484062896, |
| "learning_rate": 2.358295101027807e-06, |
| "loss": 0.2249, |
| "step": 1525 |
| }, |
| { |
| "epoch": 2.1311322948151594, |
| "grad_norm": 1.765470113307758, |
| "learning_rate": 2.351399178682101e-06, |
| "loss": 0.2284, |
| "step": 1526 |
| }, |
| { |
| "epoch": 2.1325273192280867, |
| "grad_norm": 2.13174824831439, |
| "learning_rate": 2.3445102523790876e-06, |
| "loss": 0.2198, |
| "step": 1527 |
| }, |
| { |
| "epoch": 2.1339223436410135, |
| "grad_norm": 1.9155367612459846, |
| "learning_rate": 2.3376283403152527e-06, |
| "loss": 0.2256, |
| "step": 1528 |
| }, |
| { |
| "epoch": 2.135317368053941, |
| "grad_norm": 1.9875172700280743, |
| "learning_rate": 2.330753460668553e-06, |
| "loss": 0.2393, |
| "step": 1529 |
| }, |
| { |
| "epoch": 2.136712392466868, |
| "grad_norm": 1.8406312091494403, |
| "learning_rate": 2.323885631598366e-06, |
| "loss": 0.2396, |
| "step": 1530 |
| }, |
| { |
| "epoch": 2.1381074168797953, |
| "grad_norm": 1.9374184950557762, |
| "learning_rate": 2.3170248712454525e-06, |
| "loss": 0.2461, |
| "step": 1531 |
| }, |
| { |
| "epoch": 2.1395024412927226, |
| "grad_norm": 1.963431102509043, |
| "learning_rate": 2.3101711977318995e-06, |
| "loss": 0.2642, |
| "step": 1532 |
| }, |
| { |
| "epoch": 2.14089746570565, |
| "grad_norm": 2.0376501014600716, |
| "learning_rate": 2.3033246291610717e-06, |
| "loss": 0.2291, |
| "step": 1533 |
| }, |
| { |
| "epoch": 2.142292490118577, |
| "grad_norm": 2.074177144588648, |
| "learning_rate": 2.2964851836175705e-06, |
| "loss": 0.2508, |
| "step": 1534 |
| }, |
| { |
| "epoch": 2.1436875145315044, |
| "grad_norm": 2.033089929760175, |
| "learning_rate": 2.2896528791671807e-06, |
| "loss": 0.2301, |
| "step": 1535 |
| }, |
| { |
| "epoch": 2.1450825389444317, |
| "grad_norm": 1.9262718887772488, |
| "learning_rate": 2.2828277338568226e-06, |
| "loss": 0.2316, |
| "step": 1536 |
| }, |
| { |
| "epoch": 2.146477563357359, |
| "grad_norm": 2.0757309335688165, |
| "learning_rate": 2.2760097657145096e-06, |
| "loss": 0.2404, |
| "step": 1537 |
| }, |
| { |
| "epoch": 2.147872587770286, |
| "grad_norm": 1.9444214641725062, |
| "learning_rate": 2.2691989927492984e-06, |
| "loss": 0.2594, |
| "step": 1538 |
| }, |
| { |
| "epoch": 2.149267612183213, |
| "grad_norm": 2.198150007883586, |
| "learning_rate": 2.262395432951235e-06, |
| "loss": 0.264, |
| "step": 1539 |
| }, |
| { |
| "epoch": 2.1506626365961403, |
| "grad_norm": 1.9616462283529785, |
| "learning_rate": 2.2555991042913177e-06, |
| "loss": 0.2277, |
| "step": 1540 |
| }, |
| { |
| "epoch": 2.1520576610090676, |
| "grad_norm": 2.090474806693938, |
| "learning_rate": 2.248810024721441e-06, |
| "loss": 0.2245, |
| "step": 1541 |
| }, |
| { |
| "epoch": 2.153452685421995, |
| "grad_norm": 1.9854907398753259, |
| "learning_rate": 2.2420282121743513e-06, |
| "loss": 0.2384, |
| "step": 1542 |
| }, |
| { |
| "epoch": 2.154847709834922, |
| "grad_norm": 1.978976806270666, |
| "learning_rate": 2.235253684563602e-06, |
| "loss": 0.225, |
| "step": 1543 |
| }, |
| { |
| "epoch": 2.1562427342478494, |
| "grad_norm": 2.0143674773479088, |
| "learning_rate": 2.228486459783506e-06, |
| "loss": 0.2204, |
| "step": 1544 |
| }, |
| { |
| "epoch": 2.1576377586607767, |
| "grad_norm": 1.8497292458468502, |
| "learning_rate": 2.221726555709079e-06, |
| "loss": 0.2332, |
| "step": 1545 |
| }, |
| { |
| "epoch": 2.159032783073704, |
| "grad_norm": 2.041422679794062, |
| "learning_rate": 2.2149739901960088e-06, |
| "loss": 0.2502, |
| "step": 1546 |
| }, |
| { |
| "epoch": 2.160427807486631, |
| "grad_norm": 1.9189040453437212, |
| "learning_rate": 2.208228781080592e-06, |
| "loss": 0.2278, |
| "step": 1547 |
| }, |
| { |
| "epoch": 2.161822831899558, |
| "grad_norm": 1.9937984946523888, |
| "learning_rate": 2.201490946179696e-06, |
| "loss": 0.2222, |
| "step": 1548 |
| }, |
| { |
| "epoch": 2.1632178563124853, |
| "grad_norm": 1.901075385011242, |
| "learning_rate": 2.19476050329071e-06, |
| "loss": 0.2152, |
| "step": 1549 |
| }, |
| { |
| "epoch": 2.1646128807254126, |
| "grad_norm": 2.0137231979004286, |
| "learning_rate": 2.188037470191502e-06, |
| "loss": 0.2288, |
| "step": 1550 |
| }, |
| { |
| "epoch": 2.16600790513834, |
| "grad_norm": 1.7888962568094866, |
| "learning_rate": 2.181321864640362e-06, |
| "loss": 0.229, |
| "step": 1551 |
| }, |
| { |
| "epoch": 2.167402929551267, |
| "grad_norm": 2.0707458853980563, |
| "learning_rate": 2.1746137043759594e-06, |
| "loss": 0.2098, |
| "step": 1552 |
| }, |
| { |
| "epoch": 2.1687979539641944, |
| "grad_norm": 1.8413823655100272, |
| "learning_rate": 2.167913007117306e-06, |
| "loss": 0.2245, |
| "step": 1553 |
| }, |
| { |
| "epoch": 2.1701929783771217, |
| "grad_norm": 2.0529948691138524, |
| "learning_rate": 2.1612197905636913e-06, |
| "loss": 0.2544, |
| "step": 1554 |
| }, |
| { |
| "epoch": 2.171588002790049, |
| "grad_norm": 2.0489852227371586, |
| "learning_rate": 2.154534072394651e-06, |
| "loss": 0.235, |
| "step": 1555 |
| }, |
| { |
| "epoch": 2.172983027202976, |
| "grad_norm": 2.1022639361713216, |
| "learning_rate": 2.147855870269916e-06, |
| "loss": 0.2493, |
| "step": 1556 |
| }, |
| { |
| "epoch": 2.1743780516159035, |
| "grad_norm": 1.9016592054329882, |
| "learning_rate": 2.1411852018293583e-06, |
| "loss": 0.2737, |
| "step": 1557 |
| }, |
| { |
| "epoch": 2.1757730760288303, |
| "grad_norm": 2.017634751042281, |
| "learning_rate": 2.1345220846929514e-06, |
| "loss": 0.2296, |
| "step": 1558 |
| }, |
| { |
| "epoch": 2.1771681004417576, |
| "grad_norm": 2.0588142168838957, |
| "learning_rate": 2.127866536460727e-06, |
| "loss": 0.2306, |
| "step": 1559 |
| }, |
| { |
| "epoch": 2.178563124854685, |
| "grad_norm": 1.844060249249721, |
| "learning_rate": 2.1212185747127235e-06, |
| "loss": 0.2397, |
| "step": 1560 |
| }, |
| { |
| "epoch": 2.179958149267612, |
| "grad_norm": 1.8836100804054643, |
| "learning_rate": 2.1145782170089346e-06, |
| "loss": 0.226, |
| "step": 1561 |
| }, |
| { |
| "epoch": 2.1813531736805394, |
| "grad_norm": 2.027342005832641, |
| "learning_rate": 2.107945480889276e-06, |
| "loss": 0.2211, |
| "step": 1562 |
| }, |
| { |
| "epoch": 2.1827481980934667, |
| "grad_norm": 2.0716969692058336, |
| "learning_rate": 2.1013203838735273e-06, |
| "loss": 0.2313, |
| "step": 1563 |
| }, |
| { |
| "epoch": 2.184143222506394, |
| "grad_norm": 1.87998419305253, |
| "learning_rate": 2.094702943461289e-06, |
| "loss": 0.2265, |
| "step": 1564 |
| }, |
| { |
| "epoch": 2.185538246919321, |
| "grad_norm": 2.083535070952125, |
| "learning_rate": 2.0880931771319395e-06, |
| "loss": 0.2606, |
| "step": 1565 |
| }, |
| { |
| "epoch": 2.1869332713322485, |
| "grad_norm": 2.0437831993419833, |
| "learning_rate": 2.0814911023445904e-06, |
| "loss": 0.2067, |
| "step": 1566 |
| }, |
| { |
| "epoch": 2.1883282957451757, |
| "grad_norm": 1.927662318132159, |
| "learning_rate": 2.0748967365380292e-06, |
| "loss": 0.229, |
| "step": 1567 |
| }, |
| { |
| "epoch": 2.1897233201581026, |
| "grad_norm": 1.9753615761556702, |
| "learning_rate": 2.0683100971306873e-06, |
| "loss": 0.2482, |
| "step": 1568 |
| }, |
| { |
| "epoch": 2.19111834457103, |
| "grad_norm": 2.1292063771141496, |
| "learning_rate": 2.0617312015205844e-06, |
| "loss": 0.2629, |
| "step": 1569 |
| }, |
| { |
| "epoch": 2.192513368983957, |
| "grad_norm": 2.03833931308385, |
| "learning_rate": 2.055160067085283e-06, |
| "loss": 0.2224, |
| "step": 1570 |
| }, |
| { |
| "epoch": 2.1939083933968844, |
| "grad_norm": 1.99345008089848, |
| "learning_rate": 2.0485967111818506e-06, |
| "loss": 0.2465, |
| "step": 1571 |
| }, |
| { |
| "epoch": 2.1953034178098116, |
| "grad_norm": 1.8416073370699406, |
| "learning_rate": 2.0420411511468086e-06, |
| "loss": 0.2231, |
| "step": 1572 |
| }, |
| { |
| "epoch": 2.196698442222739, |
| "grad_norm": 2.022634849681668, |
| "learning_rate": 2.0354934042960804e-06, |
| "loss": 0.266, |
| "step": 1573 |
| }, |
| { |
| "epoch": 2.198093466635666, |
| "grad_norm": 1.9543607699785175, |
| "learning_rate": 2.0289534879249544e-06, |
| "loss": 0.2215, |
| "step": 1574 |
| }, |
| { |
| "epoch": 2.1994884910485935, |
| "grad_norm": 2.120691875324055, |
| "learning_rate": 2.0224214193080394e-06, |
| "loss": 0.2411, |
| "step": 1575 |
| }, |
| { |
| "epoch": 2.2008835154615207, |
| "grad_norm": 1.9968502256620426, |
| "learning_rate": 2.015897215699208e-06, |
| "loss": 0.2407, |
| "step": 1576 |
| }, |
| { |
| "epoch": 2.202278539874448, |
| "grad_norm": 1.972527239657243, |
| "learning_rate": 2.0093808943315636e-06, |
| "loss": 0.213, |
| "step": 1577 |
| }, |
| { |
| "epoch": 2.2036735642873753, |
| "grad_norm": 1.894788749194984, |
| "learning_rate": 2.0028724724173886e-06, |
| "loss": 0.2199, |
| "step": 1578 |
| }, |
| { |
| "epoch": 2.205068588700302, |
| "grad_norm": 1.9941924804567333, |
| "learning_rate": 1.996371967148098e-06, |
| "loss": 0.2255, |
| "step": 1579 |
| }, |
| { |
| "epoch": 2.2064636131132294, |
| "grad_norm": 2.1213230860539154, |
| "learning_rate": 1.989879395694194e-06, |
| "loss": 0.2711, |
| "step": 1580 |
| }, |
| { |
| "epoch": 2.2078586375261566, |
| "grad_norm": 2.005074782180638, |
| "learning_rate": 1.9833947752052286e-06, |
| "loss": 0.2191, |
| "step": 1581 |
| }, |
| { |
| "epoch": 2.209253661939084, |
| "grad_norm": 2.0916112373693663, |
| "learning_rate": 1.976918122809744e-06, |
| "loss": 0.2423, |
| "step": 1582 |
| }, |
| { |
| "epoch": 2.210648686352011, |
| "grad_norm": 1.943662048921364, |
| "learning_rate": 1.9704494556152413e-06, |
| "loss": 0.2364, |
| "step": 1583 |
| }, |
| { |
| "epoch": 2.2120437107649384, |
| "grad_norm": 1.8072918947965364, |
| "learning_rate": 1.9639887907081297e-06, |
| "loss": 0.2101, |
| "step": 1584 |
| }, |
| { |
| "epoch": 2.2134387351778657, |
| "grad_norm": 2.074058894664055, |
| "learning_rate": 1.9575361451536772e-06, |
| "loss": 0.2528, |
| "step": 1585 |
| }, |
| { |
| "epoch": 2.214833759590793, |
| "grad_norm": 1.948267902890602, |
| "learning_rate": 1.9510915359959694e-06, |
| "loss": 0.2052, |
| "step": 1586 |
| }, |
| { |
| "epoch": 2.2162287840037203, |
| "grad_norm": 1.6977361690821569, |
| "learning_rate": 1.944654980257869e-06, |
| "loss": 0.2248, |
| "step": 1587 |
| }, |
| { |
| "epoch": 2.217623808416647, |
| "grad_norm": 1.9851481647242695, |
| "learning_rate": 1.9382264949409614e-06, |
| "loss": 0.2334, |
| "step": 1588 |
| }, |
| { |
| "epoch": 2.2190188328295744, |
| "grad_norm": 2.0409126097908628, |
| "learning_rate": 1.931806097025517e-06, |
| "loss": 0.2512, |
| "step": 1589 |
| }, |
| { |
| "epoch": 2.2204138572425016, |
| "grad_norm": 2.193027559366708, |
| "learning_rate": 1.925393803470447e-06, |
| "loss": 0.2132, |
| "step": 1590 |
| }, |
| { |
| "epoch": 2.221808881655429, |
| "grad_norm": 1.962723403198113, |
| "learning_rate": 1.9189896312132506e-06, |
| "loss": 0.2341, |
| "step": 1591 |
| }, |
| { |
| "epoch": 2.223203906068356, |
| "grad_norm": 2.025113111819309, |
| "learning_rate": 1.912593597169975e-06, |
| "loss": 0.2081, |
| "step": 1592 |
| }, |
| { |
| "epoch": 2.2245989304812834, |
| "grad_norm": 2.0420629608721126, |
| "learning_rate": 1.9062057182351768e-06, |
| "loss": 0.2374, |
| "step": 1593 |
| }, |
| { |
| "epoch": 2.2259939548942107, |
| "grad_norm": 1.8896161037166734, |
| "learning_rate": 1.899826011281865e-06, |
| "loss": 0.2551, |
| "step": 1594 |
| }, |
| { |
| "epoch": 2.227388979307138, |
| "grad_norm": 1.906560235493038, |
| "learning_rate": 1.893454493161468e-06, |
| "loss": 0.2208, |
| "step": 1595 |
| }, |
| { |
| "epoch": 2.2287840037200652, |
| "grad_norm": 1.8248556594089396, |
| "learning_rate": 1.8870911807037856e-06, |
| "loss": 0.2283, |
| "step": 1596 |
| }, |
| { |
| "epoch": 2.2301790281329925, |
| "grad_norm": 2.1102132026488745, |
| "learning_rate": 1.8807360907169326e-06, |
| "loss": 0.2577, |
| "step": 1597 |
| }, |
| { |
| "epoch": 2.23157405254592, |
| "grad_norm": 1.9791563851389666, |
| "learning_rate": 1.8743892399873154e-06, |
| "loss": 0.2252, |
| "step": 1598 |
| }, |
| { |
| "epoch": 2.2329690769588466, |
| "grad_norm": 2.049090710717768, |
| "learning_rate": 1.868050645279576e-06, |
| "loss": 0.2368, |
| "step": 1599 |
| }, |
| { |
| "epoch": 2.234364101371774, |
| "grad_norm": 1.9669615346401133, |
| "learning_rate": 1.8617203233365427e-06, |
| "loss": 0.2306, |
| "step": 1600 |
| }, |
| { |
| "epoch": 2.235759125784701, |
| "grad_norm": 1.9525208909026137, |
| "learning_rate": 1.8553982908792e-06, |
| "loss": 0.2372, |
| "step": 1601 |
| }, |
| { |
| "epoch": 2.2371541501976284, |
| "grad_norm": 2.179736854299681, |
| "learning_rate": 1.8490845646066303e-06, |
| "loss": 0.2783, |
| "step": 1602 |
| }, |
| { |
| "epoch": 2.2385491746105557, |
| "grad_norm": 2.295070889351553, |
| "learning_rate": 1.8427791611959762e-06, |
| "loss": 0.2567, |
| "step": 1603 |
| }, |
| { |
| "epoch": 2.239944199023483, |
| "grad_norm": 1.8673932533442659, |
| "learning_rate": 1.8364820973024e-06, |
| "loss": 0.2154, |
| "step": 1604 |
| }, |
| { |
| "epoch": 2.2413392234364102, |
| "grad_norm": 2.1778592782550317, |
| "learning_rate": 1.8301933895590362e-06, |
| "loss": 0.2522, |
| "step": 1605 |
| }, |
| { |
| "epoch": 2.2427342478493375, |
| "grad_norm": 1.953126746209516, |
| "learning_rate": 1.8239130545769408e-06, |
| "loss": 0.2189, |
| "step": 1606 |
| }, |
| { |
| "epoch": 2.2441292722622648, |
| "grad_norm": 1.9964448323232684, |
| "learning_rate": 1.8176411089450618e-06, |
| "loss": 0.2524, |
| "step": 1607 |
| }, |
| { |
| "epoch": 2.2455242966751916, |
| "grad_norm": 1.9567449377882402, |
| "learning_rate": 1.8113775692301822e-06, |
| "loss": 0.2342, |
| "step": 1608 |
| }, |
| { |
| "epoch": 2.246919321088119, |
| "grad_norm": 2.1009476547919776, |
| "learning_rate": 1.8051224519768817e-06, |
| "loss": 0.2391, |
| "step": 1609 |
| }, |
| { |
| "epoch": 2.248314345501046, |
| "grad_norm": 1.9587113710355928, |
| "learning_rate": 1.7988757737074959e-06, |
| "loss": 0.2179, |
| "step": 1610 |
| }, |
| { |
| "epoch": 2.2497093699139734, |
| "grad_norm": 2.0040293159213354, |
| "learning_rate": 1.7926375509220695e-06, |
| "loss": 0.2222, |
| "step": 1611 |
| }, |
| { |
| "epoch": 2.2511043943269007, |
| "grad_norm": 2.0888632269335354, |
| "learning_rate": 1.7864078000983076e-06, |
| "loss": 0.2109, |
| "step": 1612 |
| }, |
| { |
| "epoch": 2.252499418739828, |
| "grad_norm": 2.039750099246385, |
| "learning_rate": 1.7801865376915451e-06, |
| "loss": 0.2215, |
| "step": 1613 |
| }, |
| { |
| "epoch": 2.2538944431527552, |
| "grad_norm": 2.0748993098994952, |
| "learning_rate": 1.7739737801346895e-06, |
| "loss": 0.2469, |
| "step": 1614 |
| }, |
| { |
| "epoch": 2.2552894675656825, |
| "grad_norm": 1.8716892318272644, |
| "learning_rate": 1.7677695438381831e-06, |
| "loss": 0.2573, |
| "step": 1615 |
| }, |
| { |
| "epoch": 2.2566844919786098, |
| "grad_norm": 2.09042965134009, |
| "learning_rate": 1.761573845189965e-06, |
| "loss": 0.2367, |
| "step": 1616 |
| }, |
| { |
| "epoch": 2.258079516391537, |
| "grad_norm": 1.7936694510013214, |
| "learning_rate": 1.7553867005554215e-06, |
| "loss": 0.2068, |
| "step": 1617 |
| }, |
| { |
| "epoch": 2.2594745408044643, |
| "grad_norm": 1.9479746253603065, |
| "learning_rate": 1.7492081262773397e-06, |
| "loss": 0.2304, |
| "step": 1618 |
| }, |
| { |
| "epoch": 2.260869565217391, |
| "grad_norm": 2.134206025310751, |
| "learning_rate": 1.7430381386758748e-06, |
| "loss": 0.2447, |
| "step": 1619 |
| }, |
| { |
| "epoch": 2.2622645896303184, |
| "grad_norm": 1.9220137353156832, |
| "learning_rate": 1.7368767540484965e-06, |
| "loss": 0.2228, |
| "step": 1620 |
| }, |
| { |
| "epoch": 2.2636596140432457, |
| "grad_norm": 1.9039789497758535, |
| "learning_rate": 1.7307239886699546e-06, |
| "loss": 0.2541, |
| "step": 1621 |
| }, |
| { |
| "epoch": 2.265054638456173, |
| "grad_norm": 2.2671396189025774, |
| "learning_rate": 1.7245798587922263e-06, |
| "loss": 0.2414, |
| "step": 1622 |
| }, |
| { |
| "epoch": 2.2664496628691, |
| "grad_norm": 1.8312831188481675, |
| "learning_rate": 1.7184443806444851e-06, |
| "loss": 0.2221, |
| "step": 1623 |
| }, |
| { |
| "epoch": 2.2678446872820275, |
| "grad_norm": 1.7794976279581878, |
| "learning_rate": 1.7123175704330514e-06, |
| "loss": 0.2194, |
| "step": 1624 |
| }, |
| { |
| "epoch": 2.2692397116949548, |
| "grad_norm": 1.910542117265865, |
| "learning_rate": 1.706199444341341e-06, |
| "loss": 0.2393, |
| "step": 1625 |
| }, |
| { |
| "epoch": 2.270634736107882, |
| "grad_norm": 2.0701674846292213, |
| "learning_rate": 1.7000900185298418e-06, |
| "loss": 0.2729, |
| "step": 1626 |
| }, |
| { |
| "epoch": 2.2720297605208093, |
| "grad_norm": 2.167518255074453, |
| "learning_rate": 1.6939893091360577e-06, |
| "loss": 0.2609, |
| "step": 1627 |
| }, |
| { |
| "epoch": 2.273424784933736, |
| "grad_norm": 2.0346558592689963, |
| "learning_rate": 1.6878973322744658e-06, |
| "loss": 0.2266, |
| "step": 1628 |
| }, |
| { |
| "epoch": 2.2748198093466634, |
| "grad_norm": 1.9628822161613053, |
| "learning_rate": 1.6818141040364816e-06, |
| "loss": 0.2307, |
| "step": 1629 |
| }, |
| { |
| "epoch": 2.2762148337595907, |
| "grad_norm": 1.9799083641231046, |
| "learning_rate": 1.6757396404904087e-06, |
| "loss": 0.2243, |
| "step": 1630 |
| }, |
| { |
| "epoch": 2.277609858172518, |
| "grad_norm": 1.9097000725178894, |
| "learning_rate": 1.6696739576813981e-06, |
| "loss": 0.2401, |
| "step": 1631 |
| }, |
| { |
| "epoch": 2.279004882585445, |
| "grad_norm": 1.9065958749954741, |
| "learning_rate": 1.6636170716314114e-06, |
| "loss": 0.2659, |
| "step": 1632 |
| }, |
| { |
| "epoch": 2.2803999069983725, |
| "grad_norm": 2.185791921800934, |
| "learning_rate": 1.657568998339175e-06, |
| "loss": 0.2615, |
| "step": 1633 |
| }, |
| { |
| "epoch": 2.2817949314112997, |
| "grad_norm": 2.2995275927534196, |
| "learning_rate": 1.6515297537801305e-06, |
| "loss": 0.2491, |
| "step": 1634 |
| }, |
| { |
| "epoch": 2.283189955824227, |
| "grad_norm": 1.9425924150562717, |
| "learning_rate": 1.6454993539064075e-06, |
| "loss": 0.2178, |
| "step": 1635 |
| }, |
| { |
| "epoch": 2.2845849802371543, |
| "grad_norm": 1.8938205464951827, |
| "learning_rate": 1.6394778146467672e-06, |
| "loss": 0.2665, |
| "step": 1636 |
| }, |
| { |
| "epoch": 2.2859800046500816, |
| "grad_norm": 2.1852392869275716, |
| "learning_rate": 1.6334651519065658e-06, |
| "loss": 0.2576, |
| "step": 1637 |
| }, |
| { |
| "epoch": 2.287375029063009, |
| "grad_norm": 2.09160013513525, |
| "learning_rate": 1.6274613815677176e-06, |
| "loss": 0.2468, |
| "step": 1638 |
| }, |
| { |
| "epoch": 2.2887700534759357, |
| "grad_norm": 2.2221912807762, |
| "learning_rate": 1.6214665194886474e-06, |
| "loss": 0.2001, |
| "step": 1639 |
| }, |
| { |
| "epoch": 2.290165077888863, |
| "grad_norm": 1.8322716677994173, |
| "learning_rate": 1.6154805815042457e-06, |
| "loss": 0.2098, |
| "step": 1640 |
| }, |
| { |
| "epoch": 2.29156010230179, |
| "grad_norm": 2.1005552341097244, |
| "learning_rate": 1.6095035834258365e-06, |
| "loss": 0.2459, |
| "step": 1641 |
| }, |
| { |
| "epoch": 2.2929551267147175, |
| "grad_norm": 1.9045088391363771, |
| "learning_rate": 1.6035355410411252e-06, |
| "loss": 0.2003, |
| "step": 1642 |
| }, |
| { |
| "epoch": 2.2943501511276447, |
| "grad_norm": 2.0139872656400786, |
| "learning_rate": 1.5975764701141611e-06, |
| "loss": 0.2358, |
| "step": 1643 |
| }, |
| { |
| "epoch": 2.295745175540572, |
| "grad_norm": 1.9262075355870194, |
| "learning_rate": 1.5916263863853e-06, |
| "loss": 0.2404, |
| "step": 1644 |
| }, |
| { |
| "epoch": 2.2971401999534993, |
| "grad_norm": 2.201421447614601, |
| "learning_rate": 1.585685305571159e-06, |
| "loss": 0.2669, |
| "step": 1645 |
| }, |
| { |
| "epoch": 2.2985352243664265, |
| "grad_norm": 2.1567312680551227, |
| "learning_rate": 1.5797532433645696e-06, |
| "loss": 0.2259, |
| "step": 1646 |
| }, |
| { |
| "epoch": 2.299930248779354, |
| "grad_norm": 2.078334907511813, |
| "learning_rate": 1.5738302154345475e-06, |
| "loss": 0.2349, |
| "step": 1647 |
| }, |
| { |
| "epoch": 2.3013252731922806, |
| "grad_norm": 2.079069425158578, |
| "learning_rate": 1.5679162374262414e-06, |
| "loss": 0.2349, |
| "step": 1648 |
| }, |
| { |
| "epoch": 2.302720297605208, |
| "grad_norm": 1.9252113061545926, |
| "learning_rate": 1.5620113249608943e-06, |
| "loss": 0.253, |
| "step": 1649 |
| }, |
| { |
| "epoch": 2.304115322018135, |
| "grad_norm": 2.0259959768639586, |
| "learning_rate": 1.5561154936358069e-06, |
| "loss": 0.2402, |
| "step": 1650 |
| }, |
| { |
| "epoch": 2.3055103464310625, |
| "grad_norm": 2.090859034549075, |
| "learning_rate": 1.5502287590242942e-06, |
| "loss": 0.2286, |
| "step": 1651 |
| }, |
| { |
| "epoch": 2.3069053708439897, |
| "grad_norm": 1.8312991160092411, |
| "learning_rate": 1.5443511366756375e-06, |
| "loss": 0.2153, |
| "step": 1652 |
| }, |
| { |
| "epoch": 2.308300395256917, |
| "grad_norm": 1.954872792366119, |
| "learning_rate": 1.53848264211505e-06, |
| "loss": 0.2437, |
| "step": 1653 |
| }, |
| { |
| "epoch": 2.3096954196698443, |
| "grad_norm": 1.7300626387812998, |
| "learning_rate": 1.5326232908436405e-06, |
| "loss": 0.2146, |
| "step": 1654 |
| }, |
| { |
| "epoch": 2.3110904440827715, |
| "grad_norm": 2.0097750787041124, |
| "learning_rate": 1.526773098338359e-06, |
| "loss": 0.2328, |
| "step": 1655 |
| }, |
| { |
| "epoch": 2.312485468495699, |
| "grad_norm": 1.9802225368429347, |
| "learning_rate": 1.5209320800519683e-06, |
| "loss": 0.2315, |
| "step": 1656 |
| }, |
| { |
| "epoch": 2.313880492908626, |
| "grad_norm": 2.081213044620488, |
| "learning_rate": 1.515100251412998e-06, |
| "loss": 0.2503, |
| "step": 1657 |
| }, |
| { |
| "epoch": 2.3152755173215533, |
| "grad_norm": 1.9855688760114059, |
| "learning_rate": 1.5092776278257027e-06, |
| "loss": 0.227, |
| "step": 1658 |
| }, |
| { |
| "epoch": 2.31667054173448, |
| "grad_norm": 1.924537538881194, |
| "learning_rate": 1.5034642246700203e-06, |
| "loss": 0.2401, |
| "step": 1659 |
| }, |
| { |
| "epoch": 2.3180655661474074, |
| "grad_norm": 2.1845890347247714, |
| "learning_rate": 1.4976600573015398e-06, |
| "loss": 0.2516, |
| "step": 1660 |
| }, |
| { |
| "epoch": 2.3194605905603347, |
| "grad_norm": 2.0681273481820384, |
| "learning_rate": 1.4918651410514479e-06, |
| "loss": 0.2487, |
| "step": 1661 |
| }, |
| { |
| "epoch": 2.320855614973262, |
| "grad_norm": 1.9894399584697364, |
| "learning_rate": 1.486079491226501e-06, |
| "loss": 0.2185, |
| "step": 1662 |
| }, |
| { |
| "epoch": 2.3222506393861893, |
| "grad_norm": 1.9836278430699281, |
| "learning_rate": 1.4803031231089782e-06, |
| "loss": 0.2273, |
| "step": 1663 |
| }, |
| { |
| "epoch": 2.3236456637991165, |
| "grad_norm": 1.856207145115784, |
| "learning_rate": 1.4745360519566382e-06, |
| "loss": 0.2257, |
| "step": 1664 |
| }, |
| { |
| "epoch": 2.325040688212044, |
| "grad_norm": 2.059552287079175, |
| "learning_rate": 1.4687782930026833e-06, |
| "loss": 0.2519, |
| "step": 1665 |
| }, |
| { |
| "epoch": 2.326435712624971, |
| "grad_norm": 1.9484607160245673, |
| "learning_rate": 1.4630298614557236e-06, |
| "loss": 0.2305, |
| "step": 1666 |
| }, |
| { |
| "epoch": 2.3278307370378983, |
| "grad_norm": 2.057365261689767, |
| "learning_rate": 1.4572907724997249e-06, |
| "loss": 0.2738, |
| "step": 1667 |
| }, |
| { |
| "epoch": 2.329225761450825, |
| "grad_norm": 1.9604906487353115, |
| "learning_rate": 1.4515610412939791e-06, |
| "loss": 0.222, |
| "step": 1668 |
| }, |
| { |
| "epoch": 2.3306207858637524, |
| "grad_norm": 1.8794624116326488, |
| "learning_rate": 1.445840682973062e-06, |
| "loss": 0.2429, |
| "step": 1669 |
| }, |
| { |
| "epoch": 2.3320158102766797, |
| "grad_norm": 2.003776327480136, |
| "learning_rate": 1.4401297126467884e-06, |
| "loss": 0.2113, |
| "step": 1670 |
| }, |
| { |
| "epoch": 2.333410834689607, |
| "grad_norm": 1.9226038150982783, |
| "learning_rate": 1.4344281454001751e-06, |
| "loss": 0.2057, |
| "step": 1671 |
| }, |
| { |
| "epoch": 2.3348058591025342, |
| "grad_norm": 2.2602193419865517, |
| "learning_rate": 1.4287359962934055e-06, |
| "loss": 0.2485, |
| "step": 1672 |
| }, |
| { |
| "epoch": 2.3362008835154615, |
| "grad_norm": 2.1461782426022595, |
| "learning_rate": 1.4230532803617814e-06, |
| "loss": 0.2534, |
| "step": 1673 |
| }, |
| { |
| "epoch": 2.337595907928389, |
| "grad_norm": 1.9316351213030138, |
| "learning_rate": 1.4173800126156916e-06, |
| "loss": 0.2418, |
| "step": 1674 |
| }, |
| { |
| "epoch": 2.338990932341316, |
| "grad_norm": 1.856527405239218, |
| "learning_rate": 1.411716208040566e-06, |
| "loss": 0.2237, |
| "step": 1675 |
| }, |
| { |
| "epoch": 2.3403859567542433, |
| "grad_norm": 2.073164314302376, |
| "learning_rate": 1.4060618815968375e-06, |
| "loss": 0.2487, |
| "step": 1676 |
| }, |
| { |
| "epoch": 2.3417809811671706, |
| "grad_norm": 2.025415555206487, |
| "learning_rate": 1.4004170482199054e-06, |
| "loss": 0.264, |
| "step": 1677 |
| }, |
| { |
| "epoch": 2.343176005580098, |
| "grad_norm": 1.8947228597498715, |
| "learning_rate": 1.3947817228200956e-06, |
| "loss": 0.209, |
| "step": 1678 |
| }, |
| { |
| "epoch": 2.3445710299930247, |
| "grad_norm": 2.006321036445722, |
| "learning_rate": 1.3891559202826133e-06, |
| "loss": 0.2622, |
| "step": 1679 |
| }, |
| { |
| "epoch": 2.345966054405952, |
| "grad_norm": 2.303900612419585, |
| "learning_rate": 1.3835396554675179e-06, |
| "loss": 0.2378, |
| "step": 1680 |
| }, |
| { |
| "epoch": 2.3473610788188792, |
| "grad_norm": 2.037346779671685, |
| "learning_rate": 1.37793294320967e-06, |
| "loss": 0.2608, |
| "step": 1681 |
| }, |
| { |
| "epoch": 2.3487561032318065, |
| "grad_norm": 1.9305248706544658, |
| "learning_rate": 1.3723357983186974e-06, |
| "loss": 0.2163, |
| "step": 1682 |
| }, |
| { |
| "epoch": 2.3501511276447338, |
| "grad_norm": 2.0140564946990276, |
| "learning_rate": 1.3667482355789607e-06, |
| "loss": 0.2248, |
| "step": 1683 |
| }, |
| { |
| "epoch": 2.351546152057661, |
| "grad_norm": 2.084958562843635, |
| "learning_rate": 1.3611702697495088e-06, |
| "loss": 0.2131, |
| "step": 1684 |
| }, |
| { |
| "epoch": 2.3529411764705883, |
| "grad_norm": 1.9332055631395932, |
| "learning_rate": 1.3556019155640416e-06, |
| "loss": 0.2329, |
| "step": 1685 |
| }, |
| { |
| "epoch": 2.3543362008835156, |
| "grad_norm": 1.8488896616988466, |
| "learning_rate": 1.350043187730868e-06, |
| "loss": 0.2395, |
| "step": 1686 |
| }, |
| { |
| "epoch": 2.355731225296443, |
| "grad_norm": 2.430410848237495, |
| "learning_rate": 1.34449410093287e-06, |
| "loss": 0.2343, |
| "step": 1687 |
| }, |
| { |
| "epoch": 2.3571262497093697, |
| "grad_norm": 1.8914353443219187, |
| "learning_rate": 1.3389546698274686e-06, |
| "loss": 0.2171, |
| "step": 1688 |
| }, |
| { |
| "epoch": 2.358521274122297, |
| "grad_norm": 2.1017255630100493, |
| "learning_rate": 1.333424909046574e-06, |
| "loss": 0.2495, |
| "step": 1689 |
| }, |
| { |
| "epoch": 2.359916298535224, |
| "grad_norm": 2.161429978690411, |
| "learning_rate": 1.327904833196556e-06, |
| "loss": 0.2586, |
| "step": 1690 |
| }, |
| { |
| "epoch": 2.3613113229481515, |
| "grad_norm": 1.9388415195895992, |
| "learning_rate": 1.3223944568582047e-06, |
| "loss": 0.215, |
| "step": 1691 |
| }, |
| { |
| "epoch": 2.3627063473610788, |
| "grad_norm": 1.9652199591809878, |
| "learning_rate": 1.3168937945866861e-06, |
| "loss": 0.2276, |
| "step": 1692 |
| }, |
| { |
| "epoch": 2.364101371774006, |
| "grad_norm": 2.0711223470034597, |
| "learning_rate": 1.311402860911507e-06, |
| "loss": 0.241, |
| "step": 1693 |
| }, |
| { |
| "epoch": 2.3654963961869333, |
| "grad_norm": 1.9743629822432749, |
| "learning_rate": 1.3059216703364814e-06, |
| "loss": 0.2167, |
| "step": 1694 |
| }, |
| { |
| "epoch": 2.3668914205998606, |
| "grad_norm": 1.9924830951212522, |
| "learning_rate": 1.3004502373396821e-06, |
| "loss": 0.2358, |
| "step": 1695 |
| }, |
| { |
| "epoch": 2.368286445012788, |
| "grad_norm": 1.9788689015808711, |
| "learning_rate": 1.2949885763734127e-06, |
| "loss": 0.2423, |
| "step": 1696 |
| }, |
| { |
| "epoch": 2.369681469425715, |
| "grad_norm": 1.965926856035357, |
| "learning_rate": 1.2895367018641658e-06, |
| "loss": 0.2244, |
| "step": 1697 |
| }, |
| { |
| "epoch": 2.3710764938386424, |
| "grad_norm": 1.932908429022025, |
| "learning_rate": 1.284094628212576e-06, |
| "loss": 0.2582, |
| "step": 1698 |
| }, |
| { |
| "epoch": 2.372471518251569, |
| "grad_norm": 1.9431876485395636, |
| "learning_rate": 1.278662369793398e-06, |
| "loss": 0.2202, |
| "step": 1699 |
| }, |
| { |
| "epoch": 2.3738665426644965, |
| "grad_norm": 2.078921253978772, |
| "learning_rate": 1.273239940955459e-06, |
| "loss": 0.2572, |
| "step": 1700 |
| }, |
| { |
| "epoch": 2.3752615670774238, |
| "grad_norm": 2.035585260712323, |
| "learning_rate": 1.267827356021618e-06, |
| "loss": 0.2617, |
| "step": 1701 |
| }, |
| { |
| "epoch": 2.376656591490351, |
| "grad_norm": 2.180569630743452, |
| "learning_rate": 1.2624246292887377e-06, |
| "loss": 0.2349, |
| "step": 1702 |
| }, |
| { |
| "epoch": 2.3780516159032783, |
| "grad_norm": 2.03793194210423, |
| "learning_rate": 1.2570317750276374e-06, |
| "loss": 0.258, |
| "step": 1703 |
| }, |
| { |
| "epoch": 2.3794466403162056, |
| "grad_norm": 2.082005999471457, |
| "learning_rate": 1.2516488074830586e-06, |
| "loss": 0.2477, |
| "step": 1704 |
| }, |
| { |
| "epoch": 2.380841664729133, |
| "grad_norm": 1.8400987966532008, |
| "learning_rate": 1.246275740873631e-06, |
| "loss": 0.2331, |
| "step": 1705 |
| }, |
| { |
| "epoch": 2.38223668914206, |
| "grad_norm": 2.2066646811850865, |
| "learning_rate": 1.2409125893918329e-06, |
| "loss": 0.2542, |
| "step": 1706 |
| }, |
| { |
| "epoch": 2.3836317135549874, |
| "grad_norm": 2.0611768857820496, |
| "learning_rate": 1.2355593672039462e-06, |
| "loss": 0.202, |
| "step": 1707 |
| }, |
| { |
| "epoch": 2.385026737967914, |
| "grad_norm": 1.914378896690255, |
| "learning_rate": 1.2302160884500337e-06, |
| "loss": 0.2609, |
| "step": 1708 |
| }, |
| { |
| "epoch": 2.3864217623808415, |
| "grad_norm": 2.054680958740492, |
| "learning_rate": 1.2248827672438868e-06, |
| "loss": 0.2035, |
| "step": 1709 |
| }, |
| { |
| "epoch": 2.3878167867937687, |
| "grad_norm": 2.0441738070012105, |
| "learning_rate": 1.2195594176729963e-06, |
| "loss": 0.2388, |
| "step": 1710 |
| }, |
| { |
| "epoch": 2.389211811206696, |
| "grad_norm": 2.1122807295618844, |
| "learning_rate": 1.2142460537985168e-06, |
| "loss": 0.2303, |
| "step": 1711 |
| }, |
| { |
| "epoch": 2.3906068356196233, |
| "grad_norm": 2.0401675272163056, |
| "learning_rate": 1.2089426896552265e-06, |
| "loss": 0.2269, |
| "step": 1712 |
| }, |
| { |
| "epoch": 2.3920018600325506, |
| "grad_norm": 2.029589904828905, |
| "learning_rate": 1.2036493392514847e-06, |
| "loss": 0.2637, |
| "step": 1713 |
| }, |
| { |
| "epoch": 2.393396884445478, |
| "grad_norm": 2.0658070957162638, |
| "learning_rate": 1.1983660165692078e-06, |
| "loss": 0.2096, |
| "step": 1714 |
| }, |
| { |
| "epoch": 2.394791908858405, |
| "grad_norm": 2.283039546249197, |
| "learning_rate": 1.1930927355638189e-06, |
| "loss": 0.2436, |
| "step": 1715 |
| }, |
| { |
| "epoch": 2.3961869332713324, |
| "grad_norm": 1.8422258840639667, |
| "learning_rate": 1.1878295101642185e-06, |
| "loss": 0.2319, |
| "step": 1716 |
| }, |
| { |
| "epoch": 2.3975819576842596, |
| "grad_norm": 2.0568494378113296, |
| "learning_rate": 1.182576354272748e-06, |
| "loss": 0.2276, |
| "step": 1717 |
| }, |
| { |
| "epoch": 2.398976982097187, |
| "grad_norm": 1.952539086426312, |
| "learning_rate": 1.1773332817651512e-06, |
| "loss": 0.2295, |
| "step": 1718 |
| }, |
| { |
| "epoch": 2.400372006510114, |
| "grad_norm": 1.9158228630392138, |
| "learning_rate": 1.1721003064905329e-06, |
| "loss": 0.2133, |
| "step": 1719 |
| }, |
| { |
| "epoch": 2.401767030923041, |
| "grad_norm": 1.8532051493631778, |
| "learning_rate": 1.1668774422713336e-06, |
| "loss": 0.2396, |
| "step": 1720 |
| }, |
| { |
| "epoch": 2.4031620553359683, |
| "grad_norm": 1.9414496664869467, |
| "learning_rate": 1.1616647029032818e-06, |
| "loss": 0.2383, |
| "step": 1721 |
| }, |
| { |
| "epoch": 2.4045570797488955, |
| "grad_norm": 2.056538780483839, |
| "learning_rate": 1.1564621021553617e-06, |
| "loss": 0.2294, |
| "step": 1722 |
| }, |
| { |
| "epoch": 2.405952104161823, |
| "grad_norm": 2.0066219434319716, |
| "learning_rate": 1.1512696537697804e-06, |
| "loss": 0.2628, |
| "step": 1723 |
| }, |
| { |
| "epoch": 2.40734712857475, |
| "grad_norm": 2.285413378087384, |
| "learning_rate": 1.1460873714619275e-06, |
| "loss": 0.2684, |
| "step": 1724 |
| }, |
| { |
| "epoch": 2.4087421529876774, |
| "grad_norm": 2.074862240076793, |
| "learning_rate": 1.140915268920339e-06, |
| "loss": 0.2423, |
| "step": 1725 |
| }, |
| { |
| "epoch": 2.4101371774006046, |
| "grad_norm": 1.9534626474259522, |
| "learning_rate": 1.13575335980666e-06, |
| "loss": 0.2245, |
| "step": 1726 |
| }, |
| { |
| "epoch": 2.411532201813532, |
| "grad_norm": 2.11850049560302, |
| "learning_rate": 1.130601657755616e-06, |
| "loss": 0.2221, |
| "step": 1727 |
| }, |
| { |
| "epoch": 2.4129272262264587, |
| "grad_norm": 2.0705807422285614, |
| "learning_rate": 1.125460176374965e-06, |
| "loss": 0.227, |
| "step": 1728 |
| }, |
| { |
| "epoch": 2.414322250639386, |
| "grad_norm": 1.843249546710679, |
| "learning_rate": 1.1203289292454728e-06, |
| "loss": 0.2523, |
| "step": 1729 |
| }, |
| { |
| "epoch": 2.4157172750523133, |
| "grad_norm": 2.136376082447783, |
| "learning_rate": 1.1152079299208724e-06, |
| "loss": 0.2486, |
| "step": 1730 |
| }, |
| { |
| "epoch": 2.4171122994652405, |
| "grad_norm": 2.1189074920710484, |
| "learning_rate": 1.1100971919278247e-06, |
| "loss": 0.2286, |
| "step": 1731 |
| }, |
| { |
| "epoch": 2.418507323878168, |
| "grad_norm": 2.014466795940267, |
| "learning_rate": 1.104996728765887e-06, |
| "loss": 0.2471, |
| "step": 1732 |
| }, |
| { |
| "epoch": 2.419902348291095, |
| "grad_norm": 2.123142992349427, |
| "learning_rate": 1.0999065539074793e-06, |
| "loss": 0.2395, |
| "step": 1733 |
| }, |
| { |
| "epoch": 2.4212973727040223, |
| "grad_norm": 2.092780801188859, |
| "learning_rate": 1.094826680797843e-06, |
| "loss": 0.2323, |
| "step": 1734 |
| }, |
| { |
| "epoch": 2.4226923971169496, |
| "grad_norm": 1.9743466639593539, |
| "learning_rate": 1.0897571228550097e-06, |
| "loss": 0.2472, |
| "step": 1735 |
| }, |
| { |
| "epoch": 2.424087421529877, |
| "grad_norm": 2.0603635239914055, |
| "learning_rate": 1.0846978934697666e-06, |
| "loss": 0.2422, |
| "step": 1736 |
| }, |
| { |
| "epoch": 2.425482445942804, |
| "grad_norm": 1.878454958518566, |
| "learning_rate": 1.0796490060056142e-06, |
| "loss": 0.2313, |
| "step": 1737 |
| }, |
| { |
| "epoch": 2.4268774703557314, |
| "grad_norm": 2.0047687132435628, |
| "learning_rate": 1.074610473798738e-06, |
| "loss": 0.2287, |
| "step": 1738 |
| }, |
| { |
| "epoch": 2.4282724947686587, |
| "grad_norm": 2.130778745894436, |
| "learning_rate": 1.0695823101579728e-06, |
| "loss": 0.2227, |
| "step": 1739 |
| }, |
| { |
| "epoch": 2.4296675191815855, |
| "grad_norm": 1.9959834221817423, |
| "learning_rate": 1.0645645283647616e-06, |
| "loss": 0.2395, |
| "step": 1740 |
| }, |
| { |
| "epoch": 2.431062543594513, |
| "grad_norm": 2.076441367338032, |
| "learning_rate": 1.0595571416731293e-06, |
| "loss": 0.228, |
| "step": 1741 |
| }, |
| { |
| "epoch": 2.43245756800744, |
| "grad_norm": 1.9727319013347187, |
| "learning_rate": 1.0545601633096414e-06, |
| "loss": 0.2213, |
| "step": 1742 |
| }, |
| { |
| "epoch": 2.4338525924203673, |
| "grad_norm": 2.0619132387341015, |
| "learning_rate": 1.049573606473369e-06, |
| "loss": 0.245, |
| "step": 1743 |
| }, |
| { |
| "epoch": 2.4352476168332946, |
| "grad_norm": 1.9808499296055042, |
| "learning_rate": 1.0445974843358563e-06, |
| "loss": 0.2079, |
| "step": 1744 |
| }, |
| { |
| "epoch": 2.436642641246222, |
| "grad_norm": 1.9924612913344897, |
| "learning_rate": 1.0396318100410868e-06, |
| "loss": 0.2241, |
| "step": 1745 |
| }, |
| { |
| "epoch": 2.438037665659149, |
| "grad_norm": 2.0195101754838714, |
| "learning_rate": 1.0346765967054472e-06, |
| "loss": 0.2574, |
| "step": 1746 |
| }, |
| { |
| "epoch": 2.4394326900720764, |
| "grad_norm": 2.210865355019898, |
| "learning_rate": 1.029731857417689e-06, |
| "loss": 0.2589, |
| "step": 1747 |
| }, |
| { |
| "epoch": 2.4408277144850032, |
| "grad_norm": 1.9888864102360722, |
| "learning_rate": 1.0247976052389018e-06, |
| "loss": 0.2377, |
| "step": 1748 |
| }, |
| { |
| "epoch": 2.4422227388979305, |
| "grad_norm": 1.9963526780225826, |
| "learning_rate": 1.0198738532024715e-06, |
| "loss": 0.2201, |
| "step": 1749 |
| }, |
| { |
| "epoch": 2.443617763310858, |
| "grad_norm": 1.9375954149029564, |
| "learning_rate": 1.0149606143140484e-06, |
| "loss": 0.2464, |
| "step": 1750 |
| }, |
| { |
| "epoch": 2.445012787723785, |
| "grad_norm": 1.9490012959029157, |
| "learning_rate": 1.0100579015515156e-06, |
| "loss": 0.2387, |
| "step": 1751 |
| }, |
| { |
| "epoch": 2.4464078121367123, |
| "grad_norm": 2.1421767553643263, |
| "learning_rate": 1.005165727864953e-06, |
| "loss": 0.2382, |
| "step": 1752 |
| }, |
| { |
| "epoch": 2.4478028365496396, |
| "grad_norm": 2.0852319018269254, |
| "learning_rate": 1.0002841061765989e-06, |
| "loss": 0.2303, |
| "step": 1753 |
| }, |
| { |
| "epoch": 2.449197860962567, |
| "grad_norm": 1.898437822666265, |
| "learning_rate": 9.954130493808201e-07, |
| "loss": 0.2343, |
| "step": 1754 |
| }, |
| { |
| "epoch": 2.450592885375494, |
| "grad_norm": 2.2328708419723142, |
| "learning_rate": 9.905525703440815e-07, |
| "loss": 0.2324, |
| "step": 1755 |
| }, |
| { |
| "epoch": 2.4519879097884214, |
| "grad_norm": 2.098948338494277, |
| "learning_rate": 9.85702681904902e-07, |
| "loss": 0.2128, |
| "step": 1756 |
| }, |
| { |
| "epoch": 2.4533829342013487, |
| "grad_norm": 2.0504505351704916, |
| "learning_rate": 9.808633968738297e-07, |
| "loss": 0.2389, |
| "step": 1757 |
| }, |
| { |
| "epoch": 2.454777958614276, |
| "grad_norm": 2.006806960911157, |
| "learning_rate": 9.760347280334064e-07, |
| "loss": 0.2539, |
| "step": 1758 |
| }, |
| { |
| "epoch": 2.456172983027203, |
| "grad_norm": 1.9356692454237607, |
| "learning_rate": 9.712166881381279e-07, |
| "loss": 0.2416, |
| "step": 1759 |
| }, |
| { |
| "epoch": 2.45756800744013, |
| "grad_norm": 2.0609982252557013, |
| "learning_rate": 9.664092899144156e-07, |
| "loss": 0.2459, |
| "step": 1760 |
| }, |
| { |
| "epoch": 2.4589630318530573, |
| "grad_norm": 1.8784646017465558, |
| "learning_rate": 9.616125460605857e-07, |
| "loss": 0.219, |
| "step": 1761 |
| }, |
| { |
| "epoch": 2.4603580562659846, |
| "grad_norm": 1.9827145437937714, |
| "learning_rate": 9.56826469246806e-07, |
| "loss": 0.2164, |
| "step": 1762 |
| }, |
| { |
| "epoch": 2.461753080678912, |
| "grad_norm": 1.9828280925435495, |
| "learning_rate": 9.520510721150722e-07, |
| "loss": 0.2205, |
| "step": 1763 |
| }, |
| { |
| "epoch": 2.463148105091839, |
| "grad_norm": 2.1573204674909725, |
| "learning_rate": 9.472863672791721e-07, |
| "loss": 0.2797, |
| "step": 1764 |
| }, |
| { |
| "epoch": 2.4645431295047664, |
| "grad_norm": 2.114781710817254, |
| "learning_rate": 9.425323673246461e-07, |
| "loss": 0.2311, |
| "step": 1765 |
| }, |
| { |
| "epoch": 2.4659381539176937, |
| "grad_norm": 2.073720068988035, |
| "learning_rate": 9.377890848087595e-07, |
| "loss": 0.2422, |
| "step": 1766 |
| }, |
| { |
| "epoch": 2.467333178330621, |
| "grad_norm": 2.1924520868805963, |
| "learning_rate": 9.330565322604729e-07, |
| "loss": 0.2557, |
| "step": 1767 |
| }, |
| { |
| "epoch": 2.468728202743548, |
| "grad_norm": 2.1314923038601443, |
| "learning_rate": 9.283347221803985e-07, |
| "loss": 0.2365, |
| "step": 1768 |
| }, |
| { |
| "epoch": 2.470123227156475, |
| "grad_norm": 2.1016166593533656, |
| "learning_rate": 9.236236670407772e-07, |
| "loss": 0.2611, |
| "step": 1769 |
| }, |
| { |
| "epoch": 2.4715182515694023, |
| "grad_norm": 2.1958806745804416, |
| "learning_rate": 9.189233792854424e-07, |
| "loss": 0.2471, |
| "step": 1770 |
| }, |
| { |
| "epoch": 2.4729132759823296, |
| "grad_norm": 2.0613588674481877, |
| "learning_rate": 9.142338713297838e-07, |
| "loss": 0.2462, |
| "step": 1771 |
| }, |
| { |
| "epoch": 2.474308300395257, |
| "grad_norm": 1.889535779389397, |
| "learning_rate": 9.095551555607169e-07, |
| "loss": 0.2399, |
| "step": 1772 |
| }, |
| { |
| "epoch": 2.475703324808184, |
| "grad_norm": 2.076673087167382, |
| "learning_rate": 9.048872443366529e-07, |
| "loss": 0.2505, |
| "step": 1773 |
| }, |
| { |
| "epoch": 2.4770983492211114, |
| "grad_norm": 1.980949495347227, |
| "learning_rate": 9.002301499874622e-07, |
| "loss": 0.2365, |
| "step": 1774 |
| }, |
| { |
| "epoch": 2.4784933736340387, |
| "grad_norm": 1.972433910616226, |
| "learning_rate": 8.955838848144449e-07, |
| "loss": 0.2229, |
| "step": 1775 |
| }, |
| { |
| "epoch": 2.479888398046966, |
| "grad_norm": 2.205742031503069, |
| "learning_rate": 8.909484610902958e-07, |
| "loss": 0.2521, |
| "step": 1776 |
| }, |
| { |
| "epoch": 2.481283422459893, |
| "grad_norm": 1.9908248881502195, |
| "learning_rate": 8.863238910590704e-07, |
| "loss": 0.2096, |
| "step": 1777 |
| }, |
| { |
| "epoch": 2.4826784468728205, |
| "grad_norm": 1.9781717394676646, |
| "learning_rate": 8.817101869361599e-07, |
| "loss": 0.2482, |
| "step": 1778 |
| }, |
| { |
| "epoch": 2.4840734712857477, |
| "grad_norm": 2.1777301806732723, |
| "learning_rate": 8.77107360908253e-07, |
| "loss": 0.2652, |
| "step": 1779 |
| }, |
| { |
| "epoch": 2.4854684956986746, |
| "grad_norm": 2.068171178796176, |
| "learning_rate": 8.725154251333012e-07, |
| "loss": 0.2339, |
| "step": 1780 |
| }, |
| { |
| "epoch": 2.486863520111602, |
| "grad_norm": 2.2553151451018096, |
| "learning_rate": 8.679343917404959e-07, |
| "loss": 0.2962, |
| "step": 1781 |
| }, |
| { |
| "epoch": 2.488258544524529, |
| "grad_norm": 1.859927320195907, |
| "learning_rate": 8.633642728302266e-07, |
| "loss": 0.2409, |
| "step": 1782 |
| }, |
| { |
| "epoch": 2.4896535689374564, |
| "grad_norm": 2.097136463059436, |
| "learning_rate": 8.588050804740527e-07, |
| "loss": 0.226, |
| "step": 1783 |
| }, |
| { |
| "epoch": 2.4910485933503836, |
| "grad_norm": 2.031361226173633, |
| "learning_rate": 8.542568267146761e-07, |
| "loss": 0.2163, |
| "step": 1784 |
| }, |
| { |
| "epoch": 2.492443617763311, |
| "grad_norm": 1.8371067572490631, |
| "learning_rate": 8.49719523565904e-07, |
| "loss": 0.2184, |
| "step": 1785 |
| }, |
| { |
| "epoch": 2.493838642176238, |
| "grad_norm": 2.1523570658199564, |
| "learning_rate": 8.451931830126148e-07, |
| "loss": 0.2385, |
| "step": 1786 |
| }, |
| { |
| "epoch": 2.4952336665891655, |
| "grad_norm": 1.951878992014691, |
| "learning_rate": 8.40677817010736e-07, |
| "loss": 0.2222, |
| "step": 1787 |
| }, |
| { |
| "epoch": 2.4966286910020927, |
| "grad_norm": 2.1398256136886715, |
| "learning_rate": 8.361734374872032e-07, |
| "loss": 0.2424, |
| "step": 1788 |
| }, |
| { |
| "epoch": 2.4980237154150196, |
| "grad_norm": 1.8387718429463848, |
| "learning_rate": 8.316800563399307e-07, |
| "loss": 0.2252, |
| "step": 1789 |
| }, |
| { |
| "epoch": 2.499418739827947, |
| "grad_norm": 2.0758591904095205, |
| "learning_rate": 8.271976854377861e-07, |
| "loss": 0.2155, |
| "step": 1790 |
| }, |
| { |
| "epoch": 2.500813764240874, |
| "grad_norm": 1.9301849276761391, |
| "learning_rate": 8.227263366205523e-07, |
| "loss": 0.264, |
| "step": 1791 |
| }, |
| { |
| "epoch": 2.5022087886538014, |
| "grad_norm": 2.1047743582073157, |
| "learning_rate": 8.182660216988964e-07, |
| "loss": 0.3259, |
| "step": 1792 |
| }, |
| { |
| "epoch": 2.5036038130667286, |
| "grad_norm": 2.4983792769873623, |
| "learning_rate": 8.138167524543445e-07, |
| "loss": 0.2514, |
| "step": 1793 |
| }, |
| { |
| "epoch": 2.504998837479656, |
| "grad_norm": 2.2120478894270224, |
| "learning_rate": 8.09378540639243e-07, |
| "loss": 0.2515, |
| "step": 1794 |
| }, |
| { |
| "epoch": 2.506393861892583, |
| "grad_norm": 2.0867139416681195, |
| "learning_rate": 8.049513979767304e-07, |
| "loss": 0.2479, |
| "step": 1795 |
| }, |
| { |
| "epoch": 2.5077888863055104, |
| "grad_norm": 1.9359798627052334, |
| "learning_rate": 8.00535336160711e-07, |
| "loss": 0.2003, |
| "step": 1796 |
| }, |
| { |
| "epoch": 2.5091839107184377, |
| "grad_norm": 2.085079199029155, |
| "learning_rate": 7.96130366855819e-07, |
| "loss": 0.2429, |
| "step": 1797 |
| }, |
| { |
| "epoch": 2.510578935131365, |
| "grad_norm": 2.175659897075208, |
| "learning_rate": 7.917365016973866e-07, |
| "loss": 0.2206, |
| "step": 1798 |
| }, |
| { |
| "epoch": 2.5119739595442923, |
| "grad_norm": 2.1209361970147884, |
| "learning_rate": 7.873537522914155e-07, |
| "loss": 0.2506, |
| "step": 1799 |
| }, |
| { |
| "epoch": 2.5133689839572195, |
| "grad_norm": 2.1173385860772993, |
| "learning_rate": 7.829821302145485e-07, |
| "loss": 0.2345, |
| "step": 1800 |
| }, |
| { |
| "epoch": 2.5147640083701464, |
| "grad_norm": 1.9789184114073841, |
| "learning_rate": 7.786216470140334e-07, |
| "loss": 0.2365, |
| "step": 1801 |
| }, |
| { |
| "epoch": 2.5161590327830736, |
| "grad_norm": 2.0832279626065007, |
| "learning_rate": 7.742723142076991e-07, |
| "loss": 0.233, |
| "step": 1802 |
| }, |
| { |
| "epoch": 2.517554057196001, |
| "grad_norm": 1.9853952576197482, |
| "learning_rate": 7.699341432839203e-07, |
| "loss": 0.2465, |
| "step": 1803 |
| }, |
| { |
| "epoch": 2.518949081608928, |
| "grad_norm": 1.9655292208069006, |
| "learning_rate": 7.656071457015879e-07, |
| "loss": 0.2094, |
| "step": 1804 |
| }, |
| { |
| "epoch": 2.5203441060218554, |
| "grad_norm": 2.072957528236977, |
| "learning_rate": 7.612913328900784e-07, |
| "loss": 0.2308, |
| "step": 1805 |
| }, |
| { |
| "epoch": 2.5217391304347827, |
| "grad_norm": 2.050593842636747, |
| "learning_rate": 7.569867162492283e-07, |
| "loss": 0.2009, |
| "step": 1806 |
| }, |
| { |
| "epoch": 2.52313415484771, |
| "grad_norm": 1.9505121319227763, |
| "learning_rate": 7.526933071492959e-07, |
| "loss": 0.2628, |
| "step": 1807 |
| }, |
| { |
| "epoch": 2.524529179260637, |
| "grad_norm": 2.233499694853933, |
| "learning_rate": 7.484111169309399e-07, |
| "loss": 0.2267, |
| "step": 1808 |
| }, |
| { |
| "epoch": 2.525924203673564, |
| "grad_norm": 2.0427774995742456, |
| "learning_rate": 7.441401569051848e-07, |
| "loss": 0.2307, |
| "step": 1809 |
| }, |
| { |
| "epoch": 2.5273192280864913, |
| "grad_norm": 2.1523563504281604, |
| "learning_rate": 7.398804383533886e-07, |
| "loss": 0.2172, |
| "step": 1810 |
| }, |
| { |
| "epoch": 2.5287142524994186, |
| "grad_norm": 2.107184691359107, |
| "learning_rate": 7.356319725272165e-07, |
| "loss": 0.2354, |
| "step": 1811 |
| }, |
| { |
| "epoch": 2.530109276912346, |
| "grad_norm": 1.9472623249394085, |
| "learning_rate": 7.313947706486136e-07, |
| "loss": 0.2439, |
| "step": 1812 |
| }, |
| { |
| "epoch": 2.531504301325273, |
| "grad_norm": 2.2547362177468027, |
| "learning_rate": 7.271688439097713e-07, |
| "loss": 0.2611, |
| "step": 1813 |
| }, |
| { |
| "epoch": 2.5328993257382004, |
| "grad_norm": 1.9790179073787875, |
| "learning_rate": 7.229542034730952e-07, |
| "loss": 0.1908, |
| "step": 1814 |
| }, |
| { |
| "epoch": 2.5342943501511277, |
| "grad_norm": 2.1718993566008304, |
| "learning_rate": 7.187508604711851e-07, |
| "loss": 0.239, |
| "step": 1815 |
| }, |
| { |
| "epoch": 2.535689374564055, |
| "grad_norm": 2.006119781462586, |
| "learning_rate": 7.145588260067943e-07, |
| "loss": 0.2547, |
| "step": 1816 |
| }, |
| { |
| "epoch": 2.5370843989769822, |
| "grad_norm": 1.9707372022899972, |
| "learning_rate": 7.103781111528074e-07, |
| "loss": 0.2386, |
| "step": 1817 |
| }, |
| { |
| "epoch": 2.5384794233899095, |
| "grad_norm": 2.1078105247147616, |
| "learning_rate": 7.062087269522105e-07, |
| "loss": 0.2083, |
| "step": 1818 |
| }, |
| { |
| "epoch": 2.5398744478028368, |
| "grad_norm": 1.8096422690505354, |
| "learning_rate": 7.020506844180608e-07, |
| "loss": 0.24, |
| "step": 1819 |
| }, |
| { |
| "epoch": 2.541269472215764, |
| "grad_norm": 2.2907856322308047, |
| "learning_rate": 6.979039945334543e-07, |
| "loss": 0.2422, |
| "step": 1820 |
| }, |
| { |
| "epoch": 2.542664496628691, |
| "grad_norm": 2.0400580939554462, |
| "learning_rate": 6.937686682515044e-07, |
| "loss": 0.2353, |
| "step": 1821 |
| }, |
| { |
| "epoch": 2.544059521041618, |
| "grad_norm": 2.20245634299469, |
| "learning_rate": 6.896447164953057e-07, |
| "loss": 0.2535, |
| "step": 1822 |
| }, |
| { |
| "epoch": 2.5454545454545454, |
| "grad_norm": 2.048512795877345, |
| "learning_rate": 6.855321501579077e-07, |
| "loss": 0.2345, |
| "step": 1823 |
| }, |
| { |
| "epoch": 2.5468495698674727, |
| "grad_norm": 2.107797822320567, |
| "learning_rate": 6.814309801022873e-07, |
| "loss": 0.24, |
| "step": 1824 |
| }, |
| { |
| "epoch": 2.5482445942804, |
| "grad_norm": 2.0167190988164805, |
| "learning_rate": 6.77341217161322e-07, |
| "loss": 0.2644, |
| "step": 1825 |
| }, |
| { |
| "epoch": 2.5496396186933272, |
| "grad_norm": 2.0434168772720875, |
| "learning_rate": 6.732628721377533e-07, |
| "loss": 0.2187, |
| "step": 1826 |
| }, |
| { |
| "epoch": 2.5510346431062545, |
| "grad_norm": 2.0283320717025077, |
| "learning_rate": 6.69195955804165e-07, |
| "loss": 0.2894, |
| "step": 1827 |
| }, |
| { |
| "epoch": 2.5524296675191813, |
| "grad_norm": 2.184425853980423, |
| "learning_rate": 6.651404789029553e-07, |
| "loss": 0.2351, |
| "step": 1828 |
| }, |
| { |
| "epoch": 2.5538246919321086, |
| "grad_norm": 1.769961391111206, |
| "learning_rate": 6.610964521463032e-07, |
| "loss": 0.2367, |
| "step": 1829 |
| }, |
| { |
| "epoch": 2.555219716345036, |
| "grad_norm": 2.057938269523395, |
| "learning_rate": 6.570638862161449e-07, |
| "loss": 0.2136, |
| "step": 1830 |
| }, |
| { |
| "epoch": 2.556614740757963, |
| "grad_norm": 1.9601470016366807, |
| "learning_rate": 6.530427917641447e-07, |
| "loss": 0.2161, |
| "step": 1831 |
| }, |
| { |
| "epoch": 2.5580097651708904, |
| "grad_norm": 2.0539405710497296, |
| "learning_rate": 6.490331794116633e-07, |
| "loss": 0.2535, |
| "step": 1832 |
| }, |
| { |
| "epoch": 2.5594047895838177, |
| "grad_norm": 1.7665824386687003, |
| "learning_rate": 6.450350597497335e-07, |
| "loss": 0.2094, |
| "step": 1833 |
| }, |
| { |
| "epoch": 2.560799813996745, |
| "grad_norm": 1.9943622708189017, |
| "learning_rate": 6.410484433390335e-07, |
| "loss": 0.2517, |
| "step": 1834 |
| }, |
| { |
| "epoch": 2.562194838409672, |
| "grad_norm": 1.9325462598814633, |
| "learning_rate": 6.370733407098517e-07, |
| "loss": 0.2051, |
| "step": 1835 |
| }, |
| { |
| "epoch": 2.5635898628225995, |
| "grad_norm": 2.034392642432849, |
| "learning_rate": 6.331097623620697e-07, |
| "loss": 0.2411, |
| "step": 1836 |
| }, |
| { |
| "epoch": 2.5649848872355268, |
| "grad_norm": 1.906608056494423, |
| "learning_rate": 6.291577187651255e-07, |
| "loss": 0.2229, |
| "step": 1837 |
| }, |
| { |
| "epoch": 2.566379911648454, |
| "grad_norm": 2.0666019144763244, |
| "learning_rate": 6.252172203579892e-07, |
| "loss": 0.2405, |
| "step": 1838 |
| }, |
| { |
| "epoch": 2.5677749360613813, |
| "grad_norm": 2.0323492259191775, |
| "learning_rate": 6.212882775491352e-07, |
| "loss": 0.2727, |
| "step": 1839 |
| }, |
| { |
| "epoch": 2.5691699604743086, |
| "grad_norm": 1.9711644816953129, |
| "learning_rate": 6.173709007165158e-07, |
| "loss": 0.2451, |
| "step": 1840 |
| }, |
| { |
| "epoch": 2.5705649848872354, |
| "grad_norm": 2.1149991971809627, |
| "learning_rate": 6.134651002075315e-07, |
| "loss": 0.2332, |
| "step": 1841 |
| }, |
| { |
| "epoch": 2.5719600093001627, |
| "grad_norm": 2.1440606333696532, |
| "learning_rate": 6.095708863390065e-07, |
| "loss": 0.2443, |
| "step": 1842 |
| }, |
| { |
| "epoch": 2.57335503371309, |
| "grad_norm": 1.898581256781591, |
| "learning_rate": 6.056882693971605e-07, |
| "loss": 0.2183, |
| "step": 1843 |
| }, |
| { |
| "epoch": 2.574750058126017, |
| "grad_norm": 2.0720450905291448, |
| "learning_rate": 6.018172596375776e-07, |
| "loss": 0.2642, |
| "step": 1844 |
| }, |
| { |
| "epoch": 2.5761450825389445, |
| "grad_norm": 1.9560353823505137, |
| "learning_rate": 5.979578672851843e-07, |
| "loss": 0.2464, |
| "step": 1845 |
| }, |
| { |
| "epoch": 2.5775401069518717, |
| "grad_norm": 2.0786260231432077, |
| "learning_rate": 5.941101025342239e-07, |
| "loss": 0.222, |
| "step": 1846 |
| }, |
| { |
| "epoch": 2.578935131364799, |
| "grad_norm": 2.2179763750347634, |
| "learning_rate": 5.902739755482201e-07, |
| "loss": 0.2893, |
| "step": 1847 |
| }, |
| { |
| "epoch": 2.580330155777726, |
| "grad_norm": 1.98667473908548, |
| "learning_rate": 5.864494964599615e-07, |
| "loss": 0.2155, |
| "step": 1848 |
| }, |
| { |
| "epoch": 2.581725180190653, |
| "grad_norm": 2.0169000385789437, |
| "learning_rate": 5.826366753714707e-07, |
| "loss": 0.2356, |
| "step": 1849 |
| }, |
| { |
| "epoch": 2.5831202046035804, |
| "grad_norm": 1.8149669738422527, |
| "learning_rate": 5.788355223539698e-07, |
| "loss": 0.2252, |
| "step": 1850 |
| }, |
| { |
| "epoch": 2.5845152290165077, |
| "grad_norm": 2.1280566049946517, |
| "learning_rate": 5.750460474478675e-07, |
| "loss": 0.221, |
| "step": 1851 |
| }, |
| { |
| "epoch": 2.585910253429435, |
| "grad_norm": 1.7836139149162966, |
| "learning_rate": 5.712682606627251e-07, |
| "loss": 0.2277, |
| "step": 1852 |
| }, |
| { |
| "epoch": 2.587305277842362, |
| "grad_norm": 2.1508036868898928, |
| "learning_rate": 5.675021719772262e-07, |
| "loss": 0.2428, |
| "step": 1853 |
| }, |
| { |
| "epoch": 2.5887003022552895, |
| "grad_norm": 2.012263006386255, |
| "learning_rate": 5.637477913391604e-07, |
| "loss": 0.2221, |
| "step": 1854 |
| }, |
| { |
| "epoch": 2.5900953266682167, |
| "grad_norm": 1.9813993494667828, |
| "learning_rate": 5.600051286653884e-07, |
| "loss": 0.258, |
| "step": 1855 |
| }, |
| { |
| "epoch": 2.591490351081144, |
| "grad_norm": 2.1602425598412798, |
| "learning_rate": 5.562741938418187e-07, |
| "loss": 0.2299, |
| "step": 1856 |
| }, |
| { |
| "epoch": 2.5928853754940713, |
| "grad_norm": 2.0750208195688846, |
| "learning_rate": 5.525549967233829e-07, |
| "loss": 0.2349, |
| "step": 1857 |
| }, |
| { |
| "epoch": 2.5942803999069985, |
| "grad_norm": 2.0263311031326383, |
| "learning_rate": 5.488475471340099e-07, |
| "loss": 0.2325, |
| "step": 1858 |
| }, |
| { |
| "epoch": 2.595675424319926, |
| "grad_norm": 2.078040762901248, |
| "learning_rate": 5.451518548665946e-07, |
| "loss": 0.2428, |
| "step": 1859 |
| }, |
| { |
| "epoch": 2.597070448732853, |
| "grad_norm": 1.8412450414831687, |
| "learning_rate": 5.414679296829806e-07, |
| "loss": 0.2332, |
| "step": 1860 |
| }, |
| { |
| "epoch": 2.59846547314578, |
| "grad_norm": 1.8369491681995171, |
| "learning_rate": 5.377957813139262e-07, |
| "loss": 0.199, |
| "step": 1861 |
| }, |
| { |
| "epoch": 2.599860497558707, |
| "grad_norm": 2.0738049963713356, |
| "learning_rate": 5.341354194590831e-07, |
| "loss": 0.3003, |
| "step": 1862 |
| }, |
| { |
| "epoch": 2.6012555219716345, |
| "grad_norm": 2.2015502071142685, |
| "learning_rate": 5.304868537869706e-07, |
| "loss": 0.2218, |
| "step": 1863 |
| }, |
| { |
| "epoch": 2.6026505463845617, |
| "grad_norm": 1.9042994634815842, |
| "learning_rate": 5.268500939349514e-07, |
| "loss": 0.228, |
| "step": 1864 |
| }, |
| { |
| "epoch": 2.604045570797489, |
| "grad_norm": 1.938132636974177, |
| "learning_rate": 5.232251495091989e-07, |
| "loss": 0.2143, |
| "step": 1865 |
| }, |
| { |
| "epoch": 2.6054405952104163, |
| "grad_norm": 1.9382048893357533, |
| "learning_rate": 5.196120300846835e-07, |
| "loss": 0.2221, |
| "step": 1866 |
| }, |
| { |
| "epoch": 2.6068356196233435, |
| "grad_norm": 1.9324006507947422, |
| "learning_rate": 5.160107452051361e-07, |
| "loss": 0.2491, |
| "step": 1867 |
| }, |
| { |
| "epoch": 2.6082306440362704, |
| "grad_norm": 2.0427761815474397, |
| "learning_rate": 5.124213043830278e-07, |
| "loss": 0.2021, |
| "step": 1868 |
| }, |
| { |
| "epoch": 2.6096256684491976, |
| "grad_norm": 2.0082833497287225, |
| "learning_rate": 5.088437170995481e-07, |
| "loss": 0.2141, |
| "step": 1869 |
| }, |
| { |
| "epoch": 2.611020692862125, |
| "grad_norm": 1.866320494961171, |
| "learning_rate": 5.052779928045737e-07, |
| "loss": 0.2257, |
| "step": 1870 |
| }, |
| { |
| "epoch": 2.612415717275052, |
| "grad_norm": 2.0142624931091833, |
| "learning_rate": 5.01724140916649e-07, |
| "loss": 0.2341, |
| "step": 1871 |
| }, |
| { |
| "epoch": 2.6138107416879794, |
| "grad_norm": 2.0232328561691997, |
| "learning_rate": 4.981821708229545e-07, |
| "loss": 0.2512, |
| "step": 1872 |
| }, |
| { |
| "epoch": 2.6152057661009067, |
| "grad_norm": 2.1058063752183744, |
| "learning_rate": 4.946520918792886e-07, |
| "loss": 0.2182, |
| "step": 1873 |
| }, |
| { |
| "epoch": 2.616600790513834, |
| "grad_norm": 2.1036496155790463, |
| "learning_rate": 4.911339134100401e-07, |
| "loss": 0.2533, |
| "step": 1874 |
| }, |
| { |
| "epoch": 2.6179958149267613, |
| "grad_norm": 2.1879285031785325, |
| "learning_rate": 4.87627644708163e-07, |
| "loss": 0.239, |
| "step": 1875 |
| }, |
| { |
| "epoch": 2.6193908393396885, |
| "grad_norm": 2.006704466585366, |
| "learning_rate": 4.841332950351535e-07, |
| "loss": 0.2441, |
| "step": 1876 |
| }, |
| { |
| "epoch": 2.620785863752616, |
| "grad_norm": 2.007471298063676, |
| "learning_rate": 4.806508736210253e-07, |
| "loss": 0.2365, |
| "step": 1877 |
| }, |
| { |
| "epoch": 2.622180888165543, |
| "grad_norm": 1.789418537428787, |
| "learning_rate": 4.771803896642812e-07, |
| "loss": 0.2165, |
| "step": 1878 |
| }, |
| { |
| "epoch": 2.6235759125784703, |
| "grad_norm": 1.9228615618742761, |
| "learning_rate": 4.737218523318965e-07, |
| "loss": 0.2542, |
| "step": 1879 |
| }, |
| { |
| "epoch": 2.6249709369913976, |
| "grad_norm": 1.970234498952072, |
| "learning_rate": 4.7027527075929e-07, |
| "loss": 0.2212, |
| "step": 1880 |
| }, |
| { |
| "epoch": 2.6263659614043244, |
| "grad_norm": 2.071241627988333, |
| "learning_rate": 4.6684065405029677e-07, |
| "loss": 0.2363, |
| "step": 1881 |
| }, |
| { |
| "epoch": 2.6277609858172517, |
| "grad_norm": 1.9258080559377377, |
| "learning_rate": 4.6341801127715303e-07, |
| "loss": 0.2257, |
| "step": 1882 |
| }, |
| { |
| "epoch": 2.629156010230179, |
| "grad_norm": 1.9922325956159916, |
| "learning_rate": 4.6000735148046316e-07, |
| "loss": 0.2529, |
| "step": 1883 |
| }, |
| { |
| "epoch": 2.6305510346431062, |
| "grad_norm": 2.0813559532867, |
| "learning_rate": 4.566086836691791e-07, |
| "loss": 0.2275, |
| "step": 1884 |
| }, |
| { |
| "epoch": 2.6319460590560335, |
| "grad_norm": 1.9067412469761382, |
| "learning_rate": 4.532220168205798e-07, |
| "loss": 0.24, |
| "step": 1885 |
| }, |
| { |
| "epoch": 2.633341083468961, |
| "grad_norm": 2.0801424081538906, |
| "learning_rate": 4.498473598802444e-07, |
| "loss": 0.2085, |
| "step": 1886 |
| }, |
| { |
| "epoch": 2.634736107881888, |
| "grad_norm": 2.0163287177348277, |
| "learning_rate": 4.464847217620266e-07, |
| "loss": 0.2322, |
| "step": 1887 |
| }, |
| { |
| "epoch": 2.636131132294815, |
| "grad_norm": 2.0311176630581267, |
| "learning_rate": 4.4313411134803584e-07, |
| "loss": 0.2464, |
| "step": 1888 |
| }, |
| { |
| "epoch": 2.637526156707742, |
| "grad_norm": 2.0628314868379554, |
| "learning_rate": 4.397955374886104e-07, |
| "loss": 0.2192, |
| "step": 1889 |
| }, |
| { |
| "epoch": 2.6389211811206694, |
| "grad_norm": 2.1050032920387385, |
| "learning_rate": 4.364690090022938e-07, |
| "loss": 0.2369, |
| "step": 1890 |
| }, |
| { |
| "epoch": 2.6403162055335967, |
| "grad_norm": 1.9286471955876885, |
| "learning_rate": 4.331545346758159e-07, |
| "loss": 0.2322, |
| "step": 1891 |
| }, |
| { |
| "epoch": 2.641711229946524, |
| "grad_norm": 2.0048011858542334, |
| "learning_rate": 4.2985212326406456e-07, |
| "loss": 0.2121, |
| "step": 1892 |
| }, |
| { |
| "epoch": 2.6431062543594512, |
| "grad_norm": 1.9806271015465544, |
| "learning_rate": 4.265617834900637e-07, |
| "loss": 0.2134, |
| "step": 1893 |
| }, |
| { |
| "epoch": 2.6445012787723785, |
| "grad_norm": 1.9317814362926222, |
| "learning_rate": 4.2328352404495346e-07, |
| "loss": 0.2035, |
| "step": 1894 |
| }, |
| { |
| "epoch": 2.6458963031853058, |
| "grad_norm": 2.0239142062051663, |
| "learning_rate": 4.2001735358796316e-07, |
| "loss": 0.2007, |
| "step": 1895 |
| }, |
| { |
| "epoch": 2.647291327598233, |
| "grad_norm": 1.7416542864642734, |
| "learning_rate": 4.167632807463895e-07, |
| "loss": 0.2022, |
| "step": 1896 |
| }, |
| { |
| "epoch": 2.6486863520111603, |
| "grad_norm": 1.9831774018825685, |
| "learning_rate": 4.135213141155769e-07, |
| "loss": 0.2296, |
| "step": 1897 |
| }, |
| { |
| "epoch": 2.6500813764240876, |
| "grad_norm": 2.116068772685149, |
| "learning_rate": 4.1029146225889103e-07, |
| "loss": 0.2356, |
| "step": 1898 |
| }, |
| { |
| "epoch": 2.651476400837015, |
| "grad_norm": 1.9642948045482904, |
| "learning_rate": 4.0707373370769634e-07, |
| "loss": 0.2321, |
| "step": 1899 |
| }, |
| { |
| "epoch": 2.652871425249942, |
| "grad_norm": 2.0222515744267042, |
| "learning_rate": 4.0386813696133564e-07, |
| "loss": 0.2363, |
| "step": 1900 |
| }, |
| { |
| "epoch": 2.654266449662869, |
| "grad_norm": 1.9783354095753165, |
| "learning_rate": 4.0067468048710756e-07, |
| "loss": 0.2194, |
| "step": 1901 |
| }, |
| { |
| "epoch": 2.6556614740757962, |
| "grad_norm": 1.9211770592630109, |
| "learning_rate": 3.974933727202412e-07, |
| "loss": 0.229, |
| "step": 1902 |
| }, |
| { |
| "epoch": 2.6570564984887235, |
| "grad_norm": 1.8639767908818732, |
| "learning_rate": 3.943242220638777e-07, |
| "loss": 0.2152, |
| "step": 1903 |
| }, |
| { |
| "epoch": 2.6584515229016508, |
| "grad_norm": 2.0239531405139997, |
| "learning_rate": 3.911672368890462e-07, |
| "loss": 0.2426, |
| "step": 1904 |
| }, |
| { |
| "epoch": 2.659846547314578, |
| "grad_norm": 2.1025627641545572, |
| "learning_rate": 3.8802242553464096e-07, |
| "loss": 0.2313, |
| "step": 1905 |
| }, |
| { |
| "epoch": 2.6612415717275053, |
| "grad_norm": 2.0146577001145123, |
| "learning_rate": 3.8488979630739996e-07, |
| "loss": 0.2298, |
| "step": 1906 |
| }, |
| { |
| "epoch": 2.6626365961404326, |
| "grad_norm": 2.0364327114333003, |
| "learning_rate": 3.8176935748188425e-07, |
| "loss": 0.1858, |
| "step": 1907 |
| }, |
| { |
| "epoch": 2.66403162055336, |
| "grad_norm": 1.7558229059450832, |
| "learning_rate": 3.78661117300454e-07, |
| "loss": 0.207, |
| "step": 1908 |
| }, |
| { |
| "epoch": 2.6654266449662867, |
| "grad_norm": 1.9582911012714252, |
| "learning_rate": 3.755650839732489e-07, |
| "loss": 0.2123, |
| "step": 1909 |
| }, |
| { |
| "epoch": 2.666821669379214, |
| "grad_norm": 1.8840012884467325, |
| "learning_rate": 3.7248126567816454e-07, |
| "loss": 0.2562, |
| "step": 1910 |
| }, |
| { |
| "epoch": 2.668216693792141, |
| "grad_norm": 2.061501597369465, |
| "learning_rate": 3.694096705608319e-07, |
| "loss": 0.2368, |
| "step": 1911 |
| }, |
| { |
| "epoch": 2.6696117182050685, |
| "grad_norm": 2.1513760140631075, |
| "learning_rate": 3.6635030673459413e-07, |
| "loss": 0.2218, |
| "step": 1912 |
| }, |
| { |
| "epoch": 2.6710067426179958, |
| "grad_norm": 2.065018071288163, |
| "learning_rate": 3.6330318228049e-07, |
| "loss": 0.2468, |
| "step": 1913 |
| }, |
| { |
| "epoch": 2.672401767030923, |
| "grad_norm": 1.9139784162080573, |
| "learning_rate": 3.6026830524722443e-07, |
| "loss": 0.2407, |
| "step": 1914 |
| }, |
| { |
| "epoch": 2.6737967914438503, |
| "grad_norm": 1.9445091905357286, |
| "learning_rate": 3.572456836511551e-07, |
| "loss": 0.2458, |
| "step": 1915 |
| }, |
| { |
| "epoch": 2.6751918158567776, |
| "grad_norm": 1.94322248049399, |
| "learning_rate": 3.5423532547626816e-07, |
| "loss": 0.2207, |
| "step": 1916 |
| }, |
| { |
| "epoch": 2.676586840269705, |
| "grad_norm": 1.9210010764056549, |
| "learning_rate": 3.5123723867415527e-07, |
| "loss": 0.1981, |
| "step": 1917 |
| }, |
| { |
| "epoch": 2.677981864682632, |
| "grad_norm": 1.9185403011532343, |
| "learning_rate": 3.4825143116399454e-07, |
| "loss": 0.2194, |
| "step": 1918 |
| }, |
| { |
| "epoch": 2.6793768890955594, |
| "grad_norm": 2.256457880823731, |
| "learning_rate": 3.452779108325316e-07, |
| "loss": 0.2209, |
| "step": 1919 |
| }, |
| { |
| "epoch": 2.6807719135084866, |
| "grad_norm": 1.9724953467337907, |
| "learning_rate": 3.4231668553405316e-07, |
| "loss": 0.222, |
| "step": 1920 |
| }, |
| { |
| "epoch": 2.6821669379214135, |
| "grad_norm": 2.0819922224865044, |
| "learning_rate": 3.39367763090373e-07, |
| "loss": 0.237, |
| "step": 1921 |
| }, |
| { |
| "epoch": 2.6835619623343407, |
| "grad_norm": 2.0287374188952945, |
| "learning_rate": 3.3643115129080695e-07, |
| "loss": 0.238, |
| "step": 1922 |
| }, |
| { |
| "epoch": 2.684956986747268, |
| "grad_norm": 2.012873321631083, |
| "learning_rate": 3.3350685789215133e-07, |
| "loss": 0.2264, |
| "step": 1923 |
| }, |
| { |
| "epoch": 2.6863520111601953, |
| "grad_norm": 2.100733946305469, |
| "learning_rate": 3.3059489061866625e-07, |
| "loss": 0.2447, |
| "step": 1924 |
| }, |
| { |
| "epoch": 2.6877470355731226, |
| "grad_norm": 2.1754655463163273, |
| "learning_rate": 3.276952571620556e-07, |
| "loss": 0.2521, |
| "step": 1925 |
| }, |
| { |
| "epoch": 2.68914205998605, |
| "grad_norm": 2.030678162445352, |
| "learning_rate": 3.248079651814395e-07, |
| "loss": 0.2302, |
| "step": 1926 |
| }, |
| { |
| "epoch": 2.690537084398977, |
| "grad_norm": 1.827439538967311, |
| "learning_rate": 3.2193302230334455e-07, |
| "loss": 0.1955, |
| "step": 1927 |
| }, |
| { |
| "epoch": 2.6919321088119044, |
| "grad_norm": 1.826638985874322, |
| "learning_rate": 3.190704361216751e-07, |
| "loss": 0.2122, |
| "step": 1928 |
| }, |
| { |
| "epoch": 2.693327133224831, |
| "grad_norm": 2.0614544727358597, |
| "learning_rate": 3.162202141976956e-07, |
| "loss": 0.266, |
| "step": 1929 |
| }, |
| { |
| "epoch": 2.6947221576377585, |
| "grad_norm": 2.20309115743746, |
| "learning_rate": 3.133823640600137e-07, |
| "loss": 0.2578, |
| "step": 1930 |
| }, |
| { |
| "epoch": 2.6961171820506857, |
| "grad_norm": 1.9253571598225367, |
| "learning_rate": 3.105568932045577e-07, |
| "loss": 0.2494, |
| "step": 1931 |
| }, |
| { |
| "epoch": 2.697512206463613, |
| "grad_norm": 2.082806360246401, |
| "learning_rate": 3.077438090945573e-07, |
| "loss": 0.2233, |
| "step": 1932 |
| }, |
| { |
| "epoch": 2.6989072308765403, |
| "grad_norm": 2.1368559158587876, |
| "learning_rate": 3.0494311916052234e-07, |
| "loss": 0.2472, |
| "step": 1933 |
| }, |
| { |
| "epoch": 2.7003022552894675, |
| "grad_norm": 2.1399120491029393, |
| "learning_rate": 3.021548308002248e-07, |
| "loss": 0.2454, |
| "step": 1934 |
| }, |
| { |
| "epoch": 2.701697279702395, |
| "grad_norm": 2.0184949022912746, |
| "learning_rate": 2.9937895137868046e-07, |
| "loss": 0.2334, |
| "step": 1935 |
| }, |
| { |
| "epoch": 2.703092304115322, |
| "grad_norm": 2.12813933018869, |
| "learning_rate": 2.9661548822812636e-07, |
| "loss": 0.2145, |
| "step": 1936 |
| }, |
| { |
| "epoch": 2.7044873285282494, |
| "grad_norm": 1.8207230076654493, |
| "learning_rate": 2.9386444864800355e-07, |
| "loss": 0.2533, |
| "step": 1937 |
| }, |
| { |
| "epoch": 2.7058823529411766, |
| "grad_norm": 1.9816052407797478, |
| "learning_rate": 2.911258399049394e-07, |
| "loss": 0.2681, |
| "step": 1938 |
| }, |
| { |
| "epoch": 2.707277377354104, |
| "grad_norm": 2.1675186182836783, |
| "learning_rate": 2.8839966923272286e-07, |
| "loss": 0.2283, |
| "step": 1939 |
| }, |
| { |
| "epoch": 2.708672401767031, |
| "grad_norm": 2.026735541307535, |
| "learning_rate": 2.8568594383229067e-07, |
| "loss": 0.2516, |
| "step": 1940 |
| }, |
| { |
| "epoch": 2.710067426179958, |
| "grad_norm": 2.1369075066967844, |
| "learning_rate": 2.8298467087170655e-07, |
| "loss": 0.2544, |
| "step": 1941 |
| }, |
| { |
| "epoch": 2.7114624505928853, |
| "grad_norm": 2.0393735418050447, |
| "learning_rate": 2.8029585748614196e-07, |
| "loss": 0.2399, |
| "step": 1942 |
| }, |
| { |
| "epoch": 2.7128574750058125, |
| "grad_norm": 2.0244415010257995, |
| "learning_rate": 2.7761951077785676e-07, |
| "loss": 0.242, |
| "step": 1943 |
| }, |
| { |
| "epoch": 2.71425249941874, |
| "grad_norm": 2.00987027151913, |
| "learning_rate": 2.749556378161833e-07, |
| "loss": 0.2226, |
| "step": 1944 |
| }, |
| { |
| "epoch": 2.715647523831667, |
| "grad_norm": 2.0689668056181496, |
| "learning_rate": 2.723042456375036e-07, |
| "loss": 0.2224, |
| "step": 1945 |
| }, |
| { |
| "epoch": 2.7170425482445943, |
| "grad_norm": 1.9797277784560094, |
| "learning_rate": 2.696653412452327e-07, |
| "loss": 0.2256, |
| "step": 1946 |
| }, |
| { |
| "epoch": 2.7184375726575216, |
| "grad_norm": 1.93324183395182, |
| "learning_rate": 2.6703893160980266e-07, |
| "loss": 0.2318, |
| "step": 1947 |
| }, |
| { |
| "epoch": 2.719832597070449, |
| "grad_norm": 1.9660338970378355, |
| "learning_rate": 2.6442502366863854e-07, |
| "loss": 0.2466, |
| "step": 1948 |
| }, |
| { |
| "epoch": 2.7212276214833757, |
| "grad_norm": 2.041748793231265, |
| "learning_rate": 2.618236243261452e-07, |
| "loss": 0.2324, |
| "step": 1949 |
| }, |
| { |
| "epoch": 2.722622645896303, |
| "grad_norm": 2.0989128634489678, |
| "learning_rate": 2.592347404536888e-07, |
| "loss": 0.2329, |
| "step": 1950 |
| }, |
| { |
| "epoch": 2.7240176703092303, |
| "grad_norm": 2.1530826233490403, |
| "learning_rate": 2.566583788895721e-07, |
| "loss": 0.2358, |
| "step": 1951 |
| }, |
| { |
| "epoch": 2.7254126947221575, |
| "grad_norm": 1.9728780674087554, |
| "learning_rate": 2.5409454643902543e-07, |
| "loss": 0.2383, |
| "step": 1952 |
| }, |
| { |
| "epoch": 2.726807719135085, |
| "grad_norm": 2.0580855265191556, |
| "learning_rate": 2.5154324987418434e-07, |
| "loss": 0.2428, |
| "step": 1953 |
| }, |
| { |
| "epoch": 2.728202743548012, |
| "grad_norm": 2.010908338058876, |
| "learning_rate": 2.4900449593406984e-07, |
| "loss": 0.2235, |
| "step": 1954 |
| }, |
| { |
| "epoch": 2.7295977679609393, |
| "grad_norm": 1.9669057953101938, |
| "learning_rate": 2.4647829132457446e-07, |
| "loss": 0.2263, |
| "step": 1955 |
| }, |
| { |
| "epoch": 2.7309927923738666, |
| "grad_norm": 1.820511916019583, |
| "learning_rate": 2.439646427184428e-07, |
| "loss": 0.2199, |
| "step": 1956 |
| }, |
| { |
| "epoch": 2.732387816786794, |
| "grad_norm": 2.1325860915646766, |
| "learning_rate": 2.4146355675525145e-07, |
| "loss": 0.2324, |
| "step": 1957 |
| }, |
| { |
| "epoch": 2.733782841199721, |
| "grad_norm": 1.96368327913333, |
| "learning_rate": 2.389750400413965e-07, |
| "loss": 0.2493, |
| "step": 1958 |
| }, |
| { |
| "epoch": 2.7351778656126484, |
| "grad_norm": 1.9907140449319412, |
| "learning_rate": 2.364990991500743e-07, |
| "loss": 0.2276, |
| "step": 1959 |
| }, |
| { |
| "epoch": 2.7365728900255757, |
| "grad_norm": 2.199391943371498, |
| "learning_rate": 2.340357406212601e-07, |
| "loss": 0.2066, |
| "step": 1960 |
| }, |
| { |
| "epoch": 2.7379679144385025, |
| "grad_norm": 1.8746179327096653, |
| "learning_rate": 2.315849709616963e-07, |
| "loss": 0.1987, |
| "step": 1961 |
| }, |
| { |
| "epoch": 2.73936293885143, |
| "grad_norm": 1.8545723296564522, |
| "learning_rate": 2.2914679664487237e-07, |
| "loss": 0.1947, |
| "step": 1962 |
| }, |
| { |
| "epoch": 2.740757963264357, |
| "grad_norm": 2.0292994633820993, |
| "learning_rate": 2.2672122411100727e-07, |
| "loss": 0.2357, |
| "step": 1963 |
| }, |
| { |
| "epoch": 2.7421529876772843, |
| "grad_norm": 1.9882351397477116, |
| "learning_rate": 2.2430825976703485e-07, |
| "loss": 0.2226, |
| "step": 1964 |
| }, |
| { |
| "epoch": 2.7435480120902116, |
| "grad_norm": 2.0461627757851364, |
| "learning_rate": 2.2190790998658561e-07, |
| "loss": 0.2651, |
| "step": 1965 |
| }, |
| { |
| "epoch": 2.744943036503139, |
| "grad_norm": 2.1018494441005884, |
| "learning_rate": 2.1952018110996843e-07, |
| "loss": 0.2186, |
| "step": 1966 |
| }, |
| { |
| "epoch": 2.746338060916066, |
| "grad_norm": 2.1297808725292278, |
| "learning_rate": 2.1714507944415708e-07, |
| "loss": 0.2523, |
| "step": 1967 |
| }, |
| { |
| "epoch": 2.7477330853289934, |
| "grad_norm": 1.9107037042938857, |
| "learning_rate": 2.1478261126276989e-07, |
| "loss": 0.2196, |
| "step": 1968 |
| }, |
| { |
| "epoch": 2.7491281097419202, |
| "grad_norm": 2.288966469409701, |
| "learning_rate": 2.1243278280605517e-07, |
| "loss": 0.2388, |
| "step": 1969 |
| }, |
| { |
| "epoch": 2.7505231341548475, |
| "grad_norm": 2.0113282340570344, |
| "learning_rate": 2.1009560028087627e-07, |
| "loss": 0.2187, |
| "step": 1970 |
| }, |
| { |
| "epoch": 2.7519181585677748, |
| "grad_norm": 2.0577383008463404, |
| "learning_rate": 2.0777106986069162e-07, |
| "loss": 0.2341, |
| "step": 1971 |
| }, |
| { |
| "epoch": 2.753313182980702, |
| "grad_norm": 1.979246023089912, |
| "learning_rate": 2.0545919768554078e-07, |
| "loss": 0.2139, |
| "step": 1972 |
| }, |
| { |
| "epoch": 2.7547082073936293, |
| "grad_norm": 2.1145076730283408, |
| "learning_rate": 2.0315998986202902e-07, |
| "loss": 0.2503, |
| "step": 1973 |
| }, |
| { |
| "epoch": 2.7561032318065566, |
| "grad_norm": 2.038214733990826, |
| "learning_rate": 2.0087345246330714e-07, |
| "loss": 0.2273, |
| "step": 1974 |
| }, |
| { |
| "epoch": 2.757498256219484, |
| "grad_norm": 2.085222579466815, |
| "learning_rate": 1.985995915290595e-07, |
| "loss": 0.2365, |
| "step": 1975 |
| }, |
| { |
| "epoch": 2.758893280632411, |
| "grad_norm": 1.961930537056812, |
| "learning_rate": 1.9633841306548717e-07, |
| "loss": 0.2116, |
| "step": 1976 |
| }, |
| { |
| "epoch": 2.7602883050453384, |
| "grad_norm": 1.9746414569427433, |
| "learning_rate": 1.9408992304529252e-07, |
| "loss": 0.2496, |
| "step": 1977 |
| }, |
| { |
| "epoch": 2.7616833294582657, |
| "grad_norm": 1.9416879339980468, |
| "learning_rate": 1.9185412740765962e-07, |
| "loss": 0.1972, |
| "step": 1978 |
| }, |
| { |
| "epoch": 2.763078353871193, |
| "grad_norm": 1.9526605625268896, |
| "learning_rate": 1.8963103205824397e-07, |
| "loss": 0.223, |
| "step": 1979 |
| }, |
| { |
| "epoch": 2.76447337828412, |
| "grad_norm": 2.034376339460357, |
| "learning_rate": 1.8742064286915329e-07, |
| "loss": 0.2364, |
| "step": 1980 |
| }, |
| { |
| "epoch": 2.765868402697047, |
| "grad_norm": 1.928888877253871, |
| "learning_rate": 1.8522296567893282e-07, |
| "loss": 0.249, |
| "step": 1981 |
| }, |
| { |
| "epoch": 2.7672634271099743, |
| "grad_norm": 2.179741053262633, |
| "learning_rate": 1.830380062925513e-07, |
| "loss": 0.2226, |
| "step": 1982 |
| }, |
| { |
| "epoch": 2.7686584515229016, |
| "grad_norm": 2.1545101813981384, |
| "learning_rate": 1.8086577048138432e-07, |
| "loss": 0.2366, |
| "step": 1983 |
| }, |
| { |
| "epoch": 2.770053475935829, |
| "grad_norm": 1.8941469814993368, |
| "learning_rate": 1.787062639831988e-07, |
| "loss": 0.2195, |
| "step": 1984 |
| }, |
| { |
| "epoch": 2.771448500348756, |
| "grad_norm": 2.129029126050336, |
| "learning_rate": 1.7655949250213743e-07, |
| "loss": 0.2158, |
| "step": 1985 |
| }, |
| { |
| "epoch": 2.7728435247616834, |
| "grad_norm": 1.9373194190810024, |
| "learning_rate": 1.7442546170870654e-07, |
| "loss": 0.2227, |
| "step": 1986 |
| }, |
| { |
| "epoch": 2.7742385491746107, |
| "grad_norm": 2.022970081615447, |
| "learning_rate": 1.7230417723975766e-07, |
| "loss": 0.2392, |
| "step": 1987 |
| }, |
| { |
| "epoch": 2.775633573587538, |
| "grad_norm": 2.0136815131628034, |
| "learning_rate": 1.7019564469847372e-07, |
| "loss": 0.2143, |
| "step": 1988 |
| }, |
| { |
| "epoch": 2.7770285980004648, |
| "grad_norm": 1.8758190104617118, |
| "learning_rate": 1.6809986965435675e-07, |
| "loss": 0.2052, |
| "step": 1989 |
| }, |
| { |
| "epoch": 2.778423622413392, |
| "grad_norm": 2.1315337163239265, |
| "learning_rate": 1.660168576432092e-07, |
| "loss": 0.2435, |
| "step": 1990 |
| }, |
| { |
| "epoch": 2.7798186468263193, |
| "grad_norm": 2.126578038961216, |
| "learning_rate": 1.6394661416711977e-07, |
| "loss": 0.2229, |
| "step": 1991 |
| }, |
| { |
| "epoch": 2.7812136712392466, |
| "grad_norm": 1.9817111978138984, |
| "learning_rate": 1.6188914469445372e-07, |
| "loss": 0.2236, |
| "step": 1992 |
| }, |
| { |
| "epoch": 2.782608695652174, |
| "grad_norm": 2.020978049322112, |
| "learning_rate": 1.5984445465983156e-07, |
| "loss": 0.2337, |
| "step": 1993 |
| }, |
| { |
| "epoch": 2.784003720065101, |
| "grad_norm": 1.9219890439740848, |
| "learning_rate": 1.5781254946412029e-07, |
| "loss": 0.2174, |
| "step": 1994 |
| }, |
| { |
| "epoch": 2.7853987444780284, |
| "grad_norm": 1.9668911299656417, |
| "learning_rate": 1.5579343447441663e-07, |
| "loss": 0.216, |
| "step": 1995 |
| }, |
| { |
| "epoch": 2.7867937688909556, |
| "grad_norm": 1.8683512982901405, |
| "learning_rate": 1.5378711502403164e-07, |
| "loss": 0.192, |
| "step": 1996 |
| }, |
| { |
| "epoch": 2.788188793303883, |
| "grad_norm": 1.96884738892342, |
| "learning_rate": 1.5179359641247948e-07, |
| "loss": 0.2368, |
| "step": 1997 |
| }, |
| { |
| "epoch": 2.78958381771681, |
| "grad_norm": 2.0694150339442574, |
| "learning_rate": 1.4981288390546188e-07, |
| "loss": 0.2447, |
| "step": 1998 |
| }, |
| { |
| "epoch": 2.7909788421297375, |
| "grad_norm": 2.0875718503144425, |
| "learning_rate": 1.4784498273485436e-07, |
| "loss": 0.2278, |
| "step": 1999 |
| }, |
| { |
| "epoch": 2.7923738665426647, |
| "grad_norm": 2.080303164509914, |
| "learning_rate": 1.458898980986917e-07, |
| "loss": 0.2392, |
| "step": 2000 |
| }, |
| { |
| "epoch": 2.793768890955592, |
| "grad_norm": 2.1332960074779685, |
| "learning_rate": 1.4394763516115573e-07, |
| "loss": 0.2309, |
| "step": 2001 |
| }, |
| { |
| "epoch": 2.795163915368519, |
| "grad_norm": 2.1368019185944966, |
| "learning_rate": 1.4201819905256043e-07, |
| "loss": 0.2307, |
| "step": 2002 |
| }, |
| { |
| "epoch": 2.796558939781446, |
| "grad_norm": 2.1158557952062895, |
| "learning_rate": 1.4010159486933906e-07, |
| "loss": 0.2509, |
| "step": 2003 |
| }, |
| { |
| "epoch": 2.7979539641943734, |
| "grad_norm": 2.0558983826210717, |
| "learning_rate": 1.3819782767403034e-07, |
| "loss": 0.2195, |
| "step": 2004 |
| }, |
| { |
| "epoch": 2.7993489886073006, |
| "grad_norm": 1.9261173004873333, |
| "learning_rate": 1.363069024952668e-07, |
| "loss": 0.2365, |
| "step": 2005 |
| }, |
| { |
| "epoch": 2.800744013020228, |
| "grad_norm": 1.9189366322448516, |
| "learning_rate": 1.344288243277575e-07, |
| "loss": 0.2437, |
| "step": 2006 |
| }, |
| { |
| "epoch": 2.802139037433155, |
| "grad_norm": 2.010005253000098, |
| "learning_rate": 1.3256359813227758e-07, |
| "loss": 0.2238, |
| "step": 2007 |
| }, |
| { |
| "epoch": 2.8035340618460824, |
| "grad_norm": 1.8560267537251662, |
| "learning_rate": 1.3071122883565657e-07, |
| "loss": 0.2472, |
| "step": 2008 |
| }, |
| { |
| "epoch": 2.8049290862590093, |
| "grad_norm": 2.0899891570381963, |
| "learning_rate": 1.288717213307622e-07, |
| "loss": 0.2465, |
| "step": 2009 |
| }, |
| { |
| "epoch": 2.8063241106719365, |
| "grad_norm": 2.0680954666113034, |
| "learning_rate": 1.2704508047649e-07, |
| "loss": 0.2487, |
| "step": 2010 |
| }, |
| { |
| "epoch": 2.807719135084864, |
| "grad_norm": 2.003905225505703, |
| "learning_rate": 1.2523131109774822e-07, |
| "loss": 0.2023, |
| "step": 2011 |
| }, |
| { |
| "epoch": 2.809114159497791, |
| "grad_norm": 1.711566864626143, |
| "learning_rate": 1.234304179854473e-07, |
| "loss": 0.2171, |
| "step": 2012 |
| }, |
| { |
| "epoch": 2.8105091839107184, |
| "grad_norm": 2.1299908201771305, |
| "learning_rate": 1.2164240589648436e-07, |
| "loss": 0.2289, |
| "step": 2013 |
| }, |
| { |
| "epoch": 2.8119042083236456, |
| "grad_norm": 2.0155206237844103, |
| "learning_rate": 1.1986727955373588e-07, |
| "loss": 0.2267, |
| "step": 2014 |
| }, |
| { |
| "epoch": 2.813299232736573, |
| "grad_norm": 2.0292985224449844, |
| "learning_rate": 1.1810504364603737e-07, |
| "loss": 0.2363, |
| "step": 2015 |
| }, |
| { |
| "epoch": 2.8146942571495, |
| "grad_norm": 2.064222853753681, |
| "learning_rate": 1.163557028281792e-07, |
| "loss": 0.2097, |
| "step": 2016 |
| }, |
| { |
| "epoch": 2.8160892815624274, |
| "grad_norm": 2.0007115496451053, |
| "learning_rate": 1.146192617208891e-07, |
| "loss": 0.1964, |
| "step": 2017 |
| }, |
| { |
| "epoch": 2.8174843059753547, |
| "grad_norm": 1.8324329173352072, |
| "learning_rate": 1.128957249108209e-07, |
| "loss": 0.2091, |
| "step": 2018 |
| }, |
| { |
| "epoch": 2.818879330388282, |
| "grad_norm": 1.9879711284900963, |
| "learning_rate": 1.1118509695054236e-07, |
| "loss": 0.2097, |
| "step": 2019 |
| }, |
| { |
| "epoch": 2.8202743548012092, |
| "grad_norm": 1.9307293719213443, |
| "learning_rate": 1.094873823585263e-07, |
| "loss": 0.2057, |
| "step": 2020 |
| }, |
| { |
| "epoch": 2.8216693792141365, |
| "grad_norm": 1.8756053670010457, |
| "learning_rate": 1.0780258561913281e-07, |
| "loss": 0.2375, |
| "step": 2021 |
| }, |
| { |
| "epoch": 2.8230644036270633, |
| "grad_norm": 2.2006935107651433, |
| "learning_rate": 1.0613071118260321e-07, |
| "loss": 0.2365, |
| "step": 2022 |
| }, |
| { |
| "epoch": 2.8244594280399906, |
| "grad_norm": 2.2031758088252467, |
| "learning_rate": 1.0447176346504439e-07, |
| "loss": 0.2507, |
| "step": 2023 |
| }, |
| { |
| "epoch": 2.825854452452918, |
| "grad_norm": 2.0331823697534164, |
| "learning_rate": 1.0282574684841784e-07, |
| "loss": 0.2503, |
| "step": 2024 |
| }, |
| { |
| "epoch": 2.827249476865845, |
| "grad_norm": 2.069416903471262, |
| "learning_rate": 1.011926656805301e-07, |
| "loss": 0.2135, |
| "step": 2025 |
| }, |
| { |
| "epoch": 2.8286445012787724, |
| "grad_norm": 1.8308454776524232, |
| "learning_rate": 9.957252427501951e-08, |
| "loss": 0.2098, |
| "step": 2026 |
| }, |
| { |
| "epoch": 2.8300395256916997, |
| "grad_norm": 1.8951158726710682, |
| "learning_rate": 9.796532691134453e-08, |
| "loss": 0.2391, |
| "step": 2027 |
| }, |
| { |
| "epoch": 2.831434550104627, |
| "grad_norm": 1.9942746715013322, |
| "learning_rate": 9.637107783477484e-08, |
| "loss": 0.2211, |
| "step": 2028 |
| }, |
| { |
| "epoch": 2.832829574517554, |
| "grad_norm": 2.074390628687549, |
| "learning_rate": 9.478978125637583e-08, |
| "loss": 0.2056, |
| "step": 2029 |
| }, |
| { |
| "epoch": 2.834224598930481, |
| "grad_norm": 2.0197859477607314, |
| "learning_rate": 9.322144135300137e-08, |
| "loss": 0.2356, |
| "step": 2030 |
| }, |
| { |
| "epoch": 2.8356196233434083, |
| "grad_norm": 2.0090938538458474, |
| "learning_rate": 9.166606226728103e-08, |
| "loss": 0.2069, |
| "step": 2031 |
| }, |
| { |
| "epoch": 2.8370146477563356, |
| "grad_norm": 1.784604391867768, |
| "learning_rate": 9.012364810761121e-08, |
| "loss": 0.2401, |
| "step": 2032 |
| }, |
| { |
| "epoch": 2.838409672169263, |
| "grad_norm": 2.3621317749274486, |
| "learning_rate": 8.859420294814014e-08, |
| "loss": 0.235, |
| "step": 2033 |
| }, |
| { |
| "epoch": 2.83980469658219, |
| "grad_norm": 1.9238377220229415, |
| "learning_rate": 8.70777308287618e-08, |
| "loss": 0.2546, |
| "step": 2034 |
| }, |
| { |
| "epoch": 2.8411997209951174, |
| "grad_norm": 2.2386775669658077, |
| "learning_rate": 8.557423575510037e-08, |
| "loss": 0.2552, |
| "step": 2035 |
| }, |
| { |
| "epoch": 2.8425947454080447, |
| "grad_norm": 2.164637819234815, |
| "learning_rate": 8.408372169850521e-08, |
| "loss": 0.245, |
| "step": 2036 |
| }, |
| { |
| "epoch": 2.843989769820972, |
| "grad_norm": 2.1267060866199334, |
| "learning_rate": 8.26061925960353e-08, |
| "loss": 0.2627, |
| "step": 2037 |
| }, |
| { |
| "epoch": 2.8453847942338992, |
| "grad_norm": 2.175840094462751, |
| "learning_rate": 8.114165235045268e-08, |
| "loss": 0.2387, |
| "step": 2038 |
| }, |
| { |
| "epoch": 2.8467798186468265, |
| "grad_norm": 2.041608543433892, |
| "learning_rate": 7.969010483020845e-08, |
| "loss": 0.228, |
| "step": 2039 |
| }, |
| { |
| "epoch": 2.8481748430597538, |
| "grad_norm": 1.8317288740398268, |
| "learning_rate": 7.825155386943784e-08, |
| "loss": 0.2349, |
| "step": 2040 |
| }, |
| { |
| "epoch": 2.849569867472681, |
| "grad_norm": 2.076140243471252, |
| "learning_rate": 7.682600326794353e-08, |
| "loss": 0.2427, |
| "step": 2041 |
| }, |
| { |
| "epoch": 2.850964891885608, |
| "grad_norm": 1.930151853189304, |
| "learning_rate": 7.541345679118961e-08, |
| "loss": 0.2302, |
| "step": 2042 |
| }, |
| { |
| "epoch": 2.852359916298535, |
| "grad_norm": 2.198044174123011, |
| "learning_rate": 7.401391817029257e-08, |
| "loss": 0.227, |
| "step": 2043 |
| }, |
| { |
| "epoch": 2.8537549407114624, |
| "grad_norm": 2.025855994758739, |
| "learning_rate": 7.262739110200923e-08, |
| "loss": 0.2281, |
| "step": 2044 |
| }, |
| { |
| "epoch": 2.8551499651243897, |
| "grad_norm": 2.076594409784964, |
| "learning_rate": 7.125387924872552e-08, |
| "loss": 0.2552, |
| "step": 2045 |
| }, |
| { |
| "epoch": 2.856544989537317, |
| "grad_norm": 1.9583920141971498, |
| "learning_rate": 6.98933862384521e-08, |
| "loss": 0.2123, |
| "step": 2046 |
| }, |
| { |
| "epoch": 2.857940013950244, |
| "grad_norm": 2.1218586078077317, |
| "learning_rate": 6.854591566480884e-08, |
| "loss": 0.1864, |
| "step": 2047 |
| }, |
| { |
| "epoch": 2.8593350383631715, |
| "grad_norm": 1.9283990614176407, |
| "learning_rate": 6.721147108701864e-08, |
| "loss": 0.2352, |
| "step": 2048 |
| }, |
| { |
| "epoch": 2.8607300627760983, |
| "grad_norm": 2.0442219825500283, |
| "learning_rate": 6.589005602989862e-08, |
| "loss": 0.2299, |
| "step": 2049 |
| }, |
| { |
| "epoch": 2.8621250871890256, |
| "grad_norm": 2.296981149362681, |
| "learning_rate": 6.458167398384896e-08, |
| "loss": 0.2346, |
| "step": 2050 |
| }, |
| { |
| "epoch": 2.863520111601953, |
| "grad_norm": 2.0309143626789488, |
| "learning_rate": 6.328632840484294e-08, |
| "loss": 0.2003, |
| "step": 2051 |
| }, |
| { |
| "epoch": 2.86491513601488, |
| "grad_norm": 1.8135178412153783, |
| "learning_rate": 6.200402271442085e-08, |
| "loss": 0.2241, |
| "step": 2052 |
| }, |
| { |
| "epoch": 2.8663101604278074, |
| "grad_norm": 2.057910995863822, |
| "learning_rate": 6.073476029967884e-08, |
| "loss": 0.2304, |
| "step": 2053 |
| }, |
| { |
| "epoch": 2.8677051848407347, |
| "grad_norm": 2.013113048138251, |
| "learning_rate": 5.947854451326007e-08, |
| "loss": 0.2461, |
| "step": 2054 |
| }, |
| { |
| "epoch": 2.869100209253662, |
| "grad_norm": 2.0255907815051835, |
| "learning_rate": 5.823537867334694e-08, |
| "loss": 0.2136, |
| "step": 2055 |
| }, |
| { |
| "epoch": 2.870495233666589, |
| "grad_norm": 1.9199247970965672, |
| "learning_rate": 5.7005266063650534e-08, |
| "loss": 0.2243, |
| "step": 2056 |
| }, |
| { |
| "epoch": 2.8718902580795165, |
| "grad_norm": 2.156967447355663, |
| "learning_rate": 5.5788209933403944e-08, |
| "loss": 0.2805, |
| "step": 2057 |
| }, |
| { |
| "epoch": 2.8732852824924437, |
| "grad_norm": 1.9305083322744667, |
| "learning_rate": 5.4584213497351766e-08, |
| "loss": 0.2263, |
| "step": 2058 |
| }, |
| { |
| "epoch": 2.874680306905371, |
| "grad_norm": 1.976227997733238, |
| "learning_rate": 5.339327993574339e-08, |
| "loss": 0.2099, |
| "step": 2059 |
| }, |
| { |
| "epoch": 2.8760753313182983, |
| "grad_norm": 1.9714973369866737, |
| "learning_rate": 5.221541239432415e-08, |
| "loss": 0.1981, |
| "step": 2060 |
| }, |
| { |
| "epoch": 2.8774703557312256, |
| "grad_norm": 2.036301034568653, |
| "learning_rate": 5.1050613984324756e-08, |
| "loss": 0.1994, |
| "step": 2061 |
| }, |
| { |
| "epoch": 2.8788653801441524, |
| "grad_norm": 2.1703868645790667, |
| "learning_rate": 4.989888778245744e-08, |
| "loss": 0.2516, |
| "step": 2062 |
| }, |
| { |
| "epoch": 2.8802604045570797, |
| "grad_norm": 1.9606077688525048, |
| "learning_rate": 4.8760236830903697e-08, |
| "loss": 0.2377, |
| "step": 2063 |
| }, |
| { |
| "epoch": 2.881655428970007, |
| "grad_norm": 2.0531863218211885, |
| "learning_rate": 4.763466413730822e-08, |
| "loss": 0.2451, |
| "step": 2064 |
| }, |
| { |
| "epoch": 2.883050453382934, |
| "grad_norm": 2.0517963200516096, |
| "learning_rate": 4.65221726747711e-08, |
| "loss": 0.2393, |
| "step": 2065 |
| }, |
| { |
| "epoch": 2.8844454777958615, |
| "grad_norm": 2.157910418174818, |
| "learning_rate": 4.542276538183954e-08, |
| "loss": 0.2225, |
| "step": 2066 |
| }, |
| { |
| "epoch": 2.8858405022087887, |
| "grad_norm": 2.0408160528092667, |
| "learning_rate": 4.433644516249891e-08, |
| "loss": 0.246, |
| "step": 2067 |
| }, |
| { |
| "epoch": 2.887235526621716, |
| "grad_norm": 2.093617123831178, |
| "learning_rate": 4.326321488616836e-08, |
| "loss": 0.2508, |
| "step": 2068 |
| }, |
| { |
| "epoch": 2.888630551034643, |
| "grad_norm": 2.1884025061010144, |
| "learning_rate": 4.220307738768859e-08, |
| "loss": 0.2237, |
| "step": 2069 |
| }, |
| { |
| "epoch": 2.89002557544757, |
| "grad_norm": 1.9657784524140614, |
| "learning_rate": 4.11560354673185e-08, |
| "loss": 0.246, |
| "step": 2070 |
| }, |
| { |
| "epoch": 2.8914205998604974, |
| "grad_norm": 1.8686482185482467, |
| "learning_rate": 4.0122091890726354e-08, |
| "loss": 0.2291, |
| "step": 2071 |
| }, |
| { |
| "epoch": 2.8928156242734246, |
| "grad_norm": 1.9914728882718662, |
| "learning_rate": 3.9101249388981965e-08, |
| "loss": 0.2018, |
| "step": 2072 |
| }, |
| { |
| "epoch": 2.894210648686352, |
| "grad_norm": 1.9919379835112403, |
| "learning_rate": 3.809351065854894e-08, |
| "loss": 0.2586, |
| "step": 2073 |
| }, |
| { |
| "epoch": 2.895605673099279, |
| "grad_norm": 2.0949017501890173, |
| "learning_rate": 3.709887836128023e-08, |
| "loss": 0.2614, |
| "step": 2074 |
| }, |
| { |
| "epoch": 2.8970006975122065, |
| "grad_norm": 2.0514498538853267, |
| "learning_rate": 3.611735512440706e-08, |
| "loss": 0.2514, |
| "step": 2075 |
| }, |
| { |
| "epoch": 2.8983957219251337, |
| "grad_norm": 2.1514559104030906, |
| "learning_rate": 3.5148943540536105e-08, |
| "loss": 0.2265, |
| "step": 2076 |
| }, |
| { |
| "epoch": 2.899790746338061, |
| "grad_norm": 1.9415093712506253, |
| "learning_rate": 3.4193646167640646e-08, |
| "loss": 0.2521, |
| "step": 2077 |
| }, |
| { |
| "epoch": 2.9011857707509883, |
| "grad_norm": 2.2275104214143084, |
| "learning_rate": 3.325146552905223e-08, |
| "loss": 0.228, |
| "step": 2078 |
| }, |
| { |
| "epoch": 2.9025807951639155, |
| "grad_norm": 1.9328981753760448, |
| "learning_rate": 3.2322404113457886e-08, |
| "loss": 0.2535, |
| "step": 2079 |
| }, |
| { |
| "epoch": 2.903975819576843, |
| "grad_norm": 2.0722145311097027, |
| "learning_rate": 3.1406464374890144e-08, |
| "loss": 0.2293, |
| "step": 2080 |
| }, |
| { |
| "epoch": 2.90537084398977, |
| "grad_norm": 1.9769610016608057, |
| "learning_rate": 3.0503648732722046e-08, |
| "loss": 0.2477, |
| "step": 2081 |
| }, |
| { |
| "epoch": 2.906765868402697, |
| "grad_norm": 2.050363313420139, |
| "learning_rate": 2.9613959571660468e-08, |
| "loss": 0.2157, |
| "step": 2082 |
| }, |
| { |
| "epoch": 2.908160892815624, |
| "grad_norm": 1.9951482436366297, |
| "learning_rate": 2.8737399241740016e-08, |
| "loss": 0.2189, |
| "step": 2083 |
| }, |
| { |
| "epoch": 2.9095559172285514, |
| "grad_norm": 2.081109307380316, |
| "learning_rate": 2.7873970058316934e-08, |
| "loss": 0.2289, |
| "step": 2084 |
| }, |
| { |
| "epoch": 2.9109509416414787, |
| "grad_norm": 1.9864262205196008, |
| "learning_rate": 2.7023674302061875e-08, |
| "loss": 0.2243, |
| "step": 2085 |
| }, |
| { |
| "epoch": 2.912345966054406, |
| "grad_norm": 1.9215362456747311, |
| "learning_rate": 2.6186514218954905e-08, |
| "loss": 0.2063, |
| "step": 2086 |
| }, |
| { |
| "epoch": 2.9137409904673333, |
| "grad_norm": 2.0398028688026666, |
| "learning_rate": 2.5362492020280517e-08, |
| "loss": 0.2177, |
| "step": 2087 |
| }, |
| { |
| "epoch": 2.9151360148802605, |
| "grad_norm": 1.9687613018790115, |
| "learning_rate": 2.4551609882619288e-08, |
| "loss": 0.2492, |
| "step": 2088 |
| }, |
| { |
| "epoch": 2.9165310392931874, |
| "grad_norm": 2.2202058691346918, |
| "learning_rate": 2.3753869947843457e-08, |
| "loss": 0.2173, |
| "step": 2089 |
| }, |
| { |
| "epoch": 2.9179260637061146, |
| "grad_norm": 2.070982150627721, |
| "learning_rate": 2.296927432311358e-08, |
| "loss": 0.2468, |
| "step": 2090 |
| }, |
| { |
| "epoch": 2.919321088119042, |
| "grad_norm": 2.0497098444086728, |
| "learning_rate": 2.2197825080867432e-08, |
| "loss": 0.2247, |
| "step": 2091 |
| }, |
| { |
| "epoch": 2.920716112531969, |
| "grad_norm": 2.1761110942805066, |
| "learning_rate": 2.1439524258819456e-08, |
| "loss": 0.2491, |
| "step": 2092 |
| }, |
| { |
| "epoch": 2.9221111369448964, |
| "grad_norm": 1.8677894906476675, |
| "learning_rate": 2.0694373859954653e-08, |
| "loss": 0.2173, |
| "step": 2093 |
| }, |
| { |
| "epoch": 2.9235061613578237, |
| "grad_norm": 2.0166862523401874, |
| "learning_rate": 1.99623758525197e-08, |
| "loss": 0.2368, |
| "step": 2094 |
| }, |
| { |
| "epoch": 2.924901185770751, |
| "grad_norm": 1.9058734582520152, |
| "learning_rate": 1.9243532170023504e-08, |
| "loss": 0.221, |
| "step": 2095 |
| }, |
| { |
| "epoch": 2.9262962101836782, |
| "grad_norm": 2.008476408905422, |
| "learning_rate": 1.8537844711227215e-08, |
| "loss": 0.2049, |
| "step": 2096 |
| }, |
| { |
| "epoch": 2.9276912345966055, |
| "grad_norm": 1.9038713013473556, |
| "learning_rate": 1.7845315340140334e-08, |
| "loss": 0.2097, |
| "step": 2097 |
| }, |
| { |
| "epoch": 2.929086259009533, |
| "grad_norm": 1.7358824031830948, |
| "learning_rate": 1.7165945886018498e-08, |
| "loss": 0.1912, |
| "step": 2098 |
| }, |
| { |
| "epoch": 2.93048128342246, |
| "grad_norm": 2.002733171009674, |
| "learning_rate": 1.6499738143354594e-08, |
| "loss": 0.2142, |
| "step": 2099 |
| }, |
| { |
| "epoch": 2.9318763078353873, |
| "grad_norm": 2.0905328726237693, |
| "learning_rate": 1.584669387187765e-08, |
| "loss": 0.2267, |
| "step": 2100 |
| }, |
| { |
| "epoch": 2.9332713322483146, |
| "grad_norm": 1.9108917257839144, |
| "learning_rate": 1.520681479654562e-08, |
| "loss": 0.219, |
| "step": 2101 |
| }, |
| { |
| "epoch": 2.9346663566612414, |
| "grad_norm": 1.979846709755045, |
| "learning_rate": 1.4580102607541502e-08, |
| "loss": 0.2275, |
| "step": 2102 |
| }, |
| { |
| "epoch": 2.9360613810741687, |
| "grad_norm": 2.0046838206360227, |
| "learning_rate": 1.3966558960269994e-08, |
| "loss": 0.2082, |
| "step": 2103 |
| }, |
| { |
| "epoch": 2.937456405487096, |
| "grad_norm": 1.680760440099357, |
| "learning_rate": 1.3366185475351957e-08, |
| "loss": 0.2073, |
| "step": 2104 |
| }, |
| { |
| "epoch": 2.9388514299000232, |
| "grad_norm": 1.8009860986680282, |
| "learning_rate": 1.2778983738620521e-08, |
| "loss": 0.2326, |
| "step": 2105 |
| }, |
| { |
| "epoch": 2.9402464543129505, |
| "grad_norm": 2.0317108503401413, |
| "learning_rate": 1.2204955301116095e-08, |
| "loss": 0.2094, |
| "step": 2106 |
| }, |
| { |
| "epoch": 2.941641478725878, |
| "grad_norm": 1.8968224765396366, |
| "learning_rate": 1.164410167908414e-08, |
| "loss": 0.2212, |
| "step": 2107 |
| }, |
| { |
| "epoch": 2.943036503138805, |
| "grad_norm": 2.186384787215692, |
| "learning_rate": 1.109642435396907e-08, |
| "loss": 0.2125, |
| "step": 2108 |
| }, |
| { |
| "epoch": 2.9444315275517323, |
| "grad_norm": 1.9506984878795102, |
| "learning_rate": 1.0561924772412024e-08, |
| "loss": 0.2243, |
| "step": 2109 |
| }, |
| { |
| "epoch": 2.945826551964659, |
| "grad_norm": 2.378673020739392, |
| "learning_rate": 1.0040604346245319e-08, |
| "loss": 0.2682, |
| "step": 2110 |
| }, |
| { |
| "epoch": 2.9472215763775864, |
| "grad_norm": 1.724154583426404, |
| "learning_rate": 9.532464452491341e-09, |
| "loss": 0.2187, |
| "step": 2111 |
| }, |
| { |
| "epoch": 2.9486166007905137, |
| "grad_norm": 2.030830581414915, |
| "learning_rate": 9.037506433355325e-09, |
| "loss": 0.2265, |
| "step": 2112 |
| }, |
| { |
| "epoch": 2.950011625203441, |
| "grad_norm": 1.988165972381228, |
| "learning_rate": 8.555731596224803e-09, |
| "loss": 0.2115, |
| "step": 2113 |
| }, |
| { |
| "epoch": 2.9514066496163682, |
| "grad_norm": 2.0277716193856565, |
| "learning_rate": 8.087141213665717e-09, |
| "loss": 0.2384, |
| "step": 2114 |
| }, |
| { |
| "epoch": 2.9528016740292955, |
| "grad_norm": 1.9367303791899442, |
| "learning_rate": 7.631736523416867e-09, |
| "loss": 0.2296, |
| "step": 2115 |
| }, |
| { |
| "epoch": 2.9541966984422228, |
| "grad_norm": 2.0304044614481076, |
| "learning_rate": 7.1895187283899104e-09, |
| "loss": 0.1834, |
| "step": 2116 |
| }, |
| { |
| "epoch": 2.95559172285515, |
| "grad_norm": 2.1117935403333057, |
| "learning_rate": 6.760488996662706e-09, |
| "loss": 0.2321, |
| "step": 2117 |
| }, |
| { |
| "epoch": 2.9569867472680773, |
| "grad_norm": 1.9903261162086323, |
| "learning_rate": 6.3446484614798635e-09, |
| "loss": 0.197, |
| "step": 2118 |
| }, |
| { |
| "epoch": 2.9583817716810046, |
| "grad_norm": 1.9522993896671246, |
| "learning_rate": 5.941998221247192e-09, |
| "loss": 0.2359, |
| "step": 2119 |
| }, |
| { |
| "epoch": 2.959776796093932, |
| "grad_norm": 2.0867324978745687, |
| "learning_rate": 5.552539339528373e-09, |
| "loss": 0.2762, |
| "step": 2120 |
| }, |
| { |
| "epoch": 2.961171820506859, |
| "grad_norm": 1.850158984024511, |
| "learning_rate": 5.176272845045516e-09, |
| "loss": 0.2286, |
| "step": 2121 |
| }, |
| { |
| "epoch": 2.962566844919786, |
| "grad_norm": 1.9371278211250633, |
| "learning_rate": 4.813199731671381e-09, |
| "loss": 0.2036, |
| "step": 2122 |
| }, |
| { |
| "epoch": 2.963961869332713, |
| "grad_norm": 1.8464454139037667, |
| "learning_rate": 4.463320958432716e-09, |
| "loss": 0.1943, |
| "step": 2123 |
| }, |
| { |
| "epoch": 2.9653568937456405, |
| "grad_norm": 1.8850080704487493, |
| "learning_rate": 4.1266374495024795e-09, |
| "loss": 0.219, |
| "step": 2124 |
| }, |
| { |
| "epoch": 2.9667519181585678, |
| "grad_norm": 1.9760584152176828, |
| "learning_rate": 3.803150094200403e-09, |
| "loss": 0.2182, |
| "step": 2125 |
| }, |
| { |
| "epoch": 2.968146942571495, |
| "grad_norm": 1.9903736085094599, |
| "learning_rate": 3.4928597469885416e-09, |
| "loss": 0.2141, |
| "step": 2126 |
| }, |
| { |
| "epoch": 2.9695419669844223, |
| "grad_norm": 1.9826949238836484, |
| "learning_rate": 3.1957672274723907e-09, |
| "loss": 0.23, |
| "step": 2127 |
| }, |
| { |
| "epoch": 2.9709369913973496, |
| "grad_norm": 2.111435765657799, |
| "learning_rate": 2.9118733203942207e-09, |
| "loss": 0.2378, |
| "step": 2128 |
| }, |
| { |
| "epoch": 2.972332015810277, |
| "grad_norm": 2.0857714315335563, |
| "learning_rate": 2.6411787756353e-09, |
| "loss": 0.2335, |
| "step": 2129 |
| }, |
| { |
| "epoch": 2.9737270402232037, |
| "grad_norm": 1.9118431896872314, |
| "learning_rate": 2.3836843082108987e-09, |
| "loss": 0.2453, |
| "step": 2130 |
| }, |
| { |
| "epoch": 2.975122064636131, |
| "grad_norm": 1.8350015472662786, |
| "learning_rate": 2.1393905982691752e-09, |
| "loss": 0.2151, |
| "step": 2131 |
| }, |
| { |
| "epoch": 2.976517089049058, |
| "grad_norm": 2.0193930356279313, |
| "learning_rate": 1.9082982910911817e-09, |
| "loss": 0.2255, |
| "step": 2132 |
| }, |
| { |
| "epoch": 2.9779121134619855, |
| "grad_norm": 1.9778096181618947, |
| "learning_rate": 1.6904079970853083e-09, |
| "loss": 0.2194, |
| "step": 2133 |
| }, |
| { |
| "epoch": 2.9793071378749127, |
| "grad_norm": 1.869492540650723, |
| "learning_rate": 1.4857202917900604e-09, |
| "loss": 0.2221, |
| "step": 2134 |
| }, |
| { |
| "epoch": 2.98070216228784, |
| "grad_norm": 2.142769641662634, |
| "learning_rate": 1.2942357158701734e-09, |
| "loss": 0.2172, |
| "step": 2135 |
| }, |
| { |
| "epoch": 2.9820971867007673, |
| "grad_norm": 2.054017180212049, |
| "learning_rate": 1.1159547751143918e-09, |
| "loss": 0.2406, |
| "step": 2136 |
| }, |
| { |
| "epoch": 2.9834922111136946, |
| "grad_norm": 1.9105149879043084, |
| "learning_rate": 9.508779404360235e-10, |
| "loss": 0.2521, |
| "step": 2137 |
| }, |
| { |
| "epoch": 2.984887235526622, |
| "grad_norm": 2.112972558040102, |
| "learning_rate": 7.990056478707209e-10, |
| "loss": 0.2252, |
| "step": 2138 |
| }, |
| { |
| "epoch": 2.986282259939549, |
| "grad_norm": 1.898422373087025, |
| "learning_rate": 6.603382985759244e-10, |
| "loss": 0.2268, |
| "step": 2139 |
| }, |
| { |
| "epoch": 2.9876772843524764, |
| "grad_norm": 2.2452219566881477, |
| "learning_rate": 5.348762588286427e-10, |
| "loss": 0.2651, |
| "step": 2140 |
| }, |
| { |
| "epoch": 2.9890723087654036, |
| "grad_norm": 2.1778533368799264, |
| "learning_rate": 4.2261986002600783e-10, |
| "loss": 0.2521, |
| "step": 2141 |
| }, |
| { |
| "epoch": 2.9904673331783305, |
| "grad_norm": 2.1442535651439116, |
| "learning_rate": 3.235693986830546e-10, |
| "loss": 0.2588, |
| "step": 2142 |
| }, |
| { |
| "epoch": 2.9918623575912577, |
| "grad_norm": 2.119886361748237, |
| "learning_rate": 2.3772513643327555e-10, |
| "loss": 0.2408, |
| "step": 2143 |
| }, |
| { |
| "epoch": 2.993257382004185, |
| "grad_norm": 1.921028784949589, |
| "learning_rate": 1.650873000258457e-10, |
| "loss": 0.2254, |
| "step": 2144 |
| }, |
| { |
| "epoch": 2.9946524064171123, |
| "grad_norm": 2.0573904423649356, |
| "learning_rate": 1.0565608132728778e-10, |
| "loss": 0.2305, |
| "step": 2145 |
| }, |
| { |
| "epoch": 2.9960474308300395, |
| "grad_norm": 2.019354134088264, |
| "learning_rate": 5.943163732036183e-11, |
| "loss": 0.2517, |
| "step": 2146 |
| }, |
| { |
| "epoch": 2.997442455242967, |
| "grad_norm": 2.110601779062811, |
| "learning_rate": 2.6414090102400147e-11, |
| "loss": 0.2434, |
| "step": 2147 |
| }, |
| { |
| "epoch": 2.998837479655894, |
| "grad_norm": 2.144832780412135, |
| "learning_rate": 6.6035268864173e-12, |
| "loss": 0.2413, |
| "step": 2148 |
| }, |
| { |
| "epoch": 2.998837479655894, |
| "step": 2148, |
| "total_flos": 3.265949006466908e+17, |
| "train_loss": 0.4850710421606371, |
| "train_runtime": 18100.1171, |
| "train_samples_per_second": 2.851, |
| "train_steps_per_second": 0.119 |
| } |
| ], |
| "logging_steps": 1, |
| "max_steps": 2148, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 3, |
| "save_steps": 400, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 3.265949006466908e+17, |
| "train_batch_size": 1, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|